Skip to content

Commit

Permalink
Additional progress
Browse files Browse the repository at this point in the history
  • Loading branch information
JMicheli committed Dec 5, 2023
1 parent 42b121a commit b73699c
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 89 deletions.
11 changes: 6 additions & 5 deletions core/src/filesystem/image/thumbnail.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use tracing::{debug, error, trace};

use crate::{
config::StumpConfig,
db::entity::Media,
filesystem::{media, FileError},
prisma::media as prisma_media,
Expand All @@ -18,12 +19,12 @@ pub fn generate_thumbnail(
id: &str,
media_path: &str,
options: ImageProcessorOptions,
thumbnails_dir: PathBuf,
config: StumpConfig,
) -> Result<PathBuf, FileError> {
let (_, buf) = media::get_page(media_path, options.page.unwrap_or(1))?;
let (_, buf) = media::get_page(media_path, options.page.unwrap_or(1), config)?;
let ext = options.format.extension();

let thumbnail_path = thumbnails_dir.join(format!("{}.{}", &id, ext));
let thumbnail_path = config.get_thumbnails_dir().join(format!("{}.{}", &id, ext));
if !thumbnail_path.exists() {
// TODO: this will be more complicated once more specialized processors are added...
let image_buffer = if options.format == ImageFormat::Webp {
Expand All @@ -45,7 +46,7 @@ pub fn generate_thumbnail(
pub fn generate_thumbnails(
media: &[Media],
options: ImageProcessorOptions,
thumbnails_dir: PathBuf,
config: StumpConfig,
) -> Result<Vec<PathBuf>, FileError> {
trace!("Enter generate_thumbnails");

Expand All @@ -62,7 +63,7 @@ pub fn generate_thumbnails(
m.id.as_str(),
m.path.as_str(),
options.clone(),
thumbnails_dir,
config,
)
})
.filter_map(|res| {
Expand Down
13 changes: 11 additions & 2 deletions core/src/filesystem/media/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::path::{Path, PathBuf};
use prisma_client_rust::chrono::{DateTime, FixedOffset, Utc};

use crate::{
config::StumpConfig,
db::entity::{LibraryOptions, Media, Series},
filesystem::{process, FileParts, PathUtils, SeriesJson},
CoreError, CoreResult,
Expand All @@ -12,14 +13,21 @@ pub struct MediaBuilder {
path: PathBuf,
series_id: String,
library_options: LibraryOptions,
config: StumpConfig,
}

impl MediaBuilder {
pub fn new(path: &Path, series_id: &str, library_options: LibraryOptions) -> Self {
pub fn new(
path: &Path,
series_id: &str,
library_options: LibraryOptions,
config: StumpConfig,
) -> Self {
Self {
path: path.to_path_buf(),
series_id: series_id.to_string(),
library_options,
config,
}
}

Expand All @@ -32,7 +40,8 @@ impl MediaBuilder {
}

pub fn build(self) -> CoreResult<Media> {
let mut processed_entry = process(&self.path, self.library_options.into())?;
let mut processed_entry =
process(&self.path, self.library_options.into(), self.config)?;

tracing::trace!(?processed_entry, "Processed entry");

Expand Down
25 changes: 19 additions & 6 deletions core/src/filesystem/media/epub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use super::process::{FileProcessor, FileProcessorOptions, ProcessedFile};
pub struct EpubProcessor;

impl FileProcessor for EpubProcessor {
fn get_sample_size(file: &str) -> Result<u64, FileError> {
fn get_sample_size(&self, file: &str) -> Result<u64, FileError> {
let mut epub_file = Self::open(file)?;

let mut sample_size = 0;
Expand Down Expand Up @@ -44,8 +44,8 @@ impl FileProcessor for EpubProcessor {
Ok(sample_size)
}

fn hash(path: &str) -> Option<String> {
let sample_result = EpubProcessor::get_sample_size(path);
fn hash(&self, path: &str) -> Option<String> {
let sample_result = self.get_sample_size(path);

if let Ok(sample) = sample_result {
match hash::generate(path, sample) {
Expand All @@ -60,7 +60,11 @@ impl FileProcessor for EpubProcessor {
}
}

fn process(path: &str, _: FileProcessorOptions) -> Result<ProcessedFile, FileError> {
fn process(
&self,
path: &str,
_: FileProcessorOptions,
) -> Result<ProcessedFile, FileError> {
debug!(?path, "processing epub");

let path_buf = PathBuf::from(path);
Expand All @@ -72,13 +76,17 @@ impl FileProcessor for EpubProcessor {

Ok(ProcessedFile {
path: path_buf,
hash: EpubProcessor::hash(path),
hash: self.hash(path),
metadata: Some(metadata),
pages,
})
}

fn get_page(path: &str, page: i32) -> Result<(ContentType, Vec<u8>), FileError> {
fn get_page(
&self,
path: &str,
page: i32,
) -> Result<(ContentType, Vec<u8>), FileError> {
if page == 1 {
// Assume this is the cover page
EpubProcessor::get_cover(path)
Expand All @@ -88,6 +96,7 @@ impl FileProcessor for EpubProcessor {
}

fn get_page_content_types(
&self,
path: &str,
pages: Vec<i32>,
) -> Result<HashMap<i32, ContentType>, FileError> {
Expand Down Expand Up @@ -129,6 +138,10 @@ impl FileProcessor for EpubProcessor {
}

impl EpubProcessor {
pub fn new() -> Self {
Self {}
}

pub fn open(path: &str) -> Result<EpubDoc<BufReader<File>>, FileError> {
EpubDoc::new(path).map_err(|e| FileError::EpubOpenError(e.to_string()))
}
Expand Down
50 changes: 33 additions & 17 deletions core/src/filesystem/media/pdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use pdf::file::FileOptions;
use pdfium_render::{prelude::Pdfium, render_config::PdfRenderConfig};

use crate::{
config,
db::entity::metadata::MediaMetadata,
filesystem::{
archive::create_zip_archive, error::FileError, hash, image::ImageFormat,
Expand All @@ -20,14 +19,17 @@ use crate::{
use super::{process::FileConverter, FileProcessor, FileProcessorOptions, ProcessedFile};

/// A file processor for PDF files.
pub struct PdfProcessor;
pub struct PdfProcessor {
pdfium_path: Option<String>,
cache_dir: PathBuf,
}

impl FileProcessor for PdfProcessor {
// It is REALLY annoying to work with PDFs, and there is no good way to consume
// each page as a vector of bytes efficiently. Since PDFs don't really have metadata,
// I wouldn't expect the file to change much after a scan. So, for now, this will
// just make the sample size approximately 1/10th of the file size.
fn get_sample_size(path: &str) -> Result<u64, FileError> {
fn get_sample_size(&self, path: &str) -> Result<u64, FileError> {
let file = std::fs::File::open(path)?;
let metadata = file.metadata()?;
let size = metadata.len();
Expand All @@ -42,8 +44,8 @@ impl FileProcessor for PdfProcessor {
Ok(size / 10)
}

fn hash(path: &str) -> Option<String> {
let sample_result = PdfProcessor::get_sample_size(path);
fn hash(&self, path: &str) -> Option<String> {
let sample_result = self.get_sample_size(path);

if let Ok(sample) = sample_result {
match hash::generate(path, sample) {
Expand All @@ -58,23 +60,31 @@ impl FileProcessor for PdfProcessor {
}
}

fn process(path: &str, _: FileProcessorOptions) -> Result<ProcessedFile, FileError> {
fn process(
&self,
path: &str,
_: FileProcessorOptions,
) -> Result<ProcessedFile, FileError> {
let file = FileOptions::cached().open(path)?;

let pages = file.pages().count() as i32;
let metadata = file.trailer.info_dict.map(MediaMetadata::from);

Ok(ProcessedFile {
path: PathBuf::from(path),
hash: PdfProcessor::hash(path),
hash: self.hash(path),
metadata,
pages,
})
}

// TODO: The decision to use PNG should be a configuration option
fn get_page(path: &str, page: i32) -> Result<(ContentType, Vec<u8>), FileError> {
let pdfium = PdfProcessor::renderer()?;
fn get_page(
&self,
path: &str,
page: i32,
) -> Result<(ContentType, Vec<u8>), FileError> {
let pdfium = self.renderer()?;

let document = pdfium.load_pdf_from_file(path, None)?;
let document_page =
Expand Down Expand Up @@ -111,6 +121,7 @@ impl FileProcessor for PdfProcessor {
}

fn get_page_content_types(
&self,
_: &str,
pages: Vec<i32>,
) -> Result<HashMap<i32, ContentType>, FileError> {
Expand All @@ -124,11 +135,16 @@ impl FileProcessor for PdfProcessor {
}

impl PdfProcessor {
/// Initializes a PDFium renderer. If a path to the PDFium library is not provided
pub fn renderer() -> Result<Pdfium, FileError> {
let pdfium_path = config::get_pdfium_path();
pub fn new(pdfium_path: Option<String>, cache_dir: PathBuf) -> Self {
Self {
pdfium_path,
cache_dir,
}
}

if let Some(path) = pdfium_path {
/// Initializes a PDFium renderer. If a path to the PDFium library is not provided
pub fn renderer(&self) -> Result<Pdfium, FileError> {
if let Some(path) = self.pdfium_path {
let bindings = Pdfium::bind_to_library(&path)
.or_else(|e| {
tracing::error!(provided_path = ?path, ?e, "Failed to bind to PDFium library at provided path");
Expand All @@ -148,11 +164,12 @@ impl PdfProcessor {

impl FileConverter for PdfProcessor {
fn to_zip(
&self,
path: &str,
delete_source: bool,
format: Option<ImageFormat>,
) -> Result<PathBuf, FileError> {
let pdfium = PdfProcessor::renderer()?;
let pdfium = self.renderer()?;

let document = pdfium.load_pdf_from_file(path, None)?;
let iter = document.pages().iter();
Expand Down Expand Up @@ -202,8 +219,7 @@ impl FileConverter for PdfProcessor {
extension,
} = path_buf.as_path().file_parts();

let cache_dir = config::get_cache_dir();
let unpacked_path = cache_dir.join(&file_stem);
let unpacked_path = self.cache_dir.join(&file_stem);

// create folder for the zip
std::fs::create_dir_all(&unpacked_path)?;
Expand Down Expand Up @@ -237,7 +253,7 @@ impl FileConverter for PdfProcessor {
// TODO: maybe check that this path isn't in a pre-defined list of important paths?
if let Err(err) = std::fs::remove_dir_all(&unpacked_path) {
tracing::error!(
error = ?err, ?cache_dir, ?unpacked_path, "Failed to delete unpacked contents in cache",
error = ?err, ?self.cache_dir, ?unpacked_path, "Failed to delete unpacked contents in cache",
);
}

Expand Down
Loading

0 comments on commit b73699c

Please sign in to comment.