diff --git a/CHANGELOG.md b/CHANGELOG.md index 6dc8adc2..64aac4c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Raise exception if there are no videos in the playlists (#347) - Drop `--type` CLI argument and guess `--id` type (#361) +- Always reencode using our presets (even for high quality) and choose best format when downloading from Youtube (#356) ### Fixed diff --git a/scraper/src/youtube2zim/processing.py b/scraper/src/youtube2zim/processing.py index b600518a..66a5cd72 100644 --- a/scraper/src/youtube2zim/processing.py +++ b/scraper/src/youtube2zim/processing.py @@ -28,11 +28,11 @@ def process_thumbnail(thumbnail_path, preset): return True -def post_process_video(video_dir, video_id, preset, video_format, low_quality): +def post_process_video(video_dir, video_id, preset, video_format): """apply custom post-processing to downloaded video - resize thumbnail - - recompress video if incorrect video_format or low_quality requested""" + - recompress video""" # find downloaded video from video_dir files = [ @@ -52,10 +52,6 @@ def post_process_video(video_dir, video_id, preset, video_format, low_quality): ) src_path = files[0] - # don't reencode if not requesting low-quality and received wanted format - if not low_quality and src_path.suffix[1:] == video_format: - return - dst_path = src_path.with_name(f"video.{video_format}") logger.info(f"Reencode video to {dst_path}") success, process = reencode( diff --git a/scraper/src/youtube2zim/scraper.py b/scraper/src/youtube2zim/scraper.py index 805de89d..b21ce8ef 100644 --- a/scraper/src/youtube2zim/scraper.py +++ b/scraper/src/youtube2zim/scraper.py @@ -31,7 +31,12 @@ from zimscraperlib.image.probing import get_colors, is_hex_color from zimscraperlib.image.transformation import resize_image from zimscraperlib.inputs import compute_descriptions -from zimscraperlib.video.presets import VideoMp4Low, VideoWebmLow +from zimscraperlib.video.presets import ( + VideoMp4High, + VideoMp4Low, + VideoWebmHigh, + VideoWebmLow, +) from zimscraperlib.zim import Creator from zimscraperlib.zim.filesystem import validate_zimfile_creatable from zimscraperlib.zim.indexing import IndexData @@ -586,10 +591,6 @@ def extract_videos_list(self): self.videos_ids = [*all_videos.keys()] # unpacking so it's subscriptable def download_video_files(self, max_concurrency): - audext, vidext = {"webm": ("webm", "webm"), "mp4": ("m4a", "mp4")}[ - self.video_format - ] - # prepare options which are shared with every downloader options = { "cachedir": self.videos_dir, @@ -607,8 +608,7 @@ def download_video_files(self, max_concurrency): # "external_downloader_args": ["--max-tries=20", "--retry-wait=30"], "outtmpl": str(self.videos_dir.joinpath("%(id)s", "video.%(ext)s")), "preferredcodec": self.video_format, - "format": f"bestvideo*[ext={vidext}]+bestaudio[ext={audext}]/" - "bestvideo*+bestaudio/best", + "format": "bestvideo*+bestaudio/best", "y2z_videos_dir": self.videos_dir, } if self.all_subtitles: @@ -711,7 +711,15 @@ def upload_to_cache(self, key, video_path, encoder_version): def download_video(self, video_id, options): """download the video from cache/youtube and return True if successful""" - preset = {"mp4": VideoMp4Low}.get(self.video_format, VideoWebmLow)() + preset = { + "mp4": VideoMp4Low if self.low_quality else VideoMp4High, + "webm": VideoWebmLow if self.low_quality else VideoWebmHigh, + }.get(self.video_format) + if not preset: + raise Exception( + f"Impossible to find preset for {self.video_format} video format " + f"(low quality: {self.low_quality})" + ) options_copy = options.copy() video_location = options_copy["y2z_videos_dir"].joinpath(video_id) video_path = video_location.joinpath(f"video.{self.video_format}") @@ -746,7 +754,6 @@ def download_video(self, video_id, options): video_id, preset, self.video_format, - self.low_quality, ) self.add_file_to_zim( zim_path, video_path, callback=(delete_callback, video_path)