Skip to content

Commit

Permalink
Always reencode using our presets (even for high quality) and choose …
Browse files Browse the repository at this point in the history
…best format when downloading from Youtube
  • Loading branch information
benoit74 committed Oct 31, 2024
1 parent 9a26ee3 commit f359842
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Raise exception if there are no videos in the playlists (#347)
- Drop `--type` CLI argument and guess `--id` type (#361)
- Always reencode using our presets (even for high quality) and choose best format when downloading from Youtube (#356)

### Fixed

Expand Down
8 changes: 2 additions & 6 deletions scraper/src/youtube2zim/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ def process_thumbnail(thumbnail_path, preset):
return True


def post_process_video(video_dir, video_id, preset, video_format, low_quality):
def post_process_video(video_dir, video_id, preset, video_format):

Check warning on line 31 in scraper/src/youtube2zim/processing.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/youtube2zim/processing.py#L31

Added line #L31 was not covered by tests
"""apply custom post-processing to downloaded video
- resize thumbnail
- recompress video if incorrect video_format or low_quality requested"""
- recompress video"""

# find downloaded video from video_dir
files = [
Expand All @@ -52,10 +52,6 @@ def post_process_video(video_dir, video_id, preset, video_format, low_quality):
)
src_path = files[0]

# don't reencode if not requesting low-quality and received wanted format
if not low_quality and src_path.suffix[1:] == video_format:
return

dst_path = src_path.with_name(f"video.{video_format}")
logger.info(f"Reencode video to {dst_path}")
success, process = reencode(
Expand Down
25 changes: 16 additions & 9 deletions scraper/src/youtube2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@
from zimscraperlib.image.probing import get_colors, is_hex_color
from zimscraperlib.image.transformation import resize_image
from zimscraperlib.inputs import compute_descriptions
from zimscraperlib.video.presets import VideoMp4Low, VideoWebmLow
from zimscraperlib.video.presets import (

Check warning on line 34 in scraper/src/youtube2zim/scraper.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/youtube2zim/scraper.py#L34

Added line #L34 was not covered by tests
VideoMp4High,
VideoMp4Low,
VideoWebmHigh,
VideoWebmLow,
)
from zimscraperlib.zim import Creator
from zimscraperlib.zim.filesystem import validate_zimfile_creatable
from zimscraperlib.zim.indexing import IndexData
Expand Down Expand Up @@ -586,10 +591,6 @@ def extract_videos_list(self):
self.videos_ids = [*all_videos.keys()] # unpacking so it's subscriptable

def download_video_files(self, max_concurrency):
audext, vidext = {"webm": ("webm", "webm"), "mp4": ("m4a", "mp4")}[
self.video_format
]

# prepare options which are shared with every downloader
options = {
"cachedir": self.videos_dir,
Expand All @@ -607,8 +608,7 @@ def download_video_files(self, max_concurrency):
# "external_downloader_args": ["--max-tries=20", "--retry-wait=30"],
"outtmpl": str(self.videos_dir.joinpath("%(id)s", "video.%(ext)s")),
"preferredcodec": self.video_format,
"format": f"bestvideo*[ext={vidext}]+bestaudio[ext={audext}]/"
"bestvideo*+bestaudio/best",
"format": "bestvideo*+bestaudio/best",
"y2z_videos_dir": self.videos_dir,
}
if self.all_subtitles:
Expand Down Expand Up @@ -711,7 +711,15 @@ def upload_to_cache(self, key, video_path, encoder_version):
def download_video(self, video_id, options):
"""download the video from cache/youtube and return True if successful"""

preset = {"mp4": VideoMp4Low}.get(self.video_format, VideoWebmLow)()
preset = {

Check warning on line 714 in scraper/src/youtube2zim/scraper.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/youtube2zim/scraper.py#L714

Added line #L714 was not covered by tests
"mp4": VideoMp4Low if self.low_quality else VideoMp4High,
"webm": VideoWebmLow if self.low_quality else VideoWebmHigh,
}.get(self.video_format)
if not preset:
raise Exception(

Check warning on line 719 in scraper/src/youtube2zim/scraper.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/youtube2zim/scraper.py#L719

Added line #L719 was not covered by tests
f"Impossible to find preset for {self.video_format} video format "
f"(low quality: {self.low_quality})"
)
options_copy = options.copy()
video_location = options_copy["y2z_videos_dir"].joinpath(video_id)
video_path = video_location.joinpath(f"video.{self.video_format}")
Expand Down Expand Up @@ -746,7 +754,6 @@ def download_video(self, video_id, options):
video_id,
preset,
self.video_format,
self.low_quality,
)
self.add_file_to_zim(
zim_path, video_path, callback=(delete_callback, video_path)
Expand Down

0 comments on commit f359842

Please sign in to comment.