diff --git a/.gitignore b/.gitignore index 39e4fda..e20758e 100644 --- a/.gitignore +++ b/.gitignore @@ -375,8 +375,9 @@ src/ted2zim/templates/assets/videojs-ogvjs.js src/ted2zim/templates/assets/polyfills.js src/ted2zim/templates/assets/webp-hero.bundle.js -# output dir +# output and tmp dir output +tmp # ignore all vscode, this is not standard configuration in this place -.vscode \ No newline at end of file +.vscode diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c8e323..9c7407b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,15 +7,15 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - repo: https://github.com/psf/black - rev: "23.12.0" + rev: "24.3.0" hooks: - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.8 + rev: v0.3.3 hooks: - id: ruff - repo: https://github.com/RobertCraigie/pyright-python - rev: v1.1.341 + rev: v1.1.354 hooks: - id: pyright name: pyright (system) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a6c687..0f36b6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- New `long_description` CLI argument to set the ZIM long description +- New `disable_metadata_check` CLI argument to disable the metadata checks which are automated since zimscraperlib 3.x + ### Changed - Changed default publisher metadata from 'Kiwix' to 'openZIM' +- Validate ZIM metadata as early as possible +- Migrate to zimscraperlib 3.3.2 (including **new VideoLowWebm encoder preset version 2**) +- Upgrade Python dependencies, including migration to Python 3.12 + +## Fixed + +- Fix language metadata computation (list, but not yet fully properly ordered) +- Fix computation of automatic description and long description ## [2.1.0] - 2024-01-08 diff --git a/Dockerfile b/Dockerfile index cb20508..45cedc2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.11-slim-bookworm +FROM python:3.12-slim-bookworm LABEL org.opencontainers.image.source https://github.com/openzim/ted # Install necessary packages @@ -14,7 +14,7 @@ COPY entrypoint.sh /usr/local/bin/entrypoint.sh ENTRYPOINT ["entrypoint.sh"] # Copy pyproject.toml and its dependencies -COPY pyproject.toml README.md hatch_build.py get_js_deps.sh /src/ +COPY pyproject.toml openzim.toml README.md /src/ COPY src/ted2zim/__about__.py /src/src/ted2zim/__about__.py # Install Python dependencies diff --git a/README.md b/README.md index ae613dc..3721336 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,9 @@ TED (Technology, Entertainment, Design) is a global set of conferences under the This project is aimed at creating a sustainable solution to make TED accessible offline by creating ZIM files providing these videos in a similar manner like online. +`ted2zim` adheres to openZIM's [Contribution Guidelines](https://github.com/openzim/overview/wiki/Contributing). + +`ted2zim` has implemented openZIM's [Python bootstrap, conventions and policies](https://github.com/openzim/_python-bootstrap/docs/Policy.md) **v1.0.0**. ## Getting started :rocket: @@ -20,7 +23,7 @@ This project is aimed at creating a sustainable solution to make TED accessible Make sure that you have `python3`, `unzip`, `ffmpeg`, `wget` and `curl` installed on your system before running the scraper (otherwise you'll get a warning to install them). #### Setup the package -One can easily install the PyPI version but let's setup the source version. +One can easily install the PyPI version but let's setup the source version. First, clone this repository. diff --git a/get_js_deps.sh b/get_js_deps.sh deleted file mode 100755 index a6d2a06..0000000 --- a/get_js_deps.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/sh - -### -# download JS dependencies and place them in our templates/assets folder -# then launch our ogv.js script to fix dynamic loading links -### - -if ! command -v curl > /dev/null; then - echo "you need curl." - exit 1 -fi - -if ! command -v unzip > /dev/null; then - echo "you need unzip." - exit 1 -fi - -# Absolute path this script is in. -SCRIPT_PATH="$( cd "$(dirname "$0")" ; pwd -P )" -ASSETS_PATH="${SCRIPT_PATH}/src/ted2zim/templates/assets" - -echo "About to download JS assets to ${ASSETS_PATH}" - -echo "getting video.js" -curl -L -O https://github.com/videojs/video.js/releases/download/v7.8.1/video-js-7.8.1.zip -rm -rf $ASSETS_PATH/videojs -mkdir -p $ASSETS_PATH/videojs -unzip -o -d $ASSETS_PATH/videojs video-js-7.8.1.zip -rm -rf $ASSETS_PATH/videojs/alt $ASSETS_PATH/videojs/examples -rm -f video-js-7.8.1.zip - -echo "getting chosen.jquery.js" -curl -L -O https://github.com/harvesthq/chosen/releases/download/v1.8.7/chosen_v1.8.7.zip -rm -rf $ASSETS_PATH/chosen -mkdir -p $ASSETS_PATH/chosen -unzip -o -d $ASSETS_PATH/chosen chosen_v1.8.7.zip -rm -rf $ASSETS_PATH/chosen/docsupport $ASSETS_PATH/chosen/chosen.proto.* $ASSETS_PATH/chosen/*.html $ASSETS_PATH/chosen/*.md -rm -f chosen_v1.8.7.zip - -echo "getting jquery.min.js" -curl -L -o $ASSETS_PATH/jquery.min.js https://code.jquery.com/jquery-3.5.1.min.js - -echo "getting ogv.js" -curl -L -O https://github.com/brion/ogv.js/releases/download/1.8.9/ogvjs-1.8.9.zip -rm -rf $ASSETS_PATH/ogvjs -unzip -o ogvjs-1.8.9.zip -mv ogvjs-1.8.9 $ASSETS_PATH/ogvjs -rm -f ogvjs-1.8.9.zip -rm -f $ASSETS_PATH/ogvjs/COPYING $ASSETS_PATH/ogvjs/*.txt $ASSETS_PATH/ogvjs/README.md - -echo "getting videojs-ogvjs.js" -curl -L -O https://github.com/hartman/videojs-ogvjs/archive/v1.3.1.zip -rm -f $ASSETS_PATH/videojs-ogvjs.js -unzip -o v1.3.1.zip -mv videojs-ogvjs-1.3.1/dist/videojs-ogvjs.js $ASSETS_PATH/videojs-ogvjs.js -rm -rf videojs-ogvjs-1.3.1 -rm -f v1.3.1.zip - -echo "getting webp-hero" -curl -L -O https://unpkg.com/webp-hero@0.0.0-dev.26/dist-cjs/polyfills.js -rm -f $ASSETS_PATH/polyfills.js -mv polyfills.js $ASSETS_PATH/polyfills.js -curl -L -O https://unpkg.com/webp-hero@0.0.0-dev.26/dist-cjs/webp-hero.bundle.js -rm -f $ASSETS_PATH/webp-hero.bundle.js -mv webp-hero.bundle.js $ASSETS_PATH/webp-hero.bundle.js - -if command -v fix_ogvjs_dist > /dev/null; then - echo "fixing JS files" - fix_ogvjs_dist $ASSETS_PATH "assets" -else - echo "NOT fixing JS files (zimscraperlib not installed)" -fi diff --git a/hatch_build.py b/hatch_build.py deleted file mode 100644 index 977a6e0..0000000 --- a/hatch_build.py +++ /dev/null @@ -1,43 +0,0 @@ -import logging -import subprocess -from pathlib import Path - -from hatchling.builders.hooks.plugin.interface import BuildHookInterface - -logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger(__name__) - -# update list in constants.py as well -JS_DEPS = [ - "videojs", - "chosen", - "jquery.min.js", - "ogvjs", - "videojs-ogvjs.js", - "polyfills.js", - "webp-hero.bundle.js", -] - - -class GetJsDepsHook(BuildHookInterface): - def initialize(self, version, build_data): - if self.deps_already_installed(): - logger.info("JS dependencies are already installed, skipping it") - return - Path(self.root).joinpath("src/ted2zim/templates/assets") - subprocess.run( - str(Path(self.root).joinpath("get_js_deps.sh")), - check=True, - ) - return super().initialize(version, build_data) - - def deps_already_installed(self) -> bool: - for dep in JS_DEPS: - if ( - not Path(self.root) - .joinpath("src/ted2zim/templates/assets") - .joinpath(dep) - .exists() - ): - return False - return True diff --git a/openzim.toml b/openzim.toml new file mode 100644 index 0000000..8498872 --- /dev/null +++ b/openzim.toml @@ -0,0 +1,45 @@ +[files.assets.config] +target_dir="src/ted2zim/templates/assets" +execute_after=[ + "fix_ogvjs_dist .", +] + +[files.assets.actions."video.js"] +action="extract_all" +source="https://github.com/videojs/video.js/releases/download/v7.8.1/video-js-7.8.1.zip" +target_dir="videojs" +remove = ["alt","examples",] + +[files.assets.actions."chosen.jquery.js"] +action="extract_all" +source="https://github.com/harvesthq/chosen/releases/download/v1.8.7/chosen_v1.8.7.zip" +target_dir="chosen" +remove = ["docsupport","chosen.proto.*","*.html","*.md"] + +[files.assets.actions."jquery.min.js"] +action="get_file" +source="https://code.jquery.com/jquery-3.5.1.min.js" +target_file="jquery.min.js" + +[files.assets.actions."ogv.js"] +action="extract_items" +source="https://github.com/brion/ogv.js/releases/download/1.8.9/ogvjs-1.8.9.zip" +zip_paths=["ogvjs-1.8.9"] +target_paths=["ogvjs"] +remove = ["ogvjs/COPYING","ogvjs/*.txt","ogvjs/*.md",] + +[files.assets.actions."videojs-ogvjs.js"] +action="extract_items" +source="https://github.com/hartman/videojs-ogvjs/archive/v1.3.1.zip" +zip_paths=["videojs-ogvjs-1.3.1/dist/videojs-ogvjs.js"] +target_paths=["videojs-ogvjs.js"] + +[files.assets.actions."webp-hero.polyfills.js"] +action="get_file" +source="https://unpkg.com/webp-hero@0.0.0-dev.26/dist-cjs/polyfills.js" +target_file="polyfills.js" + +[files.assets.actions."webp-hero.bundle.js"] +action="get_file" +source="https://unpkg.com/webp-hero@0.0.0-dev.26/dist-cjs/webp-hero.bundle.js" +target_file="webp-hero.bundle.js" diff --git a/pyproject.toml b/pyproject.toml index d1ccaa4..d5cadf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,54 +1,59 @@ [build-system] -requires = ["hatchling"] +requires = ["hatchling", "hatch-openzim==0.2.0"] build-backend = "hatchling.build" [project] name = "ted2zim" -authors = [{ name = "Kiwix", email = "dev@kiwix.org" }] -keywords = ["kiwix", "zim", "ted", "openzim", "offline"] -requires-python = ">=3.11,<3.12" +requires-python = ">=3.12,<3.13" description = "Make ZIM file from TED Talks" readme = "README.md" -license = { text = "GPL-3.0-or-later" } -classifiers = [ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.11", - "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", -] dependencies = [ - "python-dateutil==2.8.2", - # cannot use 3.x for now, see https://github.com/openzim/ted/issues/152 - "zimscraperlib==2.1.0", + "python-dateutil==2.9.0.post0", + "zimscraperlib==3.3.2", "requests==2.31.0", - "beautifulsoup4==4.9.3", - "Jinja2==3.1.2", + "beautifulsoup4==4.12.3", + "Jinja2==3.1.3", "kiwixstorage==0.8.3", "pif==0.8.2", - "python-slugify==8.0.1", + "python-slugify==8.0.4", "yt-dlp", # yt-dlp should be updated as frequently as possible ] -dynamic = ["version"] +dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"] + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.metadata.hooks.openzim-metadata] +kind = "scraper" +additional-keywords = ["ted"] + +[tool.hatch.build.hooks.openzim-build] +dependencies = [ "zimscraperlib==3.3.2"] # required for fix_ogv_dist [project.optional-dependencies] -scripts = ["invoke==2.2.0"] -lint = ["black==23.12.0", "ruff==0.1.8"] -check = ["pyright==1.1.341"] -test = ["pytest==7.4.3", "coverage==7.3.3"] +scripts = [ + "invoke==2.2.0", +] +lint = [ + "black==24.3.0", + "ruff==0.3.3", +] +check = [ + "pyright==1.1.354", +] +test = [ + "pytest==8.1.1", + "coverage==7.4.4", +] dev = [ - "pre-commit==3.6.0", - "debugpy==1.8.0", + "pre-commit==3.6.2", + "debugpy==1.8.1", "ted2zim[scripts]", "ted2zim[lint]", "ted2zim[test]", "ted2zim[check]", - # hatchling is a dev dependency only needed for hook development on developer machine - "hatchling==1.18.0", ] -[project.urls] -Homepage = "https://github.com/openzim/ted" -Donate = "https://www.kiwix.org/en/support-us/" - [project.scripts] ted2zim = "ted2zim.entrypoint:main" ted2zim-multi = "ted2zim.multi.entrypoint:main" @@ -59,11 +64,11 @@ path = "src/ted2zim/__about__.py" [tool.hatch.build] exclude = ["/.github"] -[tool.hatch.build.hooks.custom] -path = "hatch_build.py" - [tool.hatch.build.targets.wheel] packages = ["src/ted2zim"] +artifacts = [ + "src/ted2zim/templates/assets/**", +] [tool.hatch.envs.default] features = ["dev"] @@ -80,6 +85,7 @@ html = "inv coverage --html --args '{args}'" [tool.hatch.envs.lint] template = "lint" +python = "py312" skip-install = false features = ["scripts", "lint"] @@ -100,12 +106,14 @@ all = "inv checkall --args '{args}'" [tool.black] line-length = 88 -target-version = ['py311'] +target-version = ['py312'] [tool.ruff] -target-version = "py311" +target-version = "py312" line-length = 88 src = ["src"] + +[tool.ruff.lint] select = [ "A", # flake8-builtins # "ANN", # flake8-annotations @@ -188,17 +196,17 @@ unfixable = [ "F401", ] -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["ted2zim"] -[tool.ruff.flake8-bugbear] +[tool.ruff.lint.flake8-bugbear] # add exceptions to B008 for fastapi. extend-immutable-calls = ["fastapi.Depends", "fastapi.Query"] -[tool.ruff.flake8-tidy-imports] +[tool.ruff.lint.flake8-tidy-imports] ban-relative-imports = "all" -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports "tests/**/*" = ["PLR2004", "S101", "TID252"] @@ -224,5 +232,6 @@ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] include = ["src", "tests", "tasks.py"] exclude = [".env/**", ".venv/**"] extraPaths = ["src"] -pythonVersion = "3.11" +pythonVersion = "3.12" typeCheckingMode = "basic" +disableBytesTypePromotions = true diff --git a/src/ted2zim/entrypoint.py b/src/ted2zim/entrypoint.py index bfb553e..b1fd0ce 100755 --- a/src/ted2zim/entrypoint.py +++ b/src/ted2zim/entrypoint.py @@ -86,6 +86,11 @@ def main(): help="Custom description for your ZIM. Based on selection otherwise.", ) + parser.add_argument( + "--long-description", + help="Custom long description for your ZIM.", + ) + parser.add_argument("--creator", help="Name of content creator", default="TED") parser.add_argument( @@ -163,6 +168,13 @@ def main(): version=SCRAPER, ) + parser.add_argument( + "--disable-metadata-checks", + help="Disable validity checks of metadata according to openZIM conventions", + action="store_true", + default=False, + ) + args = parser.parse_args() set_debug(args.debug) logger = get_logger() diff --git a/src/ted2zim/multi/entrypoint.py b/src/ted2zim/multi/entrypoint.py index 7d4fb6d..4d239cb 100644 --- a/src/ted2zim/multi/entrypoint.py +++ b/src/ted2zim/multi/entrypoint.py @@ -23,7 +23,7 @@ def main(): parser.add_argument( "--playlists", - help="Comma seperated list of playlist IDs to scrape. Pass all to scrape all", + help="Comma separated list of playlist IDs to scrape. Pass all to scrape all", ) parser.add_argument( @@ -73,6 +73,13 @@ def main(): version=SCRAPER, ) + parser.add_argument( + "--disable-metadata-checks", + help="Disable validity checks of metadata according to openZIM conventions", + action="store_true", + default=False, + ) + args, extra_args = parser.parse_known_args() # prevent launching without any topic(s)/playlist(s) diff --git a/src/ted2zim/multi/scraper.py b/src/ted2zim/multi/scraper.py index 475fc66..3a57400 100644 --- a/src/ted2zim/multi/scraper.py +++ b/src/ted2zim/multi/scraper.py @@ -25,6 +25,7 @@ def __init__( extra_args, ): self.debug = False + self.disable_metadata_checks = False # save options as properties for key, value in options.items(): if key not in ["topics", "playlists"]: @@ -283,6 +284,9 @@ def run_indiv_zim_mode(self, item, mode): if self.debug: args += ["--debug"] + if self.disable_metadata_checks: + args += ["--disable-metadatachecks"] + logger.debug(nicer_args_join(args)) process = subprocess.run( args, @@ -312,6 +316,8 @@ def handle_single_zim(self, mode): args += self.extra_args if self.debug: args += ["--debug"] + if self.disable_metadata_checks: + args += ["--disable-metadatachecks"] return subprocess.run(args, check=False).returncode def fetch_metadata(self): diff --git a/src/ted2zim/processing.py b/src/ted2zim/processing.py index eee4222..36bc989 100644 --- a/src/ted2zim/processing.py +++ b/src/ted2zim/processing.py @@ -33,6 +33,15 @@ def post_process_video(video_dir, video_id, preset, video_format, low_quality): dst_path = src_path.parent.joinpath(f"video.{video_format}") logger.debug(f"Converting video {video_id}") - reencode( - src_path, dst_path, preset.to_ffmpeg_args(), delete_src=True, failsafe=False - ) + success, process = reencode( + src_path, + dst_path, + preset.to_ffmpeg_args(), + delete_src=True, + with_process=True, + failsafe=True, + ) # pyright: ignore[reportGeneralTypeIssues] + if not success: + if process: + logger.error(process.stdout) + raise Exception(f"Exception while re-encoding {src_path} for {video_id}") diff --git a/src/ted2zim/scraper.py b/src/ted2zim/scraper.py index 9a575d2..0ee3a70 100644 --- a/src/ted2zim/scraper.py +++ b/src/ted2zim/scraper.py @@ -20,8 +20,16 @@ from zimscraperlib.image.optimization import optimize_image from zimscraperlib.image.presets import WebpMedium from zimscraperlib.image.transformation import resize_image +from zimscraperlib.inputs import compute_descriptions from zimscraperlib.video.presets import VideoMp4Low, VideoWebmLow from zimscraperlib.zim import make_zim_file +from zimscraperlib.zim.metadata import ( + validate_description, + validate_language, + validate_longdescription, + validate_tags, + validate_title, +) from ted2zim.constants import ( ALL, @@ -55,6 +63,7 @@ def __init__( locale_name, title, description, + long_description, creator, publisher, tags, @@ -67,6 +76,7 @@ def __init__( subtitles_setting, tmp_dir, threads, + disable_metadata_checks, ): # video-encoding info self.video_format = video_format @@ -77,12 +87,64 @@ def __init__( self.languages = ( [] if languages is None else [lang.strip() for lang in languages.split(",")] ) + + def get_iso_639_3_language(lang: str) -> str | None: + """Helper function to safely get ISO-639-3 code from input language""" + lang_info = get_language_details(lang, failsafe=True) + if lang_info: + return lang_info["iso-639-3"] + else: + logger.warning( + f"Failed to get iso-639-3 language info for {lang}. " + "This value will be missing in ZIM Language metadata." + ) + return None + + def sort_languages_hack(languages: set[str]) -> list[str]: + """This is a temporary hack to sort languages by importance in the ZIM + + For now, if eng is among the list, we assume it is the most important + language. Otherwise list is kept as-is + """ + return list(languages).sort( + key=lambda x: -1 if x == "eng" else 0 + ) # pyright: ignore[reportReturnType] + + if not self.languages: + self.zim_languages = "eng" + else: + self.zim_languages = ",".join( + sort_languages_hack( + { + lang + for lang in [ + get_iso_639_3_language(lang) for lang in self.languages + ] + if lang + } + ) + ) self.tags = [] if tags is None else [tag.strip() for tag in tags.split(",")] + self.tags = [*self.tags, "_category:ted", "ted", "_videos:yes"] self.title = title self.description = description + self.long_description = long_description self.creator = creator self.publisher = publisher self.name = name + self.disable_metadata_checks = disable_metadata_checks + + if not self.disable_metadata_checks: + # Validate ZIM metadata early so that we do not waste time doing operations + # for a scraper which will fail anyway in the end + validate_language("Language", self.zim_languages) + validate_tags("Tags", self.tags) + if self.title: + validate_title("Title", self.title) + if self.description: + validate_description("Description", self.description) + if self.long_description: + validate_longdescription("LongDescription", self.long_description) # directory setup self.output_dir = pathlib.Path(output_dir).expanduser().resolve() @@ -123,7 +185,6 @@ def __init__( self.source_languages = ( [] if not self.languages else self.to_ted_langcodes(self.languages) ) - self.zim_lang = None self.already_visited = [] # set and record locale for translations @@ -296,33 +357,29 @@ def extract_videos_from_topics(self, topic): return True def update_zim_metadata(self): - if not self.languages: - self.zim_lang = "eng" - elif len(self.source_languages) > 1: - self.zim_lang = "mul" - else: - lang_info = get_language_details(self.source_languages[0], failsafe=True) - if lang_info: - self.zim_lang = lang_info["iso-639-3"] - else: - self.zim_lang = "eng" - if self.playlist: if not self.title: self.title = self.playlist_title.strip() # pyright: ignore - if not self.description: - self.description = self.playlist_description.strip() # pyright: ignore + default_description = self.playlist_description.strip() # pyright: ignore elif len(self.topics) > 1: if not self.title: self.title = "TED Collection" - if not self.description: - self.description = "A selection of TED videos from several topics" + default_description = "A selection of TED videos from several topics" else: topic_str = self.topics[0].replace("+", " ") if not self.title: self.title = f"{topic_str.capitalize()} from TED" - if not self.description: - self.description = f"A selection of {topic_str} videos from TED" + default_description = f"A selection of {topic_str} videos from TED" + + # update description and long_description if not already set by user input, + # based on default_description potentially retrieved from playlist / topics + # compute_descriptions always returns valid description and long description + # when based on default_description + self.description, self.long_description = compute_descriptions( + default_description=default_description, + user_description=self.description, + user_long_description=self.long_description, + ) def get_display_name(self, lang_code, lang_name): """Display name for language""" @@ -932,6 +989,7 @@ def download_video_files(self, video): f"{org_video_file_path}", ) logger.debug("", exc_info=exc) + org_video_file_path.unlink(missing_ok=True) # Second try to download from youtube ID (used both when no video link AND # when video link download failed - we experience sometimes 403 errors on # video link, see #167) @@ -1166,14 +1224,16 @@ def run(self): fpath=self.output_dir.joinpath(self.fname), name=self.name, main_page="index", - favicon="favicon.png", + illustration="favicon.png", title=self.title, description=self.description, - language=self.zim_lang, # pyright: ignore[reportGeneralTypeIssues] + language=self.zim_languages, # pyright: ignore[reportArgumentType] + long_description=self.long_description, # pyright: ignore[reportArgumentType] creator=self.creator, publisher=self.publisher, - tags=[*self.tags, "_category:ted", "ted", "_videos:yes"], + tags=self.tags, scraper=SCRAPER, + disable_metadata_checks=self.disable_metadata_checks, ) if not self.keep_build_dir: logger.info("removing temp folder") diff --git a/src/ted2zim/utils.py b/src/ted2zim/utils.py index 51c2074..9d6fc89 100644 --- a/src/ted2zim/utils.py +++ b/src/ted2zim/utils.py @@ -19,9 +19,9 @@ def update_subtitles_list(video_id, language_list): """adds `link` to each language dict containing the subtitle url""" for language in language_list: - language[ - "link" - ] = f"https://www.ted.com/talks/subtitles/id/{video_id}/lang/{language['languageCode']}" + language["link"] = ( + f"https://www.ted.com/talks/subtitles/id/{video_id}/lang/{language['languageCode']}" + ) return language_list @@ -75,7 +75,6 @@ def request_url(url, json_data=None): class WebVTT: - """TED JSON subtitles to WebVTT""" def __init__(self, url):