Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #160 - Fail properly on downloads #164

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- fixed mobile navigation menu
- fixed video encoding in low quality in WebM
- added support for internationalization
- Fail properly on too many failed downloads or critical failures

# 1.0.0

Expand Down
15 changes: 13 additions & 2 deletions openedx2zim/html_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
import lxml.html
from bs4 import BeautifulSoup

from .constants import DOWNLOADABLE_EXTENSIONS, AUDIO_FORMATS
from .constants import DOWNLOADABLE_EXTENSIONS, AUDIO_FORMATS, getLogger
from .utils import jinja, prepare_url, get_back_jumps, remove_autogenerated_tags


logger = getLogger()


class HtmlProcessor:
def __init__(self, scraper):
self.scraper = scraper
Expand Down Expand Up @@ -41,12 +44,20 @@ def download_and_get_filename(
return None, None
fresh_download = False
if not output_file.exists():
file_url = prepare_url(src, netloc, path_on_server)
if self.scraper.download_file(
prepare_url(src, netloc, path_on_server),
file_url,
output_file,
):
fresh_download = True
else:
self.scraper.failed_urls.add(file_url)
if len(self.scraper.failed_urls) >= self.scraper.failure_threshhold:
logger.error("Too many failures. Try running the scraper again")
logger.debug("The following URLs failed to download:")
for failed_url in self.scraper.failed_urls:
logger.debug(failed_url)
raise SystemExit(1)
return None, None
return filename, fresh_download

Expand Down
14 changes: 14 additions & 0 deletions openedx2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ def __init__(
self.wiki = None
self.forum = None

self.failed_urls = {}
self.failure_threshhold = 1

# set and record locale for translations
locale_details = get_language_details(locale_name)
self.instance_lang = locale_details["iso-639-1"]
Expand Down Expand Up @@ -660,6 +663,10 @@ def convert_video(self, src, dst):
preset = VideoWebmLow() if self.video_format == "webm" else VideoMp4Low()
elif src.suffix[1:] != self.video_format:
preset = VideoWebmHigh() if self.video_format == "webm" else VideoMp4High()
else:
if not src.resolve() == dst.resolve():
shutil.move(src, dst)
return True
return reencode(
src,
dst,
Expand Down Expand Up @@ -884,4 +891,11 @@ def run(self):
shutil.rmtree(self.build_dir, ignore_errors=True)
# shutdown the youtube downloader
self.yt_downloader.shutdown()
if self.failed_urls:
logger.error(
f"Content from {len(self.failed_urls)} URL(s) failed to download"
)
logger.debug("Failed URLs:")
for failed_url in self.failed_urls:
logger.debug(failed_url)
logger.info("Done everything")
11 changes: 10 additions & 1 deletion openedx2zim/xblocks_extractor/libcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

from .base_xblock import BaseXblock
from ..utils import jinja, download_and_convert_subtitles, prepare_url, get_back_jumps
from ..constants import getLogger


logger = getLogger()


class Libcast(BaseXblock):
Expand Down Expand Up @@ -44,9 +48,14 @@ def download(self, instance_connection):
else:
video_path = self.output_path.joinpath("video.mp4")
if not video_path.exists():
self.scraper.download_file(
downloaded = self.scraper.download_file(
prepare_url(url, self.scraper.instance_url), video_path
)
if not downloaded:
logger.error(
f"Video for libcast block {self.xblock_json['student_view_url']} failed to download"
)
raise SystemExit(1)

def render(self):
return jinja(
Expand Down
10 changes: 8 additions & 2 deletions openedx2zim/xblocks_extractor/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,19 @@ def download(self, instance_connection):
else:
video_path = self.output_path.joinpath("video.mp4")
if not video_path.exists():
downloaded = None
if youtube:
self.scraper.download_file(url, video_path)
downloaded = self.scraper.download_file(url, video_path)
else:
self.scraper.download_file(
downloaded = self.scraper.download_file(
prepare_url(urllib.parse.unquote(url), self.scraper.instance_url),
video_path,
)
if not downloaded:
logger.error(
f"Video for video block {self.xblock_json['student_view_url']} failed to download"
)
raise SystemExit(1)
real_subtitle = download_and_convert_subtitles(
self.output_path, subs_lang, instance_connection
)
Expand Down