Skip to content

Commit

Permalink
improve error handling of http file downloads
Browse files Browse the repository at this point in the history
(#10)
  • Loading branch information
mikf committed Mar 16, 2017
1 parent 80df2b3 commit 22910f9
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 40 deletions.
5 changes: 1 addition & 4 deletions gallery_dl/downloader/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2014-2016 Mike Fährmann
# Copyright 2014-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand All @@ -13,9 +13,6 @@

class BasicDownloader():
"""Base class for downloader modules"""

max_tries = 5

def __init__(self):
self.downloading = False

Expand Down
88 changes: 52 additions & 36 deletions gallery_dl/downloader/http.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2014-2016 Mike Fährmann
# Copyright 2014-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand All @@ -10,8 +10,10 @@

import time
import requests
import requests.exceptions as rexcepts
import mimetypes
from .common import BasicDownloader
from .. import config


class Downloader(BasicDownloader):
Expand All @@ -20,56 +22,70 @@ def __init__(self, output):
BasicDownloader.__init__(self)
self.session = requests.session()
self.out = output
self.max_tries = config.get(("retries",), 5)
self.timeout = config.get(("timeout",), None)

def download_impl(self, url, pathfmt):
tries = 0
msg = ""
while True:
tries += 1
if tries > 1:
self.out.error(pathfmt.path, msg, tries-1, self.max_tries)
if tries > self.max_tries:
return
time.sleep(1)

# try to connect to remote source
try:
response = self.session.get(url, stream=True, verify=True)
except requests.exceptions.ConnectionError as exptn:
tries += 1
self.out.error(pathfmt.path, exptn, tries, self.max_tries)
time.sleep(1)
if tries == self.max_tries:
return tries
response = self.session.get(
url, stream=True, timeout=self.timeout
)
except (rexcepts.ConnectionError, rexcepts.Timeout) as exception:
msg = exception
continue
except (rexcepts.RequestException, UnicodeError) as exception:
msg = exception
break

# reject error-status-codes
if response.status_code != requests.codes.ok:
tries += 1
self.out.error(pathfmt.path, 'HTTP status "{} {}"'.format(
response.status_code, response.reason),
tries, self.max_tries
if response.status_code != 200:
msg = 'HTTP status "{} {}"'.format(
response.status_code, response.reason
)
response.close()
if response.status_code == 404:
return self.max_tries
time.sleep(1)
if tries == self.max_tries:
return tries
break
continue

# everything ok -- proceed to download
break
if not pathfmt.has_extension:
# set 'extension' keyword from Content-Type header
mtype = response.headers.get("Content-Type", "image/jpeg")
exts = mimetypes.guess_all_extensions(mtype, strict=False)
exts.sort()
pathfmt.set_extension(exts[-1][1:])
if pathfmt.exists():
self.out.skip(pathfmt.path)
response.close()
return

if not pathfmt.has_extension:
# set 'extension' keyword from Content-Type header
mtype = response.headers.get("Content-Type", "image/jpeg")
extensions = mimetypes.guess_all_extensions(mtype, strict=False)
extensions.sort()
pathfmt.set_extension(extensions[-1][1:])
if pathfmt.exists():
self.out.skip(pathfmt.path)
response.close()
return
# everything ok -- proceed to download
self.out.start(pathfmt.path)
self.downloading = True
with pathfmt.open() as file:
try:
for data in response.iter_content(None):
file.write(data)
except rexcepts.RequestException as exception:
msg = exception
response.close()
continue
self.downloading = False
self.out.success(pathfmt.path, tries)
return

self.out.start(pathfmt.path)
self.downloading = True
with pathfmt.open() as file:
for data in response.iter_content(16384):
file.write(data)
self.downloading = False
self.out.success(pathfmt.path, tries)
# output for unrecoverable errors
self.out.error(pathfmt.path, msg, tries, 0)

def set_headers(self, headers):
"""Set headers for http requests"""
Expand Down

0 comments on commit 22910f9

Please sign in to comment.