Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fall back to downloading using the requests Python package (if installed) when urllib2 fails due to SSL error #2538

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ before_install:
# autopep8 1.3.4 is last one to support Python 2.6
- if [ "x$TRAVIS_PYTHON_VERSION" == 'x2.6' ]; then pip install 'autopep8<1.3.5'; else pip install autopep8; fi
# optional Python packages for EasyBuild
- pip install GC3Pie pycodestyle python-graph-dot python-hglib PyYAML
- pip install GC3Pie pycodestyle python-graph-dot python-hglib PyYAML requests
# git config is required to make actual git commits (cfr. tests for GitRepository)
- git config --global user.name "Travis CI"
- git config --global user.email "[email protected]"
Expand Down
40 changes: 33 additions & 7 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@
from easybuild.tools.config import build_option
from easybuild.tools import run

try:
import requests
HAVE_REQUESTS = True
except ImportError:
HAVE_REQUESTS = False

_log = fancylogger.getLogger('filetools', fname=False)

Expand Down Expand Up @@ -460,26 +465,47 @@ def download_file(filename, url, path, forced=False):
attempt_cnt = 0

# use custom HTTP header
url_req = urllib2.Request(url, headers={'User-Agent': 'EasyBuild', "Accept" : "*/*"})
headers = {'User-Agent': 'EasyBuild', 'Accept': '*/*'}
# for backward compatibility, and to avoid relying on 3rd party Python library 'requests'
url_req = urllib2.Request(url, headers=headers)
used_urllib = urllib2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bartoldeman I'd prefer using a boolean here (use_urllib2) and handling HTTPError via the import above

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot from requests.exceptions import HTTPError before it is actually used (because at the first download attempt HTTPError is still urllib.HTTPError, that is why
I changed to used_urllib. If I do both:

use_urllib2 = True
HTTPError = requests.HTTPError

that's a bit double...

Perhaps I should just write a urlopen() emulation using requests (including raising the urllib2.HTTPError exception manually), which might be the cleanest then.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I overlooked that you first need HTTPError from urllib2...


while not downloaded and attempt_cnt < max_attempts:
try:
# urllib2 does the right thing for http proxy setups, urllib does not!
url_fd = urllib2.urlopen(url_req, timeout=timeout)
_log.debug('response code for given url %s: %s' % (url, url_fd.getcode()))
if used_urllib is urllib2:
# urllib2 does the right thing for http proxy setups, urllib does not!
url_fd = urllib2.urlopen(url_req, timeout=timeout)
status_code = url_fd.getcode()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bartoldeman add .code here rather than below?

status_code = url_fd.getcode().code

or maybe with

if hasattr(status_code, 'code'):
    status_code = status_code.code

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can that work? .code is a field of the exception err. for urllib2, and getcode() returns an integer.

One can use

if hasattr(err, 'code'):
    status_code = err.code

but I prefer to use use_urllib2 then.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I'm not sure what I was smoking here, I was clearly mixing things up. Thanks for clarifying.

else:
response = requests.get(url, headers=headers, stream=True, timeout=timeout)
status_code = response.status_code
response.raise_for_status()
url_fd = response.raw
url_fd.decode_content = True
_log.debug('response code for given url %s: %s' % (url, status_code))
write_file(path, url_fd.read(), forced=forced, backup=True)
_log.info("Downloaded file %s from url %s to %s" % (filename, url, path))
downloaded = True
url_fd.close()
except urllib2.HTTPError as err:
if 400 <= err.code <= 499:
_log.warning("URL %s was not found (HTTP response code %s), not trying again" % (url, err.code))
except used_urllib.HTTPError as err:
if used_urllib is urllib2:
status_code = err.code
if 400 <= status_code <= 499:
_log.warning("URL %s was not found (HTTP response code %s), not trying again" % (url, status_code))
break
else:
_log.warning("HTTPError occurred while trying to download %s to %s: %s" % (url, path, err))
attempt_cnt += 1
except IOError as err:
_log.warning("IOError occurred while trying to download %s to %s: %s" % (url, path, err))
error_re = re.compile(r"<urlopen error \[Errno 1\] _ssl.c:.*: error:.*:"
"SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure>")
if error_re.match(str(err)):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This construct is used to switch to trying with requests, but it's a bit messy, especially with the inline import statements...
I think we need a use_requests = True or maybe broken_urrlib = True here which can be checked above?

# for backward compatibility, and to avoid relying on 3rd party Python library 'requests'
use_requests = False
while not downloaded and attempt_cnt < max_attempts:
    if use_requests:
        if not HAVE_REQUESTS:
           raise EasyBuildError("...")
        # do requests...
    else:
        # do what we do now
        try:
            ...
        except IOError as err:
            if error_re.match(str(err)):
                use_requests = True

if not HAVE_REQUESTS:
raise EasyBuildError("SSL issues with urllib2. If you are using RHEL/CentOS 6.x please "
"install the python-requests and pyOpenSSL RPM packages and try again.")
_log.info("Downloading using requests package instead of urllib2")
used_urllib = requests
attempt_cnt += 1
except Exception, err:
raise EasyBuildError("Unexpected error occurred when trying to download %s to %s: %s", url, path, err)
Expand Down
38 changes: 37 additions & 1 deletion test/framework/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,18 @@ class FileToolsTest(EnhancedTestCase):
('0_foo+0x0x#-$__', 'EB_0_underscore_foo_plus_0x0x_hash__minus__dollar__underscore__underscore_'),
]

def setUp(self):
"""Test setup."""
super(FileToolsTest, self).setUp()

self.orig_filetools_urllib2_urlopen = ft.urllib2.urlopen

def tearDown(self):
"""Cleanup."""
super(FileToolsTest, self).tearDown()

ft.urllib2.urlopen = self.orig_filetools_urllib2_urlopen

def test_extract_cmd(self):
"""Test various extract commands."""
tests = [
Expand Down Expand Up @@ -317,7 +329,7 @@ def test_download_file(self):
opts = init_config(args=['--download-timeout=5.3'])
init_config(build_options={'download_timeout': opts.download_timeout})
target_location = os.path.join(self.test_prefix, 'jenkins_robots.txt')
url = 'https://jenkins1.ugent.be/robots.txt'
url = 'https://raw.githubusercontent.com/easybuilders/easybuild-framework/master/README.rst'
try:
urllib2.urlopen(url)
res = ft.download_file(fn, url, target_location)
Expand Down Expand Up @@ -349,6 +361,30 @@ def test_download_file(self):
self.assertTrue(os.path.exists(target_location))
self.assertTrue(os.path.samefile(path, target_location))

def test_download_file_requests_fallback(self):
"""Test fallback to requests in download_file function."""
url = 'https://raw.githubusercontent.com/easybuilders/easybuild-framework/master/README.rst'
fn = 'README.rst'
target = os.path.join(self.test_prefix, fn)

# replace urllib2.urlopen with function that raises SSL error
def fake_urllib2_open(*args, **kwargs):
error_msg = "<urlopen error [Errno 1] _ssl.c:510: error:12345:"
error_msg += "SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure>"
raise IOError(error_msg)

ft.urllib2.urlopen = fake_urllib2_open

# if requests is available, file is downloaded
if ft.HAVE_REQUESTS:
res = ft.download_file(fn, url, target)
self.assertTrue(res and os.path.exists(res))
self.assertTrue("https://easybuilders.github.io/easybuild" in ft.read_file(res))

# without requests being available, error is raised
ft.HAVE_REQUESTS = False
self.assertErrorRegex(EasyBuildError, "SSL issues with urllib2", ft.download_file, fn, url, target)

def test_mkdir(self):
"""Test mkdir function."""

Expand Down