From b465ef6b035a01b7f88974b1e1e1326483781261 Mon Sep 17 00:00:00 2001 From: Josh Kelley Date: Sun, 23 Aug 2015 17:39:33 -0400 Subject: [PATCH 1/3] Add Python 3.4 to the list of tested environments --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 7c78e47..f6c7f3d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py26,py27,py33 +envlist = py26,py27,py33,py34 [testenv] deps=nose From f8f9436a67f80d0ad1ba2530dc219ebf6aa4d6e1 Mon Sep 17 00:00:00 2001 From: Josh Kelley Date: Sun, 23 Aug 2015 17:39:56 -0400 Subject: [PATCH 2/3] Handle Unicode decoding errors while reading files In Python 3, opening a file in text mode and reading it may throw UnicodeDecodeErrors. This adds handling for this and reports any such errors as `unicode_decode_error` messages in Dodgy's results. This means that running Dodgy under Python 3 will be pickier than running it under Python 2, since Python 2 doesn't really care about encodings. This probably isn't ideal, but it at least keeps Dodgy from crashing on an entire project tree if one file has a bad encoding has its file type mis-detected. --- dodgy/checks.py | 7 +++++-- tests/test_checks.py | 8 ++++++++ tests/testdata/bad_utf8.txt | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 tests/testdata/bad_utf8.txt diff --git a/dodgy/checks.py b/dodgy/checks.py index 93474e9..918590c 100644 --- a/dodgy/checks.py +++ b/dodgy/checks.py @@ -67,8 +67,11 @@ def check_line(line, check_list): def check_file(filepath): - with open(filepath) as to_check: - return check_file_contents(to_check.read()) + try: + with open(filepath) as to_check: + return check_file_contents(to_check.read()) + except UnicodeDecodeError as e: + return [(0, 'unicode_decode_error', str(e))] def check_file_contents(file_contents): diff --git a/tests/test_checks.py b/tests/test_checks.py index 79b3be8..dc800d8 100644 --- a/tests/test_checks.py +++ b/tests/test_checks.py @@ -1,4 +1,5 @@ import os +import sys from unittest import TestCase from dodgy.checks import check_file @@ -48,3 +49,10 @@ def test_ssh_privatekey(self): def test_ssh_publickey(self): self._do_test('ssh_public_key.pub', 'ssh_rsa_public_key') + + def test_bad_unicode(self): + """Test that we handle errors during Python 3's required Unicode + decoding.""" + if sys.version_info > (3, 0): + self._do_test('bad_utf8.txt', 'unicode_decode_error') + diff --git a/tests/testdata/bad_utf8.txt b/tests/testdata/bad_utf8.txt new file mode 100644 index 0000000..06c0962 --- /dev/null +++ b/tests/testdata/bad_utf8.txt @@ -0,0 +1 @@ +Ã( \ No newline at end of file From 6282992bc5619555b83b6b5e6dd9681fab010707 Mon Sep 17 00:00:00 2001 From: Josh Kelley Date: Sun, 23 Aug 2015 17:47:23 -0400 Subject: [PATCH 3/3] Skip compressed files This prevents spurious UnicodeDecodeErrors in Python 3. Adding handling for compressed files would not be hard (using gzip, bz2, and optionally lzma libraries), but there's probably little benefit, since compressed files in a project tree are likely either from an upstream source or have an uncompressed version available for testing. --- dodgy/run.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dodgy/run.py b/dodgy/run.py index 55fb332..f284600 100644 --- a/dodgy/run.py +++ b/dodgy/run.py @@ -40,6 +40,9 @@ def run_checks(directory, ignore_paths=None): mimetype = mimetypes.guess_type(filepath) if mimetype[0] is None or not mimetype[0].startswith('text/'): continue + # Also skip anything with an encoding (e.g., a gzipped CSS). + if mimetype[1]: + continue for msg_parts in check_file(filepath): warnings.append({