From b465ef6b035a01b7f88974b1e1e1326483781261 Mon Sep 17 00:00:00 2001
From: Josh Kelley <joshkel@gmail.com>
Date: Sun, 23 Aug 2015 17:39:33 -0400
Subject: [PATCH 1/3] Add Python 3.4 to the list of tested environments

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 7c78e47..f6c7f3d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py26,py27,py33
+envlist = py26,py27,py33,py34
 
 [testenv]
 deps=nose

From f8f9436a67f80d0ad1ba2530dc219ebf6aa4d6e1 Mon Sep 17 00:00:00 2001
From: Josh Kelley <joshkel@gmail.com>
Date: Sun, 23 Aug 2015 17:39:56 -0400
Subject: [PATCH 2/3] Handle Unicode decoding errors while reading files

In Python 3, opening a file in text mode and reading it may throw
UnicodeDecodeErrors.  This adds handling for this and reports any such
errors as `unicode_decode_error` messages in Dodgy's results.

This means that running Dodgy under Python 3 will be pickier than
running it under Python 2, since Python 2 doesn't really care about
encodings.  This probably isn't ideal, but it at least keeps Dodgy from
crashing on an entire project tree if one file has a bad encoding has
its file type mis-detected.
---
 dodgy/checks.py             | 7 +++++--
 tests/test_checks.py        | 8 ++++++++
 tests/testdata/bad_utf8.txt | 1 +
 3 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 tests/testdata/bad_utf8.txt

diff --git a/dodgy/checks.py b/dodgy/checks.py
index 93474e9..918590c 100644
--- a/dodgy/checks.py
+++ b/dodgy/checks.py
@@ -67,8 +67,11 @@ def check_line(line, check_list):
 
 
 def check_file(filepath):
-    with open(filepath) as to_check:
-        return check_file_contents(to_check.read())
+    try:
+        with open(filepath) as to_check:
+            return check_file_contents(to_check.read())
+    except UnicodeDecodeError as e:
+        return [(0, 'unicode_decode_error', str(e))]
 
 
 def check_file_contents(file_contents):
diff --git a/tests/test_checks.py b/tests/test_checks.py
index 79b3be8..dc800d8 100644
--- a/tests/test_checks.py
+++ b/tests/test_checks.py
@@ -1,4 +1,5 @@
 import os
+import sys
 from unittest import TestCase
 from dodgy.checks import check_file
 
@@ -48,3 +49,10 @@ def test_ssh_privatekey(self):
 
     def test_ssh_publickey(self):
         self._do_test('ssh_public_key.pub', 'ssh_rsa_public_key')
+
+    def test_bad_unicode(self):
+        """Test that we handle errors during Python 3's required Unicode
+        decoding."""
+        if sys.version_info > (3, 0):
+            self._do_test('bad_utf8.txt', 'unicode_decode_error')
+
diff --git a/tests/testdata/bad_utf8.txt b/tests/testdata/bad_utf8.txt
new file mode 100644
index 0000000..06c0962
--- /dev/null
+++ b/tests/testdata/bad_utf8.txt
@@ -0,0 +1 @@
+锘棵(
\ No newline at end of file

From 6282992bc5619555b83b6b5e6dd9681fab010707 Mon Sep 17 00:00:00 2001
From: Josh Kelley <joshkel@gmail.com>
Date: Sun, 23 Aug 2015 17:47:23 -0400
Subject: [PATCH 3/3] Skip compressed files

This prevents spurious UnicodeDecodeErrors in Python 3.

Adding handling for compressed files would not be hard (using gzip, bz2,
and optionally lzma libraries), but there's probably little benefit,
since compressed files in a project tree are likely either from an
upstream source or have an uncompressed version available for testing.
---
 dodgy/run.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dodgy/run.py b/dodgy/run.py
index 55fb332..f284600 100644
--- a/dodgy/run.py
+++ b/dodgy/run.py
@@ -40,6 +40,9 @@ def run_checks(directory, ignore_paths=None):
         mimetype = mimetypes.guess_type(filepath)
         if mimetype[0] is None or not mimetype[0].startswith('text/'):
             continue
+        # Also skip anything with an encoding (e.g., a gzipped CSS).
+        if mimetype[1]:
+            continue
 
         for msg_parts in check_file(filepath):
             warnings.append({