Skip to content

Commit

Permalink
Rework encode/decode of headers in Request
Browse files Browse the repository at this point in the history
This change changes the encoding/decoding of headers in Request and
username/password in Authentication: now all keys and values have binary
type (because of an implementation detail in the Python 3 standard
library, we actually need to re-encode the headers back to bytes in
Python 3).

Documentation and comments are also improved to clarify the encoding
situation. Also add test cases for non-ASCII characters in auth headers.
  • Loading branch information
Hexcles committed Oct 9, 2018
1 parent d79ae19 commit 8b09ece
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 27 deletions.
2 changes: 1 addition & 1 deletion tools/wptserve/tests/functional/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def request(self, path, query=None, method="GET", headers=None, body=None, auth=
req.add_data(body)

if auth is not None:
req.add_header("Authorization", b"Basic %s" % base64.b64encode(("%s:%s" % auth).encode("utf-8")))
req.add_header("Authorization", b"Basic %s" % base64.b64encode((b"%s:%s" % auth)))

return urlopen(req)

Expand Down
1 change: 0 additions & 1 deletion tools/wptserve/tests/functional/test_pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def test_sub_uuid(self):
def test_sub_var(self):
resp = self.request("/sub_var.sub.txt")
port = self.server.port
print(port, type(port))
expected = b"localhost %d A %d B localhost C" % (port, port)
self.assertEqual(resp.read().rstrip(), expected)

Expand Down
10 changes: 8 additions & 2 deletions tools/wptserve/tests/functional/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,16 @@ class TestAuth(TestUsingServer):
def test_auth(self):
@wptserve.handlers.handler
def handler(request, response):
return " ".join((request.auth.username, request.auth.password))
return b" ".join((request.auth.username, request.auth.password))

route = ("GET", "/test/test_auth", handler)
self.server.router.register(*route)
resp = self.request(route[1], auth=("test", "PASS"))

resp = self.request(route[1], auth=(b"test", b"PASS"))
self.assertEqual(200, resp.getcode())
self.assertEqual([b"test", b"PASS"], resp.read().split(b" "))

encoded_text = u"どうも".encode("shift-jis")
resp = self.request(route[1], auth=(encoded_text, encoded_text))
self.assertEqual(200, resp.getcode())
self.assertEqual([encoded_text, encoded_text], resp.read().split(b" "))
5 changes: 3 additions & 2 deletions tools/wptserve/wptserve/pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,8 +491,9 @@ def config_replacement(match):
escape_func = {"html": lambda x:escape(x, quote=True),
"none": lambda x:x}[escape_type]

#Should possibly support escaping for other contexts e.g. script
#TODO: read the encoding of the response
# Should possibly support escaping for other contexts e.g. script
# TODO: read the encoding of the response
# cgi.escape() only takes text strings in Python 3.
if isinstance(value, binary_type):
value = value.decode("utf-8")
elif isinstance(value, int):
Expand Down
4 changes: 4 additions & 0 deletions tools/wptserve/wptserve/ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

class RangeParser(object):
def __call__(self, header, file_size):
try:
header = header.decode("ascii")
except UnicodeDecodeError:
raise HTTPException(400, "Non-ASCII range header value")
prefix = "bytes="
if not header.startswith(prefix):
raise HTTPException(416, message="Unrecognised range type %s" % (header,))
Expand Down
60 changes: 39 additions & 21 deletions tools/wptserve/wptserve/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def POST(self):
self.raw_input.seek(0)
fs = cgi.FieldStorage(fp=self.raw_input,
environ={"REQUEST_METHOD": self.method},
headers=self.headers,
headers=self.raw_headers,
keep_blank_values=True)
self._POST = MultiDict.from_field_storage(fs)
self.raw_input.seek(pos)
Expand All @@ -318,8 +318,8 @@ def POST(self):
def cookies(self):
if self._cookies is None:
parser = BaseCookie()
cookie_headers = self.headers.get("cookie", u"")
parser.load(cookie_headers.encode("utf-8"))
cookie_headers = self.headers.get("cookie", b"")
parser.load(cookie_headers)
cookies = Cookies()
for key, value in parser.iteritems():
cookies[key] = CookieValue(value)
Expand Down Expand Up @@ -355,21 +355,34 @@ def __init__(self, request_handler):
super(H2Request, self).__init__(request_handler)


def maybedecode(s):
if isinstance(s, text_type):
return s
def _maybe_encode(s):
"""Encodes a text-type string into binary data using iso-8859-1.
Returns `str` in Python 2 and `bytes` in Python 3. The function is a no-op
if the argument already has a binary type.
"""
if isinstance(s, binary_type):
return s.decode("ascii")
return s

# Python 3 assumes iso-8859-1 when parsing headers, which will garble text
# with non ASCII characters. We try to encode the text back to binary.
# https://github.com/python/cpython/blob/273fc220b25933e443c82af6888eb1871d032fb8/Lib/http/client.py#L213
if isinstance(s, text_type):
return s.encode("iso-8859-1")

raise TypeError("Unexpected value in RequestHeaders: %r" % s)


class RequestHeaders(dict):
"""Dictionary-like API for accessing request headers."""
"""Read-only dictionary-like API for accessing request headers.
Unlike BaseHTTPRequestHandler.headers, this class always returns all
headers with the same name (separated by commas). And it ensures all keys
(i.e. names of headers) and values have binary type.
"""
def __init__(self, items):
for header in items.keys():
key = header.lower()
key = _maybe_encode(header).lower()
# get all headers with the same name
values = items.getallmatchingheaders(header)
if len(values) > 1:
Expand All @@ -379,17 +392,17 @@ def __init__(self, items):
for value in values:
# getallmatchingheaders returns raw header lines, so
# split to get name, value
multiples.append(maybedecode(value).split(':', 1)[1].strip())
multiples.append(_maybe_encode(value).split(b':', 1)[1].strip())
headers = multiples
else:
headers = [maybedecode(items[header])]
dict.__setitem__(self, maybedecode(key), headers)
headers = [_maybe_encode(items[header])]
dict.__setitem__(self, key, headers)


def __getitem__(self, key):
"""Get all headers of a certain (case-insensitive) name. If there is
more than one, the values are returned comma separated"""
key = maybedecode(key)
key = _maybe_encode(key)
values = dict.__getitem__(self, key.lower())
if len(values) == 1:
return values[0]
Expand All @@ -415,7 +428,7 @@ def get(self, key, default=None):
def get_list(self, key, default=missing):
"""Get all the header values for a particular field name as
a list"""
key = maybedecode(key)
key = _maybe_encode(key)
try:
return dict.__getitem__(self, key.lower())
except KeyError:
Expand All @@ -425,7 +438,7 @@ def get_list(self, key, default=missing):
raise

def __contains__(self, key):
key = maybedecode(key)
key = _maybe_encode(key)
return dict.__contains__(self, key.lower())

def iteritems(self):
Expand Down Expand Up @@ -604,23 +617,28 @@ class Authentication(object):
The password supplied in the HTTP Authorization
header, or None
Both attributes are binary strings (`str` in Py2, `bytes` in Py3), since
RFC7617 Section 2.1 does not specify the encoding for username & passsword
(as long it's compatible with ASCII). UTF-8 should be a relatively safe
choice if callers need to decode them as most browsers use it.
"""
def __init__(self, headers):
self.username = None
self.password = None

auth_schemes = {"Basic": self.decode_basic}
auth_schemes = {b"Basic": self.decode_basic}

if "authorization" in headers:
header = headers.get("authorization")
assert isinstance(header, text_type)
auth_type, data = header.split(" ", 1)
assert isinstance(header, binary_type)
auth_type, data = header.split(b" ", 1)
if auth_type in auth_schemes:
self.username, self.password = auth_schemes[auth_type](data)
else:
raise HTTPException(400, "Unsupported authentication scheme %s" % auth_type)

def decode_basic(self, data):
assert isinstance(data, text_type)
decoded_data = base64.decodestring(data.encode("utf-8"))
return decoded_data.decode("utf-8").split(":", 1)
assert isinstance(data, binary_type)
decoded_data = base64.b64decode(data)
return decoded_data.split(b":", 1)

0 comments on commit 8b09ece

Please sign in to comment.