Skip to content

Commit

Permalink
Rework encode/decode of headers in Request
Browse files Browse the repository at this point in the history
This change changes the encoding/decoding of headers in Request: now
keys are always decoded (i.e. of text type) while values have binary
type (ideally we would like raw binary data, but since the Python 3
standard library always decodes headers, we actually have to re-encode
them in Python 3).

Documentation and comments are also improved to clarify the encoding
situation.
  • Loading branch information
Hexcles authored and Ms2ger committed Oct 4, 2018
1 parent acf944b commit bd7e883
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 22 deletions.
2 changes: 1 addition & 1 deletion tools/wptserve/tests/functional/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ class TestAuth(TestUsingServer):
def test_auth(self):
@wptserve.handlers.handler
def handler(request, response):
return " ".join((request.auth.username, request.auth.password))
return b" ".join((request.auth.username, request.auth.password))

route = ("GET", "/test/test_auth", handler)
self.server.router.register(*route)
Expand Down
4 changes: 4 additions & 0 deletions tools/wptserve/wptserve/ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

class RangeParser(object):
def __call__(self, header, file_size):
try:
header = header.decode("ascii")
except UnicodeDecodeError:
raise HTTPException(400, "Non-ASCII range header value")
prefix = "bytes="
if not header.startswith(prefix):
raise HTTPException(416, message="Unrecognised range type %s" % (header,))
Expand Down
60 changes: 39 additions & 21 deletions tools/wptserve/wptserve/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def POST(self):
self.raw_input.seek(0)
fs = cgi.FieldStorage(fp=self.raw_input,
environ={"REQUEST_METHOD": self.method},
headers=self.headers,
headers=self.raw_headers,
keep_blank_values=True)
self._POST = MultiDict.from_field_storage(fs)
self.raw_input.seek(pos)
Expand All @@ -318,8 +318,8 @@ def POST(self):
def cookies(self):
if self._cookies is None:
parser = BaseCookie()
cookie_headers = self.headers.get("cookie", u"")
parser.load(cookie_headers.encode("utf-8"))
cookie_headers = self.headers.get("cookie", b"")
parser.load(cookie_headers)
cookies = Cookies()
for key, value in parser.iteritems():
cookies[key] = CookieValue(value)
Expand Down Expand Up @@ -355,21 +355,34 @@ def __init__(self, request_handler):
super(H2Request, self).__init__(request_handler)


def maybedecode(s):
if isinstance(s, text_type):
return s
def _maybe_encode(s):
"""Encodes a text-type string into binary data using iso-8859-1.
Returns `str` in Python 2 and `bytes` in Python 3. The function is a no-op
if the argument already has a binary type.
"""
if isinstance(s, binary_type):
return s.decode("ascii")
return s

# Python 3 assumes iso-8859-1 when parsing headers, which will garble text
# with non ASCII characters. We try to encode the text back to binary.
# https://github.com/python/cpython/blob/273fc220b25933e443c82af6888eb1871d032fb8/Lib/http/client.py#L213
if isinstance(s, text_type):
return s.encode("iso-8859-1")

raise TypeError("Unexpected value in RequestHeaders: %r" % s)


class RequestHeaders(dict):
"""Dictionary-like API for accessing request headers."""
"""Read-only dictionary-like API for accessing request headers.
Unlike BaseHTTPRequestHandler.headers, this class always returns all
headers with the same name (separated by commas). Besides, this class
ensures all keys (i.e. names of headers) and values have binary type.
"""
def __init__(self, items):
for header in items.keys():
key = header.lower()
key = _maybe_encode(header).lower()
# get all headers with the same name
values = items.getallmatchingheaders(header)
if len(values) > 1:
Expand All @@ -379,17 +392,17 @@ def __init__(self, items):
for value in values:
# getallmatchingheaders returns raw header lines, so
# split to get name, value
multiples.append(maybedecode(value).split(':', 1)[1].strip())
multiples.append(_maybe_encode(value).split(b':', 1)[1].strip())
headers = multiples
else:
headers = [maybedecode(items[header])]
dict.__setitem__(self, maybedecode(key), headers)
headers = [_maybe_encode(items[header])]
dict.__setitem__(self, key, headers)


def __getitem__(self, key):
"""Get all headers of a certain (case-insensitive) name. If there is
more than one, the values are returned comma separated"""
key = maybedecode(key)
key = _maybe_encode(key)
values = dict.__getitem__(self, key.lower())
if len(values) == 1:
return values[0]
Expand All @@ -415,7 +428,7 @@ def get(self, key, default=None):
def get_list(self, key, default=missing):
"""Get all the header values for a particular field name as
a list"""
key = maybedecode(key)
key = _maybe_encode(key)
try:
return dict.__getitem__(self, key.lower())
except KeyError:
Expand All @@ -425,7 +438,7 @@ def get_list(self, key, default=missing):
raise

def __contains__(self, key):
key = maybedecode(key)
key = _maybe_encode(key)
return dict.__contains__(self, key.lower())

def iteritems(self):
Expand Down Expand Up @@ -604,23 +617,28 @@ class Authentication(object):
The password supplied in the HTTP Authorization
header, or None
Both attributes are binary strings (`str` in Py2, `bytes` in Py3), since
RFC7617 Section 2.1 does not specify the encoding for username & passsword
(as long it's compatible with ASCII). UTF-8 should be a relatively safe
choice if callers need to decode them as most browsers use it.
"""
def __init__(self, headers):
self.username = None
self.password = None

auth_schemes = {"Basic": self.decode_basic}
auth_schemes = {b"Basic": self.decode_basic}

if "authorization" in headers:
header = headers.get("authorization")
assert isinstance(header, text_type)
auth_type, data = header.split(" ", 1)
assert isinstance(header, binary_type)
auth_type, data = header.split(b" ", 1)
if auth_type in auth_schemes:
self.username, self.password = auth_schemes[auth_type](data)
else:
raise HTTPException(400, "Unsupported authentication scheme %s" % auth_type)

def decode_basic(self, data):
assert isinstance(data, text_type)
decoded_data = base64.decodestring(data.encode("utf-8"))
return decoded_data.decode("utf-8").split(":", 1)
assert isinstance(data, binary_type)
decoded_data = base64.b64decode(data)
return decoded_data.split(b":", 1)

0 comments on commit bd7e883

Please sign in to comment.