Skip to content

Commit

Permalink
fix: Decode field names and filenames correctly
Browse files Browse the repository at this point in the history
The HTML5 specification defines that "field names and filenames for file fields [...] must be escaped by replacing any 0x0A (LF) bytes with the byte sequence %0A, 0x0D (CR) with %0D and 0x22 (") with %22. The user agent must not perform any other escapes." and tests show that modern browsers actually do that. This is different from traditional header quoting (which involves backslash-escaping quotes and backslashes).

fixes #60
  • Loading branch information
defnull committed Oct 18, 2024
1 parent 1969677 commit 3ec3991
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 8 deletions.
68 changes: 66 additions & 2 deletions multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,21 @@
from collections.abc import MutableMapping as DictMixin
import tempfile
import functools
import warnings

try:
from warnings import deprecated
except ImportError:
from functools import wraps
def deprecated(reason):
def decorator(func):
@wraps(func)
def wrapper(*a, **ka):
warnings.warn(reason, category=DeprecationWarning, stacklevel=2)
return func(*a, **ka)
func.__deprecated__ = wrapper.__deprecated__ = reason
return wrapper
return decorator


##############################################################################
Expand Down Expand Up @@ -147,14 +162,26 @@ def __get__(self, obj, cls):
_re_option = re.compile(_option) # key=value part of an Content-Type like header


@deprecated("Use content_disposition_quote() instead")
def header_quote(val):
""" (Deprecated) Quote header option values if necessary.
Note: This is NOT the way modern browsers quote field names or filenames
in Content-Disposition headers. See :func:`content_disposition_quote`
"""
if not _re_special.search(val):
return val

return '"' + val.replace("\\", "\\\\").replace('"', '\\"') + '"'


@deprecated("Use content_disposition_unquote() instead")
def header_unquote(val, filename=False):
""" (Deprecated) Unquote header option values.
Note: This is NOT the way modern browsers quote field names or filenames
in Content-Disposition headers. See :func:`content_disposition_unquote`
"""
if val[0] == val[-1] == '"':
val = val[1:-1]

Expand All @@ -167,7 +194,44 @@ def header_unquote(val, filename=False):
return val


def parse_options_header(header, options=None):
def content_disposition_quote(val):
""" Quote field names or filenames for Content-Disposition headers the
same way modern browsers do it (see WHATWG HTML5 specification).
"""
val = val.replace("\r", "%0D").replace("\n", "%0A").replace('"', "%22")
return '"' + val + '"'


def content_disposition_unquote(val, filename=False):
""" Unquote field names or filenames from Content-Disposition headers.
Legacy quoting mechanisms are detected to some degree and also supported,
but there are rare ambiguous edge cases where we have to guess. If in
doubt, this function assumes a modern browser and follows the WHATWG
HTML5 specification.
"""

# Edge case: If the value contains two backslashes but no quote, we cannot
# know for sure if this is legacy or modern style. We keep both backslashes
# in this case (modern style).

if '"' == val[0] == val[-1]:
val = val[1:-1]
if '\\"' in val: # Legacy backslash-escaped quoted strings
val = val.replace("\\\\", "\\").replace('\\"', '"')
elif "%" in val: # Modern (HTML5) limited percent-encoding
val = val.replace("%0D", "\r").replace("%0A", "\n").replace("%22", '"')
# ie6/windows bug: full path instead of just filename
if filename and (val[1:3] == ":\\" or val[:2] == "\\\\"):
val = val.rpartition("\\")[-1]
elif "%" in val: # Modern (HTML5) limited percent-encoding
val = val.replace("%0D", "\r").replace("%0A", "\n").replace("%22", '"')
return val


def parse_options_header(header, options=None, _unquote=content_disposition_unquote):
""" Parse Content-Disposition (or similar) headers into a primary value
and an options-dict. """
value, sep, tail = header.partition(";")
if not sep:
return header.lower().strip(), {}
Expand All @@ -176,7 +240,7 @@ def parse_options_header(header, options=None):
for match in _re_option.finditer(tail):
key, val = match.groups()
key = key.lower()
options[key] = header_unquote(val, key == "filename")
options[key] = _unquote(val, key == "filename")

return value.lower(), options

Expand Down
21 changes: 17 additions & 4 deletions test/test_header_utils.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,33 @@
# -*- coding: utf-8 -*-
import unittest
import multipart
import pytest

class TestHeaderParser(unittest.TestCase):

def test_token_unquote(self):
unquote = multipart.header_unquote
with pytest.deprecated_call():
unquote = multipart.header_unquote
self.assertEqual('foo', unquote('"foo"'))
self.assertEqual('foo"bar', unquote('"foo\\"bar"'))
self.assertEqual('ie.exe', unquote('"\\\\network\\ie.exe"', True))
self.assertEqual('ie.exe', unquote('"c:\\wondows\\ie.exe"', True))

unquote = multipart.content_disposition_unquote
self.assertEqual('foo', unquote('"foo"'))
self.assertEqual('foo"bar', unquote('"foo\\"bar"'))
self.assertEqual('ie.exe', unquote('"\\\\network\\ie.exe"', True))
self.assertEqual('ie.exe', unquote('"c:\\wondows\\ie.exe"', True))

def test_token_quote(self):
quote = multipart.header_quote
self.assertEqual(quote('foo'), 'foo')
self.assertEqual(quote('foo"bar'), '"foo\\"bar"')
with pytest.deprecated_call():
quote = multipart.header_quote
self.assertEqual(quote('foo'), 'foo')
self.assertEqual(quote('foo"bar'), '"foo\\"bar"')

quote = multipart.content_disposition_quote
self.assertEqual(quote('foo'), '"foo"')
self.assertEqual(quote('foo"bar'), '"foo%22bar"')

def test_options_parser(self):
parse = multipart.parse_options_header
Expand Down
26 changes: 25 additions & 1 deletion test/test_push_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,12 @@ def compact_events(self):
yield current, b''.join(data)

def get_segment(self, index_or_name):
allnames = []
for i, (segment, body) in enumerate(self.compact_events()):
allnames.append(segment.name)
if index_or_name == i or index_or_name == segment.name:
return segment, body
self.fail(f"Segment not found: {index_or_name}")
self.fail(f"Segment {index_or_name!r} not found in {allnames!r}")


class TestPushParser(PushTestBase):
Expand Down Expand Up @@ -769,3 +771,25 @@ def test_werkzeug_examples(self):
self.assertEqual(segment.filename, None)
self.assertEqual(segment.content_type, None)
self.assertEqual(body.decode(segment.charset or 'utf8'), forms[field])



class TestRealWorldExamples(PushTestBase):
def test_special_characters(self):
""" Test the ultimate segment name/filename from hell. """
teststring = 'test \\ \\\\ ; ö " = ;'
firefox_131 = ['---------------------------3697486332756351920303607403',
b'-----------------------------3697486332756351920303607403\r\nContent-Disposition: form-data; name="test \\ \\\\ ; \xc3\xb6 %22 = ;"; filename="test \\ \\\\ ; \xc3\xb6 %22 = ;"\r\nContent-Type: application/octet-stream\r\n\r\ntest \\ \\\\ ; \xc3\xb6 " = ;\r\n-----------------------------3697486332756351920303607403--\r\n']
chrome_129 = ["----WebKitFormBoundary9duA54BXJUGUymtb",
b'------WebKitFormBoundary9duA54BXJUGUymtb\r\nContent-Disposition: form-data; name="test \\ \\\\ ; \xc3\xb6 %22 = ;"; filename="test \\ \\\\ ; \xc3\xb6 %22 = ;"\r\nContent-Type: application/octet-stream\r\n\r\ntest \\ \\\\ ; \xc3\xb6 " = ;\r\n------WebKitFormBoundary9duA54BXJUGUymtb--\r\n']

for boundary, body in [firefox_131, chrome_129]:
print(repr(boundary))
print(repr(body))
self.reset(boundary=boundary, strict=True, header_charset='utf8')
self.parse(body)
segment, body = self.get_segment(teststring)
self.assertEqual(segment.name, teststring)
self.assertEqual(segment.filename, teststring)
self.assertEqual(body, teststring.encode("utf8"))

2 changes: 1 addition & 1 deletion test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def write_header(self, header, value, **opts):
line = to_bytes(header) + b': ' + to_bytes(value)
for opt, val in opts.items():
if val is not None:
line += b"; " + to_bytes(opt) + b'=' + to_bytes(multipart.header_quote(val))
line += b"; " + to_bytes(opt) + b'=' + to_bytes(multipart.content_disposition_quote(val))
self.write(line + b'\r\n')

def write_field(self, name, data, filename=None, content_type=None):
Expand Down

0 comments on commit 3ec3991

Please sign in to comment.