From 4ae8cc5b139ccaceacf6bc89e19567a839c75971 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 17 Apr 2024 13:00:25 +0300 Subject: [PATCH] gh-117313: Fix re-folding email messages containing non-standard line separators (GH-117369) Only treat '\n', '\r' and '\r\n' as line separators in re-folding the email messages. Preserve control characters '\v', '\f', '\x1c', '\x1d' and '\x1e' and Unicode line separators '\x85', '\u2028' and '\u2029' as is. --- Lib/email/policy.py | 5 +- Lib/test/test_email/test_generator.py | 49 +++++++++++++++++++ ...-03-29-15-14-51.gh-issue-117313.ks_ONu.rst | 4 ++ 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-03-29-15-14-51.gh-issue-117313.ks_ONu.rst diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 8816c84ed175a70..46b7de5bb6d8ae8 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -21,7 +21,7 @@ 'HTTP', ] -linesep_splitter = re.compile(r'\n|\r') +linesep_splitter = re.compile(r'\n|\r\n?') @_extend_docstrings class EmailPolicy(Policy): @@ -205,7 +205,8 @@ def _fold(self, name, value, refold_binary=False): if hasattr(value, 'name'): return value.fold(policy=self) maxlen = self.max_line_length if self.max_line_length else sys.maxsize - lines = value.splitlines() + # We can't use splitlines here because it splits on more than \r and \n. + lines = linesep_splitter.split(value) refold = (self.refold_source == 'all' or self.refold_source == 'long' and (lines and len(lines[0])+len(name)+2 > maxlen or diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py index 89e7edeb63a8926..3ebcb684d006d08 100644 --- a/Lib/test/test_email/test_generator.py +++ b/Lib/test/test_email/test_generator.py @@ -140,6 +140,39 @@ def test_flatten_linesep_overrides_policy(self): g.flatten(msg, linesep='\n') self.assertEqual(s.getvalue(), self.typ(expected)) + def test_flatten_linesep(self): + source = 'Subject: one\n two\r three\r\n four\r\n\r\ntest body\r\n' + msg = self.msgmaker(self.typ(source)) + self.assertEqual(msg['Subject'], 'one two three four') + + expected = 'Subject: one\n two\n three\n four\n\ntest body\n' + s = self.ioclass() + g = self.genclass(s) + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + + expected = 'Subject: one two three four\n\ntest body\n' + s = self.ioclass() + g = self.genclass(s, policy=self.policy.clone(refold_source='all')) + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + + def test_flatten_control_linesep(self): + source = 'Subject: one\v two\f three\x1c four\x1d five\x1e six\r\n\r\ntest body\r\n' + msg = self.msgmaker(self.typ(source)) + self.assertEqual(msg['Subject'], 'one\v two\f three\x1c four\x1d five\x1e six') + + expected = 'Subject: one\v two\f three\x1c four\x1d five\x1e six\n\ntest body\n' + s = self.ioclass() + g = self.genclass(s) + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + + s = self.ioclass() + g = self.genclass(s, policy=self.policy.clone(refold_source='all')) + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + def test_set_mangle_from_via_policy(self): source = textwrap.dedent("""\ Subject: test that @@ -224,6 +257,22 @@ class TestGenerator(TestGeneratorBase, TestEmailBase): ioclass = io.StringIO typ = str + def test_flatten_unicode_linesep(self): + source = 'Subject: one\x85 two\u2028 three\u2029 four\r\n\r\ntest body\r\n' + msg = self.msgmaker(self.typ(source)) + self.assertEqual(msg['Subject'], 'one\x85 two\u2028 three\u2029 four') + + expected = 'Subject: =?utf-8?b?b25lwoUgdHdv4oCoIHRocmVl4oCp?= four\n\ntest body\n' + s = self.ioclass() + g = self.genclass(s) + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + + s = self.ioclass() + g = self.genclass(s, policy=self.policy.clone(refold_source='all')) + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + class TestBytesGenerator(TestGeneratorBase, TestEmailBase): diff --git a/Misc/NEWS.d/next/Library/2024-03-29-15-14-51.gh-issue-117313.ks_ONu.rst b/Misc/NEWS.d/next/Library/2024-03-29-15-14-51.gh-issue-117313.ks_ONu.rst new file mode 100644 index 000000000000000..e67576ee574f922 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-29-15-14-51.gh-issue-117313.ks_ONu.rst @@ -0,0 +1,4 @@ +Only treat ``'\n'``, ``'\r'`` and ``'\r\n'`` as line separators in +re-folding the :mod:`email` messages. Preserve control characters ``'\v'``, +``'\f'``, ``'\x1c'``, ``'\x1d'`` and ``'\x1e'`` and Unicode line separators +``'\x85'``, ``'\u2028'`` and ``'\u2029'`` as is.