From 7cebe782c1f5f4966d77ff9fd5367fb2eccfee64 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 17 Feb 2024 15:48:58 +0100 Subject: [PATCH 01/39] pyexpat: Implement methods pyexpat.xmlparser.(Get|Set)ReparseDeferralEnabled --- Modules/clinic/pyexpat.c.h | 49 +++++++++++++++++++++++++++++++++++++- Modules/pyexpat.c | 48 +++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index a5b93e68598204..343cb91b975038 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -8,6 +8,53 @@ preserve #endif #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() +PyDoc_STRVAR(pyexpat_xmlparser_SetReparseDeferralEnabled__doc__, +"SetReparseDeferralEnabled($self, enabled, /)\n" +"--\n" +"\n" +"Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0."); + +#define PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF \ + {"SetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_SetReparseDeferralEnabled, METH_O, pyexpat_xmlparser_SetReparseDeferralEnabled__doc__}, + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self, + int enabled); + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled(xmlparseobject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + int enabled; + + enabled = PyObject_IsTrue(arg); + if (enabled < 0) { + goto exit; + } + return_value = pyexpat_xmlparser_SetReparseDeferralEnabled_impl(self, enabled); + +exit: + return return_value; +} + +PyDoc_STRVAR(pyexpat_xmlparser_GetReparseDeferralEnabled__doc__, +"GetReparseDeferralEnabled($self, /)\n" +"--\n" +"\n" +"Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0."); + +#define PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF \ + {"GetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_GetReparseDeferralEnabled, METH_NOARGS, pyexpat_xmlparser_GetReparseDeferralEnabled__doc__}, + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self); + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled(xmlparseobject *self, PyObject *Py_UNUSED(ignored)) +{ + return pyexpat_xmlparser_GetReparseDeferralEnabled_impl(self); +} + PyDoc_STRVAR(pyexpat_xmlparser_Parse__doc__, "Parse($self, data, isfinal=False, /)\n" "--\n" @@ -498,4 +545,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ -/*[clinic end generated code: output=48c4296e43777df4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=892e48e41f9b6e4b input=a9049054013a1b77]*/ diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 62cd262a7885e9..6e7050446756f6 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -7,6 +7,7 @@ #include "pycore_pyhash.h" // _Py_HashSecret #include "pycore_traceback.h" // _PyTraceback_Add() +#include #include // offsetof() #include "expat.h" #include "pyexpat.h" @@ -81,6 +82,12 @@ typedef struct { /* NULL if not enabled */ int buffer_size; /* Size of buffer, in XML_Char units */ int buffer_used; /* Buffer units in use */ + bool reparse_deferral_enabled; /* Whether to defer reparsing of + unfinished XML tokens; a de-facto cache of + what Expat has the authority on, for lack + of a getter API function + "XML_GetReparseDeferralEnabled" in Expat + 2.6.0 */ PyObject *intern; /* Dictionary to intern strings */ PyObject **handlers; } xmlparseobject; @@ -703,6 +710,40 @@ get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv) #define MAX_CHUNK_SIZE (1 << 20) +/*[clinic input] +pyexpat.xmlparser.SetReparseDeferralEnabled + + enabled: bool + / + +Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self, + int enabled) +/*[clinic end generated code: output=5ec539e3b63c8c49 input=021eb9e0bafc32c5]*/ +{ +#if XML_COMBINED_VERSION >= 20600 + XML_SetReparseDeferralEnabled(self->itself, enabled ? XML_TRUE : XML_FALSE); + self->reparse_deferral_enabled = (bool)enabled; +#endif + Py_RETURN_NONE; +} + +/*[clinic input] +pyexpat.xmlparser.GetReparseDeferralEnabled + +Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self) +/*[clinic end generated code: output=4e91312e88a595a8 input=54b5f11d32b20f3e]*/ +{ + return PyBool_FromLong(self->reparse_deferral_enabled); +} + /*[clinic input] pyexpat.xmlparser.Parse @@ -1063,6 +1104,8 @@ static struct PyMethodDef xmlparse_methods[] = { #if XML_COMBINED_VERSION >= 19505 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif + PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF + PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -1158,6 +1201,11 @@ newxmlparseobject(pyexpat_state *state, const char *encoding, self->ns_prefixes = 0; self->handlers = NULL; self->intern = Py_XNewRef(intern); +#if XML_COMBINED_VERSION >= 20600 + self->reparse_deferral_enabled = true; +#else + self->reparse_deferral_enabled = false; +#endif /* namespace_separator is either NULL or contains one char + \0 */ self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler, From c70fbae639bd8e9d938b47dc31db9b3d20b4a711 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 17 Feb 2024 15:19:26 +0100 Subject: [PATCH 02/39] etree: Implement method xml.etree.ElementTree.XMLParser.flush (Python version) --- Lib/xml/etree/ElementTree.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index a37fead41b750e..c7baf6cb96783e 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1726,6 +1726,17 @@ def close(self): del self.parser, self._parser del self.target, self._target + def flush(self): + if not self.parser.GetReparseDeferralEnabled(): + return + + self.parser.SetReparseDeferralEnabled(False) + try: + self.parser.Parse(b"", False) + except self._error as v: + self._raiseerror(v) + finally: + self.parser.SetReparseDeferralEnabled(True) # -------------------------------------------------------------------- # C14N 2.0 From dfca8190cf07ba78267f1b01ce39e3a90c3e8d48 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Wed, 7 Feb 2024 13:34:54 +0100 Subject: [PATCH 03/39] pyexpat: Make SetReparseDeferralEnabled available via PyExpat_CAPI --- Include/pyexpat.h | 2 ++ Misc/sbom.spdx.json | 4 ++-- Modules/expat/pyexpatns.h | 1 + Modules/pyexpat.c | 5 +++++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Include/pyexpat.h b/Include/pyexpat.h index 07020b5dc964cb..8b44c1ce119f0f 100644 --- a/Include/pyexpat.h +++ b/Include/pyexpat.h @@ -50,6 +50,8 @@ struct PyExpat_CAPI void *encodingHandlerData, const XML_Char *name, XML_Encoding *info); /* might be none for expat < 2.1.0 */ int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt); + /* might be none for expat < 2.6.0 */ + XML_Bool (*SetReparseDeferralEnabled)(XML_Parser parser, XML_Bool enabled); /* always add new stuff to the end! */ }; diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index e28eaea81d6aae..27e6742292ac6d 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -132,11 +132,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "baa44fe4581895d42e8d5e83d8ce6a69b1c34dbe" + "checksumValue": "f50c899172acd93fc539007bfb43315b83d407e4" }, { "algorithm": "SHA256", - "checksumValue": "33a7b9ac8bf4571e23272cdf644c6f9808bd44c66b149e3c41ab3870d1888609" + "checksumValue": "d571b8258cfaa067a20adef553e5fcedd6671ca4a8841483496de031bd904567" } ], "fileName": "Modules/expat/pyexpatns.h" diff --git a/Modules/expat/pyexpatns.h b/Modules/expat/pyexpatns.h index d45d9b6c457159..8ee03ef0792815 100644 --- a/Modules/expat/pyexpatns.h +++ b/Modules/expat/pyexpatns.h @@ -108,6 +108,7 @@ #define XML_SetNotStandaloneHandler PyExpat_XML_SetNotStandaloneHandler #define XML_SetParamEntityParsing PyExpat_XML_SetParamEntityParsing #define XML_SetProcessingInstructionHandler PyExpat_XML_SetProcessingInstructionHandler +#define XML_SetReparseDeferralEnabled PyExpat_XML_SetReparseDeferralEnabled #define XML_SetReturnNSTriplet PyExpat_XML_SetReturnNSTriplet #define XML_SetSkippedEntityHandler PyExpat_XML_SetSkippedEntityHandler #define XML_SetStartCdataSectionHandler PyExpat_XML_SetStartCdataSectionHandler diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 6e7050446756f6..f04f96bc2f7601 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -2067,6 +2067,11 @@ pyexpat_exec(PyObject *mod) #else capi->SetHashSalt = NULL; #endif +#if XML_COMBINED_VERSION >= 20600 + capi->SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled; +#else + capi->SetReparseDeferralEnabled = NULL; +#endif /* export using capsule */ PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME, From 4baab67114662da2fea8db4e7257a5ebcf3edb9d Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 18 Feb 2024 21:12:40 +0100 Subject: [PATCH 04/39] etree: Implement method xml.etree.ElementTree.XMLParser.flush (C version) --- Modules/_elementtree.c | 29 +++++++++++++++++++++++++++++ Modules/clinic/_elementtree.c.h | 19 ++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 54451081211654..6f37fb60e2c822 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3894,6 +3894,34 @@ _elementtree_XMLParser_close_impl(XMLParserObject *self) } } +/*[clinic input] +_elementtree.XMLParser.flush + +[clinic start generated code]*/ + +static PyObject * +_elementtree_XMLParser_flush_impl(XMLParserObject *self) +/*[clinic end generated code: output=42fdb8795ca24509 input=effbecdb28715949]*/ +{ + if (!_check_xmlparser(self)) { + return NULL; + } + + elementtreestate *st = self->state; + + if (EXPAT(st, SetReparseDeferralEnabled) == NULL) { + Py_RETURN_NONE; + } + + EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_FALSE); + + PyObject *res = expat_parse(st, self, "", 0, XML_FALSE); + + EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_TRUE); + + return res; +} + /*[clinic input] _elementtree.XMLParser.feed @@ -4288,6 +4316,7 @@ static PyType_Spec treebuilder_spec = { static PyMethodDef xmlparser_methods[] = { _ELEMENTTREE_XMLPARSER_FEED_METHODDEF _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF + _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF {NULL, NULL} diff --git a/Modules/clinic/_elementtree.c.h b/Modules/clinic/_elementtree.c.h index 9622591a1aa855..10b2dd1c15f7fd 100644 --- a/Modules/clinic/_elementtree.c.h +++ b/Modules/clinic/_elementtree.c.h @@ -1169,6 +1169,23 @@ _elementtree_XMLParser_close(XMLParserObject *self, PyObject *Py_UNUSED(ignored) return _elementtree_XMLParser_close_impl(self); } +PyDoc_STRVAR(_elementtree_XMLParser_flush__doc__, +"flush($self, /)\n" +"--\n" +"\n"); + +#define _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF \ + {"flush", (PyCFunction)_elementtree_XMLParser_flush, METH_NOARGS, _elementtree_XMLParser_flush__doc__}, + +static PyObject * +_elementtree_XMLParser_flush_impl(XMLParserObject *self); + +static PyObject * +_elementtree_XMLParser_flush(XMLParserObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _elementtree_XMLParser_flush_impl(self); +} + PyDoc_STRVAR(_elementtree_XMLParser_feed__doc__, "feed($self, data, /)\n" "--\n" @@ -1219,4 +1236,4 @@ _elementtree_XMLParser__setevents(XMLParserObject *self, PyObject *const *args, exit: return return_value; } -/*[clinic end generated code: output=218ec9e6a889f796 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=aed9f53eeb0404e0 input=a9049054013a1b77]*/ From 79289427abf321899b17f68ca07c2bcdd9c45800 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 17 Feb 2024 15:10:13 +0100 Subject: [PATCH 05/39] etree: Implement method xml.etree.ElementTree.XMLPullParser.flush --- Lib/xml/etree/ElementTree.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index c7baf6cb96783e..ac690744c6fbd9 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1320,6 +1320,11 @@ def read_events(self): else: yield event + def flush(self): + if self._parser is None: + raise ValueError("flush() called after end of stream") + self._parser.flush() + def XML(text, parser=None): """Parse XML document from string constant. From e5e403306c65199f53a02e33a2ac8e28eb45d7a1 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 17 Feb 2024 15:04:15 +0100 Subject: [PATCH 06/39] etree: Use XMLPullParser.flush to fix XMLPullParserTest for Expat 2.6.0 --- Lib/test/test_xml_etree.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index c535d631bb646f..7d2a1c007f43d8 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -121,10 +121,6 @@ """ -fails_with_expat_2_6_0 = (unittest.expectedFailure - if pyexpat.version_info >= (2, 6, 0) else - lambda test: test) - def checkwarnings(*filters, quiet=False): def decorator(test): def newtest(*args, **kwargs): @@ -1468,6 +1464,7 @@ def _feed(self, parser, data, chunk_size=None): else: for i in range(0, len(data), chunk_size): parser.feed(data[i:i+chunk_size]) + parser.flush() def assert_events(self, parser, expected, max_events=None): self.assertEqual( @@ -1506,11 +1503,9 @@ def test_simple_xml(self, chunk_size=None): self.assert_event_tags(parser, [('end', 'root')]) self.assertIsNone(parser.close()) - @fails_with_expat_2_6_0 def test_simple_xml_chunk_1(self): self.test_simple_xml(chunk_size=1) - @fails_with_expat_2_6_0 def test_simple_xml_chunk_5(self): self.test_simple_xml(chunk_size=5) From bc6e1a767e77bb3384f8c4e3f7994e1b2a9e4045 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 18 Feb 2024 02:16:32 +0100 Subject: [PATCH 07/39] sax: Implement method xml.sax.expatreader.ExpatParser.flush --- Lib/xml/sax/expatreader.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index b9ad52692db8dd..70b4ec6c82087c 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -214,6 +214,22 @@ def feed(self, data, isFinal=False): # FIXME: when to invoke error()? self._err_handler.fatalError(exc) + def flush(self): + if self._parser is None: + return + + if not self._parser.GetReparseDeferralEnabled(): + return + + self._parser.SetReparseDeferralEnabled(False) + try: + self._parser.Parse(b"", False) + except expat.error as e: + exc = SAXParseException(expat.ErrorString(e.code), e, self) + self._err_handler.fatalError(exc) + finally: + self._parser.SetReparseDeferralEnabled(True) + def _close_source(self): source = self._source try: From b737f0311d0ff22dbcd21839390e8dee7a4eb00e Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 18 Feb 2024 02:33:44 +0100 Subject: [PATCH 08/39] sax: Test method xml.sax.expatreader.ExpatParser.flush --- Lib/test/test_sax.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index eda4e6a46df437..92443907924bcc 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -19,6 +19,7 @@ from io import BytesIO, StringIO import codecs import os.path +import pyexpat import shutil import sys from urllib.error import URLError @@ -1214,6 +1215,30 @@ def test_expat_incremental_reset(self): self.assertEqual(result.getvalue(), start + b"text") + def test_expat_incremental_reparse_deferral(self): + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + # This artificial chunking triggers reparse deferral with Expat >=2.6.0 + parser.feed("") + + if pyexpat.version_info >= (2, 6, 0): + self.assertEqual(result.getvalue(), start) + else: + self.assertEqual(result.getvalue(), start + b"") + + parser.flush() # no-op for Expat <2.6.0 + + self.assertEqual(result.getvalue(), start + b"") + + parser.feed("") + parser.close() + + self.assertEqual(result.getvalue(), start + b"") + # ===== Locator support def test_expat_locator_noinfo(self): From 850e46dc856efb7fdc33a417a545af8db6a74cb9 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 18 Feb 2024 03:14:45 +0100 Subject: [PATCH 09/39] Document new CVE-2023-52425 Expat API (reparse deferral) --- .../2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst diff --git a/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst b/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst new file mode 100644 index 00000000000000..9fd92f9f744bd6 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst @@ -0,0 +1,8 @@ +Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding +five new methods: + +* ``pyexpat.xmlparser.GetReparseDeferralEnabled`` +* ``pyexpat.xmlparser.SetReparseDeferralEnabled`` +* ``xml.etree.ElementTree.XMLParser.flush`` +* ``xml.etree.ElementTree.XMLPullParser.flush`` +* ``xml.sax.expatreader.ExpatParser.flush`` From 70020247bc4243d090eee0a201bbd3369f2943fd Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Wed, 21 Feb 2024 16:20:38 +0100 Subject: [PATCH 10/39] _elementtree.c: Document how we know that reparse deferral is enabled --- Modules/_elementtree.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 6f37fb60e2c822..63a7aa9ac61b8f 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3913,6 +3913,12 @@ _elementtree_XMLParser_flush_impl(XMLParserObject *self) Py_RETURN_NONE; } + // NOTE: The Expat parser in the C implementation of ElementTree is not + // exposed to the outside; as a result we known that reparse deferall + // is currently enabled, or we would not even have access to function + // XML_SetReparseDeferralEnabled in the first place (which we checked + // for, a few lines up). + EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_FALSE); PyObject *res = expat_parse(st, self, "", 0, XML_FALSE); From 2132dfecb7184525a04bc2857d10702a0ae2d4e7 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Wed, 21 Feb 2024 16:22:43 +0100 Subject: [PATCH 11/39] sax: Fix xml.sax.expatreader.ExpatParser.flush .. for the case with the following call order: 0. Reparse deferral is enabled (naturally or via ._parser.SetReparseDeferralEnabled) 1. Data is pushed via .feed 2. Reparse deferral is disabled via ._parser.SetReparseDeferralEnabled 3. Data is flushed via .flush With this change, now .flush does call ._parser.Parse, as it should. Suggested-by: Snild Dolkow --- Lib/xml/sax/expatreader.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index 70b4ec6c82087c..c01be3a669fea9 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -218,17 +218,18 @@ def flush(self): if self._parser is None: return - if not self._parser.GetReparseDeferralEnabled(): - return + was_enabled = self._parser.GetReparseDeferralEnabled() - self._parser.SetReparseDeferralEnabled(False) + if was_enabled: + self._parser.SetReparseDeferralEnabled(False) try: self._parser.Parse(b"", False) except expat.error as e: exc = SAXParseException(expat.ErrorString(e.code), e, self) self._err_handler.fatalError(exc) finally: - self._parser.SetReparseDeferralEnabled(True) + if was_enabled: + self._parser.SetReparseDeferralEnabled(True) def _close_source(self): source = self._source From 3d02dfe7f2c0efc77c93b0584234137b62e03e4d Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Wed, 21 Feb 2024 16:31:56 +0100 Subject: [PATCH 12/39] etree: Fix xml.etree.ElementTree.XMLParser.flush (Python version) .. for the case with the following call order: 0. Reparse deferral is enabled (naturally or via .parser.SetReparseDeferralEnabled) 1. Data is pushed via .feed 2. Reparse deferral is disabled via .parser.SetReparseDeferralEnabled 3. Data is flushed via .flush With this change, now .flush does call .parser.Parse, as it should. Suggested-by: Snild Dolkow --- Lib/xml/etree/ElementTree.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index ac690744c6fbd9..8fbbc0e9b62035 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1732,16 +1732,17 @@ def close(self): del self.target, self._target def flush(self): - if not self.parser.GetReparseDeferralEnabled(): - return + was_enabled = self.parser.GetReparseDeferralEnabled() - self.parser.SetReparseDeferralEnabled(False) + if was_enabled: + self.parser.SetReparseDeferralEnabled(False) try: self.parser.Parse(b"", False) except self._error as v: self._raiseerror(v) finally: - self.parser.SetReparseDeferralEnabled(True) + if was_enabled: + self.parser.SetReparseDeferralEnabled(True) # -------------------------------------------------------------------- # C14N 2.0 From fdd2facda97521607b26ec34bedd899f876e5f19 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Wed, 21 Feb 2024 17:21:32 +0100 Subject: [PATCH 13/39] etree: Fix typo "deferall" --- Modules/_elementtree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 63a7aa9ac61b8f..edd2f88a4881c3 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3914,7 +3914,7 @@ _elementtree_XMLParser_flush_impl(XMLParserObject *self) } // NOTE: The Expat parser in the C implementation of ElementTree is not - // exposed to the outside; as a result we known that reparse deferall + // exposed to the outside; as a result we known that reparse deferral // is currently enabled, or we would not even have access to function // XML_SetReparseDeferralEnabled in the first place (which we checked // for, a few lines up). From dbbd98c2f56020685a49236c2a2fd22be9e31465 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 14:29:36 +0100 Subject: [PATCH 14/39] pyexpat: Cover (Get|Set)ReparseDeferralEnabled by tests --- Lib/test/test_pyexpat.py | 78 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index d941a1a8f9ebc6..f30abceab3bd70 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -755,5 +755,83 @@ def resolve_entity(context, base, system_id, public_id): self.assertEqual(handler_call_args, [("bar", "baz")]) +class ReparseDeferralTest(unittest.TestCase): + def test_getter_initial_value(self): + if expat.version_info >= (2, 6, 0): + expected = True + else: + expected = False + + parser = expat.ParserCreate() + actual = parser.GetReparseDeferralEnabled() + + self.assertEqual(actual, expected) + + def test_getter_setter_round_trip(self): + parser = expat.ParserCreate() + was_enabled = parser.GetReparseDeferralEnabled() + + # We'll flip the value back and forth, and we expect .. + if expat.version_info >= (2, 6, 0): + # that flipping worked both ways for >=2.6.0 + expected_1 = not was_enabled + expected_2 = was_enabled + else: + # that flipping did not have any effect for <2.6.0 + expected_1 = False + expected_2 = False + + parser.SetReparseDeferralEnabled(expected_1) + actual_1 = parser.GetReparseDeferralEnabled() + + self.assertEqual(actual_1, expected_1) + + parser.SetReparseDeferralEnabled(expected_2) + actual_2 = parser.GetReparseDeferralEnabled() + + self.assertEqual(actual_2, expected_2) + + def test_reparse_deferral_enabled(self): + if expat.version_info < (2, 6, 0): + return + + started = [] + + def start_element(name, _): + started.append(name) + + parser = expat.ParserCreate() + parser.StartElementHandler = start_element + self.assertTrue(parser.GetReparseDeferralEnabled()) + + for chunk in (b''): + parser.Parse(chunk, False) + + # The key test: Have handlers already fired? Expecting: no. + self.assertEqual(started, []) + + parser.Parse(b'', True) + + self.assertEqual(started, ['doc']) + + def test_reparse_deferral_disabled(self): + started = [] + + def start_element(name, _): + started.append(name) + + parser = expat.ParserCreate() + parser.StartElementHandler = start_element + if expat.version_info >= (2, 6, 0): + parser.SetReparseDeferralEnabled(False) + self.assertFalse(parser.GetReparseDeferralEnabled()) + + for chunk in (b''): + parser.Parse(chunk, False) + + # The key test: Have handlers already fired? Expecting: yes. + self.assertEqual(started, ['doc']) + + if __name__ == "__main__": unittest.main() From 3b6ea39d4adf837d6491174c1e7b8f55843bf227 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 16:04:57 +0100 Subject: [PATCH 15/39] sax: Extend xml.sax.expatreader.ExpatParser.flush test coverage --- Lib/test/test_sax.py | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 92443907924bcc..f2659bd80e8189 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -1215,23 +1215,49 @@ def test_expat_incremental_reset(self): self.assertEqual(result.getvalue(), start + b"text") - def test_expat_incremental_reparse_deferral(self): + def test_flush_reparse_deferral_enabled(self): + if pyexpat.version_info < (2, 6, 0): + return + result = BytesIO() xmlgen = XMLGenerator(result) parser = create_parser() parser.setContentHandler(xmlgen) - # This artificial chunking triggers reparse deferral with Expat >=2.6.0 - parser.feed("") + for chunk in (""): + parser.feed(chunk) + + self.assertEqual(result.getvalue(), start) # i.e. no elements started + self.assertTrue(parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assertTrue(parser._parser.GetReparseDeferralEnabled()) + self.assertEqual(result.getvalue(), start + b"") + + parser.feed("") + parser.close() + + self.assertEqual(result.getvalue(), start + b"") + + def test_flush_reparse_deferral_disabled(self): + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + for chunk in (""): + parser.feed(chunk) if pyexpat.version_info >= (2, 6, 0): - self.assertEqual(result.getvalue(), start) - else: - self.assertEqual(result.getvalue(), start + b"") + parser._parser.SetReparseDeferralEnabled(False) + + self.assertEqual(result.getvalue(), start) # i.e. no elements started + self.assertFalse(parser._parser.GetReparseDeferralEnabled()) - parser.flush() # no-op for Expat <2.6.0 + parser.flush() + self.assertFalse(parser._parser.GetReparseDeferralEnabled()) self.assertEqual(result.getvalue(), start + b"") parser.feed("") From 5c1cfb7088df853f780b687a2ca28c48dde0e7eb Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 16:19:34 +0100 Subject: [PATCH 16/39] Doc/whatsnew/3.13.rst: Mention new Expat reparse deferral API --- Doc/whatsnew/3.13.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 7c6a2af28758be..a3c3159fa599c3 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -174,6 +174,17 @@ Other Language Changes (Contributed by Victor Stinner in :gh:`114570`.) +* Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) + by adding five new methods: + + * :meth:`pyexpat.xmlparser.GetReparseDeferralEnabled` + * :meth:`pyexpat.xmlparser.SetReparseDeferralEnabled` + * :meth:`xml.etree.ElementTree.XMLParser.flush` + * :meth:`xml.etree.ElementTree.XMLPullParser.flush` + * :meth:`xml.sax.expatreader.ExpatParser.flush` + + (Contributed by Sebastian Pipping in :gh:`115623`.) + New Modules =========== From a9c666e0e6230510f6a1236119e44f5cbbcfcd29 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 16:48:10 +0100 Subject: [PATCH 17/39] pyexpat: Document methods pyexpat.xmlparser.(Get|Set)ReparseDeferralEnabled --- Doc/library/pyexpat.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 935e872480efda..db80e58c975c5b 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -196,6 +196,33 @@ XMLParser Objects :exc:`ExpatError` to be raised with the :attr:`code` attribute set to ``errors.codes[errors.XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING]``. +.. warning:: + + Calling ``SetReparseDeferralEnabled(False)`` has security implications, + as detailed below; please make sure to understand these consequences + prior to using the ``SetReparseDeferralEnabled`` method. + +.. method:: xmlparser.SetReparseDeferralEnabled(enabled) + + Expat 2.6.0 introduced a security mechanism called "reparse deferral" + where instead of causing denial of service through quadratic runtime + from reparsing large tokens, reparsing of unfinished tokens is now delayed + by default until a sufficient amount of input is reached. + Due to this delay, registered handlers may — depending of the sizing of + input chunks pushed to Expat — no longer be called right after pushing new + input to the parser. Where immediate feedback and taking over responsiblity + of protecting against denial of service from large tokens are both wanted, + calling ``SetReparseDeferralEnabled(False)`` disables reparse deferral + for the current Expat parser instance, temporarily or altogether. + Calling ``SetReparseDeferralEnabled(True)`` allows re-enabling reparse + deferral. + +.. method:: xmlparser.GetReparseDeferralEnabled() + + Returns whether reparse deferral is currently enabled for the given + Expat parser instance. + + :class:`xmlparser` objects have the following attributes: From 35099e30ec2d741319d794d96b8615c03f99fb2a Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 17:16:17 +0100 Subject: [PATCH 18/39] etree: Document method xml.etree.ElementTree.XMLParser.flush --- Doc/library/xml.etree.elementtree.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 75a7915c15240d..1b83b7e1e54cc8 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -1387,6 +1387,16 @@ XMLParser Objects Feeds data to the parser. *data* is encoded data. + + .. method:: flush() + + Triggers parsing of any previously fed unparsed data, which can be + used to ensure more immediate feedback, in particular with Expat >=2.6.0. + The implementation of :meth:`flush` temporarily disables reparse deferral + with Expat (if currently enabled) and triggers a reparse. + Disabling reparse deferral has security consequences; please see + :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` for details. + :meth:`XMLParser.feed` calls *target*\'s ``start(tag, attrs_dict)`` method for each opening tag, its ``end(tag)`` method for each closing tag, and data is processed by method ``data(data)``. For further supported callback From 1496e8307d8a9ebf7ced30fbf4360d15738eb60b Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 17:19:04 +0100 Subject: [PATCH 19/39] etree: Document method xml.etree.ElementTree.XMLPullParser.flush --- Doc/library/xml.etree.elementtree.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 1b83b7e1e54cc8..b173eb2717f395 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -1458,6 +1458,15 @@ XMLPullParser Objects Feed the given bytes data to the parser. + .. method:: flush() + + Triggers parsing of any previously fed unparsed data, which can be + used to ensure more immediate feedback, in particular with Expat >=2.6.0. + The implementation of :meth:`flush` temporarily disables reparse deferral + with Expat (if currently enabled) and triggers a reparse. + Disabling reparse deferral has security consequences; please see + :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` for details. + .. method:: close() Signal the parser that the data stream is terminated. Unlike From a6927ffcc4e4a44c70a7731c60e4e5929b456140 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 17:38:04 +0100 Subject: [PATCH 20/39] etree: Make docs point to xml.etree.ElementTree.XMLPullParser.flush --- Doc/library/xml.etree.elementtree.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index b173eb2717f395..c4a04ca0cfd9ca 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -166,6 +166,11 @@ data but would still like to have incremental parsing capabilities, take a look at :func:`iterparse`. It can be useful when you're reading a large XML document and don't want to hold it wholly in memory. +Where _immediate_ feedback through events is wanted, calling method +:meth:`XMLPullParser.flush` can help reduce delay; +please make sure to study the related security notes. + + Finding interesting elements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From c5b215945b2012cab6db3cccf84b28091653f800 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 19:05:27 +0100 Subject: [PATCH 21/39] pyexpat: Move security warning into SetReparseDeferralEnabled docs --- Doc/library/pyexpat.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index db80e58c975c5b..79008b968beff2 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -196,14 +196,14 @@ XMLParser Objects :exc:`ExpatError` to be raised with the :attr:`code` attribute set to ``errors.codes[errors.XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING]``. +.. method:: xmlparser.SetReparseDeferralEnabled(enabled) + .. warning:: Calling ``SetReparseDeferralEnabled(False)`` has security implications, as detailed below; please make sure to understand these consequences prior to using the ``SetReparseDeferralEnabled`` method. -.. method:: xmlparser.SetReparseDeferralEnabled(enabled) - Expat 2.6.0 introduced a security mechanism called "reparse deferral" where instead of causing denial of service through quadratic runtime from reparsing large tokens, reparsing of unfinished tokens is now delayed From 082bcc17b2a2e93024f2b9937731b695597e1d33 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 19:11:36 +0100 Subject: [PATCH 22/39] pyexpat|etree: Mark new Expat API as added in 3.13 in docs --- Doc/library/pyexpat.rst | 4 ++++ Doc/library/xml.etree.elementtree.rst | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 79008b968beff2..d9309bedca483a 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -217,11 +217,15 @@ XMLParser Objects Calling ``SetReparseDeferralEnabled(True)`` allows re-enabling reparse deferral. + .. versionadded:: 3.13 + .. method:: xmlparser.GetReparseDeferralEnabled() Returns whether reparse deferral is currently enabled for the given Expat parser instance. + .. versionadded:: 3.13 + :class:`xmlparser` objects have the following attributes: diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index c4a04ca0cfd9ca..7fb4bab763f2b5 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -1402,6 +1402,9 @@ XMLParser Objects Disabling reparse deferral has security consequences; please see :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` for details. + .. versionadded:: 3.13 + + :meth:`XMLParser.feed` calls *target*\'s ``start(tag, attrs_dict)`` method for each opening tag, its ``end(tag)`` method for each closing tag, and data is processed by method ``data(data)``. For further supported callback @@ -1472,6 +1475,8 @@ XMLPullParser Objects Disabling reparse deferral has security consequences; please see :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` for details. + .. versionadded:: 3.13 + .. method:: close() Signal the parser that the data stream is terminated. Unlike From f0577e76ece43cff6af3795ba43d8f6897d21a53 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 19:26:12 +0100 Subject: [PATCH 23/39] pyexpat|sax: Do not be silent about tests skipped for Expat <2.6.0 --- Lib/test/test_pyexpat.py | 2 +- Lib/test/test_sax.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index f30abceab3bd70..d7f655a7a8caa6 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -793,7 +793,7 @@ def test_getter_setter_round_trip(self): def test_reparse_deferral_enabled(self): if expat.version_info < (2, 6, 0): - return + self.skipTest(f'Expat {expat.version_info} does not support reparse deferral') started = [] diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index f2659bd80e8189..97e96668f85c8a 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -1217,7 +1217,7 @@ def test_expat_incremental_reset(self): def test_flush_reparse_deferral_enabled(self): if pyexpat.version_info < (2, 6, 0): - return + self.skipTest(f'Expat {pyexpat.version_info} does not support reparse deferral') result = BytesIO() xmlgen = XMLGenerator(result) From f58990800aec6f16850b639b0aadf234526d7cf4 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 19:35:26 +0100 Subject: [PATCH 24/39] pyexpat: Simplify test ReparseDeferralTest.test_getter_setter_round_trip Suggested by Serhiy Storchaka --- Lib/test/test_pyexpat.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index d7f655a7a8caa6..981010d2ce7266 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -769,27 +769,17 @@ def test_getter_initial_value(self): def test_getter_setter_round_trip(self): parser = expat.ParserCreate() - was_enabled = parser.GetReparseDeferralEnabled() + enabled = (expat.version_info >= (2, 6, 0)) - # We'll flip the value back and forth, and we expect .. - if expat.version_info >= (2, 6, 0): - # that flipping worked both ways for >=2.6.0 - expected_1 = not was_enabled - expected_2 = was_enabled - else: - # that flipping did not have any effect for <2.6.0 - expected_1 = False - expected_2 = False + self.assertIs(parser.GetReparseDeferralEnabled(), enabled) - parser.SetReparseDeferralEnabled(expected_1) - actual_1 = parser.GetReparseDeferralEnabled() + parser.SetReparseDeferralEnabled(False) - self.assertEqual(actual_1, expected_1) + self.assertIs(parser.GetReparseDeferralEnabled(), False) - parser.SetReparseDeferralEnabled(expected_2) - actual_2 = parser.GetReparseDeferralEnabled() + parser.SetReparseDeferralEnabled(True) - self.assertEqual(actual_2, expected_2) + self.assertIs(parser.GetReparseDeferralEnabled(), enabled) def test_reparse_deferral_enabled(self): if expat.version_info < (2, 6, 0): From 4915045ad6a598ca86e053bcf772d8f15927ecef Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 19:43:04 +0100 Subject: [PATCH 25/39] Promote xml.parsers.expat.xmlparser instead of pyexpat.xmlparser Suggested by Serhiy Storchaka --- Doc/whatsnew/3.13.rst | 4 ++-- .../Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index a3c3159fa599c3..efa7ad3b3522e6 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -177,10 +177,10 @@ Other Language Changes * Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding five new methods: - * :meth:`pyexpat.xmlparser.GetReparseDeferralEnabled` - * :meth:`pyexpat.xmlparser.SetReparseDeferralEnabled` * :meth:`xml.etree.ElementTree.XMLParser.flush` * :meth:`xml.etree.ElementTree.XMLPullParser.flush` + * :meth:`xml.parsers.expat.xmlparser.GetReparseDeferralEnabled` + * :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` * :meth:`xml.sax.expatreader.ExpatParser.flush` (Contributed by Sebastian Pipping in :gh:`115623`.) diff --git a/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst b/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst index 9fd92f9f744bd6..97b23936928d91 100644 --- a/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst +++ b/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst @@ -1,8 +1,8 @@ Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding five new methods: -* ``pyexpat.xmlparser.GetReparseDeferralEnabled`` -* ``pyexpat.xmlparser.SetReparseDeferralEnabled`` * ``xml.etree.ElementTree.XMLParser.flush`` * ``xml.etree.ElementTree.XMLPullParser.flush`` +* ``xml.parsers.expat.xmlparser.GetReparseDeferralEnabled`` +* ``xml.parsers.expat.xmlparser.SetReparseDeferralEnabled`` * ``xml.sax.expatreader.ExpatParser.flush`` From d0ed243a5537b7ce4080794766bd56dcfb3ba512 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 20:52:49 +0100 Subject: [PATCH 26/39] etree: Cover method xml.etree.ElementTree.XMLPullParser.flush .. and xml.etree.ElementTree.XMLParser.flush indirectly as well. --- Lib/test/test_xml_etree.py | 43 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 7d2a1c007f43d8..ae1fbd7f8cfa2c 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1706,6 +1706,49 @@ def test_unknown_event(self): with self.assertRaises(ValueError): ET.XMLPullParser(events=('start', 'end', 'bogus')) + def test_flush_reparse_deferral_enabled(self): + if pyexpat.version_info < (2, 6, 0): + self.skipTest(f'Expat {pyexpat.version_info} does not support reparse deferral') + + parser = ET.XMLPullParser(events=('start', 'end')) + + for chunk in (""): + parser.feed(chunk) + + self.assert_event_tags(parser, []) # i.e. no elements started + self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assert_event_tags(parser, [('start', 'doc')]) + self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.feed("") + parser.close() + + self.assert_event_tags(parser, [('end', 'doc')]) + + def test_flush_reparse_deferral_disabled(self): + parser = ET.XMLPullParser(events=('start', 'end')) + + for chunk in (""): + parser.feed(chunk) + + if pyexpat.version_info >= (2, 6, 0): + parser._parser._parser.SetReparseDeferralEnabled(False) + + self.assert_event_tags(parser, []) # i.e. no elements started + self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assert_event_tags(parser, [('start', 'doc')]) + self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.feed("") + parser.close() + + self.assert_event_tags(parser, [('end', 'doc')]) # # xinclude tests (samples from appendix C of the xinclude specification) From 62e4fd729b0cb3cba02069251c0236441cae1255 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 20:58:14 +0100 Subject: [PATCH 27/39] pyexpat: Cut whitespace from ReparseDeferralTest.test_getter_setter_round_trip .. as requested by Serhiy Storchaka --- Lib/test/test_pyexpat.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 981010d2ce7266..7653e17296d0da 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -772,13 +772,9 @@ def test_getter_setter_round_trip(self): enabled = (expat.version_info >= (2, 6, 0)) self.assertIs(parser.GetReparseDeferralEnabled(), enabled) - parser.SetReparseDeferralEnabled(False) - self.assertIs(parser.GetReparseDeferralEnabled(), False) - parser.SetReparseDeferralEnabled(True) - self.assertIs(parser.GetReparseDeferralEnabled(), enabled) def test_reparse_deferral_enabled(self): From b0058d570a45ea054706db6f6283454dbde18fc9 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 21:02:23 +0100 Subject: [PATCH 28/39] pyexpat: Drop ReparseDeferralTest.test_getter_initial_value .. as the functionality is already covered by ReparseDeferralTest.test_getter_setter_round_trip . Sugggested by Serhiy Storchaka --- Lib/test/test_pyexpat.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 7653e17296d0da..067ac401c07043 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -756,17 +756,6 @@ def resolve_entity(context, base, system_id, public_id): class ReparseDeferralTest(unittest.TestCase): - def test_getter_initial_value(self): - if expat.version_info >= (2, 6, 0): - expected = True - else: - expected = False - - parser = expat.ParserCreate() - actual = parser.GetReparseDeferralEnabled() - - self.assertEqual(actual, expected) - def test_getter_setter_round_trip(self): parser = expat.ParserCreate() enabled = (expat.version_info >= (2, 6, 0)) From 1f70c093344109b7d443cfb351636893aa8610e6 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 21:16:59 +0100 Subject: [PATCH 29/39] pyexpat: Break a long line for PEP 8 --- Lib/test/test_pyexpat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 067ac401c07043..1d56ccd71cf962 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -768,7 +768,8 @@ def test_getter_setter_round_trip(self): def test_reparse_deferral_enabled(self): if expat.version_info < (2, 6, 0): - self.skipTest(f'Expat {expat.version_info} does not support reparse deferral') + self.skipTest(f'Expat {expat.version_info} does not ' + 'support reparse deferral') started = [] From a77de0ff9b4f0b3a2e826d022c2ee8be68a6aa7e Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 21:25:19 +0100 Subject: [PATCH 30/39] Doc/whatsnew/3.13.rst: Do not create a link into undocumented class .. for a chance at a green CI --- Doc/whatsnew/3.13.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index efa7ad3b3522e6..db262fe986c9e6 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -181,7 +181,7 @@ Other Language Changes * :meth:`xml.etree.ElementTree.XMLPullParser.flush` * :meth:`xml.parsers.expat.xmlparser.GetReparseDeferralEnabled` * :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` - * :meth:`xml.sax.expatreader.ExpatParser.flush` + * :meth:`!xml.sax.expatreader.ExpatParser.flush` (Contributed by Sebastian Pipping in :gh:`115623`.) From 2f07457763611d194b64a40a0c7eaff91c6f8db8 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 21:35:55 +0100 Subject: [PATCH 31/39] etree: Make XMLPullParserTest._feed only flush when needed Requested by Serhiy Storchaka --- Lib/test/test_xml_etree.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index ae1fbd7f8cfa2c..4ea3f2f6be2ee6 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1458,13 +1458,14 @@ def test_attlist_default(self): class XMLPullParserTest(unittest.TestCase): - def _feed(self, parser, data, chunk_size=None): + def _feed(self, parser, data, chunk_size=None, flush=False): if chunk_size is None: parser.feed(data) else: for i in range(0, len(data), chunk_size): parser.feed(data[i:i+chunk_size]) - parser.flush() + if flush: + parser.flush() def assert_events(self, parser, expected, max_events=None): self.assertEqual( @@ -1482,32 +1483,32 @@ def assert_event_tags(self, parser, expected, max_events=None): self.assertEqual([(action, elem.tag) for action, elem in events], expected) - def test_simple_xml(self, chunk_size=None): + def test_simple_xml(self, chunk_size=None, flush=False): parser = ET.XMLPullParser() self.assert_event_tags(parser, []) - self._feed(parser, "\n", chunk_size) + self._feed(parser, "\n", chunk_size, flush) self.assert_event_tags(parser, []) self._feed(parser, "\n text\n", chunk_size) + self._feed(parser, ">\n", chunk_size, flush) self.assert_event_tags(parser, [('end', 'element')]) - self._feed(parser, "texttail\n", chunk_size) - self._feed(parser, "\n", chunk_size) + self._feed(parser, "texttail\n", chunk_size, flush) + self._feed(parser, "\n", chunk_size, flush) self.assert_event_tags(parser, [ ('end', 'element'), ('end', 'empty-element'), ]) - self._feed(parser, "\n", chunk_size) + self._feed(parser, "\n", chunk_size, flush) self.assert_event_tags(parser, [('end', 'root')]) self.assertIsNone(parser.close()) def test_simple_xml_chunk_1(self): - self.test_simple_xml(chunk_size=1) + self.test_simple_xml(chunk_size=1, flush=True) def test_simple_xml_chunk_5(self): - self.test_simple_xml(chunk_size=5) + self.test_simple_xml(chunk_size=5, flush=True) def test_simple_xml_chunk_22(self): self.test_simple_xml(chunk_size=22) From 4b49de9b9affda3806cf4080e842159c249572d6 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 21:47:32 +0100 Subject: [PATCH 32/39] etree: Fix XMLPullParserTest.test_flush_[..] for C version --- Lib/test/test_xml_etree.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 4ea3f2f6be2ee6..2ca33ca218fcc1 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1712,17 +1712,20 @@ def test_flush_reparse_deferral_enabled(self): self.skipTest(f'Expat {pyexpat.version_info} does not support reparse deferral') parser = ET.XMLPullParser(events=('start', 'end')) + is_python = hasattr(parser._parser, '_parser') # rather than C for chunk in (""): parser.feed(chunk) self.assert_event_tags(parser, []) # i.e. no elements started - self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) + if is_python: + self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) parser.flush() self.assert_event_tags(parser, [('start', 'doc')]) - self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) + if is_python: + self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) parser.feed("") parser.close() @@ -1731,6 +1734,10 @@ def test_flush_reparse_deferral_enabled(self): def test_flush_reparse_deferral_disabled(self): parser = ET.XMLPullParser(events=('start', 'end')) + is_python = hasattr(parser._parser, '_parser') # rather than C + + if not is_python: + self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled methods not available in C') for chunk in (""): parser.feed(chunk) From 3c960a65f418ef7cc35fd5b2f4d15b2a01c8dec9 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 21:48:59 +0100 Subject: [PATCH 33/39] etree: Break a long line for PEP 8 --- Lib/test/test_xml_etree.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 2ca33ca218fcc1..89e69b8ed53cdd 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1709,7 +1709,8 @@ def test_unknown_event(self): def test_flush_reparse_deferral_enabled(self): if pyexpat.version_info < (2, 6, 0): - self.skipTest(f'Expat {pyexpat.version_info} does not support reparse deferral') + self.skipTest(f'Expat {pyexpat.version_info} does not ' + 'support reparse deferral') parser = ET.XMLPullParser(events=('start', 'end')) is_python = hasattr(parser._parser, '_parser') # rather than C From 4855bb9e8192a8ad033724c1c0ce7c42bd27edbe Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 21:55:51 +0100 Subject: [PATCH 34/39] etree: Make test_flush_reparse_deferral_disabled less exclusive The test only needs to exclude Expat >=2.6.0 with etree C version; all else is possible, was overly exclusive by mistake. --- Lib/test/test_xml_etree.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 89e69b8ed53cdd..c6ea39841d23de 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1737,22 +1737,24 @@ def test_flush_reparse_deferral_disabled(self): parser = ET.XMLPullParser(events=('start', 'end')) is_python = hasattr(parser._parser, '_parser') # rather than C - if not is_python: - self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled methods not available in C') - for chunk in (""): parser.feed(chunk) if pyexpat.version_info >= (2, 6, 0): + if not is_python: + self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled ' + 'methods not available in C') parser._parser._parser.SetReparseDeferralEnabled(False) self.assert_event_tags(parser, []) # i.e. no elements started - self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) + if is_python: + self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) parser.flush() self.assert_event_tags(parser, [('start', 'doc')]) - self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) + if is_python: + self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) parser.feed("") parser.close() From b6a84b24a64bd43c1ca967f051b426ed35c962ae Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 22:03:16 +0100 Subject: [PATCH 35/39] etree|sax: Simplify .flush implementations Suggested by Serhiy Storchaka --- Lib/xml/etree/ElementTree.py | 7 ++----- Lib/xml/sax/expatreader.py | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 8fbbc0e9b62035..9e15d34d22aa6c 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1733,16 +1733,13 @@ def close(self): def flush(self): was_enabled = self.parser.GetReparseDeferralEnabled() - - if was_enabled: - self.parser.SetReparseDeferralEnabled(False) try: + self.parser.SetReparseDeferralEnabled(False) self.parser.Parse(b"", False) except self._error as v: self._raiseerror(v) finally: - if was_enabled: - self.parser.SetReparseDeferralEnabled(True) + self.parser.SetReparseDeferralEnabled(was_enabled) # -------------------------------------------------------------------- # C14N 2.0 diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index c01be3a669fea9..ba3c1e98517429 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -219,17 +219,14 @@ def flush(self): return was_enabled = self._parser.GetReparseDeferralEnabled() - - if was_enabled: - self._parser.SetReparseDeferralEnabled(False) try: + self._parser.SetReparseDeferralEnabled(False) self._parser.Parse(b"", False) except expat.error as e: exc = SAXParseException(expat.ErrorString(e.code), e, self) self._err_handler.fatalError(exc) finally: - if was_enabled: - self._parser.SetReparseDeferralEnabled(True) + self._parser.SetReparseDeferralEnabled(was_enabled) def _close_source(self): source = self._source From 0faa19eea5594eaea913e58af65650d2c9577c3f Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 23:21:26 +0100 Subject: [PATCH 36/39] etree: Resolve "is_python" in favor of "ET is pyET" --- Lib/test/test_xml_etree.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index c6ea39841d23de..14df482ba6c207 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1713,19 +1713,18 @@ def test_flush_reparse_deferral_enabled(self): 'support reparse deferral') parser = ET.XMLPullParser(events=('start', 'end')) - is_python = hasattr(parser._parser, '_parser') # rather than C for chunk in (""): parser.feed(chunk) self.assert_event_tags(parser, []) # i.e. no elements started - if is_python: + if ET is pyET: self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) parser.flush() self.assert_event_tags(parser, [('start', 'doc')]) - if is_python: + if ET is pyET: self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) parser.feed("") @@ -1735,25 +1734,24 @@ def test_flush_reparse_deferral_enabled(self): def test_flush_reparse_deferral_disabled(self): parser = ET.XMLPullParser(events=('start', 'end')) - is_python = hasattr(parser._parser, '_parser') # rather than C for chunk in (""): parser.feed(chunk) if pyexpat.version_info >= (2, 6, 0): - if not is_python: + if not ET is pyET: self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled ' 'methods not available in C') parser._parser._parser.SetReparseDeferralEnabled(False) self.assert_event_tags(parser, []) # i.e. no elements started - if is_python: + if ET is pyET: self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) parser.flush() self.assert_event_tags(parser, [('start', 'doc')]) - if is_python: + if ET is pyET: self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) parser.feed("") From 40743a6eabee46a377420c85aebf9291a9ea6919 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 23:31:43 +0100 Subject: [PATCH 37/39] etree: Fix emphasis syntax for "immediate" in docs --- Doc/library/xml.etree.elementtree.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 7fb4bab763f2b5..19c7af452e2b71 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -166,7 +166,7 @@ data but would still like to have incremental parsing capabilities, take a look at :func:`iterparse`. It can be useful when you're reading a large XML document and don't want to hold it wholly in memory. -Where _immediate_ feedback through events is wanted, calling method +Where *immediate* feedback through events is wanted, calling method :meth:`XMLPullParser.flush` can help reduce delay; please make sure to study the related security notes. From a4732993d4efdd892403735235a7082c7a4309fb Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 23:32:47 +0100 Subject: [PATCH 38/39] pypexpat: Replace "none" with "NULL" to be correct Suggested by Serhiy Storchaka --- Include/pyexpat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/pyexpat.h b/Include/pyexpat.h index 8b44c1ce119f0f..9824d099c3df7d 100644 --- a/Include/pyexpat.h +++ b/Include/pyexpat.h @@ -48,9 +48,9 @@ struct PyExpat_CAPI enum XML_Status (*SetEncoding)(XML_Parser parser, const XML_Char *encoding); int (*DefaultUnknownEncodingHandler)( void *encodingHandlerData, const XML_Char *name, XML_Encoding *info); - /* might be none for expat < 2.1.0 */ + /* might be NULL for expat < 2.1.0 */ int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt); - /* might be none for expat < 2.6.0 */ + /* might be NULL for expat < 2.6.0 */ XML_Bool (*SetReparseDeferralEnabled)(XML_Parser parser, XML_Bool enabled); /* always add new stuff to the end! */ }; From a6baa0bcd89d1c968602fb64d39d162345332309 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Feb 2024 23:36:22 +0100 Subject: [PATCH 39/39] pyexpat: Indent warning about xmlparser.SetReparseDeferralEnabled Suggested by Serhiy Storchaka --- Doc/library/pyexpat.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index d9309bedca483a..ae142d9a339fa9 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -198,11 +198,11 @@ XMLParser Objects .. method:: xmlparser.SetReparseDeferralEnabled(enabled) -.. warning:: + .. warning:: - Calling ``SetReparseDeferralEnabled(False)`` has security implications, - as detailed below; please make sure to understand these consequences - prior to using the ``SetReparseDeferralEnabled`` method. + Calling ``SetReparseDeferralEnabled(False)`` has security implications, + as detailed below; please make sure to understand these consequences + prior to using the ``SetReparseDeferralEnabled`` method. Expat 2.6.0 introduced a security mechanism called "reparse deferral" where instead of causing denial of service through quadratic runtime