From 49e554dbafc87245c1364ae00ad064a96f5cb995 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 21 Nov 2022 20:42:18 +0000 Subject: [PATCH 001/112] gh-98629: Fixes sys._git and sys.version creation on Windows (GH-99664) --- .../next/Windows/2022-11-21-19-50-18.gh-issue-98629.tMmB_B.rst | 1 + PCbuild/pythoncore.vcxproj | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Windows/2022-11-21-19-50-18.gh-issue-98629.tMmB_B.rst diff --git a/Misc/NEWS.d/next/Windows/2022-11-21-19-50-18.gh-issue-98629.tMmB_B.rst b/Misc/NEWS.d/next/Windows/2022-11-21-19-50-18.gh-issue-98629.tMmB_B.rst new file mode 100644 index 00000000000000..46cbf998eb2001 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2022-11-21-19-50-18.gh-issue-98629.tMmB_B.rst @@ -0,0 +1 @@ +Fix initialization of :data:`sys.version` and ``sys._git`` on Windows diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index fd5c3175ce423b..3c3ff406bdf0ae 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -595,7 +595,7 @@ - + GITVERSION="$(GitVersion)";GITTAG="$(GitTag)";GITBRANCH="$(GitBranch)";%(PreprocessorDefinitions) From 2781ec9b0e41a62cecc189c22dfc849f9a56927c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Mon, 21 Nov 2022 21:44:17 +0100 Subject: [PATCH 002/112] gh-99659: Use correct exceptions in sqlite3 bigmem tests (#99660) The tests in question were added in 0eec6276fdcd by Serhiy. Apparently, sqlite3 changed exceptions raised in those cases in the mean time but the tests never ran because they require a high `-M` setting in the test runner. --- Lib/test/test_sqlite3/test_types.py | 8 ++++---- .../Tests/2022-11-21-19-21-30.gh-issue-99659.4gP0nm.rst | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2022-11-21-19-21-30.gh-issue-99659.4gP0nm.rst diff --git a/Lib/test/test_sqlite3/test_types.py b/Lib/test/test_sqlite3/test_types.py index 62318823510d40..5e0ff353cbbd6b 100644 --- a/Lib/test/test_sqlite3/test_types.py +++ b/Lib/test/test_sqlite3/test_types.py @@ -106,9 +106,9 @@ def test_string_with_surrogates(self): @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') @support.bigmemtest(size=2**31, memuse=4, dry_run=False) def test_too_large_string(self, maxsize): - with self.assertRaises(sqlite.InterfaceError): + with self.assertRaises(sqlite.DataError): self.cur.execute("insert into test(s) values (?)", ('x'*(2**31-1),)) - with self.assertRaises(OverflowError): + with self.assertRaises(sqlite.DataError): self.cur.execute("insert into test(s) values (?)", ('x'*(2**31),)) self.cur.execute("select 1 from test") row = self.cur.fetchone() @@ -117,9 +117,9 @@ def test_too_large_string(self, maxsize): @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') @support.bigmemtest(size=2**31, memuse=3, dry_run=False) def test_too_large_blob(self, maxsize): - with self.assertRaises(sqlite.InterfaceError): + with self.assertRaises(sqlite.DataError): self.cur.execute("insert into test(s) values (?)", (b'x'*(2**31-1),)) - with self.assertRaises(OverflowError): + with self.assertRaises(sqlite.DataError): self.cur.execute("insert into test(s) values (?)", (b'x'*(2**31),)) self.cur.execute("select 1 from test") row = self.cur.fetchone() diff --git a/Misc/NEWS.d/next/Tests/2022-11-21-19-21-30.gh-issue-99659.4gP0nm.rst b/Misc/NEWS.d/next/Tests/2022-11-21-19-21-30.gh-issue-99659.4gP0nm.rst new file mode 100644 index 00000000000000..3db1ec12b5202e --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2022-11-21-19-21-30.gh-issue-99659.4gP0nm.rst @@ -0,0 +1,3 @@ +Optional big memory tests in ``test_sqlite3`` now catch the correct +:exc:`sqlite.DataError` exception type in case of too large strings and/or +blobs passed. From 1bf983ce7eb8bfd17dc18102b61dfbdafe0deda2 Mon Sep 17 00:00:00 2001 From: GabrielAnguita <60579349+GabrielAnguita@users.noreply.github.com> Date: Tue, 22 Nov 2022 01:02:55 -0300 Subject: [PATCH 003/112] gh-99662: fix typo in typing.TypeVarTuple docs (#99672) --- Doc/library/typing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index 0ec4499f94f5a7..94c9cb11f02d6d 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -1339,7 +1339,7 @@ These are not used in annotations. They are building blocks for creating generic ``Unpack[Ts]``.) Type variable tuples must *always* be unpacked. This helps distinguish type - variable types from normal type variables:: + variable tuples from normal type variables:: x: Ts # Not valid x: tuple[Ts] # Not valid From 4d82f628c44490d6fbc3f6998d2473d1304d891f Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 22 Nov 2022 08:25:43 +0100 Subject: [PATCH 004/112] gh-47146: Soft-deprecate structmember.h, expose its contents via Python.h (GH-99014) The ``structmember.h`` header is deprecated, though it continues to be available and there are no plans to remove it. There are no deprecation warnings. Old code can stay unchanged (unless the extra include and non-namespaced macros bother you greatly). Specifically, no uses in CPython are updated -- that would just be unnecessary churn. The ``structmember.h`` header is deprecated, though it continues to be available and there are no plans to remove it. Its contents are now available just by including ``Python.h``, with a ``Py`` prefix added if it was missing: - `PyMemberDef`, `PyMember_GetOne` and`PyMember_SetOne` - Type macros like `Py_T_INT`, `Py_T_DOUBLE`, etc. (previously ``T_INT``, ``T_DOUBLE``, etc.) - The flags `Py_READONLY` (previously ``READONLY``) and `Py_AUDIT_READ` (previously all uppercase) Several items are not exposed from ``Python.h``: - `T_OBJECT` (use `Py_T_OBJECT_EX`) - `T_NONE` (previously undocumented, and pretty quirky) - The macro ``WRITE_RESTRICTED`` which does nothing. - The macros ``RESTRICTED`` and ``READ_RESTRICTED``, equivalents of `Py_AUDIT_READ`. - In some configurations, ```` is not included from ``Python.h``. It should be included manually when using ``offsetof()``. The deprecated header continues to provide its original contents under the original names. Your old code can stay unchanged, unless the extra include and non-namespaced macros bother you greatly. There is discussion on the issue to rename `T_PYSSIZET` to `PY_T_SSIZE` or similar. I chose not to do that -- users will probably copy/paste that with any spelling, and not renaming it makes migration docs simpler. Co-Authored-By: Alexander Belopolsky Co-Authored-By: Matthias Braun --- Doc/c-api/structures.rst | 245 +++++++++++++----- Doc/data/stable_abi.dat | 2 + Doc/extending/newtypes.rst | 33 +-- Doc/extending/newtypes_tutorial.rst | 15 +- Doc/includes/custom2.c | 8 +- Doc/includes/custom3.c | 4 +- Doc/includes/custom4.c | 4 +- Doc/whatsnew/3.12.rst | 31 +++ Include/descrobject.h | 55 ++++ Include/structmember.h | 95 +++---- Lib/test/test_capi/test_structmembers.py | 91 +++++-- ...2-11-02-16-51-24.gh-issue-47146.dsYDtI.rst | 5 + Misc/stable_abi.toml | 49 +++- Modules/Setup.stdlib.in | 2 +- Modules/_testcapi/parts.h | 1 + Modules/_testcapi/structmember.c | 217 ++++++++++++++++ Modules/_testcapimodule.c | 151 +---------- PCbuild/_testcapi.vcxproj | 1 + PCbuild/_testcapi.vcxproj.filters | 3 + 19 files changed, 667 insertions(+), 345 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2022-11-02-16-51-24.gh-issue-47146.dsYDtI.rst create mode 100644 Modules/_testcapi/structmember.c diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst index 5a20f07214fd51..827d624fc99edb 100644 --- a/Doc/c-api/structures.rst +++ b/Doc/c-api/structures.rst @@ -385,86 +385,67 @@ Accessing attributes of extension types .. c:type:: PyMemberDef Structure which describes an attribute of a type which corresponds to a C - struct member. Its fields are: + struct member. Its fields are, in order: - .. c:member:: const char* PyMemberDef.name + .. c:member:: const char* name - Name of the member + Name of the member. + A NULL value marks the end of a ``PyMemberDef[]`` array. - .. c:member:: int PyMemberDef.type - - The type of the member in the C struct. + The string should be static, no copy is made of it. .. c:member:: Py_ssize_t PyMemberDef.offset The offset in bytes that the member is located on the type’s object struct. - .. c:member:: int PyMemberDef.flags - - Flag bits indicating if the field should be read-only or writable. - - .. c:member:: const char* PyMemberDef.doc - - Points to the contents of the docstring. - - :c:member:`PyMemberDef.type` can be one of many ``T_`` macros corresponding to various C - types. When the member is accessed in Python, it will be converted to the - equivalent Python type. - - =============== ================== - Macro name C type - =============== ================== - T_SHORT short - T_INT int - T_LONG long - T_FLOAT float - T_DOUBLE double - T_STRING const char \* - T_OBJECT PyObject \* - T_OBJECT_EX PyObject \* - T_CHAR char - T_BYTE char - T_UBYTE unsigned char - T_UINT unsigned int - T_USHORT unsigned short - T_ULONG unsigned long - T_BOOL char - T_LONGLONG long long - T_ULONGLONG unsigned long long - T_PYSSIZET Py_ssize_t - =============== ================== - - :c:macro:`T_OBJECT` and :c:macro:`T_OBJECT_EX` differ in that - :c:macro:`T_OBJECT` returns ``None`` if the member is ``NULL`` and - :c:macro:`T_OBJECT_EX` raises an :exc:`AttributeError`. Try to use - :c:macro:`T_OBJECT_EX` over :c:macro:`T_OBJECT` because :c:macro:`T_OBJECT_EX` - handles use of the :keyword:`del` statement on that attribute more correctly - than :c:macro:`T_OBJECT`. - - :c:member:`PyMemberDef.flags` can be ``0`` for write and read access or :c:macro:`READONLY` for - read-only access. Using :c:macro:`T_STRING` for :attr:`type` implies - :c:macro:`READONLY`. :c:macro:`T_STRING` data is interpreted as UTF-8. - Only :c:macro:`T_OBJECT` and :c:macro:`T_OBJECT_EX` - members can be deleted. (They are set to ``NULL``). + .. c:member:: int type + + The type of the member in the C struct. + See :ref:`PyMemberDef-types` for the possible values. + + .. c:member:: int flags + + Zero or more of the :ref:`PyMemberDef-flags`, combined using bitwise OR. + + .. c:member:: const char* doc + + The docstring, or NULL. + The string should be static, no copy is made of it. + Typically, it is defined using :c:macro:`PyDoc_STR`. + + By default (when :c:member:`flags` is ``0``), members allow + both read and write access. + Use the :c:macro:`Py_READONLY` flag for read-only access. + Certain types, like :c:macro:`Py_T_STRING`, imply :c:macro:`Py_READONLY`. + Only :c:macro:`Py_T_OBJECT_EX` (and legacy :c:macro:`T_OBJECT`) members can + be deleted. .. _pymemberdef-offsets: - Heap allocated types (created using :c:func:`PyType_FromSpec` or similar), - ``PyMemberDef`` may contain definitions for the special member - ``__vectorcalloffset__``, corresponding to + For heap-allocated types (created using :c:func:`PyType_FromSpec` or similar), + ``PyMemberDef`` may contain a definition for the special member + ``"__vectorcalloffset__"``, corresponding to :c:member:`~PyTypeObject.tp_vectorcall_offset` in type objects. - These must be defined with ``T_PYSSIZET`` and ``READONLY``, for example:: + These must be defined with ``Py_T_PYSSIZET`` and ``Py_READONLY``, for example:: static PyMemberDef spam_type_members[] = { - {"__vectorcalloffset__", T_PYSSIZET, offsetof(Spam_object, vectorcall), READONLY}, + {"__vectorcalloffset__", Py_T_PYSSIZET, + offsetof(Spam_object, vectorcall), Py_READONLY}, {NULL} /* Sentinel */ }; + (You may need to ``#include `` for :c:func:`!offsetof`.) + The legacy offsets :c:member:`~PyTypeObject.tp_dictoffset` and - :c:member:`~PyTypeObject.tp_weaklistoffset` are still supported, but extensions are - strongly encouraged to use ``Py_TPFLAGS_MANAGED_DICT`` and - ``Py_TPFLAGS_MANAGED_WEAKREF`` instead. + :c:member:`~PyTypeObject.tp_weaklistoffset` can be defined similarly using + ``"__dictoffset__"`` and ``"__weaklistoffset__"`` members, but extensions + are strongly encouraged to use :const:`Py_TPFLAGS_MANAGED_DICT` and + :const:`Py_TPFLAGS_MANAGED_WEAKREF` instead. + .. versionchanged:: 3.12 + + ``PyMemberDef`` is always available. + Previously, it required including ``"structmember.h"``. .. c:function:: PyObject* PyMember_GetOne(const char *obj_addr, struct PyMemberDef *m) @@ -472,6 +453,10 @@ Accessing attributes of extension types attribute is described by ``PyMemberDef`` *m*. Returns ``NULL`` on error. + .. versionchanged:: 3.12 + + ``PyMember_GetOne`` is always available. + Previously, it required including ``"structmember.h"``. .. c:function:: int PyMember_SetOne(char *obj_addr, struct PyMemberDef *m, PyObject *o) @@ -479,6 +464,144 @@ Accessing attributes of extension types The attribute to set is described by ``PyMemberDef`` *m*. Returns ``0`` if successful and a negative value on failure. + .. versionchanged:: 3.12 + + ``PyMember_SetOne`` is always available. + Previously, it required including ``"structmember.h"``. + +.. _PyMemberDef-flags: + +Member flags +^^^^^^^^^^^^ + +The following flags can be used with :c:member:`PyMemberDef.flags`: + +.. c:macro:: Py_READONLY + + Not writable. + +.. c:macro:: Py_AUDIT_READ + + Emit an ``object.__getattr__`` :ref:`audit event ` + before reading. + +.. index:: + single: READ_RESTRICTED + single: WRITE_RESTRICTED + single: RESTRICTED + +.. versionchanged:: 3.10 + + The :const:`!RESTRICTED`, :const:`!READ_RESTRICTED` and + :const:`!WRITE_RESTRICTED` macros available with + ``#include "structmember.h"`` are deprecated. + :const:`!READ_RESTRICTED` and :const:`!RESTRICTED` are equivalent to + :const:`Py_AUDIT_READ`; :const:`!WRITE_RESTRICTED` does nothing. + +.. index:: + single: READONLY + +.. versionchanged:: 3.12 + + The :const:`!READONLY` macro was renamed to :const:`Py_READONLY`. + The :const:`!PY_AUDIT_READ` macro was renamed with the ``Py_`` prefix. + The new names are now always available. + Previously, these required ``#include "structmember.h"``. + The header is still available and it provides the old names. + +.. _PyMemberDef-types: + +Member types +^^^^^^^^^^^^ + +:c:member:`PyMemberDef.type` can be one of the following macros corresponding +to various C types. +When the member is accessed in Python, it will be converted to the +equivalent Python type. +When it is set from Python, it will be converted back to the C type. +If that is not possible, an exception such as :exc:`TypeError` or +:exc:`ValueError` is raised. + +Unless marked (D), attributes defined this way cannot be deleted +using e.g. :keyword:`del` or :py:func:`delattr`. + +================================ ============================= ====================== +Macro name C type Python type +================================ ============================= ====================== +.. c:macro:: Py_T_BYTE :c:expr:`char` :py:class:`int` +.. c:macro:: Py_T_SHORT :c:expr:`short` :py:class:`int` +.. c:macro:: Py_T_INT :c:expr:`int` :py:class:`int` +.. c:macro:: Py_T_LONG :c:expr:`long` :py:class:`int` +.. c:macro:: Py_T_LONGLONG :c:expr:`long long` :py:class:`int` +.. c:macro:: Py_T_UBYTE :c:expr:`unsigned char` :py:class:`int` +.. c:macro:: Py_T_UINT :c:expr:`unsigned int` :py:class:`int` +.. c:macro:: Py_T_USHORT :c:expr:`unsigned short` :py:class:`int` +.. c:macro:: Py_T_ULONG :c:expr:`unsigned long` :py:class:`int` +.. c:macro:: Py_T_ULONGLONG :c:expr:`unsigned long long` :py:class:`int` +.. c:macro:: Py_T_PYSSIZET :c:expr:`Py_ssize_t` :py:class:`int` +.. c:macro:: Py_T_FLOAT :c:expr:`float` :py:class:`float` +.. c:macro:: Py_T_DOUBLE :c:expr:`double` :py:class:`float` +.. c:macro:: Py_T_BOOL :c:expr:`char` :py:class:`bool` + (written as 0 or 1) +.. c:macro:: Py_T_STRING :c:expr:`const char *` (*) :py:class:`str` (RO) +.. c:macro:: Py_T_STRING_INPLACE :c:expr:`const char[]` (*) :py:class:`str` (RO) +.. c:macro:: Py_T_CHAR :c:expr:`char` (0-127) :py:class:`str` (**) +.. c:macro:: Py_T_OBJECT_EX :c:expr:`PyObject *` :py:class:`object` (D) +================================ ============================= ====================== + + (*): Zero-terminated, UTF8-encoded C string. + With :c:macro:`!Py_T_STRING` the C representation is a pointer; + with :c:macro:`!Py_T_STRING_INLINE` the string is stored directly + in the structure. + + (**): String of length 1. Only ASCII is accepted. + + (RO): Implies :c:macro:`Py_READONLY`. + + (D): Can be deleted, in which case the pointer is set to ``NULL``. + Reading a ``NULL`` pointer raises :py:exc:`AttributeError`. + +.. index:: + single: T_BYTE + single: T_SHORT + single: T_INT + single: T_LONG + single: T_LONGLONG + single: T_UBYTE + single: T_USHORT + single: T_UINT + single: T_ULONG + single: T_ULONGULONG + single: T_PYSSIZET + single: T_FLOAT + single: T_DOUBLE + single: T_BOOL + single: T_CHAR + single: T_STRING + single: T_STRING_INPLACE + single: T_OBJECT_EX + single: structmember.h + +.. versionadded:: 3.12 + + In previous versions, the macros were only available with + ``#include "structmember.h"`` and were named without the ``Py_`` prefix + (e.g. as ``T_INT``). + The header is still available and contains the old names, along with + the following deprecated types: + + .. c:macro:: T_OBJECT + + Like ``Py_T_OBJECT_EX``, but ``NULL`` is converted to ``None``. + This results in surprising behavior in Python: deleting the attribute + effectively sets it to ``None``. + + .. c:macro:: T_NONE + + Always ``None``. Must be used with :c:macro:`Py_READONLY`. + +Defining Getters and Setters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. c:type:: PyGetSetDef diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index db8fc15d93d15a..53895bbced8408 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -386,6 +386,8 @@ function,PyMem_Malloc,3.2,, function,PyMem_Realloc,3.2,, type,PyMemberDef,3.2,,full-abi var,PyMemberDescr_Type,3.2,, +function,PyMember_GetOne,3.2,, +function,PyMember_SetOne,3.2,, function,PyMemoryView_FromBuffer,3.11,, function,PyMemoryView_FromMemory,3.7,, function,PyMemoryView_FromObject,3.2,, diff --git a/Doc/extending/newtypes.rst b/Doc/extending/newtypes.rst index 3de849ade78888..80a1387db200c2 100644 --- a/Doc/extending/newtypes.rst +++ b/Doc/extending/newtypes.rst @@ -286,36 +286,11 @@ be read-only or read-write. The structures in the table are defined as:: For each entry in the table, a :term:`descriptor` will be constructed and added to the type which will be able to extract a value from the instance structure. The -:attr:`type` field should contain one of the type codes defined in the -:file:`structmember.h` header; the value will be used to determine how to +:attr:`type` field should contain a type code like :c:macro:`Py_T_INT` or +:c:macro:`Py_T_DOUBLE`; the value will be used to determine how to convert Python values to and from C values. The :attr:`flags` field is used to -store flags which control how the attribute can be accessed. - -The following flag constants are defined in :file:`structmember.h`; they may be -combined using bitwise-OR. - -+---------------------------+----------------------------------------------+ -| Constant | Meaning | -+===========================+==============================================+ -| :const:`READONLY` | Never writable. | -+---------------------------+----------------------------------------------+ -| :const:`PY_AUDIT_READ` | Emit an ``object.__getattr__`` | -| | :ref:`audit events ` before | -| | reading. | -+---------------------------+----------------------------------------------+ - -.. versionchanged:: 3.10 - :const:`RESTRICTED`, :const:`READ_RESTRICTED` and :const:`WRITE_RESTRICTED` - are deprecated. However, :const:`READ_RESTRICTED` is an alias for - :const:`PY_AUDIT_READ`, so fields that specify either :const:`RESTRICTED` - or :const:`READ_RESTRICTED` will also raise an audit event. - -.. index:: - single: READONLY - single: READ_RESTRICTED - single: WRITE_RESTRICTED - single: RESTRICTED - single: PY_AUDIT_READ +store flags which control how the attribute can be accessed: you can set it to +:c:macro:`Py_READONLY` to prevent Python code from setting it. An interesting advantage of using the :c:member:`~PyTypeObject.tp_members` table to build descriptors that are used at runtime is that any attribute defined this way can diff --git a/Doc/extending/newtypes_tutorial.rst b/Doc/extending/newtypes_tutorial.rst index 5d4a3f06dd5402..54de3fd42437d9 100644 --- a/Doc/extending/newtypes_tutorial.rst +++ b/Doc/extending/newtypes_tutorial.rst @@ -239,13 +239,6 @@ adds these capabilities: This version of the module has a number of changes. -We've added an extra include:: - - #include - -This include provides declarations that we use to handle attributes, as -described a bit later. - The :class:`Custom` type now has three data attributes in its C struct, *first*, *last*, and *number*. The *first* and *last* variables are Python strings containing first and last names. The *number* attribute is a C integer. @@ -436,11 +429,11 @@ We want to expose our instance variables as attributes. There are a number of ways to do that. The simplest way is to define member definitions:: static PyMemberDef Custom_members[] = { - {"first", T_OBJECT_EX, offsetof(CustomObject, first), 0, + {"first", Py_T_OBJECT_EX, offsetof(CustomObject, first), 0, "first name"}, - {"last", T_OBJECT_EX, offsetof(CustomObject, last), 0, + {"last", Py_T_OBJECT_EX, offsetof(CustomObject, last), 0, "last name"}, - {"number", T_INT, offsetof(CustomObject, number), 0, + {"number", Py_T_INT, offsetof(CustomObject, number), 0, "custom number"}, {NULL} /* Sentinel */ }; @@ -609,7 +602,7 @@ above. In this case, we aren't using a closure, so we just pass ``NULL``. We also remove the member definitions for these attributes:: static PyMemberDef Custom_members[] = { - {"number", T_INT, offsetof(CustomObject, number), 0, + {"number", Py_T_INT, offsetof(CustomObject, number), 0, "custom number"}, {NULL} /* Sentinel */ }; diff --git a/Doc/includes/custom2.c b/Doc/includes/custom2.c index aee9e1bb7f2d74..6638b9fbc1d751 100644 --- a/Doc/includes/custom2.c +++ b/Doc/includes/custom2.c @@ -1,6 +1,6 @@ #define PY_SSIZE_T_CLEAN #include -#include "structmember.h" +#include /* for offsetof() */ typedef struct { PyObject_HEAD @@ -63,11 +63,11 @@ Custom_init(CustomObject *self, PyObject *args, PyObject *kwds) } static PyMemberDef Custom_members[] = { - {"first", T_OBJECT_EX, offsetof(CustomObject, first), 0, + {"first", Py_T_OBJECT_EX, offsetof(CustomObject, first), 0, "first name"}, - {"last", T_OBJECT_EX, offsetof(CustomObject, last), 0, + {"last", Py_T_OBJECT_EX, offsetof(CustomObject, last), 0, "last name"}, - {"number", T_INT, offsetof(CustomObject, number), 0, + {"number", Py_T_INT, offsetof(CustomObject, number), 0, "custom number"}, {NULL} /* Sentinel */ }; diff --git a/Doc/includes/custom3.c b/Doc/includes/custom3.c index 8d88bc24511829..0faf2bd4be172a 100644 --- a/Doc/includes/custom3.c +++ b/Doc/includes/custom3.c @@ -1,6 +1,6 @@ #define PY_SSIZE_T_CLEAN #include -#include "structmember.h" +#include /* for offsetof() */ typedef struct { PyObject_HEAD @@ -63,7 +63,7 @@ Custom_init(CustomObject *self, PyObject *args, PyObject *kwds) } static PyMemberDef Custom_members[] = { - {"number", T_INT, offsetof(CustomObject, number), 0, + {"number", Py_T_INT, offsetof(CustomObject, number), 0, "custom number"}, {NULL} /* Sentinel */ }; diff --git a/Doc/includes/custom4.c b/Doc/includes/custom4.c index ad240ae6a8df7b..b725bc0b6fae3a 100644 --- a/Doc/includes/custom4.c +++ b/Doc/includes/custom4.c @@ -1,6 +1,6 @@ #define PY_SSIZE_T_CLEAN #include -#include "structmember.h" +#include /* for offsetof() */ typedef struct { PyObject_HEAD @@ -79,7 +79,7 @@ Custom_init(CustomObject *self, PyObject *args, PyObject *kwds) } static PyMemberDef Custom_members[] = { - {"number", T_INT, offsetof(CustomObject, number), 0, + {"number", Py_T_INT, offsetof(CustomObject, number), 0, "custom number"}, {NULL} /* Sentinel */ }; diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index f8786c15f6f47f..8e9a4f04a89056 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -849,6 +849,37 @@ Deprecated * Creating :c:data:`immutable types ` with mutable bases is deprecated and will be disabled in Python 3.14. +* The ``structmember.h`` header is deprecated, though it continues to be + available and there are no plans to remove it. + + Its contents are now available just by including ``Python.h``, + with a ``Py`` prefix added if it was missing: + + - :c:struct:`PyMemberDef`, :c:func:`PyMember_GetOne` and + :c:func:`PyMember_SetOne` + - Type macros like :c:macro:`Py_T_INT`, :c:macro:`Py_T_DOUBLE`, etc. + (previously ``T_INT``, ``T_DOUBLE``, etc.) + - The flags :c:macro:`Py_READONLY` (previously ``READONLY``) and + :c:macro:`Py_AUDIT_READ` (previously all uppercase) + + Several items are not exposed from ``Python.h``: + + - :c:macro:`T_OBJECT` (use :c:macro:`Py_T_OBJECT_EX`) + - :c:macro:`T_NONE` (previously undocumented, and pretty quirky) + - The macro ``WRITE_RESTRICTED`` which does nothing. + - The macros ``RESTRICTED`` and ``READ_RESTRICTED``, equivalents of + :c:macro:`Py_AUDIT_READ`. + - In some configurations, ```` is not included from ``Python.h``. + It should be included manually when using ``offsetof()``. + + The deprecated header continues to provide its original + contents under the original names. + Your old code can stay unchanged, unless the extra include and non-namespaced + macros bother you greatly. + + (Contributed in :gh:`47146` by Petr Viktorin, based on + earlier work by Alexander Belopolsky and Matthias Braun.) + Removed ------- diff --git a/Include/descrobject.h b/Include/descrobject.h index 77f221df07714f..0a420b865dfd1b 100644 --- a/Include/descrobject.h +++ b/Include/descrobject.h @@ -32,6 +32,61 @@ PyAPI_FUNC(PyObject *) PyDescr_NewGetSet(PyTypeObject *, PyGetSetDef *); PyAPI_FUNC(PyObject *) PyDictProxy_New(PyObject *); PyAPI_FUNC(PyObject *) PyWrapper_New(PyObject *, PyObject *); + +/* An array of PyMemberDef structures defines the name, type and offset + of selected members of a C structure. These can be read by + PyMember_GetOne() and set by PyMember_SetOne() (except if their READONLY + flag is set). The array must be terminated with an entry whose name + pointer is NULL. */ +struct PyMemberDef { + const char *name; + int type; + Py_ssize_t offset; + int flags; + const char *doc; +}; + +// These constants used to be in structmember.h, not prefixed by Py_. +// (structmember.h now has aliases to the new names.) + +/* Types */ +#define Py_T_SHORT 0 +#define Py_T_INT 1 +#define Py_T_LONG 2 +#define Py_T_FLOAT 3 +#define Py_T_DOUBLE 4 +#define Py_T_STRING 5 +#define _Py_T_OBJECT 6 // Deprecated, use Py_T_OBJECT_EX instead +/* the ordering here is weird for binary compatibility */ +#define Py_T_CHAR 7 /* 1-character string */ +#define Py_T_BYTE 8 /* 8-bit signed int */ +/* unsigned variants: */ +#define Py_T_UBYTE 9 +#define Py_T_USHORT 10 +#define Py_T_UINT 11 +#define Py_T_ULONG 12 + +/* Added by Jack: strings contained in the structure */ +#define Py_T_STRING_INPLACE 13 + +/* Added by Lillo: bools contained in the structure (assumed char) */ +#define Py_T_BOOL 14 + +#define Py_T_OBJECT_EX 16 +#define Py_T_LONGLONG 17 +#define Py_T_ULONGLONG 18 + +#define Py_T_PYSSIZET 19 /* Py_ssize_t */ +#define _Py_T_NONE 20 // Deprecated. Value is always None. + +/* Flags */ +#define Py_READONLY 1 +#define Py_AUDIT_READ 2 // Added in 3.10, harmless no-op before that +#define _Py_WRITE_RESTRICTED 4 // Deprecated, no-op. Do not reuse the value. + +PyAPI_FUNC(PyObject *) PyMember_GetOne(const char *, PyMemberDef *); +PyAPI_FUNC(int) PyMember_SetOne(char *, PyMemberDef *, PyObject *); + #ifndef Py_LIMITED_API # define Py_CPYTHON_DESCROBJECT_H # include "cpython/descrobject.h" diff --git a/Include/structmember.h b/Include/structmember.h index 65a777d5f52117..f6e8fd829892f4 100644 --- a/Include/structmember.h +++ b/Include/structmember.h @@ -5,69 +5,50 @@ extern "C" { #endif -/* Interface to map C struct members to Python object attributes */ - -#include /* For offsetof */ - -/* An array of PyMemberDef structures defines the name, type and offset - of selected members of a C structure. These can be read by - PyMember_GetOne() and set by PyMember_SetOne() (except if their READONLY - flag is set). The array must be terminated with an entry whose name - pointer is NULL. */ - -struct PyMemberDef { - const char *name; - int type; - Py_ssize_t offset; - int flags; - const char *doc; -}; +/* Interface to map C struct members to Python object attributes + * + * This header is deprecated: new code should not use stuff from here. + * New definitions are in descrobject.h. + * + * However, there's nothing wrong with old code continuing to use it, + * and there's not much mainenance overhead in maintaining a few aliases. + * So, don't be too eager to convert old code. + * + * It uses names not prefixed with Py_. + * It is also *not* included from Python.h and must be included individually. + */ + +#include /* For offsetof (not always provided by Python.h) */ /* Types */ -#define T_SHORT 0 -#define T_INT 1 -#define T_LONG 2 -#define T_FLOAT 3 -#define T_DOUBLE 4 -#define T_STRING 5 -#define T_OBJECT 6 -/* XXX the ordering here is weird for binary compatibility */ -#define T_CHAR 7 /* 1-character string */ -#define T_BYTE 8 /* 8-bit signed int */ -/* unsigned variants: */ -#define T_UBYTE 9 -#define T_USHORT 10 -#define T_UINT 11 -#define T_ULONG 12 - -/* Added by Jack: strings contained in the structure */ -#define T_STRING_INPLACE 13 - -/* Added by Lillo: bools contained in the structure (assumed char) */ -#define T_BOOL 14 - -#define T_OBJECT_EX 16 /* Like T_OBJECT, but raises AttributeError - when the value is NULL, instead of - converting to None. */ -#define T_LONGLONG 17 -#define T_ULONGLONG 18 - -#define T_PYSSIZET 19 /* Py_ssize_t */ -#define T_NONE 20 /* Value is always None */ - +#define T_SHORT Py_T_SHORT +#define T_INT Py_T_INT +#define T_LONG Py_T_LONG +#define T_FLOAT Py_T_FLOAT +#define T_DOUBLE Py_T_DOUBLE +#define T_STRING Py_T_STRING +#define T_OBJECT _Py_T_OBJECT +#define T_CHAR Py_T_CHAR +#define T_BYTE Py_T_BYTE +#define T_UBYTE Py_T_UBYTE +#define T_USHORT Py_T_USHORT +#define T_UINT Py_T_UINT +#define T_ULONG Py_T_ULONG +#define T_STRING_INPLACE Py_T_STRING_INPLACE +#define T_BOOL Py_T_BOOL +#define T_OBJECT_EX Py_T_OBJECT_EX +#define T_LONGLONG Py_T_LONGLONG +#define T_ULONGLONG Py_T_ULONGLONG +#define T_PYSSIZET Py_T_PYSSIZET +#define T_NONE _Py_T_NONE /* Flags */ -#define READONLY 1 -#define READ_RESTRICTED 2 -#define PY_WRITE_RESTRICTED 4 +#define READONLY Py_READONLY +#define PY_AUDIT_READ Py_AUDIT_READ +#define READ_RESTRICTED Py_AUDIT_READ +#define PY_WRITE_RESTRICTED _Py_WRITE_RESTRICTED #define RESTRICTED (READ_RESTRICTED | PY_WRITE_RESTRICTED) -#define PY_AUDIT_READ READ_RESTRICTED - -/* Current API, use this */ -PyAPI_FUNC(PyObject *) PyMember_GetOne(const char *, PyMemberDef *); -PyAPI_FUNC(int) PyMember_SetOne(char *, PyMemberDef *, PyObject *); - #ifdef __cplusplus } diff --git a/Lib/test/test_capi/test_structmembers.py b/Lib/test/test_capi/test_structmembers.py index 07d2f623f7156e..2cf46b203478dc 100644 --- a/Lib/test/test_capi/test_structmembers.py +++ b/Lib/test/test_capi/test_structmembers.py @@ -4,32 +4,42 @@ # Skip this test if the _testcapi module isn't available. import_helper.import_module('_testcapi') -from _testcapi import _test_structmembersType, \ - CHAR_MAX, CHAR_MIN, UCHAR_MAX, \ - SHRT_MAX, SHRT_MIN, USHRT_MAX, \ - INT_MAX, INT_MIN, UINT_MAX, \ - LONG_MAX, LONG_MIN, ULONG_MAX, \ - LLONG_MAX, LLONG_MIN, ULLONG_MAX, \ - PY_SSIZE_T_MAX, PY_SSIZE_T_MIN - -ts=_test_structmembersType(False, # T_BOOL - 1, # T_BYTE - 2, # T_UBYTE - 3, # T_SHORT - 4, # T_USHORT - 5, # T_INT - 6, # T_UINT - 7, # T_LONG - 8, # T_ULONG - 23, # T_PYSSIZET - 9.99999,# T_FLOAT - 10.1010101010, # T_DOUBLE - "hi" # T_STRING_INPLACE - ) - -class ReadWriteTests(unittest.TestCase): +from _testcapi import (_test_structmembersType_OldAPI, + _test_structmembersType_NewAPI, + CHAR_MAX, CHAR_MIN, UCHAR_MAX, + SHRT_MAX, SHRT_MIN, USHRT_MAX, + INT_MAX, INT_MIN, UINT_MAX, + LONG_MAX, LONG_MIN, ULONG_MAX, + LLONG_MAX, LLONG_MIN, ULLONG_MAX, + PY_SSIZE_T_MAX, PY_SSIZE_T_MIN, + ) + +# There are two classes: one using and another using +# `Py_`-prefixed API. They should behave the same in Python + +def _make_test_object(cls): + return cls(False, # T_BOOL + 1, # T_BYTE + 2, # T_UBYTE + 3, # T_SHORT + 4, # T_USHORT + 5, # T_INT + 6, # T_UINT + 7, # T_LONG + 8, # T_ULONG + 23, # T_PYSSIZET + 9.99999,# T_FLOAT + 10.1010101010, # T_DOUBLE + "hi", # T_STRING_INPLACE + ) + + +class ReadWriteTests: + def setUp(self): + self.ts = _make_test_object(self.cls) def test_bool(self): + ts = self.ts ts.T_BOOL = True self.assertEqual(ts.T_BOOL, True) ts.T_BOOL = False @@ -37,6 +47,7 @@ def test_bool(self): self.assertRaises(TypeError, setattr, ts, 'T_BOOL', 1) def test_byte(self): + ts = self.ts ts.T_BYTE = CHAR_MAX self.assertEqual(ts.T_BYTE, CHAR_MAX) ts.T_BYTE = CHAR_MIN @@ -45,6 +56,7 @@ def test_byte(self): self.assertEqual(ts.T_UBYTE, UCHAR_MAX) def test_short(self): + ts = self.ts ts.T_SHORT = SHRT_MAX self.assertEqual(ts.T_SHORT, SHRT_MAX) ts.T_SHORT = SHRT_MIN @@ -53,6 +65,7 @@ def test_short(self): self.assertEqual(ts.T_USHORT, USHRT_MAX) def test_int(self): + ts = self.ts ts.T_INT = INT_MAX self.assertEqual(ts.T_INT, INT_MAX) ts.T_INT = INT_MIN @@ -61,6 +74,7 @@ def test_int(self): self.assertEqual(ts.T_UINT, UINT_MAX) def test_long(self): + ts = self.ts ts.T_LONG = LONG_MAX self.assertEqual(ts.T_LONG, LONG_MAX) ts.T_LONG = LONG_MIN @@ -69,13 +83,17 @@ def test_long(self): self.assertEqual(ts.T_ULONG, ULONG_MAX) def test_py_ssize_t(self): + ts = self.ts ts.T_PYSSIZET = PY_SSIZE_T_MAX self.assertEqual(ts.T_PYSSIZET, PY_SSIZE_T_MAX) ts.T_PYSSIZET = PY_SSIZE_T_MIN self.assertEqual(ts.T_PYSSIZET, PY_SSIZE_T_MIN) - @unittest.skipUnless(hasattr(ts, "T_LONGLONG"), "long long not present") def test_longlong(self): + ts = self.ts + if not hasattr(ts, "T_LONGLONG"): + self.skipTest("long long not present") + ts.T_LONGLONG = LLONG_MAX self.assertEqual(ts.T_LONGLONG, LLONG_MAX) ts.T_LONGLONG = LLONG_MIN @@ -91,6 +109,7 @@ def test_longlong(self): self.assertEqual(ts.T_ULONGLONG, 4) def test_bad_assignments(self): + ts = self.ts integer_attributes = [ 'T_BOOL', 'T_BYTE', 'T_UBYTE', @@ -109,37 +128,57 @@ def test_bad_assignments(self): self.assertRaises(TypeError, setattr, ts, attr, nonint) def test_inplace_string(self): + ts = self.ts self.assertEqual(ts.T_STRING_INPLACE, "hi") self.assertRaises(TypeError, setattr, ts, "T_STRING_INPLACE", "s") self.assertRaises(TypeError, delattr, ts, "T_STRING_INPLACE") +class ReadWriteTests_OldAPI(ReadWriteTests, unittest.TestCase): + cls = _test_structmembersType_OldAPI + +class ReadWriteTests_NewAPI(ReadWriteTests, unittest.TestCase): + cls = _test_structmembersType_NewAPI -class TestWarnings(unittest.TestCase): +class TestWarnings: + def setUp(self): + self.ts = _make_test_object(self.cls) def test_byte_max(self): + ts = self.ts with warnings_helper.check_warnings(('', RuntimeWarning)): ts.T_BYTE = CHAR_MAX+1 def test_byte_min(self): + ts = self.ts with warnings_helper.check_warnings(('', RuntimeWarning)): ts.T_BYTE = CHAR_MIN-1 def test_ubyte_max(self): + ts = self.ts with warnings_helper.check_warnings(('', RuntimeWarning)): ts.T_UBYTE = UCHAR_MAX+1 def test_short_max(self): + ts = self.ts with warnings_helper.check_warnings(('', RuntimeWarning)): ts.T_SHORT = SHRT_MAX+1 def test_short_min(self): + ts = self.ts with warnings_helper.check_warnings(('', RuntimeWarning)): ts.T_SHORT = SHRT_MIN-1 def test_ushort_max(self): + ts = self.ts with warnings_helper.check_warnings(('', RuntimeWarning)): ts.T_USHORT = USHRT_MAX+1 +class TestWarnings_OldAPI(TestWarnings, unittest.TestCase): + cls = _test_structmembersType_OldAPI + +class TestWarnings_NewAPI(TestWarnings, unittest.TestCase): + cls = _test_structmembersType_NewAPI + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2022-11-02-16-51-24.gh-issue-47146.dsYDtI.rst b/Misc/NEWS.d/next/C API/2022-11-02-16-51-24.gh-issue-47146.dsYDtI.rst new file mode 100644 index 00000000000000..0f419427925dc9 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2022-11-02-16-51-24.gh-issue-47146.dsYDtI.rst @@ -0,0 +1,5 @@ +The ``structmember.h`` header is deprecated. Its non-deprecated contents are +now available just by including ``Python.h``, with a ``Py_`` prefix added if +it was missing. (Deprecated contents are :c:macro:`T_OBJECT`, +:c:macro:`T_NONE`, and no-op flags.) Patch by Petr Viktorin, based on +earlier work by Alexander Belopolsky and Matthias Braun. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 0ba0f51b2de451..aa12bcc85cebc7 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -94,7 +94,7 @@ added = '3.2' struct_abi_kind = 'full-abi' [struct.PyMemberDef] - added = '3.2' + added = '3.2' # Before 3.12, PyMemberDef required #include "structmember.h" struct_abi_kind = 'full-abi' [struct.PyGetSetDef] added = '3.2' @@ -1777,11 +1777,9 @@ added = '3.2' abi_only = true [function.PyMember_GetOne] - added = '3.2' - abi_only = true + added = '3.2' # Before 3.12, available in "structmember.h" [function.PyMember_SetOne] - added = '3.2' - abi_only = true + added = '3.2' # Before 3.12, available in "structmember.h" # TLS api is deprecated; superseded by TSS API @@ -2303,3 +2301,44 @@ added = '3.12' [typedef.releasebufferproc] added = '3.12' + +[const.Py_T_BYTE] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_SHORT] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_INT] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_LONG] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_LONGLONG] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_UBYTE] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_UINT] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_USHORT] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_ULONG] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_ULONGLONG] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_PYSSIZET] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_FLOAT] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_DOUBLE] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_BOOL] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_STRING] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_STRING_INPLACE] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_CHAR] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_T_OBJECT_EX] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_READONLY] + added = '3.12' # Before 3.12, available in "structmember.h" w/o Py_ prefix +[const.Py_AUDIT_READ] + added = '3.12' # Before 3.12, available in "structmember.h" diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 7307d37bb49295..d64752e8ca9609 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -169,7 +169,7 @@ @MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c @MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c -@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/unicode.c _testcapi/getargs.c _testcapi/pytime.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c +@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/unicode.c _testcapi/getargs.c _testcapi/pytime.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/structmember.c @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c # Some testing modules MUST be built as shared libraries. diff --git a/Modules/_testcapi/parts.h b/Modules/_testcapi/parts.h index e25314a7caacfd..7ba3c4ebff8cde 100644 --- a/Modules/_testcapi/parts.h +++ b/Modules/_testcapi/parts.h @@ -35,6 +35,7 @@ int _PyTestCapi_Init_Mem(PyObject *module); int _PyTestCapi_Init_Watchers(PyObject *module); int _PyTestCapi_Init_Long(PyObject *module); int _PyTestCapi_Init_Float(PyObject *module); +int _PyTestCapi_Init_Structmember(PyObject *module); #ifdef LIMITED_API_AVAILABLE int _PyTestCapi_Init_VectorcallLimited(PyObject *module); diff --git a/Modules/_testcapi/structmember.c b/Modules/_testcapi/structmember.c new file mode 100644 index 00000000000000..0fb872a4328d60 --- /dev/null +++ b/Modules/_testcapi/structmember.c @@ -0,0 +1,217 @@ +#define PY_SSIZE_T_CLEAN +#include "parts.h" +#include // for offsetof() + + +// This defines two classes that contain all the simple member types, one +// using "new" Py_-prefixed API, and the other using "old" . +// They should behave identically in Python. + +typedef struct { + char bool_member; + char byte_member; + unsigned char ubyte_member; + short short_member; + unsigned short ushort_member; + int int_member; + unsigned int uint_member; + long long_member; + unsigned long ulong_member; + Py_ssize_t pyssizet_member; + float float_member; + double double_member; + char inplace_member[6]; + long long longlong_member; + unsigned long long ulonglong_member; +} all_structmembers; + +typedef struct { + PyObject_HEAD + all_structmembers structmembers; +} test_structmembers; + + +static struct PyMemberDef test_members_newapi[] = { + {"T_BOOL", Py_T_BOOL, offsetof(test_structmembers, structmembers.bool_member), 0, NULL}, + {"T_BYTE", Py_T_BYTE, offsetof(test_structmembers, structmembers.byte_member), 0, NULL}, + {"T_UBYTE", Py_T_UBYTE, offsetof(test_structmembers, structmembers.ubyte_member), 0, NULL}, + {"T_SHORT", Py_T_SHORT, offsetof(test_structmembers, structmembers.short_member), 0, NULL}, + {"T_USHORT", Py_T_USHORT, offsetof(test_structmembers, structmembers.ushort_member), 0, NULL}, + {"T_INT", Py_T_INT, offsetof(test_structmembers, structmembers.int_member), 0, NULL}, + {"T_UINT", Py_T_UINT, offsetof(test_structmembers, structmembers.uint_member), 0, NULL}, + {"T_LONG", Py_T_LONG, offsetof(test_structmembers, structmembers.long_member), 0, NULL}, + {"T_ULONG", Py_T_ULONG, offsetof(test_structmembers, structmembers.ulong_member), 0, NULL}, + {"T_PYSSIZET", Py_T_PYSSIZET, offsetof(test_structmembers, structmembers.pyssizet_member), 0, NULL}, + {"T_FLOAT", Py_T_FLOAT, offsetof(test_structmembers, structmembers.float_member), 0, NULL}, + {"T_DOUBLE", Py_T_DOUBLE, offsetof(test_structmembers, structmembers.double_member), 0, NULL}, + {"T_STRING_INPLACE", Py_T_STRING_INPLACE, offsetof(test_structmembers, structmembers.inplace_member), 0, NULL}, + {"T_LONGLONG", Py_T_LONGLONG, offsetof(test_structmembers, structmembers.longlong_member), 0, NULL}, + {"T_ULONGLONG", Py_T_ULONGLONG, offsetof(test_structmembers, structmembers.ulonglong_member), 0, NULL}, + {NULL} +}; + +static PyObject * +test_structmembers_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + static char *keywords[] = { + "T_BOOL", "T_BYTE", "T_UBYTE", "T_SHORT", "T_USHORT", + "T_INT", "T_UINT", "T_LONG", "T_ULONG", "T_PYSSIZET", + "T_FLOAT", "T_DOUBLE", "T_STRING_INPLACE", + "T_LONGLONG", "T_ULONGLONG", + NULL}; + static const char fmt[] = "|bbBhHiIlknfds#LK"; + test_structmembers *ob; + const char *s = NULL; + Py_ssize_t string_len = 0; + ob = PyObject_New(test_structmembers, type); + if (ob == NULL) { + return NULL; + } + memset(&ob->structmembers, 0, sizeof(all_structmembers)); + if (!PyArg_ParseTupleAndKeywords(args, kwargs, fmt, keywords, + &ob->structmembers.bool_member, + &ob->structmembers.byte_member, + &ob->structmembers.ubyte_member, + &ob->structmembers.short_member, + &ob->structmembers.ushort_member, + &ob->structmembers.int_member, + &ob->structmembers.uint_member, + &ob->structmembers.long_member, + &ob->structmembers.ulong_member, + &ob->structmembers.pyssizet_member, + &ob->structmembers.float_member, + &ob->structmembers.double_member, + &s, &string_len, + &ob->structmembers.longlong_member, + &ob->structmembers.ulonglong_member)) + { + Py_DECREF(ob); + return NULL; + } + if (s != NULL) { + if (string_len > 5) { + Py_DECREF(ob); + PyErr_SetString(PyExc_ValueError, "string too long"); + return NULL; + } + strcpy(ob->structmembers.inplace_member, s); + } + else { + strcpy(ob->structmembers.inplace_member, ""); + } + return (PyObject *)ob; +} + +static PyType_Slot test_structmembers_slots[] = { + {Py_tp_new, test_structmembers_new}, + {Py_tp_members, test_members_newapi}, + {0}, +}; + +static PyType_Spec test_structmembers_spec = { + .name = "_testcapi._test_structmembersType_NewAPI", + .flags = Py_TPFLAGS_DEFAULT, + .basicsize = sizeof(test_structmembers), + .slots = test_structmembers_slots, +}; + +#include + +static struct PyMemberDef test_members[] = { + {"T_BOOL", T_BOOL, offsetof(test_structmembers, structmembers.bool_member), 0, NULL}, + {"T_BYTE", T_BYTE, offsetof(test_structmembers, structmembers.byte_member), 0, NULL}, + {"T_UBYTE", T_UBYTE, offsetof(test_structmembers, structmembers.ubyte_member), 0, NULL}, + {"T_SHORT", T_SHORT, offsetof(test_structmembers, structmembers.short_member), 0, NULL}, + {"T_USHORT", T_USHORT, offsetof(test_structmembers, structmembers.ushort_member), 0, NULL}, + {"T_INT", T_INT, offsetof(test_structmembers, structmembers.int_member), 0, NULL}, + {"T_UINT", T_UINT, offsetof(test_structmembers, structmembers.uint_member), 0, NULL}, + {"T_LONG", T_LONG, offsetof(test_structmembers, structmembers.long_member), 0, NULL}, + {"T_ULONG", T_ULONG, offsetof(test_structmembers, structmembers.ulong_member), 0, NULL}, + {"T_PYSSIZET", T_PYSSIZET, offsetof(test_structmembers, structmembers.pyssizet_member), 0, NULL}, + {"T_FLOAT", T_FLOAT, offsetof(test_structmembers, structmembers.float_member), 0, NULL}, + {"T_DOUBLE", T_DOUBLE, offsetof(test_structmembers, structmembers.double_member), 0, NULL}, + {"T_STRING_INPLACE", T_STRING_INPLACE, offsetof(test_structmembers, structmembers.inplace_member), 0, NULL}, + {"T_LONGLONG", T_LONGLONG, offsetof(test_structmembers, structmembers.longlong_member), 0, NULL}, + {"T_ULONGLONG", T_ULONGLONG, offsetof(test_structmembers, structmembers.ulonglong_member), 0, NULL}, + {NULL} +}; + + +static void +test_structmembers_free(PyObject *ob) +{ + PyObject_Free(ob); +} + +/* Designated initializers would work too, but this does test the *old* API */ +static PyTypeObject test_structmembersType_OldAPI= { + PyVarObject_HEAD_INIT(NULL, 0) + "test_structmembersType_OldAPI", + sizeof(test_structmembers), /* tp_basicsize */ + 0, /* tp_itemsize */ + test_structmembers_free, /* destructor tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + PyObject_GenericSetAttr, /* tp_setattro */ + 0, /* tp_as_buffer */ + 0, /* tp_flags */ + "Type containing all structmember types", + 0, /* traverseproc tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + test_members, /* tp_members */ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + test_structmembers_new, /* tp_new */ +}; + + +int +_PyTestCapi_Init_Structmember(PyObject *m) +{ + int res; + res = PyType_Ready(&test_structmembersType_OldAPI); + if (res < 0) { + return -1; + } + res = PyModule_AddObject( + m, + "_test_structmembersType_OldAPI", + (PyObject *)&test_structmembersType_OldAPI); + if (res < 0) { + return -1; + } + + PyObject *test_structmembersType_NewAPI = PyType_FromModuleAndSpec( + m, &test_structmembers_spec, NULL); + if (!test_structmembersType_NewAPI) { + return -1; + } + res = PyModule_AddType(m, (PyTypeObject*)test_structmembersType_NewAPI); + Py_DECREF(test_structmembersType_NewAPI); + if (res < 0) { + return -1; + } + + return 0; +} diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 9dd09f68003d93..83eef73a875d9d 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -21,7 +21,7 @@ #include "Python.h" #include "marshal.h" // PyMarshal_WriteLongToFile -#include "structmember.h" // PyMemberDef +#include "structmember.h" // for offsetof(), T_OBJECT #include // FLT_MAX #include @@ -3371,147 +3371,6 @@ static PyMethodDef TestMethods[] = { {NULL, NULL} /* sentinel */ }; -typedef struct { - char bool_member; - char byte_member; - unsigned char ubyte_member; - short short_member; - unsigned short ushort_member; - int int_member; - unsigned int uint_member; - long long_member; - unsigned long ulong_member; - Py_ssize_t pyssizet_member; - float float_member; - double double_member; - char inplace_member[6]; - long long longlong_member; - unsigned long long ulonglong_member; -} all_structmembers; - -typedef struct { - PyObject_HEAD - all_structmembers structmembers; -} test_structmembers; - -static struct PyMemberDef test_members[] = { - {"T_BOOL", T_BOOL, offsetof(test_structmembers, structmembers.bool_member), 0, NULL}, - {"T_BYTE", T_BYTE, offsetof(test_structmembers, structmembers.byte_member), 0, NULL}, - {"T_UBYTE", T_UBYTE, offsetof(test_structmembers, structmembers.ubyte_member), 0, NULL}, - {"T_SHORT", T_SHORT, offsetof(test_structmembers, structmembers.short_member), 0, NULL}, - {"T_USHORT", T_USHORT, offsetof(test_structmembers, structmembers.ushort_member), 0, NULL}, - {"T_INT", T_INT, offsetof(test_structmembers, structmembers.int_member), 0, NULL}, - {"T_UINT", T_UINT, offsetof(test_structmembers, structmembers.uint_member), 0, NULL}, - {"T_LONG", T_LONG, offsetof(test_structmembers, structmembers.long_member), 0, NULL}, - {"T_ULONG", T_ULONG, offsetof(test_structmembers, structmembers.ulong_member), 0, NULL}, - {"T_PYSSIZET", T_PYSSIZET, offsetof(test_structmembers, structmembers.pyssizet_member), 0, NULL}, - {"T_FLOAT", T_FLOAT, offsetof(test_structmembers, structmembers.float_member), 0, NULL}, - {"T_DOUBLE", T_DOUBLE, offsetof(test_structmembers, structmembers.double_member), 0, NULL}, - {"T_STRING_INPLACE", T_STRING_INPLACE, offsetof(test_structmembers, structmembers.inplace_member), 0, NULL}, - {"T_LONGLONG", T_LONGLONG, offsetof(test_structmembers, structmembers.longlong_member), 0, NULL}, - {"T_ULONGLONG", T_ULONGLONG, offsetof(test_structmembers, structmembers.ulonglong_member), 0, NULL}, - {NULL} -}; - - -static PyObject * -test_structmembers_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) -{ - static char *keywords[] = { - "T_BOOL", "T_BYTE", "T_UBYTE", "T_SHORT", "T_USHORT", - "T_INT", "T_UINT", "T_LONG", "T_ULONG", "T_PYSSIZET", - "T_FLOAT", "T_DOUBLE", "T_STRING_INPLACE", - "T_LONGLONG", "T_ULONGLONG", - NULL}; - static const char fmt[] = "|bbBhHiIlknfds#LK"; - test_structmembers *ob; - const char *s = NULL; - Py_ssize_t string_len = 0; - ob = PyObject_New(test_structmembers, type); - if (ob == NULL) - return NULL; - memset(&ob->structmembers, 0, sizeof(all_structmembers)); - if (!PyArg_ParseTupleAndKeywords(args, kwargs, fmt, keywords, - &ob->structmembers.bool_member, - &ob->structmembers.byte_member, - &ob->structmembers.ubyte_member, - &ob->structmembers.short_member, - &ob->structmembers.ushort_member, - &ob->structmembers.int_member, - &ob->structmembers.uint_member, - &ob->structmembers.long_member, - &ob->structmembers.ulong_member, - &ob->structmembers.pyssizet_member, - &ob->structmembers.float_member, - &ob->structmembers.double_member, - &s, &string_len - , &ob->structmembers.longlong_member, - &ob->structmembers.ulonglong_member - )) { - Py_DECREF(ob); - return NULL; - } - if (s != NULL) { - if (string_len > 5) { - Py_DECREF(ob); - PyErr_SetString(PyExc_ValueError, "string too long"); - return NULL; - } - strcpy(ob->structmembers.inplace_member, s); - } - else { - strcpy(ob->structmembers.inplace_member, ""); - } - return (PyObject *)ob; -} - -static void -test_structmembers_free(PyObject *ob) -{ - PyObject_Free(ob); -} - -static PyTypeObject test_structmembersType = { - PyVarObject_HEAD_INIT(NULL, 0) - "test_structmembersType", - sizeof(test_structmembers), /* tp_basicsize */ - 0, /* tp_itemsize */ - test_structmembers_free, /* destructor tp_dealloc */ - 0, /* tp_vectorcall_offset */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_as_async */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - PyObject_GenericSetAttr, /* tp_setattro */ - 0, /* tp_as_buffer */ - 0, /* tp_flags */ - "Type containing all structmember types", - 0, /* traverseproc tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - test_members, /* tp_members */ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - test_structmembers_new, /* tp_new */ -}; - typedef struct { PyObject_HEAD @@ -4064,11 +3923,6 @@ PyInit__testcapi(void) Py_SET_TYPE(&_HashInheritanceTester_Type, &PyType_Type); - Py_SET_TYPE(&test_structmembersType, &PyType_Type); - Py_INCREF(&test_structmembersType); - /* don't use a name starting with "test", since we don't want - test_capi to automatically call this */ - PyModule_AddObject(m, "_test_structmembersType", (PyObject *)&test_structmembersType); if (PyType_Ready(&matmulType) < 0) return NULL; Py_INCREF(&matmulType); @@ -4197,6 +4051,9 @@ PyInit__testcapi(void) if (_PyTestCapi_Init_Float(m) < 0) { return NULL; } + if (_PyTestCapi_Init_Structmember(m) < 0) { + return NULL; + } #ifndef LIMITED_API_AVAILABLE PyModule_AddObjectRef(m, "LIMITED_API_AVAILABLE", Py_False); diff --git a/PCbuild/_testcapi.vcxproj b/PCbuild/_testcapi.vcxproj index d91cdfef7b6bd9..58bf4e1eacbf21 100644 --- a/PCbuild/_testcapi.vcxproj +++ b/PCbuild/_testcapi.vcxproj @@ -106,6 +106,7 @@ + diff --git a/PCbuild/_testcapi.vcxproj.filters b/PCbuild/_testcapi.vcxproj.filters index 1b112b164ff0cd..101c5322761634 100644 --- a/PCbuild/_testcapi.vcxproj.filters +++ b/PCbuild/_testcapi.vcxproj.filters @@ -48,6 +48,9 @@ Source Files + + Source Files + From 959ba45d75953caa911e16b4c2a277978fc4b9b0 Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Tue, 22 Nov 2022 11:14:23 +0100 Subject: [PATCH 005/112] GH-97001: Release GIL in termios extension (#99503) Without releasing the GIL calls to termios APIs might block the entire interpreter. --- ...2-11-15-10-55-24.gh-issue-97001.KeQuVF.rst | 1 + Modules/termios.c | 88 ++++++++++++++++--- 2 files changed, 77 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-15-10-55-24.gh-issue-97001.KeQuVF.rst diff --git a/Misc/NEWS.d/next/Library/2022-11-15-10-55-24.gh-issue-97001.KeQuVF.rst b/Misc/NEWS.d/next/Library/2022-11-15-10-55-24.gh-issue-97001.KeQuVF.rst new file mode 100644 index 00000000000000..014161cf7b1d44 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-15-10-55-24.gh-issue-97001.KeQuVF.rst @@ -0,0 +1 @@ +Release the GIL when calling termios APIs to avoid blocking threads. diff --git a/Modules/termios.c b/Modules/termios.c index 354e5ca18d04d8..fcc8f042679870 100644 --- a/Modules/termios.c +++ b/Modules/termios.c @@ -82,7 +82,12 @@ termios_tcgetattr_impl(PyObject *module, int fd) { termiosmodulestate *state = PyModule_GetState(module); struct termios mode; - if (tcgetattr(fd, &mode) == -1) { + int r; + + Py_BEGIN_ALLOW_THREADS + r = tcgetattr(fd, &mode); + Py_END_ALLOW_THREADS + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } @@ -169,7 +174,12 @@ termios_tcsetattr_impl(PyObject *module, int fd, int when, PyObject *term) /* Get the old mode, in case there are any hidden fields... */ termiosmodulestate *state = PyModule_GetState(module); struct termios mode; - if (tcgetattr(fd, &mode) == -1) { + int r; + + Py_BEGIN_ALLOW_THREADS + r = tcgetattr(fd, &mode); + Py_END_ALLOW_THREADS + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } @@ -211,7 +221,12 @@ termios_tcsetattr_impl(PyObject *module, int fd, int when, PyObject *term) return PyErr_SetFromErrno(state->TermiosError); if (cfsetospeed(&mode, (speed_t) ospeed) == -1) return PyErr_SetFromErrno(state->TermiosError); - if (tcsetattr(fd, when, &mode) == -1) + + Py_BEGIN_ALLOW_THREADS + r = tcsetattr(fd, when, &mode); + Py_END_ALLOW_THREADS + + if (r == -1) return PyErr_SetFromErrno(state->TermiosError); Py_RETURN_NONE; @@ -235,7 +250,13 @@ termios_tcsendbreak_impl(PyObject *module, int fd, int duration) /*[clinic end generated code: output=5945f589b5d3ac66 input=dc2f32417691f8ed]*/ { termiosmodulestate *state = PyModule_GetState(module); - if (tcsendbreak(fd, duration) == -1) { + int r; + + Py_BEGIN_ALLOW_THREADS + r = tcsendbreak(fd, duration); + Py_END_ALLOW_THREADS + + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } @@ -256,7 +277,13 @@ termios_tcdrain_impl(PyObject *module, int fd) /*[clinic end generated code: output=5fd86944c6255955 input=c99241b140b32447]*/ { termiosmodulestate *state = PyModule_GetState(module); - if (tcdrain(fd) == -1) { + int r; + + Py_BEGIN_ALLOW_THREADS + r = tcdrain(fd); + Py_END_ALLOW_THREADS + + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } @@ -282,7 +309,13 @@ termios_tcflush_impl(PyObject *module, int fd, int queue) /*[clinic end generated code: output=2424f80312ec2f21 input=0f7d08122ddc07b5]*/ { termiosmodulestate *state = PyModule_GetState(module); - if (tcflush(fd, queue) == -1) { + int r; + + Py_BEGIN_ALLOW_THREADS + r = tcflush(fd, queue); + Py_END_ALLOW_THREADS + + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } @@ -308,7 +341,13 @@ termios_tcflow_impl(PyObject *module, int fd, int action) /*[clinic end generated code: output=afd10928e6ea66eb input=c6aff0640b6efd9c]*/ { termiosmodulestate *state = PyModule_GetState(module); - if (tcflow(fd, action) == -1) { + int r; + + Py_BEGIN_ALLOW_THREADS + r = tcflow(fd, action); + Py_END_ALLOW_THREADS + + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } @@ -333,7 +372,13 @@ termios_tcgetwinsize_impl(PyObject *module, int fd) #if defined(TIOCGWINSZ) termiosmodulestate *state = PyModule_GetState(module); struct winsize w; - if (ioctl(fd, TIOCGWINSZ, &w) == -1) { + int r; + + Py_BEGIN_ALLOW_THREADS + r = ioctl(fd, TIOCGWINSZ, &w); + Py_END_ALLOW_THREADS + + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } @@ -352,7 +397,12 @@ termios_tcgetwinsize_impl(PyObject *module, int fd) #elif defined(TIOCGSIZE) termiosmodulestate *state = PyModule_GetState(module); struct ttysize s; - if (ioctl(fd, TIOCGSIZE, &s) == -1) { + int r; + + Py_BEGIN_ALLOW_THREADS + r = ioctl(fd, TIOCGSIZE, &s); + Py_END_ALLOW_THREADS + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } @@ -433,15 +483,25 @@ termios_tcsetwinsize_impl(PyObject *module, int fd, PyObject *winsz) return NULL; } - if (ioctl(fd, TIOCSWINSZ, &w) == -1) { + int r; + Py_BEGIN_ALLOW_THREADS + r = ioctl(fd, TIOCSWINSZ, &w); + Py_END_ALLOW_THREADS + + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } Py_RETURN_NONE; #elif defined(TIOCGSIZE) && defined(TIOCSSIZE) struct ttysize s; + int r; /* Get the old ttysize because it might have more fields. */ - if (ioctl(fd, TIOCGSIZE, &s) == -1) { + Py_BEGIN_ALLOW_THREADS + r = ioctl(fd, TIOCGSIZE, &s); + Py_END_ALLOW_THREADS + + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } @@ -453,7 +513,11 @@ termios_tcsetwinsize_impl(PyObject *module, int fd, PyObject *winsz) return NULL; } - if (ioctl(fd, TIOCSSIZE, &s) == -1) { + Py_BEGIN_ALLOW_THREADS + r = ioctl(fd, TIOCSSIZE, &s); + Py_END_ALLOW_THREADS + + if (r == -1) { return PyErr_SetFromErrno(state->TermiosError); } From bc3a11d21ddef28047b18c0f6a5068fa9fb16da2 Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Tue, 22 Nov 2022 11:33:37 +0100 Subject: [PATCH 006/112] GH-92892: Add section about variadic functions to ctypes documentation (#99529) On some platforms, and in particular macOS/arm64, the calling convention for variadic arguments is different from the regular calling convention. Add a section to the documentation to document this. --- Doc/library/ctypes.rst | 20 +++++++++++++++++++ ...2-11-16-12-52-23.gh-issue-92892.TS-P0j.rst | 1 + 2 files changed, 21 insertions(+) create mode 100644 Misc/NEWS.d/next/Documentation/2022-11-16-12-52-23.gh-issue-92892.TS-P0j.rst diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index e85a6cb7149c7b..71e5545ffe47c6 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -373,6 +373,26 @@ that they can be converted to the required C data type:: 31 >>> +.. _ctypes-calling-variadic-functions: + +Calling varadic functions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +On a lot of platforms calling variadic functions through ctypes is exactly the same +as calling functions with a fixed number of parameters. On some platforms, and in +particular ARM64 for Apple Platforms, the calling convention for variadic functions +is different than that for regular functions. + +On those platforms it is required to specify the *argtypes* attribute for the +regular, non-variadic, function arguments: + +.. code-block:: python3 + + libc.printf.argtypes = [ctypes.c_char_p] + +Because specifying the attribute does inhibit portability it is adviced to always +specify ``argtypes`` for all variadic functions. + .. _ctypes-calling-functions-with-own-custom-data-types: diff --git a/Misc/NEWS.d/next/Documentation/2022-11-16-12-52-23.gh-issue-92892.TS-P0j.rst b/Misc/NEWS.d/next/Documentation/2022-11-16-12-52-23.gh-issue-92892.TS-P0j.rst new file mode 100644 index 00000000000000..54e421d19d9da3 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2022-11-16-12-52-23.gh-issue-92892.TS-P0j.rst @@ -0,0 +1 @@ +Document that calling variadic functions with ctypes requires special care on macOS/arm64 (and possibly other platforms). From 1acdfec359fdf3db936168480be0f4157273c200 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 22 Nov 2022 13:41:14 +0300 Subject: [PATCH 007/112] gh-99341: Cover type ignore nodes when incrementing line numbers (GH-99422) --- Lib/ast.py | 6 ++++++ Lib/test/test_ast.py | 12 ++++++++++++ .../2022-11-13-02-06-56.gh-issue-99341.8-OlwB.rst | 2 ++ 3 files changed, 20 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-11-13-02-06-56.gh-issue-99341.8-OlwB.rst diff --git a/Lib/ast.py b/Lib/ast.py index 1a94e9368c161a..2cbc80a9835aa5 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -237,6 +237,12 @@ def increment_lineno(node, n=1): location in a file. """ for child in walk(node): + # TypeIgnore is a special case where lineno is not an attribute + # but rather a field of the node itself. + if isinstance(child, TypeIgnore): + child.lineno = getattr(child, 'lineno', 0) + n + continue + if 'lineno' in child._attributes: child.lineno = getattr(child, 'lineno', 0) + n if ( diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index b34644118d2815..773fba87632b0a 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -1036,6 +1036,18 @@ def test_increment_lineno(self): self.assertEqual(ast.increment_lineno(src).lineno, 2) self.assertIsNone(ast.increment_lineno(src).end_lineno) + def test_increment_lineno_on_module(self): + src = ast.parse(dedent("""\ + a = 1 + b = 2 # type: ignore + c = 3 + d = 4 # type: ignore@tag + """), type_comments=True) + ast.increment_lineno(src, n=5) + self.assertEqual(src.type_ignores[0].lineno, 7) + self.assertEqual(src.type_ignores[1].lineno, 9) + self.assertEqual(src.type_ignores[1].tag, '@tag') + def test_iter_fields(self): node = ast.parse('foo()', mode='eval') d = dict(ast.iter_fields(node.body)) diff --git a/Misc/NEWS.d/next/Library/2022-11-13-02-06-56.gh-issue-99341.8-OlwB.rst b/Misc/NEWS.d/next/Library/2022-11-13-02-06-56.gh-issue-99341.8-OlwB.rst new file mode 100644 index 00000000000000..451561c579daff --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-13-02-06-56.gh-issue-99341.8-OlwB.rst @@ -0,0 +1,2 @@ +Fix :func:`ast.increment_lineno` to also cover :class:`ast.TypeIgnore` when +changing line numbers. From 20d9749a0f9b9fa6946019f04a54b6287d16588e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 22 Nov 2022 13:04:19 +0100 Subject: [PATCH 008/112] gh-99537: Use Py_SETREF() function in longobject C code (#99655) Replace "Py_DECREF(var); var = new;" with "Py_SETREF(var, new);" in longobject.c and _testcapi/long.c. --- Modules/_testcapi/long.c | 12 +++---- Objects/longobject.c | 73 +++++++++++++--------------------------- 2 files changed, 28 insertions(+), 57 deletions(-) diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 70e315cdb8a3d0..1be8de5e576254 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -121,8 +121,7 @@ test_long_and_overflow(PyObject *self, PyObject *Py_UNUSED(ignored)) } temp = PyNumber_Add(num, one); Py_DECREF(one); - Py_DECREF(num); - num = temp; + Py_SETREF(num, temp); if (num == NULL) return NULL; overflow = 0; @@ -165,8 +164,7 @@ test_long_and_overflow(PyObject *self, PyObject *Py_UNUSED(ignored)) } temp = PyNumber_Subtract(num, one); Py_DECREF(one); - Py_DECREF(num); - num = temp; + Py_SETREF(num, temp); if (num == NULL) return NULL; overflow = 0; @@ -285,8 +283,7 @@ test_long_long_and_overflow(PyObject *self, PyObject *Py_UNUSED(ignored)) } temp = PyNumber_Add(num, one); Py_DECREF(one); - Py_DECREF(num); - num = temp; + Py_SETREF(num, temp); if (num == NULL) return NULL; overflow = 0; @@ -329,8 +326,7 @@ test_long_long_and_overflow(PyObject *self, PyObject *Py_UNUSED(ignored)) } temp = PyNumber_Subtract(num, one); Py_DECREF(one); - Py_DECREF(num); - num = temp; + Py_SETREF(num, temp); if (num == NULL) return NULL; overflow = 0; diff --git a/Objects/longobject.c b/Objects/longobject.c index bb9e40f862aa11..f4bd981e4b9870 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2598,8 +2598,7 @@ long_from_non_binary_base(const char *start, const char *end, Py_ssize_t digits, memcpy(tmp->ob_digit, z->ob_digit, sizeof(digit) * size_z); - Py_DECREF(z); - z = tmp; + Py_SETREF(z, tmp); z->ob_digit[size_z] = (digit)c; ++size_z; } @@ -4140,8 +4139,7 @@ l_divmod(PyLongObject *v, PyLongObject *w, (Py_SIZE(mod) > 0 && Py_SIZE(w) < 0)) { PyLongObject *temp; temp = (PyLongObject *) long_add(mod, w); - Py_DECREF(mod); - mod = temp; + Py_SETREF(mod, temp); if (mod == NULL) { Py_DECREF(div); return -1; @@ -4152,8 +4150,7 @@ l_divmod(PyLongObject *v, PyLongObject *w, Py_DECREF(div); return -1; } - Py_DECREF(div); - div = temp; + Py_SETREF(div, temp); } if (pdiv != NULL) *pdiv = div; @@ -4189,8 +4186,7 @@ l_mod(PyLongObject *v, PyLongObject *w, PyLongObject **pmod) (Py_SIZE(mod) > 0 && Py_SIZE(w) < 0)) { PyLongObject *temp; temp = (PyLongObject *) long_add(mod, w); - Py_DECREF(mod); - mod = temp; + Py_SETREF(mod, temp); if (mod == NULL) return -1; } @@ -4430,8 +4426,7 @@ long_true_divide(PyObject *v, PyObject *w) else { PyLongObject *div, *rem; div = x_divrem(x, b, &rem); - Py_DECREF(x); - x = div; + Py_SETREF(x, div); if (x == NULL) goto error; if (Py_SIZE(rem)) @@ -4561,8 +4556,7 @@ long_invmod(PyLongObject *a, PyLongObject *n) if (l_divmod(a, n, &q, &r) == -1) { goto Error; } - Py_DECREF(a); - a = n; + Py_SETREF(a, n); n = r; t = (PyLongObject *)long_mul(q, c); Py_DECREF(q); @@ -4574,8 +4568,7 @@ long_invmod(PyLongObject *a, PyLongObject *n) if (s == NULL) { goto Error; } - Py_DECREF(b); - b = c; + Py_SETREF(b, c); c = s; } /* references now owned: a, b, c, n */ @@ -4670,8 +4663,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) temp = (PyLongObject *)_PyLong_Copy(c); if (temp == NULL) goto Error; - Py_DECREF(c); - c = temp; + Py_SETREF(c, temp); temp = NULL; _PyLong_Negate(&c); if (c == NULL) @@ -4691,8 +4683,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) temp = (PyLongObject *)_PyLong_Copy(b); if (temp == NULL) goto Error; - Py_DECREF(b); - b = temp; + Py_SETREF(b, temp); temp = NULL; _PyLong_Negate(&b); if (b == NULL) @@ -4701,8 +4692,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) temp = long_invmod(a, c); if (temp == NULL) goto Error; - Py_DECREF(a); - a = temp; + Py_SETREF(a, temp); temp = NULL; } @@ -4718,8 +4708,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) if (Py_SIZE(a) < 0 || Py_SIZE(a) > Py_SIZE(c)) { if (l_mod(a, c, &temp) < 0) goto Error; - Py_DECREF(a); - a = temp; + Py_SETREF(a, temp); temp = NULL; } } @@ -4786,9 +4775,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) * because we're primarily trying to cut overhead for small powers. */ assert(bi); /* else there is no significant bit */ - Py_INCREF(a); - Py_DECREF(z); - z = a; + Py_SETREF(z, Py_NewRef(a)); for (bit = 2; ; bit <<= 1) { if (bit > bi) { /* found the first bit */ assert((bi & bit) == 0); @@ -4874,8 +4861,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) temp = (PyLongObject *)long_sub(z, c); if (temp == NULL) goto Error; - Py_DECREF(z); - z = temp; + Py_SETREF(z, temp); temp = NULL; } goto Done; @@ -5444,8 +5430,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg) /* no progress; do a Euclidean step */ if (l_mod(a, b, &r) < 0) goto error; - Py_DECREF(a); - a = b; + Py_SETREF(a, b); b = r; alloc_a = alloc_b; alloc_b = Py_SIZE(b); @@ -5766,8 +5751,7 @@ _PyLong_DivmodNear(PyObject *a, PyObject *b) goto error; if (quo_is_neg) { temp = long_neg((PyLongObject*)twice_rem); - Py_DECREF(twice_rem); - twice_rem = temp; + Py_SETREF(twice_rem, temp); if (twice_rem == NULL) goto error; } @@ -5781,8 +5765,7 @@ _PyLong_DivmodNear(PyObject *a, PyObject *b) temp = long_sub(quo, (PyLongObject *)one); else temp = long_add(quo, (PyLongObject *)one); - Py_DECREF(quo); - quo = (PyLongObject *)temp; + Py_SETREF(quo, (PyLongObject *)temp); if (quo == NULL) goto error; /* and remainder */ @@ -5790,8 +5773,7 @@ _PyLong_DivmodNear(PyObject *a, PyObject *b) temp = long_add(rem, (PyLongObject *)b); else temp = long_sub(rem, (PyLongObject *)b); - Py_DECREF(rem); - rem = (PyLongObject *)temp; + Py_SETREF(rem, (PyLongObject *)temp); if (rem == NULL) goto error; } @@ -5857,8 +5839,7 @@ int___round___impl(PyObject *self, PyObject *o_ndigits) /* result = self - divmod_near(self, 10 ** -ndigits)[1] */ temp = long_neg((PyLongObject*)ndigits); - Py_DECREF(ndigits); - ndigits = temp; + Py_SETREF(ndigits, temp); if (ndigits == NULL) return NULL; @@ -5870,21 +5851,18 @@ int___round___impl(PyObject *self, PyObject *o_ndigits) temp = long_pow(result, ndigits, Py_None); Py_DECREF(ndigits); - Py_DECREF(result); - result = temp; + Py_SETREF(result, temp); if (result == NULL) return NULL; temp = _PyLong_DivmodNear(self, result); - Py_DECREF(result); - result = temp; + Py_SETREF(result, temp); if (result == NULL) return NULL; temp = long_sub((PyLongObject *)self, (PyLongObject *)PyTuple_GET_ITEM(result, 1)); - Py_DECREF(result); - result = temp; + Py_SETREF(result, temp); return result; } @@ -5949,8 +5927,7 @@ int_bit_length_impl(PyObject *self) Py_DECREF(x); if (y == NULL) goto error; - Py_DECREF(result); - result = y; + Py_SETREF(result, y); x = (PyLongObject *)PyLong_FromLong((long)msd_bits); if (x == NULL) @@ -5959,8 +5936,7 @@ int_bit_length_impl(PyObject *self) Py_DECREF(x); if (y == NULL) goto error; - Py_DECREF(result); - result = y; + Py_SETREF(result, y); return (PyObject *)result; @@ -6026,8 +6002,7 @@ int_bit_count_impl(PyObject *self) if (y == NULL) { goto error; } - Py_DECREF(result); - result = y; + Py_SETREF(result, y); } return result; From 3db0a21f731cec28a89f7495a82ee2670bce75fe Mon Sep 17 00:00:00 2001 From: mpage Date: Tue, 22 Nov 2022 04:06:44 -0800 Subject: [PATCH 009/112] gh-91053: Add an optional callback that is invoked whenever a function is modified (#98175) --- Doc/c-api/function.rst | 60 +++++ Include/cpython/funcobject.h | 49 ++++ Include/internal/pycore_function.h | 2 + Include/internal/pycore_interp.h | 6 + Lib/test/test_capi/test_watchers.py | 93 +++++++ ...2-10-05-11-44-52.gh-issue-91053.f5Bo3p.rst | 4 + Modules/_testcapi/watchers.c | 237 ++++++++++++++++++ Objects/funcobject.c | 68 +++++ Python/pystate.c | 5 + 9 files changed, 524 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-05-11-44-52.gh-issue-91053.f5Bo3p.rst diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst index df88e85e518829..3cce18bdde3057 100644 --- a/Doc/c-api/function.rst +++ b/Doc/c-api/function.rst @@ -118,3 +118,63 @@ There are a few functions specific to Python functions. must be a dictionary or ``Py_None``. Raises :exc:`SystemError` and returns ``-1`` on failure. + + +.. c:function:: int PyFunction_AddWatcher(PyFunction_WatchCallback callback) + + Register *callback* as a function watcher for the current interpreter. + Return an ID which may be passed to :c:func:`PyFunction_ClearWatcher`. + In case of error (e.g. no more watcher IDs available), + return ``-1`` and set an exception. + + .. versionadded:: 3.12 + + +.. c:function:: int PyFunction_ClearWatcher(int watcher_id) + + Clear watcher identified by *watcher_id* previously returned from + :c:func:`PyFunction_AddWatcher` for the current interpreter. + Return ``0`` on success, or ``-1`` and set an exception on error + (e.g. if the given *watcher_id* was never registered.) + + .. versionadded:: 3.12 + + +.. c:type:: PyFunction_WatchEvent + + Enumeration of possible function watcher events: + - ``PyFunction_EVENT_CREATE`` + - ``PyFunction_EVENT_DESTROY`` + - ``PyFunction_EVENT_MODIFY_CODE`` + - ``PyFunction_EVENT_MODIFY_DEFAULTS`` + - ``PyFunction_EVENT_MODIFY_KWDEFAULTS`` + + .. versionadded:: 3.12 + + +.. c:type:: int (*PyFunction_WatchCallback)(PyFunction_WatchEvent event, PyFunctionObject *func, PyObject *new_value) + + Type of a function watcher callback function. + + If *event* is ``PyFunction_EVENT_CREATE`` or ``PyFunction_EVENT_DESTROY`` + then *new_value* will be ``NULL``. Otherwise, *new_value* will hold a + :term:`borrowed reference` to the new value that is about to be stored in + *func* for the attribute that is being modified. + + The callback may inspect but must not modify *func*; doing so could have + unpredictable effects, including infinite recursion. + + If *event* is ``PyFunction_EVENT_CREATE``, then the callback is invoked + after `func` has been fully initialized. Otherwise, the callback is invoked + before the modification to *func* takes place, so the prior state of *func* + can be inspected. The runtime is permitted to optimize away the creation of + function objects when possible. In such cases no event will be emitted. + Although this creates the possitibility of an observable difference of + runtime behavior depending on optimization decisions, it does not change + the semantics of the Python code being executed. + + If the callback returns with an exception set, it must return ``-1``; this + exception will be printed as an unraisable exception using + :c:func:`PyErr_WriteUnraisable`. Otherwise it should return ``0``. + + .. versionadded:: 3.12 diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h index dd8f20b2c20b39..5979febc2e3421 100644 --- a/Include/cpython/funcobject.h +++ b/Include/cpython/funcobject.h @@ -131,6 +131,55 @@ PyAPI_DATA(PyTypeObject) PyStaticMethod_Type; PyAPI_FUNC(PyObject *) PyClassMethod_New(PyObject *); PyAPI_FUNC(PyObject *) PyStaticMethod_New(PyObject *); +#define FOREACH_FUNC_EVENT(V) \ + V(CREATE) \ + V(DESTROY) \ + V(MODIFY_CODE) \ + V(MODIFY_DEFAULTS) \ + V(MODIFY_KWDEFAULTS) + +typedef enum { + #define DEF_EVENT(EVENT) PyFunction_EVENT_##EVENT, + FOREACH_FUNC_EVENT(DEF_EVENT) + #undef DEF_EVENT +} PyFunction_WatchEvent; + +/* + * A callback that is invoked for different events in a function's lifecycle. + * + * The callback is invoked with a borrowed reference to func, after it is + * created and before it is modified or destroyed. The callback should not + * modify func. + * + * When a function's code object, defaults, or kwdefaults are modified the + * callback will be invoked with the respective event and new_value will + * contain a borrowed reference to the new value that is about to be stored in + * the function. Otherwise the third argument is NULL. + * + * If the callback returns with an exception set, it must return -1. Otherwise + * it should return 0. + */ +typedef int (*PyFunction_WatchCallback)( + PyFunction_WatchEvent event, + PyFunctionObject *func, + PyObject *new_value); + +/* + * Register a per-interpreter callback that will be invoked for function lifecycle + * events. + * + * Returns a handle that may be passed to PyFunction_ClearWatcher on success, + * or -1 and sets an error if no more handles are available. + */ +PyAPI_FUNC(int) PyFunction_AddWatcher(PyFunction_WatchCallback callback); + +/* + * Clear the watcher associated with the watcher_id handle. + * + * Returns 0 on success or -1 if no watcher exists for the supplied id. + */ +PyAPI_FUNC(int) PyFunction_ClearWatcher(int watcher_id); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index c95190f5217315..5cedb33d7e3afd 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -8,6 +8,8 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#define FUNC_MAX_WATCHERS 8 + struct _py_func_runtime_state { uint32_t next_version; }; diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 437bf74b771dc0..532b28499080f2 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -17,6 +17,7 @@ extern "C" { #include "pycore_dict_state.h" // struct _Py_dict_state #include "pycore_exceptions.h" // struct _Py_exc_state #include "pycore_floatobject.h" // struct _Py_float_state +#include "pycore_function.h" // FUNC_MAX_WATCHERS #include "pycore_genobject.h" // struct _Py_async_gen_state #include "pycore_gc.h" // struct _gc_runtime_state #include "pycore_list.h" // struct _Py_list_state @@ -171,6 +172,11 @@ struct _is { // Initialized to _PyEval_EvalFrameDefault(). _PyFrameEvalFunction eval_frame; + PyDict_WatchCallback dict_watchers[DICT_MAX_WATCHERS]; + PyFunction_WatchCallback func_watchers[FUNC_MAX_WATCHERS]; + // One bit is set for each non-NULL entry in func_watchers + uint8_t active_func_watchers; + Py_ssize_t co_extra_user_count; freefunc co_extra_freefuncs[MAX_CO_EXTRA_USERS]; diff --git a/Lib/test/test_capi/test_watchers.py b/Lib/test/test_capi/test_watchers.py index f635c746b4962d..5e4f42a86006bd 100644 --- a/Lib/test/test_capi/test_watchers.py +++ b/Lib/test/test_capi/test_watchers.py @@ -336,5 +336,98 @@ def test_no_more_ids_available(self): self.add_watcher() +class TestFuncWatchers(unittest.TestCase): + @contextmanager + def add_watcher(self, func): + wid = _testcapi.add_func_watcher(func) + try: + yield + finally: + _testcapi.clear_func_watcher(wid) + + def test_func_events_dispatched(self): + events = [] + def watcher(*args): + events.append(args) + + with self.add_watcher(watcher): + def myfunc(): + pass + self.assertIn((_testcapi.PYFUNC_EVENT_CREATE, myfunc, None), events) + myfunc_id = id(myfunc) + + new_code = self.test_func_events_dispatched.__code__ + myfunc.__code__ = new_code + self.assertIn((_testcapi.PYFUNC_EVENT_MODIFY_CODE, myfunc, new_code), events) + + new_defaults = (123,) + myfunc.__defaults__ = new_defaults + self.assertIn((_testcapi.PYFUNC_EVENT_MODIFY_DEFAULTS, myfunc, new_defaults), events) + + new_defaults = (456,) + _testcapi.set_func_defaults_via_capi(myfunc, new_defaults) + self.assertIn((_testcapi.PYFUNC_EVENT_MODIFY_DEFAULTS, myfunc, new_defaults), events) + + new_kwdefaults = {"self": 123} + myfunc.__kwdefaults__ = new_kwdefaults + self.assertIn((_testcapi.PYFUNC_EVENT_MODIFY_KWDEFAULTS, myfunc, new_kwdefaults), events) + + new_kwdefaults = {"self": 456} + _testcapi.set_func_kwdefaults_via_capi(myfunc, new_kwdefaults) + self.assertIn((_testcapi.PYFUNC_EVENT_MODIFY_KWDEFAULTS, myfunc, new_kwdefaults), events) + + # Clear events reference to func + events = [] + del myfunc + self.assertIn((_testcapi.PYFUNC_EVENT_DESTROY, myfunc_id, None), events) + + def test_multiple_watchers(self): + events0 = [] + def first_watcher(*args): + events0.append(args) + + events1 = [] + def second_watcher(*args): + events1.append(args) + + with self.add_watcher(first_watcher): + with self.add_watcher(second_watcher): + def myfunc(): + pass + + event = (_testcapi.PYFUNC_EVENT_CREATE, myfunc, None) + self.assertIn(event, events0) + self.assertIn(event, events1) + + def test_watcher_raises_error(self): + class MyError(Exception): + pass + + def watcher(*args): + raise MyError("testing 123") + + with self.add_watcher(watcher): + with catch_unraisable_exception() as cm: + def myfunc(): + pass + + self.assertIs(cm.unraisable.object, myfunc) + self.assertIsInstance(cm.unraisable.exc_value, MyError) + + def test_clear_out_of_range_watcher_id(self): + with self.assertRaisesRegex(ValueError, r"invalid func watcher ID -1"): + _testcapi.clear_func_watcher(-1) + with self.assertRaisesRegex(ValueError, r"invalid func watcher ID 8"): + _testcapi.clear_func_watcher(8) # FUNC_MAX_WATCHERS = 8 + + def test_clear_unassigned_watcher_id(self): + with self.assertRaisesRegex(ValueError, r"no func watcher set for ID 1"): + _testcapi.clear_func_watcher(1) + + def test_allocate_too_many_watchers(self): + with self.assertRaisesRegex(RuntimeError, r"no more func watcher IDs"): + _testcapi.allocate_too_many_func_watchers() + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-05-11-44-52.gh-issue-91053.f5Bo3p.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-05-11-44-52.gh-issue-91053.f5Bo3p.rst new file mode 100644 index 00000000000000..59bb12caef740d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-05-11-44-52.gh-issue-91053.f5Bo3p.rst @@ -0,0 +1,4 @@ +Optimizing interpreters and JIT compilers may need to invalidate internal +metadata when functions are modified. This change adds the ability to +provide a callback that will be invoked each time a function is created, +modified, or destroyed. diff --git a/Modules/_testcapi/watchers.c b/Modules/_testcapi/watchers.c index e0d489a60e5d48..608cd780d12a26 100644 --- a/Modules/_testcapi/watchers.c +++ b/Modules/_testcapi/watchers.c @@ -1,5 +1,7 @@ #include "parts.h" +#define Py_BUILD_CORE +#include "pycore_function.h" // FUNC_MAX_WATCHERS // Test dict watching static PyObject *g_dict_watch_events; @@ -275,6 +277,223 @@ unwatch_type(PyObject *self, PyObject *args) Py_RETURN_NONE; } +// Test function watchers + +#define NUM_FUNC_WATCHERS 2 +static PyObject *pyfunc_watchers[NUM_FUNC_WATCHERS]; +static int func_watcher_ids[NUM_FUNC_WATCHERS] = {-1, -1}; + +static PyObject * +get_id(PyObject *obj) +{ + PyObject *builtins = PyEval_GetBuiltins(); // borrowed ref. + if (builtins == NULL) { + return NULL; + } + PyObject *id_str = PyUnicode_FromString("id"); + if (id_str == NULL) { + return NULL; + } + PyObject *id_func = PyObject_GetItem(builtins, id_str); + Py_DECREF(id_str); + if (id_func == NULL) { + return NULL; + } + PyObject *stack[] = {obj}; + PyObject *id = PyObject_Vectorcall(id_func, stack, 1, NULL); + Py_DECREF(id_func); + return id; +} + +static int +call_pyfunc_watcher(PyObject *watcher, PyFunction_WatchEvent event, + PyFunctionObject *func, PyObject *new_value) +{ + PyObject *event_obj = PyLong_FromLong(event); + if (event_obj == NULL) { + return -1; + } + if (new_value == NULL) { + new_value = Py_None; + } + Py_INCREF(new_value); + PyObject *func_or_id = NULL; + if (event == PyFunction_EVENT_DESTROY) { + /* Don't expose a function that's about to be destroyed to managed code */ + func_or_id = get_id((PyObject *) func); + if (func_or_id == NULL) { + Py_DECREF(event_obj); + Py_DECREF(new_value); + return -1; + } + } + else { + Py_INCREF(func); + func_or_id = (PyObject *) func; + } + PyObject *stack[] = {event_obj, func_or_id, new_value}; + PyObject *res = PyObject_Vectorcall(watcher, stack, 3, NULL); + int st = (res == NULL) ? -1 : 0; + Py_XDECREF(res); + Py_DECREF(new_value); + Py_DECREF(event_obj); + Py_DECREF(func_or_id); + return st; +} + +static int +first_func_watcher_callback(PyFunction_WatchEvent event, PyFunctionObject *func, + PyObject *new_value) +{ + return call_pyfunc_watcher(pyfunc_watchers[0], event, func, new_value); +} + +static int +second_func_watcher_callback(PyFunction_WatchEvent event, + PyFunctionObject *func, PyObject *new_value) +{ + return call_pyfunc_watcher(pyfunc_watchers[1], event, func, new_value); +} + +static PyFunction_WatchCallback func_watcher_callbacks[NUM_FUNC_WATCHERS] = { + first_func_watcher_callback, + second_func_watcher_callback +}; + +static int +add_func_event(PyObject *module, const char *name, PyFunction_WatchEvent event) +{ + PyObject *value = PyLong_FromLong(event); + if (value == NULL) { + return -1; + } + int ok = PyModule_AddObjectRef(module, name, value); + Py_DECREF(value); + return ok; +} + +static PyObject * +add_func_watcher(PyObject *self, PyObject *func) +{ + if (!PyFunction_Check(func)) { + PyErr_SetString(PyExc_TypeError, "'func' must be a function"); + return NULL; + } + int idx = -1; + for (int i = 0; i < NUM_FUNC_WATCHERS; i++) { + if (func_watcher_ids[i] == -1) { + idx = i; + break; + } + } + if (idx == -1) { + PyErr_SetString(PyExc_RuntimeError, "no free watchers"); + return NULL; + } + PyObject *result = PyLong_FromLong(idx); + if (result == NULL) { + return NULL; + } + func_watcher_ids[idx] = PyFunction_AddWatcher(func_watcher_callbacks[idx]); + if (func_watcher_ids[idx] < 0) { + Py_DECREF(result); + return NULL; + } + pyfunc_watchers[idx] = Py_NewRef(func); + return result; +} + +static PyObject * +clear_func_watcher(PyObject *self, PyObject *watcher_id_obj) +{ + long watcher_id = PyLong_AsLong(watcher_id_obj); + if ((watcher_id < INT_MIN) || (watcher_id > INT_MAX)) { + PyErr_SetString(PyExc_ValueError, "invalid watcher ID"); + return NULL; + } + int wid = (int) watcher_id; + if (PyFunction_ClearWatcher(wid) < 0) { + return NULL; + } + int idx = -1; + for (int i = 0; i < NUM_FUNC_WATCHERS; i++) { + if (func_watcher_ids[i] == wid) { + idx = i; + break; + } + } + assert(idx != -1); + Py_CLEAR(pyfunc_watchers[idx]); + func_watcher_ids[idx] = -1; + Py_RETURN_NONE; +} + +static int +noop_func_event_handler(PyFunction_WatchEvent event, PyFunctionObject *func, + PyObject *new_value) +{ + return 0; +} + +static PyObject * +allocate_too_many_func_watchers(PyObject *self, PyObject *args) +{ + int watcher_ids[FUNC_MAX_WATCHERS + 1]; + int num_watchers = 0; + for (unsigned long i = 0; i < sizeof(watcher_ids) / sizeof(int); i++) { + int watcher_id = PyFunction_AddWatcher(noop_func_event_handler); + if (watcher_id == -1) { + break; + } + watcher_ids[i] = watcher_id; + num_watchers++; + } + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + for (int i = 0; i < num_watchers; i++) { + if (PyFunction_ClearWatcher(watcher_ids[i]) < 0) { + PyErr_WriteUnraisable(Py_None); + break; + } + } + if (type) { + PyErr_Restore(type, value, traceback); + return NULL; + } + else if (PyErr_Occurred()) { + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject * +set_func_defaults(PyObject *self, PyObject *args) +{ + PyObject *func = NULL; + PyObject *defaults = NULL; + if (!PyArg_ParseTuple(args, "OO", &func, &defaults)) { + return NULL; + } + if (PyFunction_SetDefaults(func, defaults) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject * +set_func_kwdefaults(PyObject *self, PyObject *args) +{ + PyObject *func = NULL; + PyObject *kwdefaults = NULL; + if (!PyArg_ParseTuple(args, "OO", &func, &kwdefaults)) { + return NULL; + } + if (PyFunction_SetKwDefaults(func, kwdefaults) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + static PyMethodDef test_methods[] = { // Dict watchers. {"add_dict_watcher", add_dict_watcher, METH_O, NULL}, @@ -289,6 +508,14 @@ static PyMethodDef test_methods[] = { {"watch_type", watch_type, METH_VARARGS, NULL}, {"unwatch_type", unwatch_type, METH_VARARGS, NULL}, {"get_type_modified_events", get_type_modified_events, METH_NOARGS, NULL}, + + // Function watchers. + {"add_func_watcher", add_func_watcher, METH_O, NULL}, + {"clear_func_watcher", clear_func_watcher, METH_O, NULL}, + {"set_func_defaults_via_capi", set_func_defaults, METH_VARARGS, NULL}, + {"set_func_kwdefaults_via_capi", set_func_kwdefaults, METH_VARARGS, NULL}, + {"allocate_too_many_func_watchers", allocate_too_many_func_watchers, + METH_NOARGS, NULL}, {NULL}, }; @@ -298,5 +525,15 @@ _PyTestCapi_Init_Watchers(PyObject *mod) if (PyModule_AddFunctions(mod, test_methods) < 0) { return -1; } + + /* Expose each event as an attribute on the module */ +#define ADD_EVENT(event) \ + if (add_func_event(mod, "PYFUNC_EVENT_" #event, \ + PyFunction_EVENT_##event)) { \ + return -1; \ + } + FOREACH_FUNC_EVENT(ADD_EVENT); +#undef ADD_EVENT + return 0; } diff --git a/Objects/funcobject.c b/Objects/funcobject.c index cab80006589b48..bf97edc53ad7d9 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -3,10 +3,68 @@ #include "Python.h" #include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals() +#include "pycore_function.h" // FUNC_MAX_WATCHERS #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _PyErr_Occurred() #include "structmember.h" // PyMemberDef +static void +notify_func_watchers(PyInterpreterState *interp, PyFunction_WatchEvent event, + PyFunctionObject *func, PyObject *new_value) +{ + for (int i = 0; i < FUNC_MAX_WATCHERS; i++) { + PyFunction_WatchCallback cb = interp->func_watchers[i]; + if ((cb != NULL) && (cb(event, func, new_value) < 0)) { + PyErr_WriteUnraisable((PyObject *) func); + } + } +} + +static inline void +handle_func_event(PyFunction_WatchEvent event, PyFunctionObject *func, + PyObject *new_value) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->active_func_watchers) { + notify_func_watchers(interp, event, func, new_value); + } +} + +int +PyFunction_AddWatcher(PyFunction_WatchCallback callback) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(interp->_initialized); + for (int i = 0; i < FUNC_MAX_WATCHERS; i++) { + if (interp->func_watchers[i] == NULL) { + interp->func_watchers[i] = callback; + interp->active_func_watchers |= (1 << i); + return i; + } + } + PyErr_SetString(PyExc_RuntimeError, "no more func watcher IDs available"); + return -1; +} + +int +PyFunction_ClearWatcher(int watcher_id) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (watcher_id < 0 || watcher_id >= FUNC_MAX_WATCHERS) { + PyErr_Format(PyExc_ValueError, "invalid func watcher ID %d", + watcher_id); + return -1; + } + if (!interp->func_watchers[watcher_id]) { + PyErr_Format(PyExc_ValueError, "no func watcher set for ID %d", + watcher_id); + return -1; + } + interp->func_watchers[watcher_id] = NULL; + interp->active_func_watchers &= ~(1 << watcher_id); + return 0; +} + PyFunctionObject * _PyFunction_FromConstructor(PyFrameConstructor *constr) { @@ -31,6 +89,7 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr) op->vectorcall = _PyFunction_Vectorcall; op->func_version = 0; _PyObject_GC_TRACK(op); + handle_func_event(PyFunction_EVENT_CREATE, op, NULL); return op; } @@ -105,6 +164,7 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname op->vectorcall = _PyFunction_Vectorcall; op->func_version = 0; _PyObject_GC_TRACK(op); + handle_func_event(PyFunction_EVENT_CREATE, op, NULL); return (PyObject *)op; error: @@ -196,6 +256,8 @@ PyFunction_SetDefaults(PyObject *op, PyObject *defaults) PyErr_SetString(PyExc_SystemError, "non-tuple default args"); return -1; } + handle_func_event(PyFunction_EVENT_MODIFY_DEFAULTS, + (PyFunctionObject *) op, defaults); ((PyFunctionObject *)op)->func_version = 0; Py_XSETREF(((PyFunctionObject *)op)->func_defaults, defaults); return 0; @@ -236,6 +298,8 @@ PyFunction_SetKwDefaults(PyObject *op, PyObject *defaults) "non-dict keyword only default args"); return -1; } + handle_func_event(PyFunction_EVENT_MODIFY_KWDEFAULTS, + (PyFunctionObject *) op, defaults); ((PyFunctionObject *)op)->func_version = 0; Py_XSETREF(((PyFunctionObject *)op)->func_kwdefaults, defaults); return 0; @@ -389,6 +453,7 @@ func_set_code(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignored)) nclosure, nfree); return -1; } + handle_func_event(PyFunction_EVENT_MODIFY_CODE, op, value); op->func_version = 0; Py_XSETREF(op->func_code, Py_NewRef(value)); return 0; @@ -468,6 +533,7 @@ func_set_defaults(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignored return -1; } + handle_func_event(PyFunction_EVENT_MODIFY_DEFAULTS, op, value); op->func_version = 0; Py_XSETREF(op->func_defaults, Py_XNewRef(value)); return 0; @@ -508,6 +574,7 @@ func_set_kwdefaults(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignor return -1; } + handle_func_event(PyFunction_EVENT_MODIFY_KWDEFAULTS, op, value); op->func_version = 0; Py_XSETREF(op->func_kwdefaults, Py_XNewRef(value)); return 0; @@ -687,6 +754,7 @@ func_clear(PyFunctionObject *op) static void func_dealloc(PyFunctionObject *op) { + handle_func_event(PyFunction_EVENT_DESTROY, op, NULL); _PyObject_GC_UNTRACK(op); if (op->func_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *) op); diff --git a/Python/pystate.c b/Python/pystate.c index c7f2386f8c6329..19fd9a6ae4497b 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -461,6 +461,11 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) interp->dict_state.watchers[i] = NULL; } + for (int i=0; i < FUNC_MAX_WATCHERS; i++) { + interp->func_watchers[i] = NULL; + } + interp->active_func_watchers = 0; + // XXX Once we have one allocator per interpreter (i.e. // per-interpreter GC) we must ensure that all of the interpreter's // objects have been cleaned up at the point. From 135ec7cefbaffd516b77362ad2b2ad1025af462e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 22 Nov 2022 13:39:11 +0100 Subject: [PATCH 010/112] gh-99537: Use Py_SETREF() function in C code (#99657) Fix potential race condition in code patterns: * Replace "Py_DECREF(var); var = new;" with "Py_SETREF(var, new);" * Replace "Py_XDECREF(var); var = new;" with "Py_XSETREF(var, new);" * Replace "Py_CLEAR(var); var = new;" with "Py_XSETREF(var, new);" Other changes: * Replace "old = var; var = new; Py_DECREF(var)" with "Py_SETREF(var, new);" * Replace "old = var; var = new; Py_XDECREF(var)" with "Py_XSETREF(var, new);" * And remove the "old" variable. --- Objects/bytesobject.c | 4 +--- Objects/capsule.c | 3 +-- Objects/fileobject.c | 6 ++---- Objects/floatobject.c | 9 +++------ Objects/genobject.c | 3 +-- Objects/setobject.c | 3 +-- Objects/sliceobject.c | 22 ++++++---------------- Objects/stringlib/unicode_format.h | 6 ++---- Objects/typeobject.c | 6 ++---- Objects/unicodeobject.c | 6 ++---- Objects/weakrefobject.c | 6 ++---- Python/_warnings.c | 6 ++---- Python/bltinmodule.c | 3 +-- Python/compile.c | 4 +--- Python/errors.c | 7 ++----- Python/hamt.c | 3 +-- Python/pythonrun.c | 3 +-- Python/sysmodule.c | 3 +-- Python/traceback.c | 7 ++----- 19 files changed, 34 insertions(+), 76 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 91c89bbd9005a7..a63f396e022f71 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2109,9 +2109,7 @@ bytes_translate_impl(PyBytesObject *self, PyObject *table, changed = 1; } if (!changed && PyBytes_CheckExact(input_obj)) { - Py_INCREF(input_obj); - Py_DECREF(result); - result = input_obj; + Py_SETREF(result, Py_NewRef(input_obj)); } PyBuffer_Release(&del_table_view); PyBuffer_Release(&table_view); diff --git a/Objects/capsule.c b/Objects/capsule.c index 606e50e6961133..baaddb3f1f0849 100644 --- a/Objects/capsule.c +++ b/Objects/capsule.c @@ -220,8 +220,7 @@ PyCapsule_Import(const char *name, int no_block) } } else { PyObject *object2 = PyObject_GetAttrString(object, trace); - Py_DECREF(object); - object = object2; + Py_SETREF(object, object2); } if (!object) { goto EXIT; diff --git a/Objects/fileobject.c b/Objects/fileobject.c index ab67cd23cef3b3..bf56be5f7ea7b0 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -88,8 +88,7 @@ PyFile_GetLine(PyObject *f, int n) else { PyObject *v; v = PyBytes_FromStringAndSize(s, len-1); - Py_DECREF(result); - result = v; + Py_SETREF(result, v); } } } @@ -104,8 +103,7 @@ PyFile_GetLine(PyObject *f, int n) else if (PyUnicode_READ_CHAR(result, len-1) == '\n') { PyObject *v; v = PyUnicode_Substring(result, 0, len-1); - Py_DECREF(result); - result = v; + Py_SETREF(result, v); } } return result; diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 46016e946ad344..912b742f797d24 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -531,20 +531,17 @@ float_richcompare(PyObject *v, PyObject *w, int op) temp = _PyLong_Lshift(ww, 1); if (temp == NULL) goto Error; - Py_DECREF(ww); - ww = temp; + Py_SETREF(ww, temp); temp = _PyLong_Lshift(vv, 1); if (temp == NULL) goto Error; - Py_DECREF(vv); - vv = temp; + Py_SETREF(vv, temp); temp = PyNumber_Or(vv, _PyLong_GetOne()); if (temp == NULL) goto Error; - Py_DECREF(vv); - vv = temp; + Py_SETREF(vv, temp); } r = PyObject_RichCompareBool(vv, ww, op); diff --git a/Objects/genobject.c b/Objects/genobject.c index 6661e3fb9461a4..c006f1af2177f9 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -491,8 +491,7 @@ _gen_throw(PyGenObject *gen, int close_on_genexit, } else { /* Normalize to raise , */ - Py_XDECREF(val); - val = typ; + Py_XSETREF(val, typ); typ = Py_NewRef(PyExceptionInstance_Class(typ)); if (tb == NULL) diff --git a/Objects/setobject.c b/Objects/setobject.c index e0646768384a88..ae9e9b99446116 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1270,8 +1270,7 @@ set_intersection_multi(PySetObject *so, PyObject *args) Py_DECREF(result); return NULL; } - Py_DECREF(result); - result = newresult; + Py_SETREF(result, newresult); } return result; } diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 1da1df3090721f..5694bd9c661fa5 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -448,8 +448,7 @@ _PySlice_GetLongIndices(PySliceObject *self, PyObject *length, if (_PyLong_Sign(start) < 0) { /* start += length */ PyObject *tmp = PyNumber_Add(start, length); - Py_DECREF(start); - start = tmp; + Py_SETREF(start, tmp); if (start == NULL) goto error; @@ -457,9 +456,7 @@ _PySlice_GetLongIndices(PySliceObject *self, PyObject *length, if (cmp_result < 0) goto error; if (cmp_result) { - Py_INCREF(lower); - Py_DECREF(start); - start = lower; + Py_SETREF(start, Py_NewRef(lower)); } } else { @@ -467,9 +464,7 @@ _PySlice_GetLongIndices(PySliceObject *self, PyObject *length, if (cmp_result < 0) goto error; if (cmp_result) { - Py_INCREF(upper); - Py_DECREF(start); - start = upper; + Py_SETREF(start, Py_NewRef(upper)); } } } @@ -486,8 +481,7 @@ _PySlice_GetLongIndices(PySliceObject *self, PyObject *length, if (_PyLong_Sign(stop) < 0) { /* stop += length */ PyObject *tmp = PyNumber_Add(stop, length); - Py_DECREF(stop); - stop = tmp; + Py_SETREF(stop, tmp); if (stop == NULL) goto error; @@ -495,9 +489,7 @@ _PySlice_GetLongIndices(PySliceObject *self, PyObject *length, if (cmp_result < 0) goto error; if (cmp_result) { - Py_INCREF(lower); - Py_DECREF(stop); - stop = lower; + Py_SETREF(stop, Py_NewRef(lower)); } } else { @@ -505,9 +497,7 @@ _PySlice_GetLongIndices(PySliceObject *self, PyObject *length, if (cmp_result < 0) goto error; if (cmp_result) { - Py_INCREF(upper); - Py_DECREF(stop); - stop = upper; + Py_SETREF(stop, Py_NewRef(upper)); } } } diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index e970588a7e34bc..ccd7c77c0a03fd 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -473,8 +473,7 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs, goto error; /* assign to obj */ - Py_DECREF(obj); - obj = tmp; + Py_SETREF(obj, tmp); } /* end of iterator, this is the non-error case */ if (ok == 1) @@ -825,8 +824,7 @@ output_markup(SubString *field_name, SubString *format_spec, goto done; /* do the assignment, transferring ownership: fieldobj = tmp */ - Py_DECREF(fieldobj); - fieldobj = tmp; + Py_SETREF(fieldobj, tmp); tmp = NULL; } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 9d868d51f02750..312406993c5b23 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5968,8 +5968,7 @@ object___dir___impl(PyObject *self) else { /* Copy __dict__ to avoid mutating it. */ PyObject *temp = PyDict_Copy(dict); - Py_DECREF(dict); - dict = temp; + Py_SETREF(dict, temp); } if (dict == NULL) @@ -9377,8 +9376,7 @@ super_getattro(PyObject *self, PyObject *name) (See SF ID #743627) */ (su->obj == (PyObject *)starttype) ? NULL : su->obj, (PyObject *)starttype); - Py_DECREF(res); - res = res2; + Py_SETREF(res, res2); } Py_DECREF(mro); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b1acfc71379cd5..55f029dd504ca0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13572,8 +13572,7 @@ _PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type) for (i = 0; i < numdigits; i++) *b1++ = *buf++; *b1 = '\0'; - Py_DECREF(result); - result = r1; + Py_SETREF(result, r1); buf = PyBytes_AS_STRING(result); len = numnondigits + prec; } @@ -13590,8 +13589,7 @@ _PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type) || buf != PyUnicode_DATA(result)) { PyObject *unicode; unicode = _PyUnicode_FromASCII(buf, len); - Py_DECREF(result); - result = unicode; + Py_SETREF(result, unicode); } else if (len != PyUnicode_GET_LENGTH(result)) { if (PyUnicode_Resize(&result, len) < 0) diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index ff284c882b0f1c..bd7720e2753307 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -824,8 +824,7 @@ PyWeakref_NewRef(PyObject *ob, PyObject *callback) during GC. Return that one instead of this one to avoid violating the invariants of the list of weakrefs for ob. */ - Py_DECREF(result); - result = (PyWeakReference*)Py_NewRef(ref); + Py_SETREF(result, (PyWeakReference*)Py_NewRef(ref)); } } else { @@ -888,8 +887,7 @@ PyWeakref_NewProxy(PyObject *ob, PyObject *callback) during GC. Return that one instead of this one to avoid violating the invariants of the list of weakrefs for ob. */ - Py_DECREF(result); - result = (PyWeakReference*)Py_NewRef(proxy); + Py_SETREF(result, (PyWeakReference*)Py_NewRef(proxy)); goto skip_insert; } prev = ref; diff --git a/Python/_warnings.c b/Python/_warnings.c index d703e1e6d843a5..046c37eb49bac0 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -801,8 +801,7 @@ next_external_frame(PyFrameObject *frame) { do { PyFrameObject *back = PyFrame_GetBack(frame); - Py_DECREF(frame); - frame = back; + Py_SETREF(frame, back); } while (frame != NULL && is_internal_frame(frame)); return frame; @@ -828,8 +827,7 @@ setup_context(Py_ssize_t stack_level, PyObject **filename, int *lineno, if (stack_level <= 0 || is_internal_frame(f)) { while (--stack_level > 0 && f != NULL) { PyFrameObject *back = PyFrame_GetBack(f); - Py_DECREF(f); - f = back; + Py_SETREF(f, back); } } else { diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 119e21ae0da290..c2cf79a727f0a8 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -168,8 +168,7 @@ builtin___build_class__(PyObject *self, PyObject *const *args, Py_ssize_t nargs, goto error; } if (winner != meta) { - Py_DECREF(meta); - meta = Py_NewRef(winner); + Py_SETREF(meta, Py_NewRef(winner)); } } /* else: meta is not a class, so we cannot do the metaclass diff --git a/Python/compile.c b/Python/compile.c index 9226bc233ead74..366321143a54aa 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -8280,9 +8280,7 @@ merge_const_one(PyObject *const_cache, PyObject **obj) t = PyTuple_GET_ITEM(t, 1); } - Py_INCREF(t); - Py_DECREF(*obj); - *obj = t; + Py_SETREF(*obj, Py_NewRef(t)); return 1; } diff --git a/Python/errors.c b/Python/errors.c index d74ac347484fed..6a42f5912f942a 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -353,16 +353,13 @@ _PyErr_NormalizeException(PyThreadState *tstate, PyObject **exc, if (fixed_value == NULL) { goto error; } - Py_DECREF(value); - value = fixed_value; + Py_SETREF(value, fixed_value); } /* If the class of the instance doesn't exactly match the class of the type, believe the instance. */ else if (inclass != type) { - Py_INCREF(inclass); - Py_DECREF(type); - type = inclass; + Py_SETREF(type, Py_NewRef(inclass)); } } *exc = type; diff --git a/Python/hamt.c b/Python/hamt.c index c4e47eb9e5703a..8cb94641bef251 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -1354,8 +1354,7 @@ hamt_node_collision_assoc(PyHamtNode_Collision *self, } /* Replace the old value with the new value for the our key. */ - Py_DECREF(new_node->c_array[val_idx]); - new_node->c_array[val_idx] = Py_NewRef(val); + Py_SETREF(new_node->c_array[val_idx], Py_NewRef(val)); return (PyHamtNode *)new_node; diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 70872222eb6458..35292b6478a833 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -718,8 +718,7 @@ _Py_HandleSystemExit(int *exitcode_p) /* The error code should be in the `code' attribute. */ PyObject *code = PyObject_GetAttr(value, &_Py_ID(code)); if (code) { - Py_DECREF(value); - value = code; + Py_SETREF(value, code); if (value == Py_None) goto done; } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 6f0a126a62277b..88f806e616f27e 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -198,8 +198,7 @@ sys_audit_tstate(PyThreadState *ts, const char *event, eventArgs = _Py_VaBuildValue_SizeT(argFormat, vargs); if (eventArgs && !PyTuple_Check(eventArgs)) { PyObject *argTuple = PyTuple_Pack(1, eventArgs); - Py_DECREF(eventArgs); - eventArgs = argTuple; + Py_SETREF(eventArgs, argTuple); } } else { diff --git a/Python/traceback.c b/Python/traceback.c index 356e64364832aa..da26c9b260a3bd 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -136,9 +136,7 @@ tb_next_set(PyTracebackObject *self, PyObject *new_next, void *Py_UNUSED(_)) cursor = cursor->tb_next; } - PyObject *old_next = (PyObject*)self->tb_next; - self->tb_next = (PyTracebackObject *)Py_XNewRef(new_next); - Py_XDECREF(old_next); + Py_XSETREF(self->tb_next, (PyTracebackObject *)Py_XNewRef(new_next)); return 0; } @@ -533,8 +531,7 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int PyObject *truncated; truncated = PyUnicode_Substring(lineobj, i, PyUnicode_GET_LENGTH(lineobj)); if (truncated) { - Py_DECREF(lineobj); - lineobj = truncated; + Py_SETREF(lineobj, truncated); } else { PyErr_Clear(); } From 7e3f09cad9b783d8968aa79ff6a8ee57beb8b83e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 22 Nov 2022 14:22:22 +0100 Subject: [PATCH 011/112] gh-99537: Use Py_SETREF() function in C code (#99656) Fix potential race condition in code patterns: * Replace "Py_DECREF(var); var = new;" with "Py_SETREF(var, new);" * Replace "Py_XDECREF(var); var = new;" with "Py_XSETREF(var, new);" * Replace "Py_CLEAR(var); var = new;" with "Py_XSETREF(var, new);" Other changes: * Replace "old = var; var = new; Py_DECREF(var)" with "Py_SETREF(var, new);" * Replace "old = var; var = new; Py_XDECREF(var)" with "Py_XSETREF(var, new);" * And remove the "old" variable. --- Doc/includes/custom2.c | 10 +++------- Doc/includes/custom3.c | 20 +++++--------------- Doc/includes/custom4.c | 18 +++++------------- Modules/_collectionsmodule.c | 5 +---- Modules/_datetimemodule.c | 8 ++------ Modules/_elementtree.c | 6 ++---- Modules/_functoolsmodule.c | 3 +-- Modules/_io/stringio.c | 3 +-- Modules/_io/textio.c | 9 +++------ Modules/_json.c | 4 +--- Modules/_pickle.c | 7 ++----- Modules/_sqlite/cursor.c | 3 +-- Modules/_testinternalcapi.c | 3 +-- Modules/_tkinter.c | 3 +-- Modules/_zoneinfo.c | 3 +-- Modules/audioop.c | 3 +-- Modules/cjkcodecs/multibytecodec.c | 6 ++---- Modules/itertoolsmodule.c | 3 +-- Modules/mathmodule.c | 6 ++---- Modules/posixmodule.c | 3 +-- 20 files changed, 37 insertions(+), 89 deletions(-) diff --git a/Doc/includes/custom2.c b/Doc/includes/custom2.c index 6638b9fbc1d751..a3b2d6ab78d3c4 100644 --- a/Doc/includes/custom2.c +++ b/Doc/includes/custom2.c @@ -42,7 +42,7 @@ static int Custom_init(CustomObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"first", "last", "number", NULL}; - PyObject *first = NULL, *last = NULL, *tmp; + PyObject *first = NULL, *last = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOi", kwlist, &first, &last, @@ -50,14 +50,10 @@ Custom_init(CustomObject *self, PyObject *args, PyObject *kwds) return -1; if (first) { - tmp = self->first; - self->first = Py_NewRef(first); - Py_XDECREF(tmp); + Py_XSETREF(self->first, Py_NewRef(first)); } if (last) { - tmp = self->last; - self->last = Py_NewRef(last); - Py_XDECREF(tmp); + Py_XSETREF(self->last, Py_NewRef(last)); } return 0; } diff --git a/Doc/includes/custom3.c b/Doc/includes/custom3.c index 0faf2bd4be172a..1a68bc4be8c399 100644 --- a/Doc/includes/custom3.c +++ b/Doc/includes/custom3.c @@ -42,7 +42,7 @@ static int Custom_init(CustomObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"first", "last", "number", NULL}; - PyObject *first = NULL, *last = NULL, *tmp; + PyObject *first = NULL, *last = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|UUi", kwlist, &first, &last, @@ -50,14 +50,10 @@ Custom_init(CustomObject *self, PyObject *args, PyObject *kwds) return -1; if (first) { - tmp = self->first; - self->first = Py_NewRef(first); - Py_DECREF(tmp); + Py_SETREF(self->first, Py_NewRef(first)); } if (last) { - tmp = self->last; - self->last = Py_NewRef(last); - Py_DECREF(tmp); + Py_SETREF(self->last, Py_NewRef(last)); } return 0; } @@ -77,7 +73,6 @@ Custom_getfirst(CustomObject *self, void *closure) static int Custom_setfirst(CustomObject *self, PyObject *value, void *closure) { - PyObject *tmp; if (value == NULL) { PyErr_SetString(PyExc_TypeError, "Cannot delete the first attribute"); return -1; @@ -87,9 +82,7 @@ Custom_setfirst(CustomObject *self, PyObject *value, void *closure) "The first attribute value must be a string"); return -1; } - tmp = self->first; - self->first = Py_NewRef(value); - Py_DECREF(tmp); + Py_SETREF(self->first, Py_NewRef(value)); return 0; } @@ -102,7 +95,6 @@ Custom_getlast(CustomObject *self, void *closure) static int Custom_setlast(CustomObject *self, PyObject *value, void *closure) { - PyObject *tmp; if (value == NULL) { PyErr_SetString(PyExc_TypeError, "Cannot delete the last attribute"); return -1; @@ -112,9 +104,7 @@ Custom_setlast(CustomObject *self, PyObject *value, void *closure) "The last attribute value must be a string"); return -1; } - tmp = self->last; - self->last = Py_NewRef(value); - Py_DECREF(tmp); + Py_SETREF(self->last, Py_NewRef(value)); return 0; } diff --git a/Doc/includes/custom4.c b/Doc/includes/custom4.c index b725bc0b6fae3a..b932d159d26e93 100644 --- a/Doc/includes/custom4.c +++ b/Doc/includes/custom4.c @@ -58,7 +58,7 @@ static int Custom_init(CustomObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"first", "last", "number", NULL}; - PyObject *first = NULL, *last = NULL, *tmp; + PyObject *first = NULL, *last = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|UUi", kwlist, &first, &last, @@ -66,14 +66,10 @@ Custom_init(CustomObject *self, PyObject *args, PyObject *kwds) return -1; if (first) { - tmp = self->first; - self->first = Py_NewRef(first); - Py_DECREF(tmp); + Py_SETREF(self->first, Py_NewRef(first)); } if (last) { - tmp = self->last; - self->last = Py_NewRef(last); - Py_DECREF(tmp); + Py_SETREF(self->last, Py_NewRef(last)); } return 0; } @@ -102,9 +98,7 @@ Custom_setfirst(CustomObject *self, PyObject *value, void *closure) "The first attribute value must be a string"); return -1; } - Py_INCREF(value); - Py_CLEAR(self->first); - self->first = value; + Py_XSETREF(self->first, Py_NewRef(value)); return 0; } @@ -126,9 +120,7 @@ Custom_setlast(CustomObject *self, PyObject *value, void *closure) "The last attribute value must be a string"); return -1; } - Py_INCREF(value); - Py_CLEAR(self->last); - self->last = value; + Py_XSETREF(self->last, Py_NewRef(value)); return 0; } diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index f1fd271f6d213f..5fa583821889f3 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -1256,7 +1256,6 @@ deque_remove(dequeobject *deque, PyObject *value) static int deque_ass_item(dequeobject *deque, Py_ssize_t i, PyObject *v) { - PyObject *old_value; block *b; Py_ssize_t n, len=Py_SIZE(deque), halflen=(len+1)>>1, index=i; @@ -1282,9 +1281,7 @@ deque_ass_item(dequeobject *deque, Py_ssize_t i, PyObject *v) while (--n >= 0) b = b->leftlink; } - old_value = b->data[i]; - b->data[i] = Py_NewRef(v); - Py_DECREF(old_value); + Py_SETREF(b->data[i], Py_NewRef(v)); return 0; } diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 20e8516fcde7fb..712abc3346faf9 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -6247,13 +6247,10 @@ datetime_astimezone(PyDateTime_DateTime *self, PyObject *args, PyObject *kw) } else { /* Result is already aware - just replace tzinfo. */ - temp = result->tzinfo; - result->tzinfo = Py_NewRef(PyDateTime_TimeZone_UTC); - Py_DECREF(temp); + Py_SETREF(result->tzinfo, Py_NewRef(PyDateTime_TimeZone_UTC)); } /* Attach new tzinfo and let fromutc() do the rest. */ - temp = result->tzinfo; if (tzinfo == Py_None) { tzinfo = local_timezone(result); if (tzinfo == NULL) { @@ -6263,8 +6260,7 @@ datetime_astimezone(PyDateTime_DateTime *self, PyObject *args, PyObject *kw) } else Py_INCREF(tzinfo); - result->tzinfo = tzinfo; - Py_DECREF(temp); + Py_SETREF(result->tzinfo, tzinfo); temp = (PyObject *)result; result = (PyDateTime_DateTime *) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 7b0977931f2ae6..3df93651654a76 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -537,8 +537,7 @@ element_get_text(ElementObject* self) if (!tmp) return NULL; self->text = tmp; - Py_DECREF(res); - res = tmp; + Py_SETREF(res, tmp); } } @@ -559,8 +558,7 @@ element_get_tail(ElementObject* self) if (!tmp) return NULL; self->tail = tmp; - Py_DECREF(res); - res = tmp; + Py_SETREF(res, tmp); } } diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index 9b30f41ec69932..4032ba79374fa4 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -1243,8 +1243,7 @@ lru_cache_clear_list(lru_list_elem *link) { while (link != NULL) { lru_list_elem *next = link->next; - Py_DECREF(link); - link = next; + Py_SETREF(link, next); } } diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index 5c3bf353680ea4..ae6c3125a2d9da 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -193,8 +193,7 @@ write_str(stringio *self, PyObject *obj) if (self->writenl) { PyObject *translated = PyUnicode_Replace( decoded, &_Py_STR(newline), self->writenl, -1); - Py_DECREF(decoded); - decoded = translated; + Py_SETREF(decoded, translated); } if (decoded == NULL) return -1; diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 8b5d00fe4aafd7..ff903e9341de27 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -320,8 +320,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *myself, out = PyUnicode_DATA(modified); PyUnicode_WRITE(kind, out, 0, '\r'); memcpy(out + kind, PyUnicode_DATA(output), kind * output_len); - Py_DECREF(output); - output = modified; /* output remains ready */ + Py_SETREF(output, modified); /* output remains ready */ self->pendingcr = 0; output_len++; } @@ -336,8 +335,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *myself, PyObject *modified = PyUnicode_Substring(output, 0, output_len -1); if (modified == NULL) goto error; - Py_DECREF(output); - output = modified; + Py_SETREF(output, modified); self->pendingcr = 1; } } @@ -865,8 +863,7 @@ _textiowrapper_set_decoder(textio *self, PyObject *codec_info, self->decoder, self->readtranslate ? Py_True : Py_False, NULL); if (incrementalDecoder == NULL) return -1; - Py_CLEAR(self->decoder); - self->decoder = incrementalDecoder; + Py_XSETREF(self->decoder, incrementalDecoder); } return 0; diff --git a/Modules/_json.c b/Modules/_json.c index 06f232f7d7de9b..81431aa1041c55 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -709,9 +709,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss if (memokey == NULL) { goto bail; } - Py_INCREF(memokey); - Py_DECREF(key); - key = memokey; + Py_SETREF(key, Py_NewRef(memokey)); idx = next_idx; /* skip whitespace between key and : delimiter, read :, skip whitespace */ diff --git a/Modules/_pickle.c b/Modules/_pickle.c index db62094c5a5af9..1e118e9cd10bbf 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1829,8 +1829,7 @@ get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent) n = PyList_GET_SIZE(names); for (i = 0; i < n; i++) { PyObject *name = PyList_GET_ITEM(names, i); - Py_XDECREF(parent); - parent = obj; + Py_XSETREF(parent, obj); (void)_PyObject_LookupAttr(parent, name, &obj); if (obj == NULL) { Py_DECREF(parent); @@ -3717,9 +3716,7 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name) else { gen_global: if (parent == module) { - Py_INCREF(lastname); - Py_DECREF(global_name); - global_name = lastname; + Py_SETREF(global_name, Py_NewRef(lastname)); } if (self->proto >= 4) { const char stack_global_op = STACK_GLOBAL; diff --git a/Modules/_sqlite/cursor.c b/Modules/_sqlite/cursor.c index 7844b6e26cdbda..a4e22bb4a2b58d 100644 --- a/Modules/_sqlite/cursor.c +++ b/Modules/_sqlite/cursor.c @@ -1123,8 +1123,7 @@ pysqlite_cursor_iternext(pysqlite_Cursor *self) PyObject *factory = self->row_factory; PyObject *args[] = { (PyObject *)self, row, }; PyObject *new_row = PyObject_Vectorcall(factory, args, 2, NULL); - Py_DECREF(row); - row = new_row; + Py_SETREF(row, new_row); } return row; } diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index cec114cb5919da..b14b8ac3c74054 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -523,8 +523,7 @@ set_eval_frame_record(PyObject *self, PyObject *list) PyErr_SetString(PyExc_TypeError, "argument must be a list"); return NULL; } - Py_CLEAR(record_list); - record_list = Py_NewRef(list); + Py_XSETREF(record_list, Py_NewRef(list)); _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState_Get(), record_eval); Py_RETURN_NONE; } diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index a5d5428eaf5077..6ff7d2bfced29b 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -1105,8 +1105,7 @@ fromBignumObj(TkappObject *tkapp, Tcl_Obj *value) PyMem_Free(bytes); if (res != NULL && bigValue.sign == MP_NEG) { PyObject *res2 = PyNumber_Negative(res); - Py_DECREF(res); - res = res2; + Py_SETREF(res, res2); } mp_clear(&bigValue); return res; diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index 34f5abd343ece0..c0fef13ba17d20 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -1074,8 +1074,7 @@ load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj) // that the dstoff is set correctly in that case. if (PyObject_IsTrue(tti->dstoff)) { _ttinfo *tti_after = &(self->tzrule_after.std); - Py_DECREF(tti_after->dstoff); - tti_after->dstoff = Py_NewRef(tti->dstoff); + Py_SETREF(tti_after->dstoff, Py_NewRef(tti->dstoff)); } } diff --git a/Modules/audioop.c b/Modules/audioop.c index 6d484e8bba3ec7..9325f82f9a17e0 100644 --- a/Modules/audioop.c +++ b/Modules/audioop.c @@ -1471,8 +1471,7 @@ audioop_ratecv_impl(PyObject *module, Py_buffer *fragment, int width, len = (Py_ssize_t)(ncp - PyBytes_AsString(str)); rv = PyBytes_FromStringAndSize (PyBytes_AsString(str), len); - Py_DECREF(str); - str = rv; + Py_SETREF(str, rv); if (str == NULL) goto exit; rv = Py_BuildValue("(O(iO))", str, d, samps); diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 6d67fce1da0353..8b6232695d4c13 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -980,8 +980,7 @@ _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEn goto errorexit; } - Py_CLEAR(self->pending); - self->pending = pending; + Py_XSETREF(self->pending, pending); memcpy(self->state.c, statebytes+1+statebytes[0], sizeof(self->state.c)); @@ -1438,8 +1437,7 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, memcpy(ctrdata + self->pendingsize, PyBytes_AS_STRING(cres), PyBytes_GET_SIZE(cres)); - Py_DECREF(cres); - cres = ctr; + Py_SETREF(cres, ctr); self->pendingsize = 0; } diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 65d01425052241..4b0a4d88c435c6 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -834,8 +834,7 @@ teedataobject_safe_decref(PyObject *obj) Py_REFCNT(obj) == 1) { PyObject *nextlink = ((teedataobject *)obj)->nextlink; ((teedataobject *)obj)->nextlink = NULL; - Py_DECREF(obj); - obj = nextlink; + Py_SETREF(obj, nextlink); } Py_XDECREF(obj); } diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 16a2f45c8b84df..83eb338be9b83c 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2069,8 +2069,7 @@ factorial_odd_part(unsigned long n) Py_DECREF(partial); if (tmp == NULL) goto error; - Py_DECREF(inner); - inner = tmp; + Py_SETREF(inner, tmp); /* Now inner is the product of all odd integers j in the range (0, n/2**i], giving the inner product in the formula above. */ @@ -2078,8 +2077,7 @@ factorial_odd_part(unsigned long n) tmp = PyNumber_Multiply(outer, inner); if (tmp == NULL) goto error; - Py_DECREF(outer); - outer = tmp; + Py_SETREF(outer, tmp); } Py_DECREF(inner); return outer; diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 6d65ae97c9e318..98fc264aff6bf9 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1202,8 +1202,7 @@ path_converter(PyObject *o, void *p) } /* still owns a reference to the original object */ - Py_DECREF(o); - o = res; + Py_SETREF(o, res); } if (is_unicode) { From 9a91182d4a87e4511dad20ad101e3eab0e1c5088 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 22 Nov 2022 15:22:55 +0100 Subject: [PATCH 012/112] gh-99537: Use Py_CLEAR() function in C code (#99686) Replace "Py_XDECREF(var); var = NULL;" with "Py_CLEAR(var);". Don't replace "Py_DECREF(var); var = NULL;" with "Py_CLEAR(var);". It would add an useless "if (var)" test in code path where var cannot be NULL. --- Modules/_xxsubinterpretersmodule.c | 3 +-- Modules/_zoneinfo.c | 14 ++++---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/Modules/_xxsubinterpretersmodule.c b/Modules/_xxsubinterpretersmodule.c index e65137e58fb5bb..244ae3517e1d81 100644 --- a/Modules/_xxsubinterpretersmodule.c +++ b/Modules/_xxsubinterpretersmodule.c @@ -2383,8 +2383,7 @@ channel_list_interpreters(PyObject *self, PyObject *args, PyObject *kwds) goto finally; except: - Py_XDECREF(ids); - ids = NULL; + Py_CLEAR(ids); finally: return ids; diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index c0fef13ba17d20..cb7d4c943845b1 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -231,8 +231,7 @@ zoneinfo_new_instance(PyTypeObject *type, PyObject *key) goto cleanup; error: - Py_XDECREF(self); - self = NULL; + Py_CLEAR(self); cleanup: if (file_obj != NULL) { PyObject *exc, *val, *tb; @@ -2606,14 +2605,9 @@ static PyMethodDef module_methods[] = {{NULL, NULL}}; static void module_free(void *m) { - Py_XDECREF(_tzpath_find_tzfile); - _tzpath_find_tzfile = NULL; - - Py_XDECREF(_common_mod); - _common_mod = NULL; - - Py_XDECREF(io_open); - io_open = NULL; + Py_CLEAR(_tzpath_find_tzfile); + Py_CLEAR(_common_mod); + Py_CLEAR(io_open); xdecref_ttinfo(&NO_TTINFO); From 995f6170c78570eca818f7e7dbd8a7661c171a81 Mon Sep 17 00:00:00 2001 From: "Dong Uk, Kang" Date: Wed, 23 Nov 2022 00:06:20 +0900 Subject: [PATCH 013/112] gh-88863: Clear ref cycles to resolve leak when asyncio.open_connection raises (#95739) Break reference cycles to resolve memory leak, by removing local exception and future instances from the frame --- Lib/asyncio/base_events.py | 31 ++++++++++++------- Lib/asyncio/selector_events.py | 10 +++++- Lib/asyncio/windows_events.py | 8 ++++- ...2-08-06-12-18-07.gh-issue-88863.NnqsuJ.rst | 3 ++ 4 files changed, 38 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-08-06-12-18-07.gh-issue-88863.NnqsuJ.rst diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index c8a2f9f25634ef..91d32e3939dcd3 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -986,6 +986,8 @@ async def _connect_sock(self, exceptions, addr_info, local_addr_infos=None): if sock is not None: sock.close() raise + finally: + exceptions = my_exceptions = None async def create_connection( self, protocol_factory, host=None, port=None, @@ -1084,19 +1086,22 @@ async def create_connection( if sock is None: exceptions = [exc for sub in exceptions for exc in sub] - if all_errors: - raise ExceptionGroup("create_connection failed", exceptions) - if len(exceptions) == 1: - raise exceptions[0] - else: - # If they all have the same str(), raise one. - model = str(exceptions[0]) - if all(str(exc) == model for exc in exceptions): + try: + if all_errors: + raise ExceptionGroup("create_connection failed", exceptions) + if len(exceptions) == 1: raise exceptions[0] - # Raise a combined exception so the user can see all - # the various error messages. - raise OSError('Multiple exceptions: {}'.format( - ', '.join(str(exc) for exc in exceptions))) + else: + # If they all have the same str(), raise one. + model = str(exceptions[0]) + if all(str(exc) == model for exc in exceptions): + raise exceptions[0] + # Raise a combined exception so the user can see all + # the various error messages. + raise OSError('Multiple exceptions: {}'.format( + ', '.join(str(exc) for exc in exceptions))) + finally: + exceptions = None else: if sock is None: @@ -1904,6 +1909,8 @@ def _run_once(self): event_list = self._selector.select(timeout) self._process_events(event_list) + # Needed to break cycles when an exception occurs. + event_list = None # Handle 'later' callbacks that are ready. end_time = self.time() + self._clock_resolution diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index bfa4590154f372..3d30006198f671 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -633,7 +633,11 @@ async def sock_connect(self, sock, address): fut = self.create_future() self._sock_connect(fut, sock, address) - return await fut + try: + return await fut + finally: + # Needed to break cycles when an exception occurs. + fut = None def _sock_connect(self, fut, sock, address): fd = sock.fileno() @@ -655,6 +659,8 @@ def _sock_connect(self, fut, sock, address): fut.set_exception(exc) else: fut.set_result(None) + finally: + fut = None def _sock_write_done(self, fd, fut, handle=None): if handle is None or not handle.cancelled(): @@ -678,6 +684,8 @@ def _sock_connect_cb(self, fut, sock, address): fut.set_exception(exc) else: fut.set_result(None) + finally: + fut = None async def sock_accept(self, sock): """Accept a connection. diff --git a/Lib/asyncio/windows_events.py b/Lib/asyncio/windows_events.py index acc97daafecc0b..4dad436fb4187f 100644 --- a/Lib/asyncio/windows_events.py +++ b/Lib/asyncio/windows_events.py @@ -439,7 +439,11 @@ def select(self, timeout=None): self._poll(timeout) tmp = self._results self._results = [] - return tmp + try: + return tmp + finally: + # Needed to break cycles when an exception occurs. + tmp = None def _result(self, value): fut = self._loop.create_future() @@ -793,6 +797,8 @@ def _poll(self, timeout=None): else: f.set_result(value) self._results.append(f) + finally: + f = None # Remove unregistered futures for ov in self._unregistered: diff --git a/Misc/NEWS.d/next/Library/2022-08-06-12-18-07.gh-issue-88863.NnqsuJ.rst b/Misc/NEWS.d/next/Library/2022-08-06-12-18-07.gh-issue-88863.NnqsuJ.rst new file mode 100644 index 00000000000000..23f8cb01cf0ac8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-08-06-12-18-07.gh-issue-88863.NnqsuJ.rst @@ -0,0 +1,3 @@ +To avoid apparent memory leaks when :func:`asyncio.open_connection` raises, +break reference cycles generated by local exception and future instances +(which has exception instance as its member var). Patch by Dong Uk, Kang. From d15b9f19ac0ffb29b646735d69b29f48a71c247f Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 22 Nov 2022 16:41:57 +0100 Subject: [PATCH 014/112] gh-93937: Document PyFrame_Check and PyFrame_Type (GH-99695) --- Doc/c-api/frame.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Doc/c-api/frame.rst b/Doc/c-api/frame.rst index b81faab1d97009..1ac8f03d6e48f8 100644 --- a/Doc/c-api/frame.rst +++ b/Doc/c-api/frame.rst @@ -19,6 +19,24 @@ can be used to get a frame object. See also :ref:`Reflection `. +.. c:var:: PyTypeObject PyFrame_Type + + The type of frame objects. + It is the same object as :py:class:`types.FrameType` in the Python layer. + + .. versionchanged:: 3.11 + + Previously, this type was only available after including + ````. + +.. c:function:: int PyFrame_Check(PyObject *obj) + + Return non-zero if *obj* is a frame object. + + .. versionchanged:: 3.11 + + Previously, this function was only available after including + ````. .. c:function:: PyFrameObject* PyFrame_GetBack(PyFrameObject *frame) From c2102136be569e6fc8ed90181f229b46d07142f8 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 22 Nov 2022 17:49:37 +0200 Subject: [PATCH 015/112] gh-99645: Fix a bug in handling class cleanups in unittest.TestCase (GH-99646) Now addClassCleanup() uses separate lists for different TestCase subclasses, and doClassCleanups() only cleans up the particular class. --- Lib/test/test_unittest/test_runner.py | 27 +++++++++++++++++++ Lib/unittest/case.py | 10 +++---- ...2-11-21-13-49-03.gh-issue-99645.9w1QKq.rst | 3 +++ 3 files changed, 35 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-21-13-49-03.gh-issue-99645.9w1QKq.rst diff --git a/Lib/test/test_unittest/test_runner.py b/Lib/test/test_unittest/test_runner.py index 569b2654aa12eb..df584b7620d092 100644 --- a/Lib/test/test_unittest/test_runner.py +++ b/Lib/test/test_unittest/test_runner.py @@ -547,6 +547,33 @@ def testNothing(self): self.assertEqual(TestableTest._class_cleanups, []) + def test_run_nested_test(self): + ordering = [] + + class InnerTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + ordering.append('inner setup') + cls.addClassCleanup(ordering.append, 'inner cleanup') + def test(self): + ordering.append('inner test') + + class OuterTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + ordering.append('outer setup') + cls.addClassCleanup(ordering.append, 'outer cleanup') + def test(self): + ordering.append('start outer test') + runTests(InnerTest) + ordering.append('end outer test') + + runTests(OuterTest) + self.assertEqual(ordering, [ + 'outer setup', 'start outer test', + 'inner setup', 'inner test', 'inner cleanup', + 'end outer test', 'outer cleanup']) + class TestModuleCleanUp(unittest.TestCase): def test_add_and_do_ModuleCleanup(self): diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index b01f6605e23e39..5167c5f843f085 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -384,11 +384,11 @@ class TestCase(object): # of difflib. See #11763. _diffThreshold = 2**16 - # Attribute used by TestSuite for classSetUp - - _classSetupFailed = False - - _class_cleanups = [] + def __init_subclass__(cls, *args, **kwargs): + # Attribute used by TestSuite for classSetUp + cls._classSetupFailed = False + cls._class_cleanups = [] + super().__init_subclass__(*args, **kwargs) def __init__(self, methodName='runTest'): """Create an instance of the class that will use the named test diff --git a/Misc/NEWS.d/next/Library/2022-11-21-13-49-03.gh-issue-99645.9w1QKq.rst b/Misc/NEWS.d/next/Library/2022-11-21-13-49-03.gh-issue-99645.9w1QKq.rst new file mode 100644 index 00000000000000..f6ee449891d9f6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-21-13-49-03.gh-issue-99645.9w1QKq.rst @@ -0,0 +1,3 @@ +Fix a bug in handling class cleanups in :class:`unittest.TestCase`. Now +``addClassCleanup()`` uses separate lists for different ``TestCase`` +subclasses, and ``doClassCleanups()`` only cleans up the particular class. From 1b2de89bce7eee3c63ce2286f071db57cd2cfa22 Mon Sep 17 00:00:00 2001 From: Charles Machalow Date: Tue, 22 Nov 2022 09:19:34 -0800 Subject: [PATCH 016/112] gh-99547: Add isjunction methods for checking if a path is a junction (GH-99548) --- Doc/library/os.path.rst | 9 +++++ Doc/library/os.rst | 15 ++++++-- Doc/library/pathlib.rst | 8 +++++ Doc/whatsnew/3.12.rst | 12 +++++++ Lib/ntpath.py | 20 ++++++++++- Lib/pathlib.py | 6 ++++ Lib/posixpath.py | 12 ++++++- Lib/shutil.py | 25 ++++---------- Lib/test/test_ntpath.py | 17 ++++++++++ Lib/test/test_os.py | 17 ++++++++++ Lib/test/test_pathlib.py | 7 ++++ Lib/test/test_posixpath.py | 3 ++ ...2-11-16-21-35-30.gh-issue-99547.p_c_bp.rst | 1 + Modules/clinic/posixmodule.c.h | 34 ++++++++++++++++++- Modules/posixmodule.c | 20 +++++++++++ 15 files changed, 182 insertions(+), 24 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-11-16-21-35-30.gh-issue-99547.p_c_bp.rst diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 6d52a03ba95704..50e089653fe71b 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -266,6 +266,15 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. +.. function:: isjunction(path) + + Return ``True`` if *path* refers to an :func:`existing ` directory + entry that is a junction. Always return ``False`` if junctions are not + supported on the current platform. + + .. versionadded:: 3.12 + + .. function:: islink(path) Return ``True`` if *path* refers to an :func:`existing ` directory diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 3387d0842da8b6..775aa32df99a46 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -2738,6 +2738,17 @@ features: This method can raise :exc:`OSError`, such as :exc:`PermissionError`, but :exc:`FileNotFoundError` is caught and not raised. + .. method:: is_junction() + + Return ``True`` if this entry is a junction (even if broken); + return ``False`` if the entry points to a regular directory, any kind + of file, a symlink, or if it doesn't exist anymore. + + The result is cached on the ``os.DirEntry`` object. Call + :func:`os.path.isjunction` to fetch up-to-date information. + + .. versionadded:: 3.12 + .. method:: stat(*, follow_symlinks=True) Return a :class:`stat_result` object for this entry. This method @@ -2760,8 +2771,8 @@ features: Note that there is a nice correspondence between several attributes and methods of ``os.DirEntry`` and of :class:`pathlib.Path`. In particular, the ``name`` attribute has the same - meaning, as do the ``is_dir()``, ``is_file()``, ``is_symlink()`` - and ``stat()`` methods. + meaning, as do the ``is_dir()``, ``is_file()``, ``is_symlink()``, + ``is_junction()``, and ``stat()`` methods. .. versionadded:: 3.5 diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 944963e1e1ae79..6537637f33c70e 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -891,6 +891,14 @@ call fails (for example because the path doesn't exist). other errors (such as permission errors) are propagated. +.. method:: Path.is_junction() + + Return ``True`` if the path points to a junction, and ``False`` for any other + type of file. Currently only Windows supports junctions. + + .. versionadded:: 3.12 + + .. method:: Path.is_mount() Return ``True`` if the path is a :dfn:`mount point`: a point in a diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 8e9a4f04a89056..a9b69c2ebf43bf 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -234,6 +234,10 @@ pathlib more consistent with :func:`os.path.relpath`. (Contributed by Domenico Ragusa in :issue:`40358`.) +* Add :meth:`pathlib.Path.is_junction` as a proxy to :func:`os.path.isjunction`. + (Contributed by Charles Machalow in :gh:`99547`.) + + dis --- @@ -252,6 +256,14 @@ os for a process with :func:`os.pidfd_open` in non-blocking mode. (Contributed by Kumar Aditya in :gh:`93312`.) +* Add :func:`os.path.isjunction` to check if a given path is a junction. + (Contributed by Charles Machalow in :gh:`99547`.) + +* :class:`os.DirEntry` now includes an :meth:`os.DirEntry.is_junction` + method to check if the entry is a junction. + (Contributed by Charles Machalow in :gh:`99547`.) + + shutil ------ diff --git a/Lib/ntpath.py b/Lib/ntpath.py index d9582f4087433e..873c884c3bd934 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -30,7 +30,7 @@ "ismount", "expanduser","expandvars","normpath","abspath", "curdir","pardir","sep","pathsep","defpath","altsep", "extsep","devnull","realpath","supports_unicode_filenames","relpath", - "samefile", "sameopenfile", "samestat", "commonpath"] + "samefile", "sameopenfile", "samestat", "commonpath", "isjunction"] def _get_bothseps(path): if isinstance(path, bytes): @@ -267,6 +267,24 @@ def islink(path): return False return stat.S_ISLNK(st.st_mode) + +# Is a path a junction? + +if hasattr(os.stat_result, 'st_reparse_tag'): + def isjunction(path): + """Test whether a path is a junction""" + try: + st = os.lstat(path) + except (OSError, ValueError, AttributeError): + return False + return bool(st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT) +else: + def isjunction(path): + """Test whether a path is a junction""" + os.fspath(path) + return False + + # Being true for dangling symbolic links is also useful. def lexists(path): diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 068d1b02f484bf..bc57ae60e725b2 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1223,6 +1223,12 @@ def is_symlink(self): # Non-encodable path return False + def is_junction(self): + """ + Whether this path is a junction. + """ + return self._flavour.pathmod.isjunction(self) + def is_block_device(self): """ Whether this path is a block device. diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 5b4d78bca06132..737f8a5c156d81 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -35,7 +35,7 @@ "samefile","sameopenfile","samestat", "curdir","pardir","sep","pathsep","defpath","altsep","extsep", "devnull","realpath","supports_unicode_filenames","relpath", - "commonpath"] + "commonpath", "isjunction"] def _get_sep(path): @@ -169,6 +169,16 @@ def islink(path): return False return stat.S_ISLNK(st.st_mode) + +# Is a path a junction? + +def isjunction(path): + """Test whether a path is a junction + Junctions are not a part of posix semantics""" + os.fspath(path) + return False + + # Being true for dangling symbolic links is also useful. def lexists(path): diff --git a/Lib/shutil.py b/Lib/shutil.py index f5687e3b346ef2..f372406a6c51a8 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -565,18 +565,6 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, dirs_exist_ok=dirs_exist_ok) if hasattr(os.stat_result, 'st_file_attributes'): - # Special handling for directory junctions to make them behave like - # symlinks for shutil.rmtree, since in general they do not appear as - # regular links. - def _rmtree_isdir(entry): - try: - st = entry.stat(follow_symlinks=False) - return (stat.S_ISDIR(st.st_mode) and not - (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT - and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) - except OSError: - return False - def _rmtree_islink(path): try: st = os.lstat(path) @@ -586,12 +574,6 @@ def _rmtree_islink(path): except OSError: return False else: - def _rmtree_isdir(entry): - try: - return entry.is_dir(follow_symlinks=False) - except OSError: - return False - def _rmtree_islink(path): return os.path.islink(path) @@ -605,7 +587,12 @@ def _rmtree_unsafe(path, onerror): entries = [] for entry in entries: fullname = entry.path - if _rmtree_isdir(entry): + try: + is_dir = entry.is_dir(follow_symlinks=False) + except OSError: + is_dir = False + + if is_dir and not entry.is_junction(): try: if entry.is_symlink(): # This can only happen if someone replaces diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index d51946322c8056..336648273b6cf1 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -856,6 +856,23 @@ def test_nt_helpers(self): self.assertIsInstance(b_final_path, bytes) self.assertGreater(len(b_final_path), 0) + @unittest.skipIf(sys.platform != 'win32', "Can only test junctions with creation on win32.") + def test_isjunction(self): + with os_helper.temp_dir() as d: + with os_helper.change_cwd(d): + os.mkdir('tmpdir') + + import _winapi + try: + _winapi.CreateJunction('tmpdir', 'testjunc') + except OSError: + raise unittest.SkipTest('creating the test junction failed') + + self.assertTrue(ntpath.isjunction('testjunc')) + self.assertFalse(ntpath.isjunction('tmpdir')) + self.assertPathEqual(ntpath.realpath('testjunc'), ntpath.realpath('tmpdir')) + + class NtCommonTest(test_genericpath.CommonTest, unittest.TestCase): pathmodule = ntpath attributes = ['relpath'] diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index cb78e8cb77de1c..94db8bb7737acd 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -4158,6 +4158,8 @@ def check_entry(self, entry, name, is_dir, is_file, is_symlink): self.assertEqual(entry.is_file(follow_symlinks=False), stat.S_ISREG(entry_lstat.st_mode)) + self.assertEqual(entry.is_junction(), os.path.isjunction(entry.path)) + self.assert_stat_equal(entry.stat(), entry_stat, os.name == 'nt' and not is_symlink) @@ -4206,6 +4208,21 @@ def test_attributes(self): entry = entries['symlink_file.txt'] self.check_entry(entry, 'symlink_file.txt', False, True, True) + @unittest.skipIf(sys.platform != 'win32', "Can only test junctions with creation on win32.") + def test_attributes_junctions(self): + dirname = os.path.join(self.path, "tgtdir") + os.mkdir(dirname) + + import _winapi + try: + _winapi.CreateJunction(dirname, os.path.join(self.path, "srcjunc")) + except OSError: + raise unittest.SkipTest('creating the test junction failed') + + entries = self.get_entries(['srcjunc', 'tgtdir']) + self.assertEqual(entries['srcjunc'].is_junction(), True) + self.assertEqual(entries['tgtdir'].is_junction(), False) + def get_entry(self, name): path = self.bytes_path if isinstance(name, bytes) else self.path entries = list(os.scandir(path)) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 3b1f302cc964ba..94401e5429cdf2 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2411,6 +2411,13 @@ def test_is_symlink(self): self.assertIs((P / 'linkA\udfff').is_file(), False) self.assertIs((P / 'linkA\x00').is_file(), False) + def test_is_junction(self): + P = self.cls(BASE) + + with mock.patch.object(P._flavour, 'pathmod'): + self.assertEqual(P.is_junction(), P._flavour.pathmod.isjunction.return_value) + P._flavour.pathmod.isjunction.assert_called_once_with(P) + def test_is_fifo_false(self): P = self.cls(BASE) self.assertFalse((P / 'fileA').is_fifo()) diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index 8a1dd131928cff..6c1c0f5577b7ec 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -244,6 +244,9 @@ def fake_lstat(path): finally: os.lstat = save_lstat + def test_isjunction(self): + self.assertFalse(posixpath.isjunction(ABSTFN)) + def test_expanduser(self): self.assertEqual(posixpath.expanduser("foo"), "foo") self.assertEqual(posixpath.expanduser(b"foo"), b"foo") diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-16-21-35-30.gh-issue-99547.p_c_bp.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-16-21-35-30.gh-issue-99547.p_c_bp.rst new file mode 100644 index 00000000000000..7e3c52924213ec --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-16-21-35-30.gh-issue-99547.p_c_bp.rst @@ -0,0 +1 @@ +Add a function to os.path to check if a path is a junction: isjunction. Add similar functionality to pathlib.Path as is_junction. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 1ad96ea296ea68..f9f6ca372ec6c7 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -10269,6 +10269,38 @@ os_DirEntry_is_symlink(DirEntry *self, PyTypeObject *defining_class, PyObject *c return return_value; } +PyDoc_STRVAR(os_DirEntry_is_junction__doc__, +"is_junction($self, /)\n" +"--\n" +"\n" +"Return True if the entry is a junction; cached per entry."); + +#define OS_DIRENTRY_IS_JUNCTION_METHODDEF \ + {"is_junction", _PyCFunction_CAST(os_DirEntry_is_junction), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, os_DirEntry_is_junction__doc__}, + +static int +os_DirEntry_is_junction_impl(DirEntry *self, PyTypeObject *defining_class); + +static PyObject * +os_DirEntry_is_junction(DirEntry *self, PyTypeObject *defining_class, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + int _return_value; + + if (nargs) { + PyErr_SetString(PyExc_TypeError, "is_junction() takes no arguments"); + goto exit; + } + _return_value = os_DirEntry_is_junction_impl(self, defining_class); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(os_DirEntry_stat__doc__, "stat($self, /, *, follow_symlinks=True)\n" "--\n" @@ -11517,4 +11549,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=90f5e6995114e5ca input=a9049054013a1b77]*/ +/*[clinic end generated code: output=4192d8e09e216300 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 98fc264aff6bf9..45e71ee9c0598d 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -13633,6 +13633,25 @@ os_DirEntry_is_symlink_impl(DirEntry *self, PyTypeObject *defining_class) #endif } +/*[clinic input] +os.DirEntry.is_junction -> bool + defining_class: defining_class + / + +Return True if the entry is a junction; cached per entry. +[clinic start generated code]*/ + +static int +os_DirEntry_is_junction_impl(DirEntry *self, PyTypeObject *defining_class) +/*[clinic end generated code: output=7061a07b0ef2cd1f input=475cd36fb7d4723f]*/ +{ +#ifdef MS_WINDOWS + return self->win32_lstat.st_reparse_tag == IO_REPARSE_TAG_MOUNT_POINT; +#else + return 0; +#endif +} + static PyObject * DirEntry_fetch_stat(PyObject *module, DirEntry *self, int follow_symlinks) { @@ -13927,6 +13946,7 @@ static PyMethodDef DirEntry_methods[] = { OS_DIRENTRY_IS_DIR_METHODDEF OS_DIRENTRY_IS_FILE_METHODDEF OS_DIRENTRY_IS_SYMLINK_METHODDEF + OS_DIRENTRY_IS_JUNCTION_METHODDEF OS_DIRENTRY_STAT_METHODDEF OS_DIRENTRY_INODE_METHODDEF OS_DIRENTRY___FSPATH___METHODDEF From 5d41833cc04292ced35102ba71460a06b86a2a98 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 22 Nov 2022 17:20:47 +0000 Subject: [PATCH 017/112] Update Visual Studio solution to build all extension modules on F5 (GH-99667) Without these "forward" dependencies, VS would only build as far as necessary to launch the selected project. --- PCbuild/pcbuild.sln | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/PCbuild/pcbuild.sln b/PCbuild/pcbuild.sln index d8fc00c149ba66..848d59504381cc 100644 --- a/PCbuild/pcbuild.sln +++ b/PCbuild/pcbuild.sln @@ -10,7 +10,42 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "python", "python.vcxproj", "{B11D750F-CD1F-4A96-85CE-E69A5C5259F9}" ProjectSection(ProjectDependencies) = postProject + {9E48B300-37D1-11DD-8C41-005056C00008} = {9E48B300-37D1-11DD-8C41-005056C00008} + {9EC7190A-249F-4180-A900-548FDCF3055F} = {9EC7190A-249F-4180-A900-548FDCF3055F} + {78D80A15-BD8C-44E2-B49E-1F05B0A0A687} = {78D80A15-BD8C-44E2-B49E-1F05B0A0A687} + {6901D91C-6E48-4BB7-9FEC-700C8131DF1D} = {6901D91C-6E48-4BB7-9FEC-700C8131DF1D} + {54B1431F-B86B-4ACB-B28C-88BCF93191D8} = {54B1431F-B86B-4ACB-B28C-88BCF93191D8} + {F749B822-B489-4CA5-A3AD-CE078F5F338A} = {F749B822-B489-4CA5-A3AD-CE078F5F338A} + {D06B6426-4762-44CC-8BAD-D79052507F2F} = {D06B6426-4762-44CC-8BAD-D79052507F2F} + {36D0C52C-DF4E-45D0-8BC7-E294C3ABC781} = {36D0C52C-DF4E-45D0-8BC7-E294C3ABC781} + {CB435430-EBB1-478B-8F4E-C256F6838F55} = {CB435430-EBB1-478B-8F4E-C256F6838F55} + {17E1E049-C309-4D79-843F-AE483C264AEA} = {17E1E049-C309-4D79-843F-AE483C264AEA} + {384C224A-7474-476E-A01B-750EA7DE918C} = {384C224A-7474-476E-A01B-750EA7DE918C} + {12728250-16EC-4DC6-94D7-E21DD88947F8} = {12728250-16EC-4DC6-94D7-E21DD88947F8} + {86937F53-C189-40EF-8CE8-8759D8E7D480} = {86937F53-C189-40EF-8CE8-8759D8E7D480} + {28B5D777-DDF2-4B6B-B34F-31D938813856} = {28B5D777-DDF2-4B6B-B34F-31D938813856} + {31FFC478-7B4A-43E8-9954-8D03E2187E9C} = {31FFC478-7B4A-43E8-9954-8D03E2187E9C} + {F9D71780-F393-11E0-BE50-0800200C9A66} = {F9D71780-F393-11E0-BE50-0800200C9A66} + {494BAC80-A60C-43A9-99E7-ACB691CE2C4D} = {494BAC80-A60C-43A9-99E7-ACB691CE2C4D} + {C6E20F84-3247-4AD6-B051-B073268F73BA} = {C6E20F84-3247-4AD6-B051-B073268F73BA} + {B244E787-C445-441C-BDF4-5A4F1A3A1E51} = {B244E787-C445-441C-BDF4-5A4F1A3A1E51} + {18CAE28C-B454-46C1-87A0-493D91D97F03} = {18CAE28C-B454-46C1-87A0-493D91D97F03} + {13CECB97-4119-4316-9D42-8534019A5A44} = {13CECB97-4119-4316-9D42-8534019A5A44} + {885D4898-D08D-4091-9C40-C700CFE3FC5A} = {885D4898-D08D-4091-9C40-C700CFE3FC5A} + {447F05A8-F581-4CAC-A466-5AC7936E207E} = {447F05A8-F581-4CAC-A466-5AC7936E207E} + {ECC7CEAC-A5E5-458E-BB9E-2413CC847881} = {ECC7CEAC-A5E5-458E-BB9E-2413CC847881} + {4946ECAC-2E69-4BF8-A90A-F5136F5094DF} = {4946ECAC-2E69-4BF8-A90A-F5136F5094DF} + {FDB84CBB-2FB6-47C8-A2D6-091E0833239D} = {FDB84CBB-2FB6-47C8-A2D6-091E0833239D} + {73FCD2BD-F133-46B7-8EC1-144CD82A59D5} = {73FCD2BD-F133-46B7-8EC1-144CD82A59D5} + {2097F1C1-597C-4167-93E3-656A7D6339B2} = {2097F1C1-597C-4167-93E3-656A7D6339B2} + {A2697BD3-28C1-4AEC-9106-8B748639FD16} = {A2697BD3-28C1-4AEC-9106-8B748639FD16} + {900342D7-516A-4469-B1AD-59A66E49A25F} = {900342D7-516A-4469-B1AD-59A66E49A25F} + {6DAC66D9-E703-4624-BE03-49112AB5AA62} = {6DAC66D9-E703-4624-BE03-49112AB5AA62} + {0E9791DB-593A-465F-98BC-681011311617} = {0E9791DB-593A-465F-98BC-681011311617} {0E9791DB-593A-465F-98BC-681011311618} = {0E9791DB-593A-465F-98BC-681011311618} + {EB6E69DD-04BF-4543-9B92-49FAABCEAC2E} = {EB6E69DD-04BF-4543-9B92-49FAABCEAC2E} + {16BFE6F0-22EF-40B5-B831-7E937119EF10} = {16BFE6F0-22EF-40B5-B831-7E937119EF10} + {FCBE1EF2-E0F0-40B1-88B5-00A35D378742} = {FCBE1EF2-E0F0-40B1-88B5-00A35D378742} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pythoncore", "pythoncore.vcxproj", "{CF7AC3D1-E2DF-41D2-BEA6-1E2556CDEA26}" @@ -19,6 +54,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pythoncore", "pythoncore.vc EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pythonw", "pythonw.vcxproj", "{F4229CC3-873C-49AE-9729-DD308ED4CD4A}" + ProjectSection(ProjectDependencies) = postProject + {B11D750F-CD1F-4A96-85CE-E69A5C5259F9} = {B11D750F-CD1F-4A96-85CE-E69A5C5259F9} + EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "winsound", "winsound.vcxproj", "{28B5D777-DDF2-4B6B-B34F-31D938813856}" EndProject @@ -101,12 +139,18 @@ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "liblzma", "liblzma.vcxproj", "{12728250-16EC-4DC6-94D7-E21DD88947F8}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "python_uwp", "python_uwp.vcxproj", "{9DE9E23D-C8D4-4817-92A9-920A8B1FE5FF}" + ProjectSection(ProjectDependencies) = postProject + {B11D750F-CD1F-4A96-85CE-E69A5C5259F9} = {B11D750F-CD1F-4A96-85CE-E69A5C5259F9} + EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "venvlauncher", "venvlauncher.vcxproj", "{494BAC80-A60C-43A9-99E7-ACB691CE2C4D}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "venvwlauncher", "venvwlauncher.vcxproj", "{FDB84CBB-2FB6-47C8-A2D6-091E0833239D}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pythonw_uwp", "pythonw_uwp.vcxproj", "{AB603547-1E2A-45B3-9E09-B04596006393}" + ProjectSection(ProjectDependencies) = postProject + {F4229CC3-873C-49AE-9729-DD308ED4CD4A} = {F4229CC3-873C-49AE-9729-DD308ED4CD4A} + EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_uuid", "_uuid.vcxproj", "{CB435430-EBB1-478B-8F4E-C256F6838F55}" EndProject From 22d91c16bb03c3d87f53b5fee10325b876262a78 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Tue, 22 Nov 2022 12:06:36 -0600 Subject: [PATCH 018/112] gh-99146 struct module documentation should have more predictable examples/warnings (GH-99141) * nail down a couple examples to have more predictable output * update a number of things, but this is really just a stash... * added an applications section to describe typical uses for native and machine-independent formats * make sure all format strings use a format prefix character * responding to comments from @gpshead. Not likely finished yet. * This got more involved than I expected... * respond to several PR comments * a lot of wordsmithing * try and be more consistent in use of ``x`` vs ``'x'`` * expand examples a bit * update the "see also" to be more up-to-date * original examples relied on import * so present all examples as if * reformat based on @gpshead comment (missed before) * responding to comments * missed this * one more suggested edit * wordsmithing --- Doc/library/struct.rst | 282 ++++++++++++++++++++++++++++++----------- 1 file changed, 206 insertions(+), 76 deletions(-) diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst index 620f50376beb62..50d70731f77523 100644 --- a/Doc/library/struct.rst +++ b/Doc/library/struct.rst @@ -12,21 +12,25 @@ -------------- -This module performs conversions between Python values and C structs represented -as Python :class:`bytes` objects. This can be used in handling binary data -stored in files or from network connections, among other sources. It uses -:ref:`struct-format-strings` as compact descriptions of the layout of the C -structs and the intended conversion to/from Python values. +This module converts between Python values and C structs represented +as Python :class:`bytes` objects. Compact :ref:`format strings ` +describe the intended conversions to/from Python values. +The module's functions and objects can be used for two largely +distinct applications, data exchange with external sources (files or +network connections), or data transfer between the Python application +and the C layer. .. note:: - By default, the result of packing a given C struct includes pad bytes in - order to maintain proper alignment for the C types involved; similarly, - alignment is taken into account when unpacking. This behavior is chosen so - that the bytes of a packed struct correspond exactly to the layout in memory - of the corresponding C struct. To handle platform-independent data formats - or omit implicit pad bytes, use ``standard`` size and alignment instead of - ``native`` size and alignment: see :ref:`struct-alignment` for details. + When no prefix character is given, native mode is the default. It + packs or unpacks data based on the platform and compiler on which + the Python interpreter was built. + The result of packing a given C struct includes pad bytes which + maintain proper alignment for the C types involved; similarly, + alignment is taken into account when unpacking. In contrast, when + communicating data between external sources, the programmer is + responsible for defining byte ordering and padding between elements. + See :ref:`struct-alignment` for details. Several :mod:`struct` functions (and methods of :class:`Struct`) take a *buffer* argument. This refers to objects that implement the :ref:`bufferobjects` and @@ -102,10 +106,13 @@ The module defines the following exception and functions: Format Strings -------------- -Format strings are the mechanism used to specify the expected layout when -packing and unpacking data. They are built up from :ref:`format-characters`, -which specify the type of data being packed/unpacked. In addition, there are -special characters for controlling the :ref:`struct-alignment`. +Format strings describe the data layout when +packing and unpacking data. They are built up from :ref:`format characters`, +which specify the type of data being packed/unpacked. In addition, +special characters control the :ref:`byte order, size and alignment`. +Each format string consists of an optional prefix character which +describes the overall properties of the data and one or more format +characters which describe the actual data values and padding. .. _struct-alignment: @@ -116,6 +123,11 @@ Byte Order, Size, and Alignment By default, C types are represented in the machine's native format and byte order, and properly aligned by skipping pad bytes if necessary (according to the rules used by the C compiler). +This behavior is chosen so +that the bytes of a packed struct correspond exactly to the memory layout +of the corresponding C struct. +Whether to use native byte ordering +and padding or standard formats depends on the application. .. index:: single: @ (at); in struct format strings @@ -144,12 +156,10 @@ following table: If the first character is not one of these, ``'@'`` is assumed. -Native byte order is big-endian or little-endian, depending on the host -system. For example, Intel x86 and AMD64 (x86-64) are little-endian; -IBM z and most legacy architectures are big-endian; -and ARM, RISC-V and IBM Power feature switchable endianness -(bi-endian, though the former two are nearly always little-endian in practice). -Use ``sys.byteorder`` to check the endianness of your system. +Native byte order is big-endian or little-endian, depending on the +host system. For example, Intel x86, AMD64 (x86-64), and Apple M1 are +little-endian; IBM z and many legacy architectures are big-endian. +Use :data:`sys.byteorder` to check the endianness of your system. Native size and alignment are determined using the C compiler's ``sizeof`` expression. This is always combined with native byte order. @@ -231,9 +241,9 @@ platform-dependent. +--------+--------------------------+--------------------+----------------+------------+ | ``d`` | :c:expr:`double` | float | 8 | \(4) | +--------+--------------------------+--------------------+----------------+------------+ -| ``s`` | :c:expr:`char[]` | bytes | | | +| ``s`` | :c:expr:`char[]` | bytes | | \(9) | +--------+--------------------------+--------------------+----------------+------------+ -| ``p`` | :c:expr:`char[]` | bytes | | | +| ``p`` | :c:expr:`char[]` | bytes | | \(8) | +--------+--------------------------+--------------------+----------------+------------+ | ``P`` | :c:expr:`void \*` | integer | | \(5) | +--------+--------------------------+--------------------+----------------+------------+ @@ -292,8 +302,33 @@ Notes: format `_ for more information. (7) - For padding, ``x`` inserts null bytes. - + When packing, ``'x'`` inserts one NUL byte. + +(8) + The ``'p'`` format character encodes a "Pascal string", meaning a short + variable-length string stored in a *fixed number of bytes*, given by the count. + The first byte stored is the length of the string, or 255, whichever is + smaller. The bytes of the string follow. If the string passed in to + :func:`pack` is too long (longer than the count minus 1), only the leading + ``count-1`` bytes of the string are stored. If the string is shorter than + ``count-1``, it is padded with null bytes so that exactly count bytes in all + are used. Note that for :func:`unpack`, the ``'p'`` format character consumes + ``count`` bytes, but that the string returned can never contain more than 255 + bytes. + +(9) + For the ``'s'`` format character, the count is interpreted as the length of the + bytes, not a repeat count like for the other format characters; for example, + ``'10s'`` means a single 10-byte string mapping to or from a single + Python byte string, while ``'10c'`` means 10 + separate one byte character elements (e.g., ``cccccccccc``) mapping + to or from ten different Python byte objects. (See :ref:`struct-examples` + for a concrete demonstration of the difference.) + If a count is not given, it defaults to 1. For packing, the string is + truncated or padded with null bytes as appropriate to make it fit. For + unpacking, the resulting bytes object always has exactly the specified number + of bytes. As a special case, ``'0s'`` means a single, empty string (while + ``'0c'`` means 0 characters). A format character may be preceded by an integral repeat count. For example, the format string ``'4h'`` means exactly the same as ``'hhhh'``. @@ -301,15 +336,6 @@ the format string ``'4h'`` means exactly the same as ``'hhhh'``. Whitespace characters between formats are ignored; a count and its format must not contain whitespace though. -For the ``'s'`` format character, the count is interpreted as the length of the -bytes, not a repeat count like for the other format characters; for example, -``'10s'`` means a single 10-byte string, while ``'10c'`` means 10 characters. -If a count is not given, it defaults to 1. For packing, the string is -truncated or padded with null bytes as appropriate to make it fit. For -unpacking, the resulting bytes object always has exactly the specified number -of bytes. As a special case, ``'0s'`` means a single, empty string (while -``'0c'`` means 0 characters). - When packing a value ``x`` using one of the integer formats (``'b'``, ``'B'``, ``'h'``, ``'H'``, ``'i'``, ``'I'``, ``'l'``, ``'L'``, ``'q'``, ``'Q'``), if ``x`` is outside the valid range for that format @@ -319,17 +345,6 @@ then :exc:`struct.error` is raised. Previously, some of the integer formats wrapped out-of-range values and raised :exc:`DeprecationWarning` instead of :exc:`struct.error`. -The ``'p'`` format character encodes a "Pascal string", meaning a short -variable-length string stored in a *fixed number of bytes*, given by the count. -The first byte stored is the length of the string, or 255, whichever is -smaller. The bytes of the string follow. If the string passed in to -:func:`pack` is too long (longer than the count minus 1), only the leading -``count-1`` bytes of the string are stored. If the string is shorter than -``count-1``, it is padded with null bytes so that exactly count bytes in all -are used. Note that for :func:`unpack`, the ``'p'`` format character consumes -``count`` bytes, but that the string returned can never contain more than 255 -bytes. - .. index:: single: ? (question mark); in struct format strings For the ``'?'`` format character, the return value is either :const:`True` or @@ -345,18 +360,36 @@ Examples ^^^^^^^^ .. note:: - All examples assume a native byte order, size, and alignment with a - big-endian machine. + Native byte order examples (designated by the ``'@'`` format prefix or + lack of any prefix character) may not match what the reader's + machine produces as + that depends on the platform and compiler. + +Pack and unpack integers of three different sizes, using big endian +ordering:: -A basic example of packing/unpacking three integers:: + >>> from struct import * + >>> pack(">bhl", 1, 2, 3) + b'\x01\x00\x02\x00\x00\x00\x03' + >>> unpack('>bhl', b'\x01\x00\x02\x00\x00\x00\x03' + (1, 2, 3) + >>> calcsize('>bhl') + 7 - >>> from struct import * - >>> pack('hhl', 1, 2, 3) - b'\x00\x01\x00\x02\x00\x00\x00\x03' - >>> unpack('hhl', b'\x00\x01\x00\x02\x00\x00\x00\x03') - (1, 2, 3) - >>> calcsize('hhl') - 8 +Attempt to pack an integer which is too large for the defined field:: + + >>> pack(">h", 99999) + Traceback (most recent call last): + File "", line 1, in + struct.error: 'h' format requires -32768 <= number <= 32767 + +Demonstrate the difference between ``'s'`` and ``'c'`` format +characters:: + + >>> pack("@ccc", b'1', b'2', b'3') + b'123' + >>> pack("@3s", b'123') + b'123' Unpacked fields can be named by assigning them to variables or by wrapping the result in a named tuple:: @@ -369,35 +402,132 @@ the result in a named tuple:: >>> Student._make(unpack('<10sHHb', record)) Student(name=b'raymond ', serialnum=4658, school=264, gradelevel=8) -The ordering of format characters may have an impact on size since the padding -needed to satisfy alignment requirements is different:: - - >>> pack('ci', b'*', 0x12131415) - b'*\x00\x00\x00\x12\x13\x14\x15' - >>> pack('ic', 0x12131415, b'*') - b'\x12\x13\x14\x15*' - >>> calcsize('ci') +The ordering of format characters may have an impact on size in native +mode since padding is implicit. In standard mode, the user is +responsible for inserting any desired padding. +Note in +the first ``pack`` call below that three NUL bytes were added after the +packed ``'#'`` to align the following integer on a four-byte boundary. +In this example, the output was produced on a little endian machine:: + + >>> pack('@ci', b'#', 0x12131415) + b'#\x00\x00\x00\x15\x14\x13\x12' + >>> pack('@ic', 0x12131415, b'#') + b'\x15\x14\x13\x12#' + >>> calcsize('@ci') 8 - >>> calcsize('ic') + >>> calcsize('@ic') 5 -The following format ``'llh0l'`` specifies two pad bytes at the end, assuming -longs are aligned on 4-byte boundaries:: +The following format ``'llh0l'`` results in two pad bytes being added +at the end, assuming the platform's longs are aligned on 4-byte boundaries:: - >>> pack('llh0l', 1, 2, 3) + >>> pack('@llh0l', 1, 2, 3) b'\x00\x00\x00\x01\x00\x00\x00\x02\x00\x03\x00\x00' -This only works when native size and alignment are in effect; standard size and -alignment does not enforce any alignment. - .. seealso:: Module :mod:`array` Packed binary storage of homogeneous data. - Module :mod:`xdrlib` - Packing and unpacking of XDR data. + Module :mod:`json` + JSON encoder and decoder. + + Module :mod:`pickle` + Python object serialization. + + +.. _applications: + +Applications +------------ + +Two main applications for the :mod:`struct` module exist, data +interchange between Python and C code within an application or another +application compiled using the same compiler (:ref:`native formats`), and +data interchange between applications using agreed upon data layout +(:ref:`standard formats`). Generally speaking, the format strings +constructed for these two domains are distinct. + + +.. _struct-native-formats: + +Native Formats +^^^^^^^^^^^^^^ + +When constructing format strings which mimic native layouts, the +compiler and machine architecture determine byte ordering and padding. +In such cases, the ``@`` format character should be used to specify +native byte ordering and data sizes. Internal pad bytes are normally inserted +automatically. It is possible that a zero-repeat format code will be +needed at the end of a format string to round up to the correct +byte boundary for proper alignment of consective chunks of data. + +Consider these two simple examples (on a 64-bit, little-endian +machine):: + + >>> calcsize('@lhl') + 24 + >>> calcsize('@llh') + 18 + +Data is not padded to an 8-byte boundary at the end of the second +format string without the use of extra padding. A zero-repeat format +code solves that problem:: + + >>> calcsize('@llh0l') + 24 + +The ``'x'`` format code can be used to specify the repeat, but for +native formats it is better to use a zero-repeat format like ``'0l'``. + +By default, native byte ordering and alignment is used, but it is +better to be explicit and use the ``'@'`` prefix character. + + +.. _struct-standard-formats: + +Standard Formats +^^^^^^^^^^^^^^^^ + +When exchanging data beyond your process such as networking or storage, +be precise. Specify the exact byte order, size, and alignment. Do +not assume they match the native order of a particular machine. +For example, network byte order is big-endian, while many popular CPUs +are little-endian. By defining this explicitly, the user need not +care about the specifics of the platform their code is running on. +The first character should typically be ``<`` or ``>`` +(or ``!``). Padding is the responsibility of the programmer. The +zero-repeat format character won't work. Instead, the user must +explicitly add ``'x'`` pad bytes where needed. Revisiting the +examples from the previous section, we have:: + + >>> calcsize('>> pack('>> calcsize('@llh') + 18 + >>> pack('@llh', 1, 2, 3) == pack('>> calcsize('>> calcsize('@llh0l') + 24 + >>> pack('@llh0l', 1, 2, 3) == pack('>> calcsize('>> calcsize('@llh0l') + 12 + >>> pack('@llh0l', 1, 2, 3) == pack(' Date: Tue, 22 Nov 2022 14:13:54 -0600 Subject: [PATCH 019/112] gh-88226: Emit TARGET labels in Python/ceval.c when debugging, even if computed gotos aren't enabled (GH-98265) Keep target labels when debugging, but don't warn about lack of use. Co-authored-by: Eryk Sun --- ...2-10-16-12-49-24.gh-issue-88226.BsnQ4k.rst | 3 +++ Python/ceval.c | 25 ++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2022-10-16-12-49-24.gh-issue-88226.BsnQ4k.rst diff --git a/Misc/NEWS.d/next/Build/2022-10-16-12-49-24.gh-issue-88226.BsnQ4k.rst b/Misc/NEWS.d/next/Build/2022-10-16-12-49-24.gh-issue-88226.BsnQ4k.rst new file mode 100644 index 00000000000000..5f32091739a282 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2022-10-16-12-49-24.gh-issue-88226.BsnQ4k.rst @@ -0,0 +1,3 @@ +Always define ``TARGET_*`` labels in ``Python/ceval.c``, even if +``USE_COMPUTED_GOTOS`` is disabled. This allows breakpoints to be +set at those labels in (for instance) ``gdb``. diff --git a/Python/ceval.c b/Python/ceval.c index d28fdeb627fa3e..80bfa21ad0b6f0 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -678,11 +678,11 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) #endif #if USE_COMPUTED_GOTOS -#define TARGET(op) TARGET_##op: INSTRUCTION_START(op); -#define DISPATCH_GOTO() goto *opcode_targets[opcode] +# define TARGET(op) TARGET_##op: INSTRUCTION_START(op); +# define DISPATCH_GOTO() goto *opcode_targets[opcode] #else -#define TARGET(op) case op: INSTRUCTION_START(op); -#define DISPATCH_GOTO() goto dispatch_opcode +# define TARGET(op) case op: TARGET_##op: INSTRUCTION_START(op); +# define DISPATCH_GOTO() goto dispatch_opcode #endif /* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */ @@ -1056,6 +1056,18 @@ static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) { #define KWNAMES_LEN() \ (kwnames == NULL ? 0 : ((int)PyTuple_GET_SIZE(kwnames))) +/* Disable unused label warnings. They are handy for debugging, even + if computed gotos aren't used. */ + +/* TBD - what about other compilers? */ +#if defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-label" +#elif defined(_MSC_VER) /* MS_WINDOWS */ +# pragma warning(push) +# pragma warning(disable:4102) +#endif + PyObject* _Py_HOT_FUNCTION _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) { @@ -1435,6 +1447,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int goto error; } +#if defined(__GNUC__) +# pragma GCC diagnostic pop +#elif defined(_MSC_VER) /* MS_WINDOWS */ +# pragma warning(pop) +#endif static void format_missing(PyThreadState *tstate, const char *kind, From f5fea2288620cb2fda24f3eecc9d623331fe4401 Mon Sep 17 00:00:00 2001 From: ram vikram singh Date: Wed, 23 Nov 2022 02:46:06 +0530 Subject: [PATCH 020/112] gh-99650 : Updated argparse docs (GH-99653) --- Doc/library/argparse.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index c55d94421e5b14..f8839d0986d047 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -1945,7 +1945,7 @@ Argument groups .. method:: ArgumentParser.add_argument_group(title=None, description=None) By default, :class:`ArgumentParser` groups command-line arguments into - "positional arguments" and "optional arguments" when displaying help + "positional arguments" and "options" when displaying help messages. When there is a better conceptual grouping of arguments than this default one, appropriate groups can be created using the :meth:`add_argument_group` method:: From f1a4a6a58736196f766d51f048d19a2b0a0a155a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 22 Nov 2022 22:17:06 +0100 Subject: [PATCH 021/112] gh-99300: Replace Py_INCREF() with Py_NewRef() in _elementtree.c (#99696) * Replace Py_INCREF() and Py_XINCREF() using a cast with Py_NewRef() and Py_XNewRef() in Modules/_elementtree.c. * Make reference counting more explicit: don't steal implicitly a reference on PyList_SET_ITEM(), use Py_NewRef() instead. * Replace PyModule_AddObject() with PyModule_AddObjectRef(). --- Modules/_elementtree.c | 105 ++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 69 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 3df93651654a76..2da44cf2886ea4 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -411,14 +411,10 @@ element_init(PyObject *self, PyObject *args, PyObject *kwds) Py_XDECREF(attrib); /* Replace the objects already pointed to by tag, text and tail. */ - Py_INCREF(tag); - Py_XSETREF(self_elem->tag, tag); + Py_XSETREF(self_elem->tag, Py_NewRef(tag)); - Py_INCREF(Py_None); - _set_joined_ptr(&self_elem->text, Py_None); - - Py_INCREF(Py_None); - _set_joined_ptr(&self_elem->tail, Py_None); + _set_joined_ptr(&self_elem->text, Py_NewRef(Py_None)); + _set_joined_ptr(&self_elem->tail, Py_NewRef(Py_None)); return 0; } @@ -690,11 +686,8 @@ _elementtree_Element_clear_impl(ElementObject *self) { clear_extra(self); - Py_INCREF(Py_None); - _set_joined_ptr(&self->text, Py_None); - - Py_INCREF(Py_None); - _set_joined_ptr(&self->tail, Py_None); + _set_joined_ptr(&self->text, Py_NewRef(Py_None)); + _set_joined_ptr(&self->tail, Py_NewRef(Py_None)); Py_RETURN_NONE; } @@ -970,8 +963,7 @@ element_setstate_from_attributes(ElementObject *self, return NULL; } - Py_INCREF(tag); - Py_XSETREF(self->tag, tag); + Py_XSETREF(self->tag, Py_NewRef(tag)); text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None; Py_INCREF(JOIN_OBJ(text)); @@ -1035,8 +1027,7 @@ element_setstate_from_attributes(ElementObject *self, } /* Stash attrib. */ - Py_XINCREF(attrib); - Py_XSETREF(self->extra->attrib, attrib); + Py_XSETREF(self->extra->attrib, Py_XNewRef(attrib)); dealloc_extra(oldextra); Py_RETURN_NONE; @@ -1173,8 +1164,7 @@ _elementtree_Element_extend(ElementObject *self, PyObject *elements) } for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) { - PyObject* element = PySequence_Fast_GET_ITEM(seq, i); - Py_INCREF(element); + PyObject* element = Py_NewRef(PySequence_Fast_GET_ITEM(seq, i)); if (element_add_subelement(self, element) < 0) { Py_DECREF(seq); Py_DECREF(element); @@ -1363,8 +1353,7 @@ _elementtree_Element_get_impl(ElementObject *self, PyObject *key, { if (self->extra && self->extra->attrib) { PyObject *attrib = Py_NewRef(self->extra->attrib); - PyObject *value = PyDict_GetItemWithError(attrib, key); - Py_XINCREF(value); + PyObject *value = Py_XNewRef(PyDict_GetItemWithError(attrib, key)); Py_DECREF(attrib); if (value != NULL || PyErr_Occurred()) { return value; @@ -1964,8 +1953,7 @@ static int element_tag_setter(ElementObject *self, PyObject *value, void *closure) { _VALIDATE_ATTR_VALUE(value); - Py_INCREF(value); - Py_SETREF(self->tag, value); + Py_SETREF(self->tag, Py_NewRef(value)); return 0; } @@ -1973,8 +1961,7 @@ static int element_text_setter(ElementObject *self, PyObject *value, void *closure) { _VALIDATE_ATTR_VALUE(value); - Py_INCREF(value); - _set_joined_ptr(&self->text, value); + _set_joined_ptr(&self->text, Py_NewRef(value)); return 0; } @@ -1982,8 +1969,7 @@ static int element_tail_setter(ElementObject *self, PyObject *value, void *closure) { _VALIDATE_ATTR_VALUE(value); - Py_INCREF(value); - _set_joined_ptr(&self->tail, value); + _set_joined_ptr(&self->tail, Py_NewRef(value)); return 0; } @@ -2001,8 +1987,7 @@ element_attrib_setter(ElementObject *self, PyObject *value, void *closure) if (create_extra(self, NULL) < 0) return -1; } - Py_INCREF(value); - Py_XSETREF(self->extra->attrib, value); + Py_XSETREF(self->extra->attrib, Py_NewRef(value)); return 0; } @@ -2149,9 +2134,8 @@ elementiter_next(ElementIterObject *it) } assert(Element_Check(extra->children[child_index])); - elem = (ElementObject *)extra->children[child_index]; + elem = (ElementObject *)Py_NewRef(extra->children[child_index]); item->child_index++; - Py_INCREF(elem); } if (parent_stack_push_new(it, elem) < 0) { @@ -2364,8 +2348,7 @@ _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, /*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/ { if (element_factory != Py_None) { - Py_INCREF(element_factory); - Py_XSETREF(self->element_factory, element_factory); + Py_XSETREF(self->element_factory, Py_NewRef(element_factory)); } else { Py_CLEAR(self->element_factory); } @@ -2375,8 +2358,7 @@ _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, comment_factory = st->comment_factory; } if (comment_factory) { - Py_INCREF(comment_factory); - Py_XSETREF(self->comment_factory, comment_factory); + Py_XSETREF(self->comment_factory, Py_NewRef(comment_factory)); self->insert_comments = insert_comments; } else { Py_CLEAR(self->comment_factory); @@ -2388,8 +2370,7 @@ _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, pi_factory = st->pi_factory; } if (pi_factory) { - Py_INCREF(pi_factory); - Py_XSETREF(self->pi_factory, pi_factory); + Py_XSETREF(self->pi_factory, Py_NewRef(pi_factory)); self->insert_pis = insert_pis; } else { Py_CLEAR(self->pi_factory); @@ -2492,14 +2473,12 @@ _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory, if (comment_factory == Py_None) { Py_CLEAR(st->comment_factory); } else { - Py_INCREF(comment_factory); - Py_XSETREF(st->comment_factory, comment_factory); + Py_XSETREF(st->comment_factory, Py_NewRef(comment_factory)); } if (pi_factory == Py_None) { Py_CLEAR(st->pi_factory); } else { - Py_INCREF(pi_factory); - Py_XSETREF(st->pi_factory, pi_factory); + Py_XSETREF(st->pi_factory, Py_NewRef(pi_factory)); } return old; @@ -2676,10 +2655,8 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, } self->index++; - Py_INCREF(node); - Py_SETREF(self->this, node); - Py_INCREF(node); - Py_SETREF(self->last, node); + Py_SETREF(self->this, Py_NewRef(node)); + Py_SETREF(self->last, Py_NewRef(node)); if (treebuilder_append_event(self, self->start_event_obj, node) < 0) goto error; @@ -2719,9 +2696,9 @@ treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) PyObject* list = PyList_New(2); if (!list) return NULL; - PyList_SET_ITEM(list, 0, self->data); - Py_INCREF(data); PyList_SET_ITEM(list, 1, data); - self->data = list; + PyList_SET_ITEM(list, 0, Py_NewRef(self->data)); + PyList_SET_ITEM(list, 1, Py_NewRef(data)); + Py_SETREF(self->data, list); } } @@ -2749,8 +2726,7 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) self->last = Py_NewRef(self->this); Py_XSETREF(self->last_for_tail, self->last); self->index--; - self->this = PyList_GET_ITEM(self->stack, self->index); - Py_INCREF(self->this); + self->this = Py_NewRef(PyList_GET_ITEM(self->stack, self->index)); Py_DECREF(item); if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0) @@ -2778,8 +2754,7 @@ treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text) if (self->insert_comments && this != Py_None) { if (treebuilder_add_subelement(this, comment) < 0) goto error; - Py_INCREF(comment); - Py_XSETREF(self->last_for_tail, comment); + Py_XSETREF(self->last_for_tail, Py_NewRef(comment)); } } else { comment = Py_NewRef(text); @@ -2818,8 +2793,7 @@ treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text) if (self->insert_pis && this != Py_None) { if (treebuilder_add_subelement(this, pi) < 0) goto error; - Py_INCREF(pi); - Py_XSETREF(self->last_for_tail, pi); + Py_XSETREF(self->last_for_tail, Py_NewRef(pi)); } } else { pi = PyTuple_Pack(2, target, text); @@ -3038,12 +3012,9 @@ makeuniversal(XMLParserObject* self, const char* string) if (!key) return NULL; - value = PyDict_GetItemWithError(self->names, key); + value = Py_XNewRef(PyDict_GetItemWithError(self->names, key)); - if (value) { - Py_INCREF(value); - } - else if (!PyErr_Occurred()) { + if (value == NULL && !PyErr_Occurred()) { /* new name. convert to universal name, and decode as necessary */ @@ -4029,39 +4000,37 @@ _elementtree_XMLParser__setevents_impl(XMLParserObject *self, return NULL; } - Py_INCREF(event_name_obj); if (strcmp(event_name, "start") == 0) { - Py_XSETREF(target->start_event_obj, event_name_obj); + Py_XSETREF(target->start_event_obj, Py_NewRef(event_name_obj)); } else if (strcmp(event_name, "end") == 0) { - Py_XSETREF(target->end_event_obj, event_name_obj); + Py_XSETREF(target->end_event_obj, Py_NewRef(event_name_obj)); } else if (strcmp(event_name, "start-ns") == 0) { - Py_XSETREF(target->start_ns_event_obj, event_name_obj); + Py_XSETREF(target->start_ns_event_obj, Py_NewRef(event_name_obj)); EXPAT(SetNamespaceDeclHandler)( self->parser, (XML_StartNamespaceDeclHandler) expat_start_ns_handler, (XML_EndNamespaceDeclHandler) expat_end_ns_handler ); } else if (strcmp(event_name, "end-ns") == 0) { - Py_XSETREF(target->end_ns_event_obj, event_name_obj); + Py_XSETREF(target->end_ns_event_obj, Py_NewRef(event_name_obj)); EXPAT(SetNamespaceDeclHandler)( self->parser, (XML_StartNamespaceDeclHandler) expat_start_ns_handler, (XML_EndNamespaceDeclHandler) expat_end_ns_handler ); } else if (strcmp(event_name, "comment") == 0) { - Py_XSETREF(target->comment_event_obj, event_name_obj); + Py_XSETREF(target->comment_event_obj, Py_NewRef(event_name_obj)); EXPAT(SetCommentHandler)( self->parser, (XML_CommentHandler) expat_comment_handler ); } else if (strcmp(event_name, "pi") == 0) { - Py_XSETREF(target->pi_event_obj, event_name_obj); + Py_XSETREF(target->pi_event_obj, Py_NewRef(event_name_obj)); EXPAT(SetProcessingInstructionHandler)( self->parser, (XML_ProcessingInstructionHandler) expat_pi_handler ); } else { - Py_DECREF(event_name_obj); Py_DECREF(events_seq); PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name); return NULL; @@ -4406,9 +4375,7 @@ PyInit__elementtree(void) st->parseerror_obj = PyErr_NewException( "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL ); - Py_INCREF(st->parseerror_obj); - if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) { - Py_DECREF(st->parseerror_obj); + if (PyModule_AddObjectRef(m, "ParseError", st->parseerror_obj) < 0) { return NULL; } From 8f18ac04d32515eab841172c956a8cb14bcee9c3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 22 Nov 2022 16:04:57 -0800 Subject: [PATCH 022/112] GH-98831: Add `macro` and `op` and their implementation to DSL (#99495) Newly supported interpreter definition syntax: - `op(NAME, (input_stack_effects -- output_stack_effects)) { ... }` - `macro(NAME) = OP1 + OP2;` Also some other random improvements: - Convert `WITH_EXCEPT_START` to use stack effects - Fix lexer to balk at unrecognized characters, e.g. `@` - Fix moved output names; support object pointers in cache - Introduce `error()` method to print errors - Introduce read_uint16(p) as equivalent to `*p` Co-authored-by: Brandt Bucher --- Include/internal/pycore_code.h | 6 + Python/bytecodes.c | 32 ++- Python/generated_cases.c.h | 103 +++++---- Tools/cases_generator/README.md | 6 +- Tools/cases_generator/generate_cases.py | 277 +++++++++++++++++++----- Tools/cases_generator/lexer.py | 4 +- Tools/cases_generator/parser.py | 32 ++- 7 files changed, 325 insertions(+), 135 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index ba36ee38d2b0ba..80c1bfb6c9afa2 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -293,6 +293,12 @@ write_obj(uint16_t *p, PyObject *val) memcpy(p, &val, sizeof(val)); } +static inline uint16_t +read_u16(uint16_t *p) +{ + return *p; +} + static inline uint32_t read_u32(uint16_t *p) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 78f7d4ac061674..a1f910da8ed54a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -70,6 +70,8 @@ do { \ #define DISPATCH_SAME_OPARG() ((void)0) #define inst(name, ...) case name: +#define op(name, ...) /* NAME is ignored */ +#define macro(name) static int MACRO_##name #define super(name) static int SUPER_##name #define family(name, ...) static int family_##name @@ -80,6 +82,7 @@ do { \ static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub; static PyObject *container, *start, *stop, *v, *lhs, *rhs; static PyObject *list, *tuple, *dict; +static PyObject *exit_func, *lasti, *val; static PyObject * dummy_func( @@ -156,10 +159,7 @@ dummy_func( res = NULL; } - inst(END_FOR, (value1, value2 --)) { - Py_DECREF(value1); - Py_DECREF(value2); - } + macro(END_FOR) = POP_TOP + POP_TOP; inst(UNARY_POSITIVE, (value -- res)) { res = PyNumber_Positive(value); @@ -2725,33 +2725,27 @@ dummy_func( PUSH(res); } - // stack effect: ( -- __0) - inst(WITH_EXCEPT_START) { + inst(WITH_EXCEPT_START, (exit_func, lasti, unused, val -- exit_func, lasti, unused, val, res)) { /* At the top of the stack are 4 values: - - TOP = exc_info() - - SECOND = previous exception - - THIRD: lasti of exception in exc_info() - - FOURTH: the context.__exit__ bound method + - val: TOP = exc_info() + - unused: SECOND = previous exception + - lasti: THIRD = lasti of exception in exc_info() + - exit_func: FOURTH = the context.__exit__ bound method We call FOURTH(type(TOP), TOP, GetTraceback(TOP)). Then we push the __exit__ return value. */ - PyObject *exit_func; - PyObject *exc, *val, *tb, *res; + PyObject *exc, *tb; - val = TOP(); assert(val && PyExceptionInstance_Check(val)); exc = PyExceptionInstance_Class(val); tb = PyException_GetTraceback(val); Py_XDECREF(tb); - assert(PyLong_Check(PEEK(3))); - exit_func = PEEK(4); + assert(PyLong_Check(lasti)); + (void)lasti; // Shut up compiler warning if asserts are off PyObject *stack[4] = {NULL, exc, val, tb}; res = PyObject_Vectorcall(exit_func, stack + 1, 3 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); - if (res == NULL) - goto error; - - PUSH(res); + ERROR_IF(res == NULL, error); } // stack effect: ( -- __0) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2c6333f8e61537..ae8fdd5e99c3dc 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -78,15 +78,6 @@ DISPATCH(); } - TARGET(END_FOR) { - PyObject *value2 = PEEK(1); - PyObject *value1 = PEEK(2); - Py_DECREF(value1); - Py_DECREF(value2); - STACK_SHRINK(2); - DISPATCH(); - } - TARGET(UNARY_POSITIVE) { PyObject *value = PEEK(1); PyObject *res; @@ -446,7 +437,7 @@ TARGET(BINARY_SUBSCR_GETITEM) { uint32_t type_version = read_u32(next_instr + 1); - uint16_t func_version = *(next_instr + 3); + uint16_t func_version = read_u16(next_instr + 3); PyObject *sub = PEEK(1); PyObject *container = PEEK(2); PyTypeObject *tp = Py_TYPE(container); @@ -2754,31 +2745,32 @@ } TARGET(WITH_EXCEPT_START) { + PyObject *val = PEEK(1); + PyObject *lasti = PEEK(3); + PyObject *exit_func = PEEK(4); + PyObject *res; /* At the top of the stack are 4 values: - - TOP = exc_info() - - SECOND = previous exception - - THIRD: lasti of exception in exc_info() - - FOURTH: the context.__exit__ bound method + - val: TOP = exc_info() + - unused: SECOND = previous exception + - lasti: THIRD = lasti of exception in exc_info() + - exit_func: FOURTH = the context.__exit__ bound method We call FOURTH(type(TOP), TOP, GetTraceback(TOP)). Then we push the __exit__ return value. */ - PyObject *exit_func; - PyObject *exc, *val, *tb, *res; + PyObject *exc, *tb; - val = TOP(); assert(val && PyExceptionInstance_Check(val)); exc = PyExceptionInstance_Class(val); tb = PyException_GetTraceback(val); Py_XDECREF(tb); - assert(PyLong_Check(PEEK(3))); - exit_func = PEEK(4); + assert(PyLong_Check(lasti)); + (void)lasti; // Shut up compiler warning if asserts are off PyObject *stack[4] = {NULL, exc, val, tb}; res = PyObject_Vectorcall(exit_func, stack + 1, 3 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); - if (res == NULL) - goto error; - - PUSH(res); + if (res == NULL) goto error; + STACK_GROW(1); + POKE(1, res); DISPATCH(); } @@ -3711,13 +3703,14 @@ } TARGET(LOAD_FAST__LOAD_FAST) { + PyObject *_tmp_1; + PyObject *_tmp_2; { PyObject *value; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - STACK_GROW(1); - POKE(1, value); + _tmp_1 = value; } NEXTOPARG(); next_instr++; @@ -3726,20 +3719,23 @@ value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - STACK_GROW(1); - POKE(1, value); + _tmp_2 = value; } + STACK_GROW(2); + POKE(1, _tmp_2); + POKE(2, _tmp_1); DISPATCH(); } TARGET(LOAD_FAST__LOAD_CONST) { + PyObject *_tmp_1; + PyObject *_tmp_2; { PyObject *value; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - STACK_GROW(1); - POKE(1, value); + _tmp_1 = value; } NEXTOPARG(); next_instr++; @@ -3747,17 +3743,19 @@ PyObject *value; value = GETITEM(consts, oparg); Py_INCREF(value); - STACK_GROW(1); - POKE(1, value); + _tmp_2 = value; } + STACK_GROW(2); + POKE(1, _tmp_2); + POKE(2, _tmp_1); DISPATCH(); } TARGET(STORE_FAST__LOAD_FAST) { + PyObject *_tmp_1 = PEEK(1); { - PyObject *value = PEEK(1); + PyObject *value = _tmp_1; SETLOCAL(oparg, value); - STACK_SHRINK(1); } NEXTOPARG(); next_instr++; @@ -3766,35 +3764,37 @@ value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - STACK_GROW(1); - POKE(1, value); + _tmp_1 = value; } + POKE(1, _tmp_1); DISPATCH(); } TARGET(STORE_FAST__STORE_FAST) { + PyObject *_tmp_1 = PEEK(2); + PyObject *_tmp_2 = PEEK(1); { - PyObject *value = PEEK(1); + PyObject *value = _tmp_2; SETLOCAL(oparg, value); - STACK_SHRINK(1); } NEXTOPARG(); next_instr++; { - PyObject *value = PEEK(1); + PyObject *value = _tmp_1; SETLOCAL(oparg, value); - STACK_SHRINK(1); } + STACK_SHRINK(2); DISPATCH(); } TARGET(LOAD_CONST__LOAD_FAST) { + PyObject *_tmp_1; + PyObject *_tmp_2; { PyObject *value; value = GETITEM(consts, oparg); Py_INCREF(value); - STACK_GROW(1); - POKE(1, value); + _tmp_1 = value; } NEXTOPARG(); next_instr++; @@ -3803,8 +3803,25 @@ value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - STACK_GROW(1); - POKE(1, value); + _tmp_2 = value; } + STACK_GROW(2); + POKE(1, _tmp_2); + POKE(2, _tmp_1); + DISPATCH(); + } + + TARGET(END_FOR) { + PyObject *_tmp_1 = PEEK(2); + PyObject *_tmp_2 = PEEK(1); + { + PyObject *value = _tmp_2; + Py_DECREF(value); + } + { + PyObject *value = _tmp_1; + Py_DECREF(value); + } + STACK_SHRINK(2); DISPATCH(); } diff --git a/Tools/cases_generator/README.md b/Tools/cases_generator/README.md index abcafe257720eb..dc055ead1941cd 100644 --- a/Tools/cases_generator/README.md +++ b/Tools/cases_generator/README.md @@ -2,9 +2,9 @@ What's currently here: -- lexer.py: lexer for C, originally written by Mark Shannon -- plexer.py: OO interface on top of lexer.py; main class: `PLexer` -- parser.py: Parser for instruction definition DSL; main class `Parser` +- `lexer.py`: lexer for C, originally written by Mark Shannon +- `plexer.py`: OO interface on top of lexer.py; main class: `PLexer` +- `parser.py`: Parser for instruction definition DSL; main class `Parser` - `generate_cases.py`: driver script to read `Python/bytecodes.c` and write `Python/generated_cases.c.h` diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index e11d0c77e99d27..424b15ede2aadf 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -5,6 +5,8 @@ """ import argparse +import contextlib +import dataclasses import os import re import sys @@ -17,6 +19,8 @@ BEGIN_MARKER = "// BEGIN BYTECODES //" END_MARKER = "// END BYTECODES //" RE_PREDICTED = r"(?s)(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);" +UNUSED = "unused" +BITS_PER_CODE_UNIT = 16 arg_parser = argparse.ArgumentParser() arg_parser.add_argument("-i", "--input", type=str, default=DEFAULT_INPUT) @@ -51,9 +55,7 @@ def __init__(self, inst: parser.InstDef): ] self.output_effects = self.outputs # For consistency/completeness - def write( - self, f: typing.TextIO, indent: str, dedent: int = 0 - ) -> None: + def write(self, f: typing.TextIO, indent: str, dedent: int = 0) -> None: """Write one instruction, sans prologue and epilogue.""" if dedent < 0: indent += " " * -dedent # DO WE NEED THIS? @@ -70,25 +72,33 @@ def write( # Write cache effect variable declarations cache_offset = 0 for ceffect in self.cache_effects: - if ceffect.name != "unused": - # TODO: if name is 'descr' use PyObject *descr = read_obj(...) - bits = ceffect.size * 16 - f.write(f"{indent} uint{bits}_t {ceffect.name} = ") - if ceffect.size == 1: - f.write(f"*(next_instr + {cache_offset});\n") + if ceffect.name != UNUSED: + bits = ceffect.size * BITS_PER_CODE_UNIT + if bits == 64: + # NOTE: We assume that 64-bit data in the cache + # is always an object pointer. + # If this becomes false, we need a way to specify + # syntactically what type the cache data is. + f.write( + f"{indent} PyObject *{ceffect.name} = " + f"read_obj(next_instr + {cache_offset});\n" + ) else: - f.write(f"read_u{bits}(next_instr + {cache_offset});\n") + f.write(f"{indent} uint{bits}_t {ceffect.name} = " + f"read_u{bits}(next_instr + {cache_offset});\n") cache_offset += ceffect.size assert cache_offset == self.cache_offset # Write input stack effect variable declarations and initializations for i, seffect in enumerate(reversed(self.input_effects), 1): - if seffect.name != "unused": + if seffect.name != UNUSED: f.write(f"{indent} PyObject *{seffect.name} = PEEK({i});\n") # Write output stack effect variable declarations + input_names = {seffect.name for seffect in self.input_effects} + input_names.add(UNUSED) for seffect in self.output_effects: - if seffect.name != "unused": + if seffect.name not in input_names: f.write(f"{indent} PyObject *{seffect.name};\n") self.write_body(f, indent, dedent) @@ -105,21 +115,22 @@ def write( f.write(f"{indent} STACK_SHRINK({-diff});\n") # Write output stack effect assignments - input_names = [seffect.name for seffect in self.input_effects] - for i, output in enumerate(reversed(self.output_effects), 1): - if output.name not in input_names and output.name != "unused": - f.write(f"{indent} POKE({i}, {output.name});\n") + unmoved_names = {UNUSED} + for ieffect, oeffect in zip(self.input_effects, self.output_effects): + if ieffect.name == oeffect.name: + unmoved_names.add(ieffect.name) + for i, seffect in enumerate(reversed(self.output_effects)): + if seffect.name not in unmoved_names: + f.write(f"{indent} POKE({i+1}, {seffect.name});\n") # Write cache effect if self.cache_offset: f.write(f"{indent} next_instr += {self.cache_offset};\n") - def write_body( - self, f: typing.TextIO, ndent: str, dedent: int - ) -> None: + def write_body(self, f: typing.TextIO, ndent: str, dedent: int) -> None: """Write the instruction body.""" - # Get lines of text with proper dedelt + # Get lines of text with proper dedent blocklines = self.block.to_text(dedent=dedent).splitlines(True) # Remove blank lines from both ends @@ -146,6 +157,13 @@ def write_body( # The code block is responsible for DECREF()ing them. # NOTE: If the label doesn't exist, just add it to ceval.c. ninputs = len(self.input_effects) + # Don't pop common input/output effects at the bottom! + # These aren't DECREF'ed so they can stay. + for ieff, oeff in zip(self.input_effects, self.output_effects): + if ieff.name == oeff.name: + ninputs -= 1 + else: + break if ninputs: f.write(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n") else: @@ -154,6 +172,84 @@ def write_body( f.write(line) +@dataclasses.dataclass +class SuperComponent: + instr: Instruction + input_mapping: dict[str, parser.StackEffect] + output_mapping: dict[str, parser.StackEffect] + + +class SuperInstruction(parser.Super): + + stack: list[str] + initial_sp: int + final_sp: int + parts: list[SuperComponent] + + def __init__(self, sup: parser.Super): + super().__init__(sup.kind, sup.name, sup.ops) + self.context = sup.context + + def analyze(self, a: "Analyzer") -> None: + components = self.check_components(a) + self.stack, self.initial_sp = self.super_macro_analysis(a, components) + sp = self.initial_sp + self.parts = [] + for instr in components: + input_mapping = {} + for ieffect in reversed(instr.input_effects): + sp -= 1 + if ieffect.name != UNUSED: + input_mapping[self.stack[sp]] = ieffect + output_mapping = {} + for oeffect in instr.output_effects: + if oeffect.name != UNUSED: + output_mapping[self.stack[sp]] = oeffect + sp += 1 + self.parts.append(SuperComponent(instr, input_mapping, output_mapping)) + self.final_sp = sp + + def check_components(self, a: "Analyzer") -> list[Instruction]: + components: list[Instruction] = [] + if not self.ops: + a.error(f"{self.kind.capitalize()}-instruction has no operands", self) + for name in self.ops: + if name not in a.instrs: + a.error(f"Unknown instruction {name!r}", self) + else: + instr = a.instrs[name] + if self.kind == "super" and instr.kind != "inst": + a.error(f"Super-instruction operand {instr.name} must be inst, not op", instr) + components.append(instr) + return components + + def super_macro_analysis( + self, a: "Analyzer", components: list[Instruction] + ) -> tuple[list[str], int]: + """Analyze a super-instruction or macro. + + Print an error if there's a cache effect (which we don't support yet). + + Return the list of variable names and the initial stack pointer. + """ + lowest = current = highest = 0 + for instr in components: + if instr.cache_effects: + a.error( + f"Super-instruction {self.name!r} has cache effects in {instr.name!r}", + instr, + ) + current -= len(instr.input_effects) + lowest = min(lowest, current) + current += len(instr.output_effects) + highest = max(highest, current) + # At this point, 'current' is the net stack effect, + # and 'lowest' and 'highest' are the extremes. + # Note that 'lowest' may be negative. + stack = [f"_tmp_{i+1}" for i in range(highest - lowest)] + return stack, -lowest + + class Analyzer: """Parse input, analyze it, and write to output.""" @@ -161,14 +257,26 @@ class Analyzer: src: str errors: int = 0 + def error(self, msg: str, node: parser.Node) -> None: + lineno = 0 + if context := node.context: + # Use line number of first non-comment in the node + for token in context.owner.tokens[context.begin : context.end]: + lineno = token.line + if token.kind != "COMMENT": + break + print(f"{self.filename}:{lineno}: {msg}", file=sys.stderr) + self.errors += 1 + def __init__(self, filename: str): """Read the input file.""" self.filename = filename with open(filename) as f: self.src = f.read() - instrs: dict[str, Instruction] - supers: dict[str, parser.Super] + instrs: dict[str, Instruction] # Includes ops + supers: dict[str, parser.Super] # Includes macros + super_instrs: dict[str, SuperInstruction] families: dict[str, parser.Family] def parse(self) -> None: @@ -180,7 +288,9 @@ def parse(self) -> None: if tkn.text == BEGIN_MARKER: break else: - raise psr.make_syntax_error(f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}") + raise psr.make_syntax_error( + f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}" + ) # Parse until end marker self.instrs = {} @@ -198,7 +308,7 @@ def parse(self) -> None: print( f"Read {len(self.instrs)} instructions, " - f"{len(self.supers)} supers, " + f"{len(self.supers)} supers/macros, " f"and {len(self.families)} families from {self.filename}", file=sys.stderr, ) @@ -211,6 +321,7 @@ def analyze(self) -> None: self.find_predictions() self.map_families() self.check_families() + self.analyze_supers() def find_predictions(self) -> None: """Find the instructions that need PREDICTED() labels.""" @@ -219,11 +330,10 @@ def find_predictions(self) -> None: if target_instr := self.instrs.get(target): target_instr.predicted = True else: - print( + self.error( f"Unknown instruction {target!r} predicted in {instr.name!r}", - file=sys.stderr, + instr, # TODO: Use better location ) - self.errors += 1 def map_families(self) -> None: """Make instruction names back to their family, if they have one.""" @@ -232,11 +342,10 @@ def map_families(self) -> None: if member_instr := self.instrs.get(member): member_instr.family = family else: - print( + self.error( f"Unknown instruction {member!r} referenced in family {family.name!r}", - file=sys.stderr, + family, ) - self.errors += 1 def check_families(self) -> None: """Check each family: @@ -247,13 +356,11 @@ def check_families(self) -> None: """ for family in self.families.values(): if len(family.members) < 2: - print(f"Family {family.name!r} has insufficient members") - self.errors += 1 + self.error(f"Family {family.name!r} has insufficient members", family) members = [member for member in family.members if member in self.instrs] if members != family.members: unknown = set(family.members) - set(members) - print(f"Family {family.name!r} has unknown members: {unknown}") - self.errors += 1 + self.error(f"Family {family.name!r} has unknown members: {unknown}", family) if len(members) < 2: continue head = self.instrs[members[0]] @@ -266,18 +373,21 @@ def check_families(self) -> None: i = len(instr.input_effects) o = len(instr.output_effects) if (c, i, o) != (cache, input, output): - self.errors += 1 - print( + self.error( f"Family {family.name!r} has inconsistent " - f"(cache, inputs, outputs) effects:", - file=sys.stderr, - ) - print( + f"(cache, inputs, outputs) effects:\n" f" {family.members[0]} = {(cache, input, output)}; " f"{member} = {(c, i, o)}", - file=sys.stderr, + family, ) - self.errors += 1 + + def analyze_supers(self) -> None: + """Analyze each super instruction.""" + self.super_instrs = {} + for name, sup in self.supers.items(): + dup = SuperInstruction(sup) + dup.analyze(self) + self.super_instrs[name] = dup def write_instructions(self, filename: str) -> None: """Write instructions to output file.""" @@ -289,7 +399,11 @@ def write_instructions(self, filename: str) -> None: f.write(f"// Do not edit!\n") # Write regular instructions + n_instrs = 0 for name, instr in self.instrs.items(): + if instr.kind != "inst": + continue # ops are not real instructions + n_instrs += 1 f.write(f"\n{indent}TARGET({name}) {{\n") if instr.predicted: f.write(f"{indent} PREDICTED({name});\n") @@ -298,26 +412,75 @@ def write_instructions(self, filename: str) -> None: f.write(f"{indent} DISPATCH();\n") f.write(f"{indent}}}\n") - # Write super-instructions - for name, sup in self.supers.items(): - components = [self.instrs[name] for name in sup.ops] - f.write(f"\n{indent}TARGET({sup.name}) {{\n") - for i, instr in enumerate(components): - if i > 0: - f.write(f"{indent} NEXTOPARG();\n") - f.write(f"{indent} next_instr++;\n") - f.write(f"{indent} {{\n") - instr.write(f, indent, dedent=-4) - f.write(f" {indent}}}\n") - f.write(f"{indent} DISPATCH();\n") - f.write(f"{indent}}}\n") + # Write super-instructions and macros + n_supers = 0 + n_macros = 0 + for sup in self.super_instrs.values(): + if sup.kind == "super": + n_supers += 1 + elif sup.kind == "macro": + n_macros += 1 + self.write_super_macro(f, sup, indent) print( - f"Wrote {len(self.instrs)} instructions and " - f"{len(self.supers)} super-instructions to {filename}", + f"Wrote {n_instrs} instructions, {n_supers} supers, " + f"and {n_macros} macros to {filename}", file=sys.stderr, ) + def write_super_macro( + self, f: typing.TextIO, sup: SuperInstruction, indent: str = "" + ) -> None: + + # TODO: Make write() and block() methods of some Formatter class + def write(arg: str) -> None: + if arg: + f.write(f"{indent}{arg}\n") + else: + f.write("\n") + + @contextlib.contextmanager + def block(head: str): + if head: + write(head + " {") + else: + write("{") + nonlocal indent + indent += " " + yield + indent = indent[:-4] + write("}") + + write("") + with block(f"TARGET({sup.name})"): + for i, var in enumerate(sup.stack): + if i < sup.initial_sp: + write(f"PyObject *{var} = PEEK({sup.initial_sp - i});") + else: + write(f"PyObject *{var};") + + for i, comp in enumerate(sup.parts): + if i > 0 and sup.kind == "super": + write("NEXTOPARG();") + write("next_instr++;") + + with block(""): + for var, ieffect in comp.input_mapping.items(): + write(f"PyObject *{ieffect.name} = {var};") + for oeffect in comp.output_mapping.values(): + write(f"PyObject *{oeffect.name};") + comp.instr.write_body(f, indent, dedent=-4) + for var, oeffect in comp.output_mapping.items(): + write(f"{var} = {oeffect.name};") + + if sup.final_sp > sup.initial_sp: + write(f"STACK_GROW({sup.final_sp - sup.initial_sp});") + elif sup.final_sp < sup.initial_sp: + write(f"STACK_SHRINK({sup.initial_sp - sup.final_sp});") + for i, var in enumerate(reversed(sup.stack[:sup.final_sp]), 1): + write(f"POKE({i}, {var});") + write("DISPATCH();") + def always_exits(block: parser.Block) -> bool: """Determine whether a block always ends in a return/goto/etc.""" diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 493a32e38166d7..980c920bf357f4 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -112,7 +112,8 @@ def choice(*opts): COMMENT = 'COMMENT' newline = r"\n" -matcher = re.compile(choice(id_re, number_re, str_re, char, newline, macro, comment_re, *operators.values())) +invalid = r"\S" # A single non-space character that's not caught by any of the other patterns +matcher = re.compile(choice(id_re, number_re, str_re, char, newline, macro, comment_re, *operators.values(), invalid)) letter = re.compile(r'[a-zA-Z_]') kwds = ( @@ -177,7 +178,6 @@ def __repr__(self): def tokenize(src, line=1, filename=None): linestart = -1 - # TODO: finditer() skips over unrecognized characters, e.g. '@' for m in matcher.finditer(src): start, end = m.span() text = m.group(0) diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index c511607fdf70ec..ae5ef1e26ea1c2 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -1,7 +1,7 @@ """Parser for bytecodes.inst.""" from dataclasses import dataclass, field -from typing import NamedTuple, Callable, TypeVar +from typing import NamedTuple, Callable, TypeVar, Literal import lexer as lx from plexer import PLexer @@ -74,6 +74,7 @@ class CacheEffect(Node): @dataclass class InstHeader(Node): + kind: Literal["inst", "op"] name: str inputs: list[InputEffect] outputs: list[OutputEffect] @@ -81,9 +82,14 @@ class InstHeader(Node): @dataclass class InstDef(Node): + # TODO: Merge InstHeader and InstDef header: InstHeader block: Block + @property + def kind(self) -> str: + return self.header.kind + @property def name(self) -> str: return self.header.name @@ -93,12 +99,13 @@ def inputs(self) -> list[InputEffect]: return self.header.inputs @property - def outputs(self) -> list[StackEffect]: + def outputs(self) -> list[OutputEffect]: return self.header.outputs @dataclass class Super(Node): + kind: Literal["macro", "super"] name: str ops: list[str] @@ -122,10 +129,12 @@ def inst_def(self) -> InstDef | None: @contextual def inst_header(self) -> InstHeader | None: - # inst(NAME) | inst(NAME, (inputs -- outputs)) + # inst(NAME) + # | inst(NAME, (inputs -- outputs)) + # | op(NAME, (inputs -- outputs)) # TODO: Error out when there is something unexpected. # TODO: Make INST a keyword in the lexer. - if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "inst": + if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"): if (self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER))): name = tkn.text @@ -134,9 +143,10 @@ def inst_header(self) -> InstHeader | None: if self.expect(lx.RPAREN): if ((tkn := self.peek()) and tkn.kind == lx.LBRACE): - return InstHeader(name, inp, outp) - elif self.expect(lx.RPAREN): - return InstHeader(name, [], []) + return InstHeader(kind, name, inp, outp) + elif self.expect(lx.RPAREN) and kind == "inst": + # No legacy stack effect if kind is "op". + return InstHeader(kind, name, [], []) return None def stack_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: @@ -200,13 +210,13 @@ def output(self) -> OutputEffect | None: @contextual def super_def(self) -> Super | None: - if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "super": + if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("super", "macro"): if self.expect(lx.LPAREN): if (tkn := self.expect(lx.IDENTIFIER)): if self.expect(lx.RPAREN): if self.expect(lx.EQUALS): if ops := self.ops(): - res = Super(tkn.text, ops) + res = Super(kind, tkn.text, ops) return res def ops(self) -> list[str] | None: @@ -278,7 +288,7 @@ def c_blob(self) -> list[lx.Token]: filename = sys.argv[1] if filename == "-c" and sys.argv[2:]: src = sys.argv[2] - filename = None + filename = "" else: with open(filename) as f: src = f.read() @@ -287,7 +297,7 @@ def c_blob(self) -> list[lx.Token]: end = srclines.index("// END BYTECODES //") src = "\n".join(srclines[begin+1 : end]) else: - filename = None + filename = "" src = "if (x) { x.foo; // comment\n}" parser = Parser(src, filename) x = parser.inst_def() or parser.super_def() or parser.family_def() From 8f024a02d7d63315ecc3479f0715e927f48fc91b Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Wed, 23 Nov 2022 11:52:12 +0100 Subject: [PATCH 023/112] GH-95283: Add note about compilers in Mac/README.txt (#99506) The build machinery assumes that the compiler that's used to build on macOS includes an SDK that's at least as new as the OS version on the build machine. Explicitly mention this in Mac/README.txt. --- Mac/README.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Mac/README.rst b/Mac/README.rst index 7476639d0ff541..bc40b41f7f38ad 100644 --- a/Mac/README.rst +++ b/Mac/README.rst @@ -10,6 +10,19 @@ Python on macOS README This document provides a quick overview of some macOS specific features in the Python distribution. +Compilers for building on macOS +=============================== + +The core developers primarily test builds on macOS with Apple's compiler tools, +either Xcode or the Command Line Tools. For these we only support building with +a compiler that includes an SDK that targets the OS on the build machine, that is +the version of Xcode that shipped with the OS version or one newer. + +For example, for macOS 12 we support Xcode 13 and Xcode 14 (or the corresponding +Command Line Tools). + +Building with other compilers, such as GCC, likely works, but is not actively supported. + macOS specific arguments to configure ===================================== From 5d9183c7ad68eb9ddb53d54a3f9a27e29dbabf31 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 23 Nov 2022 10:59:52 +0000 Subject: [PATCH 024/112] gh-99619: fix error in documentation of ExceptionGroup.derive() (GH-99621) --- Doc/library/exceptions.rst | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index 4271a30de74a57..1217b817b4e843 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -934,21 +934,42 @@ their subgroups based on the types of the contained exceptions. .. method:: derive(excs) - Returns an exception group with the same :attr:`message`, - :attr:`__traceback__`, :attr:`__cause__`, :attr:`__context__` - and :attr:`__notes__` but which wraps the exceptions in ``excs``. + Returns an exception group with the same :attr:`message`, but which + wraps the exceptions in ``excs``. This method is used by :meth:`subgroup` and :meth:`split`. A subclass needs to override it in order to make :meth:`subgroup` and :meth:`split` return instances of the subclass rather - than :exc:`ExceptionGroup`. :: + than :exc:`ExceptionGroup`. + + :meth:`subgroup` and :meth:`split` copy the :attr:`__traceback__`, + :attr:`__cause__`, :attr:`__context__` and :attr:`__notes__` fields from + the original exception group to the one returned by :meth:`derive`, so + these fields do not need to be updated by :meth:`derive`. :: >>> class MyGroup(ExceptionGroup): ... def derive(self, exc): ... return MyGroup(self.message, exc) ... - >>> MyGroup("eg", [ValueError(1), TypeError(2)]).split(TypeError) - (MyGroup('eg', [TypeError(2)]), MyGroup('eg', [ValueError(1)])) + >>> e = MyGroup("eg", [ValueError(1), TypeError(2)]) + >>> e.add_note("a note") + >>> e.__context__ = Exception("context") + >>> e.__cause__ = Exception("cause") + >>> try: + ... raise e + ... except Exception as e: + ... exc = e + ... + >>> match, rest = exc.split(ValueError) + >>> exc, exc.__context__, exc.__cause__, exc.__notes__ + (MyGroup('eg', [ValueError(1), TypeError(2)]), Exception('context'), Exception('cause'), ['a note']) + >>> match, match.__context__, match.__cause__, match.__notes__ + (MyGroup('eg', [ValueError(1)]), Exception('context'), Exception('cause'), ['a note']) + >>> rest, rest.__context__, rest.__cause__, rest.__notes__ + (MyGroup('eg', [TypeError(2)]), Exception('context'), Exception('cause'), ['a note']) + >>> exc.__traceback__ is match.__traceback__ is rest.__traceback__ + True + Note that :exc:`BaseExceptionGroup` defines :meth:`__new__`, so subclasses that need a different constructor signature need to From 81f7359f67a7166d57a10a3d5366406d9c85f1de Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 23 Nov 2022 14:57:50 +0100 Subject: [PATCH 025/112] gh-99537: Use Py_SETREF(var, NULL) in C code (#99687) Replace "Py_DECREF(var); var = NULL;" with "Py_SETREF(var, NULL);". --- Modules/_abc.c | 3 +-- Modules/_datetimemodule.c | 12 ++++-------- Modules/_elementtree.c | 3 +-- Modules/_io/_iomodule.c | 3 +-- Modules/_pickle.c | 3 +-- Modules/_scproxy.c | 2 +- Modules/_struct.c | 3 +-- Modules/_tkinter.c | 3 +-- Modules/_xxsubinterpretersmodule.c | 3 +-- Modules/_zoneinfo.c | 3 +-- Modules/cjkcodecs/multibytecodec.c | 3 +-- Modules/mathmodule.c | 9 +++------ Modules/nismodule.c | 3 +-- Modules/posixmodule.c | 12 ++++-------- Objects/abstract.c | 9 +++------ Objects/classobject.c | 6 ++---- Objects/descrobject.c | 3 +-- Objects/fileobject.c | 9 +++------ Objects/typeobject.c | 12 ++++-------- Python/bltinmodule.c | 12 ++++-------- Python/errors.c | 3 +-- Python/marshal.c | 12 ++++-------- 22 files changed, 44 insertions(+), 87 deletions(-) diff --git a/Modules/_abc.c b/Modules/_abc.c index e6e72427a0481d..e146d4fd0cac39 100644 --- a/Modules/_abc.c +++ b/Modules/_abc.c @@ -624,8 +624,7 @@ _abc__abc_instancecheck_impl(PyObject *module, PyObject *self, switch (PyObject_IsTrue(result)) { case -1: - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); break; case 0: Py_DECREF(result); diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 712abc3346faf9..eda8c5610ba659 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1328,8 +1328,7 @@ call_tzname(PyObject *tzinfo, PyObject *tzinfoarg) PyErr_Format(PyExc_TypeError, "tzinfo.tzname() must " "return None or a string, not '%s'", Py_TYPE(result)->tp_name); - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); } return result; @@ -1849,8 +1848,7 @@ delta_to_microseconds(PyDateTime_Delta *self) x2 = PyNumber_Multiply(x1, seconds_per_day); /* days in seconds */ if (x2 == NULL) goto Done; - Py_DECREF(x1); - x1 = NULL; + Py_SETREF(x1, NULL); /* x2 has days in seconds */ x1 = PyLong_FromLong(GET_TD_SECONDS(self)); /* seconds */ @@ -1867,8 +1865,7 @@ delta_to_microseconds(PyDateTime_Delta *self) x1 = PyNumber_Multiply(x3, us_per_second); /* us */ if (x1 == NULL) goto Done; - Py_DECREF(x3); - x3 = NULL; + Py_SETREF(x3, NULL); /* x1 has days+seconds in us */ x2 = PyLong_FromLong(GET_TD_MICROSECONDS(self)); @@ -2038,8 +2035,7 @@ multiply_truedivide_timedelta_float(PyDateTime_Delta *delta, PyObject *floatobj, goto error; } temp = PyNumber_Multiply(pyus_in, PyTuple_GET_ITEM(ratio, op)); - Py_DECREF(pyus_in); - pyus_in = NULL; + Py_SETREF(pyus_in, NULL); if (temp == NULL) goto error; pyus_out = divide_nearest(temp, PyTuple_GET_ITEM(ratio, !op)); diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 2da44cf2886ea4..0c68ede42ca61d 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -345,8 +345,7 @@ get_attrib_from_keywords(PyObject *kwds) } attrib = PyDict_Copy(attrib); if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) { - Py_DECREF(attrib); - attrib = NULL; + Py_SETREF(attrib, NULL); } } else if (!PyErr_Occurred()) { diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index ccd40ab8bca451..121d9617e1883b 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -334,8 +334,7 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, goto error; result = raw; - Py_DECREF(path_or_fd); - path_or_fd = NULL; + Py_SETREF(path_or_fd, NULL); modeobj = PyUnicode_FromString(mode); if (modeobj == NULL) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 1e118e9cd10bbf..2078779663a919 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -4344,8 +4344,7 @@ save(PicklerObject *self, PyObject *obj, int pers_save) if (reduce_value != Py_NotImplemented) { goto reduce; } - Py_DECREF(reduce_value); - reduce_value = NULL; + Py_SETREF(reduce_value, NULL); } if (type == &PyType_Type) { diff --git a/Modules/_scproxy.c b/Modules/_scproxy.c index 4c1f1aa300c717..344b66f9aad522 100644 --- a/Modules/_scproxy.c +++ b/Modules/_scproxy.c @@ -84,7 +84,7 @@ get_proxy_settings(PyObject* Py_UNUSED(mod), PyObject *Py_UNUSED(ignored)) if (v == NULL) goto error; r = PyDict_SetItemString(result, "exclude_simple", v); - Py_DECREF(v); v = NULL; + Py_SETREF(v, NULL); if (r == -1) goto error; anArray = CFDictionaryGetValue(proxyDict, diff --git a/Modules/_struct.c b/Modules/_struct.c index 2f2eb25d984230..c960b81b246ece 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -2164,8 +2164,7 @@ cache_struct_converter(PyObject *module, PyObject *fmt, PyStructObject **ptr) _structmodulestate *state = get_struct_state(module); if (fmt == NULL) { - Py_DECREF(*ptr); - *ptr = NULL; + Py_SETREF(*ptr, NULL); return 1; } diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 6ff7d2bfced29b..93d4474f65d62c 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -2163,8 +2163,7 @@ _tkinter_tkapp_splitlist(TkappObject *self, PyObject *arg) for (i = 0; i < argc; i++) { PyObject *s = unicodeFromTclString(argv[i]); if (!s) { - Py_DECREF(v); - v = NULL; + Py_SETREF(v, NULL); goto finally; } PyTuple_SET_ITEM(v, i, s); diff --git a/Modules/_xxsubinterpretersmodule.c b/Modules/_xxsubinterpretersmodule.c index 244ae3517e1d81..2c9e0cda1ab048 100644 --- a/Modules/_xxsubinterpretersmodule.c +++ b/Modules/_xxsubinterpretersmodule.c @@ -2320,8 +2320,7 @@ channel_list_all(PyObject *self, PyObject *Py_UNUSED(ignored)) PyObject *id = (PyObject *)newchannelid(&ChannelIDtype, *cur, 0, &_globals.channels, 0, 0); if (id == NULL) { - Py_DECREF(ids); - ids = NULL; + Py_SETREF(ids, NULL); break; } PyList_SET_ITEM(ids, (Py_ssize_t)i, id); diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index cb7d4c943845b1..9d38589ea3d1b0 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -220,8 +220,7 @@ zoneinfo_new_instance(PyTypeObject *type, PyObject *key) } PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL); - Py_DECREF(file_obj); - file_obj = NULL; + Py_SETREF(file_obj, NULL); if (rv == NULL) { goto error; } diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 8b6232695d4c13..1d77fd33ac3b87 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -1463,8 +1463,7 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, goto errorexit; } - Py_DECREF(cres); - cres = NULL; + Py_SETREF(cres, NULL); if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0) break; diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 83eb338be9b83c..49c0293d4f5ce3 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -3152,8 +3152,7 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start) long i_result = PyLong_AsLongAndOverflow(result, &overflow); /* If this already overflowed, don't even enter the loop. */ if (overflow == 0) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); } /* Loop over all the items in the iterable until we finish, we overflow * or we found a non integer element */ @@ -3200,8 +3199,7 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start) */ if (PyFloat_CheckExact(result)) { double f_result = PyFloat_AS_DOUBLE(result); - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); while(result == NULL) { item = PyIter_Next(iter); if (item == NULL) { @@ -3250,8 +3248,7 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start) if (item == NULL) { /* error, or end-of-sequence */ if (PyErr_Occurred()) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); } break; } diff --git a/Modules/nismodule.c b/Modules/nismodule.c index 39b991162b2761..ec7f6d8031e84b 100644 --- a/Modules/nismodule.c +++ b/Modules/nismodule.c @@ -458,8 +458,7 @@ nis_maps (PyObject *module, PyObject *args, PyObject *kwdict) if (!str || PyList_Append(list, str) < 0) { Py_XDECREF(str); - Py_DECREF(list); - list = NULL; + Py_SETREF(list, NULL); break; } Py_DECREF(str); diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 45e71ee9c0598d..8185517b06b5dd 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -4036,14 +4036,12 @@ _listdir_windows_no_opendir(path_t *path, PyObject *list) Py_SETREF(v, PyUnicode_EncodeFSDefault(v)); } if (v == NULL) { - Py_DECREF(list); - list = NULL; + Py_SETREF(list, NULL); break; } if (PyList_Append(list, v) != 0) { Py_DECREF(v); - Py_DECREF(list); - list = NULL; + Py_SETREF(list, NULL); break; } Py_DECREF(v); @@ -13131,15 +13129,13 @@ os_listxattr_impl(PyObject *module, path_t *path, int follow_symlinks) PyObject *attribute = PyUnicode_DecodeFSDefaultAndSize(start, trace - start); if (!attribute) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); goto exit; } error = PyList_Append(result, attribute); Py_DECREF(attribute); if (error) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); goto exit; } start = trace + 1; diff --git a/Objects/abstract.c b/Objects/abstract.c index 8aa3fc17c6341b..9dc74fb9c2608c 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -806,8 +806,7 @@ PyObject_Format(PyObject *obj, PyObject *format_spec) PyErr_Format(PyExc_TypeError, "__format__ must return a str, not %.200s", Py_TYPE(result)->tp_name); - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); goto done; } @@ -2791,8 +2790,7 @@ PyObject_GetIter(PyObject *o) "iter() returned non-iterator " "of type '%.100s'", Py_TYPE(res)->tp_name); - Py_DECREF(res); - res = NULL; + Py_SETREF(res, NULL); } return res; } @@ -2812,8 +2810,7 @@ PyObject_GetAIter(PyObject *o) { PyErr_Format(PyExc_TypeError, "aiter() returned not an async iterator of type '%.100s'", Py_TYPE(it)->tp_name); - Py_DECREF(it); - it = NULL; + Py_SETREF(it, NULL); } return it; } diff --git a/Objects/classobject.c b/Objects/classobject.c index eedf8f0e1e1acf..2cb192e725d40d 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -283,8 +283,7 @@ method_repr(PyMethodObject *a) } if (funcname != NULL && !PyUnicode_Check(funcname)) { - Py_DECREF(funcname); - funcname = NULL; + Py_SETREF(funcname, NULL); } /* XXX Shouldn't use repr()/%R here! */ @@ -484,8 +483,7 @@ instancemethod_repr(PyObject *self) return NULL; } if (funcname != NULL && !PyUnicode_Check(funcname)) { - Py_DECREF(funcname); - funcname = NULL; + Py_SETREF(funcname, NULL); } result = PyUnicode_FromFormat("", diff --git a/Objects/descrobject.c b/Objects/descrobject.c index cc204931c3fe3d..c545b90c6283e1 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -906,8 +906,7 @@ descr_new(PyTypeObject *descrtype, PyTypeObject *type, const char *name) descr->d_type = (PyTypeObject*)Py_XNewRef(type); descr->d_name = PyUnicode_InternFromString(name); if (descr->d_name == NULL) { - Py_DECREF(descr); - descr = NULL; + Py_SETREF(descr, NULL); } else { descr->d_qualname = NULL; diff --git a/Objects/fileobject.c b/Objects/fileobject.c index bf56be5f7ea7b0..e99e155f2b8c98 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -67,8 +67,7 @@ PyFile_GetLine(PyObject *f, int n) } if (result != NULL && !PyBytes_Check(result) && !PyUnicode_Check(result)) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); PyErr_SetString(PyExc_TypeError, "object.readline() returned non-string"); } @@ -77,8 +76,7 @@ PyFile_GetLine(PyObject *f, int n) const char *s = PyBytes_AS_STRING(result); Py_ssize_t len = PyBytes_GET_SIZE(result); if (len == 0) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); PyErr_SetString(PyExc_EOFError, "EOF when reading a line"); } @@ -95,8 +93,7 @@ PyFile_GetLine(PyObject *f, int n) if (n < 0 && result != NULL && PyUnicode_Check(result)) { Py_ssize_t len = PyUnicode_GET_LENGTH(result); if (len == 0) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); PyErr_SetString(PyExc_EOFError, "EOF when reading a line"); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 312406993c5b23..ad8a936fa7ce20 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1213,8 +1213,7 @@ type_repr(PyTypeObject *type) if (mod == NULL) PyErr_Clear(); else if (!PyUnicode_Check(mod)) { - Py_DECREF(mod); - mod = NULL; + Py_SETREF(mod, NULL); } name = type_qualname(type, NULL); if (name == NULL) { @@ -1288,8 +1287,7 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds) int res = type->tp_init(obj, args, kwds); if (res < 0) { assert(_PyErr_Occurred(tstate)); - Py_DECREF(obj); - obj = NULL; + Py_SETREF(obj, NULL); } else { assert(!_PyErr_Occurred(tstate)); @@ -5007,8 +5005,7 @@ object_repr(PyObject *self) if (mod == NULL) PyErr_Clear(); else if (!PyUnicode_Check(mod)) { - Py_DECREF(mod); - mod = NULL; + Py_SETREF(mod, NULL); } name = type_qualname(type, NULL); if (name == NULL) { @@ -8107,8 +8104,7 @@ slot_tp_hash(PyObject *self) func = lookup_maybe_method(self, &_Py_ID(__hash__), &unbound); if (func == Py_None) { - Py_DECREF(func); - func = NULL; + Py_SETREF(func, NULL); } if (func == NULL) { diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index c2cf79a727f0a8..b3b7e8d6c50530 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -218,8 +218,7 @@ builtin___build_class__(PyObject *self, PyObject *const *args, Py_ssize_t nargs, "__class__ set to %.200R defining %.200R as %.200R"; PyErr_Format(PyExc_TypeError, msg, cell_cls, name, cls); } - Py_DECREF(cls); - cls = NULL; + Py_SETREF(cls, NULL); goto error; } } @@ -2483,8 +2482,7 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) long i_result = PyLong_AsLongAndOverflow(result, &overflow); /* If this already overflowed, don't even enter the loop. */ if (overflow == 0) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); } while(result == NULL) { item = PyIter_Next(iter); @@ -2534,8 +2532,7 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) if (PyFloat_CheckExact(result)) { double f_result = PyFloat_AS_DOUBLE(result); - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); while(result == NULL) { item = PyIter_Next(iter); if (item == NULL) { @@ -2582,8 +2579,7 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) if (item == NULL) { /* error, or end-of-sequence */ if (PyErr_Occurred()) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); } break; } diff --git a/Python/errors.c b/Python/errors.c index 6a42f5912f942a..05ef62246ec0a4 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -37,8 +37,7 @@ _PyErr_Restore(PyThreadState *tstate, PyObject *type, PyObject *value, if (traceback != NULL && !PyTraceBack_Check(traceback)) { /* XXX Should never happen -- fatal error instead? */ /* Well, it could be None. */ - Py_DECREF(traceback); - traceback = NULL; + Py_SETREF(traceback, NULL); } /* Save these in locals to safeguard against recursive diff --git a/Python/marshal.c b/Python/marshal.c index 86cf57daa77025..5f392d9e1ecfff 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -1217,8 +1217,7 @@ r_object(RFILE *p) if (!PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for tuple"); - Py_DECREF(v); - v = NULL; + Py_SETREF(v, NULL); break; } PyTuple_SET_ITEM(v, i, v2); @@ -1244,8 +1243,7 @@ r_object(RFILE *p) if (!PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for list"); - Py_DECREF(v); - v = NULL; + Py_SETREF(v, NULL); break; } PyList_SET_ITEM(v, i, v2); @@ -1277,8 +1275,7 @@ r_object(RFILE *p) Py_DECREF(val); } if (PyErr_Occurred()) { - Py_DECREF(v); - v = NULL; + Py_SETREF(v, NULL); } retval = v; break; @@ -1322,8 +1319,7 @@ r_object(RFILE *p) if (!PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for set"); - Py_DECREF(v); - v = NULL; + Py_SETREF(v, NULL); break; } if (PySet_Add(v, v2) == -1) { From 55bad199cf89f5488bcfd50b897d798afd57976e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 23 Nov 2022 15:44:42 +0100 Subject: [PATCH 026/112] gh-79315: Add Include/cpython/memoryobject.h header (#99723) Move non-limited C API from Include/memoryobject.h to a new Include/cpython/memoryobject.h header file. --- Include/cpython/memoryobject.h | 42 ++++++++++++++++++++++++++++ Include/memoryobject.h | 44 ++---------------------------- Makefile.pre.in | 1 + PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 ++ 5 files changed, 50 insertions(+), 41 deletions(-) create mode 100644 Include/cpython/memoryobject.h diff --git a/Include/cpython/memoryobject.h b/Include/cpython/memoryobject.h new file mode 100644 index 00000000000000..e2a1e168e463b8 --- /dev/null +++ b/Include/cpython/memoryobject.h @@ -0,0 +1,42 @@ +#ifndef Py_CPYTHON_MEMORYOBJECT_H +# error "this header file must not be included directly" +#endif + +PyAPI_DATA(PyTypeObject) _PyManagedBuffer_Type; + +/* The structs are declared here so that macros can work, but they shouldn't + be considered public. Don't access their fields directly, use the macros + and functions instead! */ +#define _Py_MANAGED_BUFFER_RELEASED 0x001 /* access to exporter blocked */ +#define _Py_MANAGED_BUFFER_FREE_FORMAT 0x002 /* free format */ + +typedef struct { + PyObject_HEAD + int flags; /* state flags */ + Py_ssize_t exports; /* number of direct memoryview exports */ + Py_buffer master; /* snapshot buffer obtained from the original exporter */ +} _PyManagedBufferObject; + + +/* memoryview state flags */ +#define _Py_MEMORYVIEW_RELEASED 0x001 /* access to master buffer blocked */ +#define _Py_MEMORYVIEW_C 0x002 /* C-contiguous layout */ +#define _Py_MEMORYVIEW_FORTRAN 0x004 /* Fortran contiguous layout */ +#define _Py_MEMORYVIEW_SCALAR 0x008 /* scalar: ndim = 0 */ +#define _Py_MEMORYVIEW_PIL 0x010 /* PIL-style layout */ + +typedef struct { + PyObject_VAR_HEAD + _PyManagedBufferObject *mbuf; /* managed buffer */ + Py_hash_t hash; /* hash value for read-only views */ + int flags; /* state flags */ + Py_ssize_t exports; /* number of buffer re-exports */ + Py_buffer view; /* private copy of the exporter's view */ + PyObject *weakreflist; + Py_ssize_t ob_array[1]; /* shape, strides, suboffsets */ +} PyMemoryViewObject; + +/* Get a pointer to the memoryview's private copy of the exporter's buffer. */ +#define PyMemoryView_GET_BUFFER(op) (&((PyMemoryViewObject *)(op))->view) +/* Get a pointer to the exporting object (this may be NULL!). */ +#define PyMemoryView_GET_BASE(op) (((PyMemoryViewObject *)(op))->view.obj) diff --git a/Include/memoryobject.h b/Include/memoryobject.h index 19aec679a5fad1..2c9146aa2b5b06 100644 --- a/Include/memoryobject.h +++ b/Include/memoryobject.h @@ -6,20 +6,10 @@ extern "C" { #endif -#ifndef Py_LIMITED_API -PyAPI_DATA(PyTypeObject) _PyManagedBuffer_Type; -#endif PyAPI_DATA(PyTypeObject) PyMemoryView_Type; #define PyMemoryView_Check(op) Py_IS_TYPE((op), &PyMemoryView_Type) -#ifndef Py_LIMITED_API -/* Get a pointer to the memoryview's private copy of the exporter's buffer. */ -#define PyMemoryView_GET_BUFFER(op) (&((PyMemoryViewObject *)(op))->view) -/* Get a pointer to the exporting object (this may be NULL!). */ -#define PyMemoryView_GET_BASE(op) (((PyMemoryViewObject *)(op))->view.obj) -#endif - PyAPI_FUNC(PyObject *) PyMemoryView_FromObject(PyObject *base); #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 PyAPI_FUNC(PyObject *) PyMemoryView_FromMemory(char *mem, Py_ssize_t size, @@ -32,38 +22,10 @@ PyAPI_FUNC(PyObject *) PyMemoryView_GetContiguous(PyObject *base, int buffertype, char order); - -/* The structs are declared here so that macros can work, but they shouldn't - be considered public. Don't access their fields directly, use the macros - and functions instead! */ #ifndef Py_LIMITED_API -#define _Py_MANAGED_BUFFER_RELEASED 0x001 /* access to exporter blocked */ -#define _Py_MANAGED_BUFFER_FREE_FORMAT 0x002 /* free format */ -typedef struct { - PyObject_HEAD - int flags; /* state flags */ - Py_ssize_t exports; /* number of direct memoryview exports */ - Py_buffer master; /* snapshot buffer obtained from the original exporter */ -} _PyManagedBufferObject; - - -/* memoryview state flags */ -#define _Py_MEMORYVIEW_RELEASED 0x001 /* access to master buffer blocked */ -#define _Py_MEMORYVIEW_C 0x002 /* C-contiguous layout */ -#define _Py_MEMORYVIEW_FORTRAN 0x004 /* Fortran contiguous layout */ -#define _Py_MEMORYVIEW_SCALAR 0x008 /* scalar: ndim = 0 */ -#define _Py_MEMORYVIEW_PIL 0x010 /* PIL-style layout */ - -typedef struct { - PyObject_VAR_HEAD - _PyManagedBufferObject *mbuf; /* managed buffer */ - Py_hash_t hash; /* hash value for read-only views */ - int flags; /* state flags */ - Py_ssize_t exports; /* number of buffer re-exports */ - Py_buffer view; /* private copy of the exporter's view */ - PyObject *weakreflist; - Py_ssize_t ob_array[1]; /* shape, strides, suboffsets */ -} PyMemoryViewObject; +# define Py_CPYTHON_MEMORYOBJECT_H +# include "cpython/memoryobject.h" +# undef Py_CPYTHON_MEMORYOBJECT_H #endif #ifdef __cplusplus diff --git a/Makefile.pre.in b/Makefile.pre.in index 209a9b376a08c9..5c49af36d86736 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1585,6 +1585,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/cpython/listobject.h \ $(srcdir)/Include/cpython/longintrepr.h \ $(srcdir)/Include/cpython/longobject.h \ + $(srcdir)/Include/cpython/memoryobject.h \ $(srcdir)/Include/cpython/methodobject.h \ $(srcdir)/Include/cpython/modsupport.h \ $(srcdir)/Include/cpython/object.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 3c3ff406bdf0ae..f62434370cfdf7 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -155,6 +155,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 3ab7f31dff43f3..f44a1ad8550a38 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -384,6 +384,9 @@ Include + + Include + Include From 71a4a2da983a651bfcbc1be59c6e27508cdd05c6 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Wed, 23 Nov 2022 19:50:15 +0000 Subject: [PATCH 027/112] Use faster APIs to calculate paths at startup for Store packaged Python on Windows (GH-99345) --- ...2-11-23-17-17-16.gh-issue-99345.jOa3-f.rst | 2 + PC/python_uwp.cpp | 74 ++++++++++++------- 2 files changed, 51 insertions(+), 25 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2022-11-23-17-17-16.gh-issue-99345.jOa3-f.rst diff --git a/Misc/NEWS.d/next/Windows/2022-11-23-17-17-16.gh-issue-99345.jOa3-f.rst b/Misc/NEWS.d/next/Windows/2022-11-23-17-17-16.gh-issue-99345.jOa3-f.rst new file mode 100644 index 00000000000000..99db0c55a67eed --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2022-11-23-17-17-16.gh-issue-99345.jOa3-f.rst @@ -0,0 +1,2 @@ +Use faster initialization functions to detect install location for Windows +Store package diff --git a/PC/python_uwp.cpp b/PC/python_uwp.cpp index 88369e8fbfeb38..2beea60e5af1ef 100644 --- a/PC/python_uwp.cpp +++ b/PC/python_uwp.cpp @@ -10,6 +10,7 @@ #include +#include #include #include @@ -28,37 +29,49 @@ const wchar_t *PROGNAME = L"python.exe"; #endif static std::wstring -get_user_base() +get_package_family() { try { - const auto appData = winrt::Windows::Storage::ApplicationData::Current(); - if (appData) { - const auto localCache = appData.LocalCacheFolder(); - if (localCache) { - auto path = localCache.Path(); - if (!path.empty()) { - return std::wstring(path) + L"\\local-packages"; - } - } + UINT32 nameLength = MAX_PATH; + std::wstring name; + name.resize(nameLength); + DWORD rc = GetCurrentPackageFamilyName(&nameLength, name.data()); + if (rc == ERROR_SUCCESS) { + name.resize(nameLength - 1); + return name; } - } catch (...) { + else if (rc != ERROR_INSUFFICIENT_BUFFER) { + throw rc; + } + name.resize(nameLength); + rc = GetCurrentPackageFamilyName(&nameLength, name.data()); + if (rc != ERROR_SUCCESS) { + throw rc; + } + name.resize(nameLength - 1); + return name; } + catch (...) { + } + return std::wstring(); } static std::wstring -get_package_family() +get_user_base() { try { - const auto package = winrt::Windows::ApplicationModel::Package::Current(); - if (package) { - const auto id = package.Id(); - if (id) { - return std::wstring(id.FamilyName()); + const auto appData = winrt::Windows::Storage::ApplicationData::Current(); + if (appData) { + const auto localCache = appData.LocalCacheFolder(); + if (localCache) { + std::wstring path { localCache.Path().c_str() }; + if (!path.empty()) { + return path + L"\\local-packages"; + } } } - } - catch (...) { + } catch (...) { } return std::wstring(); @@ -68,13 +81,24 @@ static std::wstring get_package_home() { try { - const auto package = winrt::Windows::ApplicationModel::Package::Current(); - if (package) { - const auto path = package.InstalledLocation(); - if (path) { - return std::wstring(path.Path()); - } + UINT32 pathLength = MAX_PATH; + std::wstring path; + path.resize(pathLength); + DWORD rc = GetCurrentPackagePath(&pathLength, path.data()); + if (rc == ERROR_SUCCESS) { + path.resize(pathLength - 1); + return path; + } + else if (rc != ERROR_INSUFFICIENT_BUFFER) { + throw rc; + } + path.resize(pathLength); + rc = GetCurrentPackagePath(&pathLength, path.data()); + if (rc != ERROR_SUCCESS) { + throw rc; } + path.resize(pathLength - 1); + return path; } catch (...) { } From 57dfb1c4c8d5298494f121d1686a77a11612fd64 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Thu, 24 Nov 2022 00:32:09 +0200 Subject: [PATCH 028/112] Upload NEWS file as artifact (#30419) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Éric --- .github/workflows/doc.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index 10e4cf074a590a..44a1f206df1eb9 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -50,9 +50,14 @@ jobs: run: make -C Doc/ venv - name: 'Check documentation' run: make -C Doc/ check + - name: 'Upload NEWS' + uses: actions/upload-artifact@v3 + with: + name: NEWS + path: Doc/build/NEWS - name: 'Build HTML documentation' run: make -C Doc/ SPHINXOPTS="-q" SPHINXERRORHANDLING="-W --keep-going" html - - name: 'Upload' + - name: 'Upload docs' uses: actions/upload-artifact@v3 with: name: doc-html From c69cfcdb116c4907b306e2bd0e263d5ceba48bd5 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Thu, 24 Nov 2022 01:47:31 +0300 Subject: [PATCH 029/112] closes gh-99508: fix `TypeError` in `Lib/importlib/_bootstrap_external.py` (GH-99635) --- Lib/importlib/_bootstrap_external.py | 3 ++- .../next/Library/2022-11-21-10-45-54.gh-issue-99508.QqVbby.rst | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-21-10-45-54.gh-issue-99508.QqVbby.rst diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index f4dbbebcd224c8..71a16064b8ec0a 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -1144,7 +1144,8 @@ def get_code(self, fullname): source_mtime is not None): if hash_based: if source_hash is None: - source_hash = _imp.source_hash(source_bytes) + source_hash = _imp.source_hash(_RAW_MAGIC_NUMBER, + source_bytes) data = _code_to_hash_pyc(code_object, source_hash, check_source) else: data = _code_to_timestamp_pyc(code_object, source_mtime, diff --git a/Misc/NEWS.d/next/Library/2022-11-21-10-45-54.gh-issue-99508.QqVbby.rst b/Misc/NEWS.d/next/Library/2022-11-21-10-45-54.gh-issue-99508.QqVbby.rst new file mode 100644 index 00000000000000..82720d17bcafd3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-21-10-45-54.gh-issue-99508.QqVbby.rst @@ -0,0 +1,2 @@ +Fix ``TypeError`` in ``Lib/importlib/_bootstrap_external.py`` while calling +``_imp.source_hash()``. From 9dc08361bef67a331d1609c8629314c0ca5a79d5 Mon Sep 17 00:00:00 2001 From: Illia Volochii Date: Thu, 24 Nov 2022 04:24:09 +0200 Subject: [PATCH 030/112] gh-96828: Add an `ssl.OP_ENABLE_KTLS` option (GH-96830) Expose the constant when OpenSSL defines it. --- Doc/library/ssl.rst | 16 ++++++++++++++++ ...2022-09-14-21-56-15.gh-issue-96828.ZoOY5G.rst | 2 ++ Modules/_ssl.c | 3 +++ 3 files changed, 21 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-09-14-21-56-15.gh-issue-96828.ZoOY5G.rst diff --git a/Doc/library/ssl.rst b/Doc/library/ssl.rst index 4e6d06dc38d80c..08824feeb3958f 100644 --- a/Doc/library/ssl.rst +++ b/Doc/library/ssl.rst @@ -807,6 +807,22 @@ Constants .. versionadded:: 3.10 +.. data:: OP_ENABLE_KTLS + + Enable the use of the kernel TLS. To benefit from the feature, OpenSSL must + have been compiled with support for it, and the negotiated cipher suites and + extensions must be supported by it (a list of supported ones may vary by + platform and kernel version). + + Note that with enabled kernel TLS some cryptographic operations are + performed by the kernel directly and not via any available OpenSSL + Providers. This might be undesirable if, for example, the application + requires all cryptographic operations to be performed by the FIPS provider. + + This option is only available with OpenSSL 3.0.0 and later. + + .. versionadded:: 3.12 + .. data:: HAS_ALPN Whether the OpenSSL library has built-in support for the *Application-Layer diff --git a/Misc/NEWS.d/next/Library/2022-09-14-21-56-15.gh-issue-96828.ZoOY5G.rst b/Misc/NEWS.d/next/Library/2022-09-14-21-56-15.gh-issue-96828.ZoOY5G.rst new file mode 100644 index 00000000000000..d8a448851f4779 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-09-14-21-56-15.gh-issue-96828.ZoOY5G.rst @@ -0,0 +1,2 @@ +Add an :data:`~ssl.OP_ENABLE_KTLS` option for enabling the use of the kernel +TLS (kTLS). Patch by Illia Volochii. diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 2826d1593753b7..2885774295b065 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -5864,6 +5864,9 @@ sslmodule_init_constants(PyObject *m) PyModule_AddIntConstant(m, "OP_IGNORE_UNEXPECTED_EOF", SSL_OP_IGNORE_UNEXPECTED_EOF); #endif +#ifdef SSL_OP_ENABLE_KTLS + PyModule_AddIntConstant(m, "OP_ENABLE_KTLS", SSL_OP_ENABLE_KTLS); +#endif #ifdef X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT PyModule_AddIntConstant(m, "HOSTFLAG_ALWAYS_CHECK_SUBJECT", From 0c1fbc17b48f56c6070d335ed291a24c91ed190a Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Thu, 24 Nov 2022 09:10:27 +0530 Subject: [PATCH 031/112] GH-66285: fix forking in `asyncio` (#99539) `asyncio` now does not shares event loop and signal wakeupfd in forked processes. --- Lib/asyncio/events.py | 9 ++ Lib/test/test_asyncio/test_unix_events.py | 95 +++++++++++++++++++ ...2-11-17-10-56-47.gh-issue-66285.KvjlaB.rst | 1 + 3 files changed, 105 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst diff --git a/Lib/asyncio/events.py b/Lib/asyncio/events.py index a327ba54a323a8..39a01048b4c8dd 100644 --- a/Lib/asyncio/events.py +++ b/Lib/asyncio/events.py @@ -17,6 +17,7 @@ import subprocess import sys import threading +import signal from . import format_helpers @@ -665,6 +666,14 @@ class _Local(threading.local): def __init__(self): self._local = self._Local() + if hasattr(os, 'fork'): + def on_fork(): + # Reset the loop and wakeupfd in the forked child process. + self._local = self._Local() + signal.set_wakeup_fd(-1) + + os.register_at_fork(after_in_child=on_fork) + def get_event_loop(self): """Get the event loop for the current context. diff --git a/Lib/test/test_asyncio/test_unix_events.py b/Lib/test/test_asyncio/test_unix_events.py index 93e8611f184d25..4e1dab2f86b4dd 100644 --- a/Lib/test/test_asyncio/test_unix_events.py +++ b/Lib/test/test_asyncio/test_unix_events.py @@ -11,10 +11,13 @@ import sys import threading import unittest +import time from unittest import mock import warnings +import multiprocessing from test.support import os_helper from test.support import socket_helper +from test.support import wait_process if sys.platform == 'win32': raise unittest.SkipTest('UNIX only') @@ -1867,5 +1870,97 @@ async def runner(): wsock.close() +@unittest.skipUnless(hasattr(os, 'fork'), 'requires os.fork()') +class TestFork(unittest.IsolatedAsyncioTestCase): + + async def test_fork_not_share_event_loop(self): + # The forked process should not share the event loop with the parent + loop = asyncio.get_running_loop() + r, w = os.pipe() + self.addCleanup(os.close, r) + self.addCleanup(os.close, w) + pid = os.fork() + if pid == 0: + # child + try: + loop = asyncio.get_event_loop_policy().get_event_loop() + os.write(w, str(id(loop)).encode()) + finally: + os._exit(0) + else: + # parent + child_loop = int(os.read(r, 100).decode()) + self.assertNotEqual(child_loop, id(loop)) + wait_process(pid, exitcode=0) + + def test_fork_signal_handling(self): + # Sending signal to the forked process should not affect the parent + # process + ctx = multiprocessing.get_context('fork') + manager = ctx.Manager() + self.addCleanup(manager.shutdown) + child_started = manager.Event() + child_handled = manager.Event() + parent_handled = manager.Event() + + def child_main(): + signal.signal(signal.SIGTERM, lambda *args: child_handled.set()) + child_started.set() + time.sleep(1) + + async def main(): + loop = asyncio.get_running_loop() + loop.add_signal_handler(signal.SIGTERM, lambda *args: parent_handled.set()) + + process = ctx.Process(target=child_main) + process.start() + child_started.wait() + os.kill(process.pid, signal.SIGTERM) + process.join() + + async def func(): + await asyncio.sleep(0.1) + return 42 + + # Test parent's loop is still functional + self.assertEqual(await asyncio.create_task(func()), 42) + + asyncio.run(main()) + + self.assertFalse(parent_handled.is_set()) + self.assertTrue(child_handled.is_set()) + + def test_fork_asyncio_run(self): + ctx = multiprocessing.get_context('fork') + manager = ctx.Manager() + self.addCleanup(manager.shutdown) + result = manager.Value('i', 0) + + async def child_main(): + await asyncio.sleep(0.1) + result.value = 42 + + process = ctx.Process(target=lambda: asyncio.run(child_main())) + process.start() + process.join() + + self.assertEqual(result.value, 42) + + def test_fork_asyncio_subprocess(self): + ctx = multiprocessing.get_context('fork') + manager = ctx.Manager() + self.addCleanup(manager.shutdown) + result = manager.Value('i', 1) + + async def child_main(): + proc = await asyncio.create_subprocess_exec(sys.executable, '-c', 'pass') + result.value = await proc.wait() + + process = ctx.Process(target=lambda: asyncio.run(child_main())) + process.start() + process.join() + + self.assertEqual(result.value, 0) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst b/Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst new file mode 100644 index 00000000000000..ebd82173882726 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst @@ -0,0 +1 @@ +Fix :mod:`asyncio` to not share event loop and signal wakeupfd in forked processes. Patch by Kumar Aditya. From c24397a1080fa496d4e860e3054592ecb3685052 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Thu, 24 Nov 2022 12:26:54 +0530 Subject: [PATCH 032/112] add Kumar Aditya as `asyncio` codeowner (GH-99744) Automerge-Triggered-By: GH:kumaraditya303 --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 606639b882f824..5f6d86209b842e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,7 +8,7 @@ .github/** @ezio-melotti # asyncio -**/*asyncio* @1st1 @asvetlov @gvanrossum +**/*asyncio* @1st1 @asvetlov @gvanrossum @kumaraditya303 # Core **/*context* @1st1 From b4d54a332ed593c9fcd0da25684c622a251d03ce Mon Sep 17 00:00:00 2001 From: David Hewitt <1939362+davidhewitt@users.noreply.github.com> Date: Thu, 24 Nov 2022 08:21:59 +0000 Subject: [PATCH 033/112] gh-99706: unicodeobject: Fix padding in `PyASCIIObject.state` (GH-99707) --- Include/cpython/unicodeobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 8444507ade1b31..86eeab67275ec8 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -135,7 +135,7 @@ typedef struct { unsigned int ascii:1; /* Padding to ensure that PyUnicode_DATA() is always aligned to 4 bytes (see issue #19537 on m68k). */ - unsigned int :25; + unsigned int :26; } state; } PyASCIIObject; From 4e5f2db6f228d45a655cf22fd587792d56116145 Mon Sep 17 00:00:00 2001 From: SQLPATCH <95843853+SQLPATCH@users.noreply.github.com> Date: Thu, 24 Nov 2022 17:04:19 +0800 Subject: [PATCH 034/112] gh-98872: Fix a possible resource leak in Python 3.11.0 (GH-99047) Issue: #98872 Automerge-Triggered-By: GH:kumaraditya303 --- .../next/Build/2022-11-03-08-10-49.gh-issue-98872.gdsR8X.rst | 1 + Programs/_freeze_module.c | 1 + 2 files changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Build/2022-11-03-08-10-49.gh-issue-98872.gdsR8X.rst diff --git a/Misc/NEWS.d/next/Build/2022-11-03-08-10-49.gh-issue-98872.gdsR8X.rst b/Misc/NEWS.d/next/Build/2022-11-03-08-10-49.gh-issue-98872.gdsR8X.rst new file mode 100644 index 00000000000000..ad4dc496ee0e6b --- /dev/null +++ b/Misc/NEWS.d/next/Build/2022-11-03-08-10-49.gh-issue-98872.gdsR8X.rst @@ -0,0 +1 @@ +Fix a possible fd leak in ``Programs/_freeze_module.c`` introduced in Python 3.11. diff --git a/Programs/_freeze_module.c b/Programs/_freeze_module.c index d6d737d48d6070..9e2169f32e9211 100644 --- a/Programs/_freeze_module.c +++ b/Programs/_freeze_module.c @@ -194,6 +194,7 @@ write_frozen(const char *outpath, const char *inpath, const char *name, if (ferror(outfile)) { fprintf(stderr, "error when writing to '%s'\n", outpath); + fclose(outfile); return -1; } fclose(outfile); From 5f4ae86a639fb84260d622e31468da21dc468265 Mon Sep 17 00:00:00 2001 From: zhanpon Date: Thu, 24 Nov 2022 18:10:38 +0900 Subject: [PATCH 035/112] gh-94808: [coverage] Add an asynchronous generator test where the generator is already running (#97672) --- Lib/test/test_asyncgen.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Lib/test/test_asyncgen.py b/Lib/test/test_asyncgen.py index f6184c0cab4694..0421efdbf9dac9 100644 --- a/Lib/test/test_asyncgen.py +++ b/Lib/test/test_asyncgen.py @@ -378,6 +378,19 @@ async def async_gen_wrapper(): self.compare_generators(sync_gen_wrapper(), async_gen_wrapper()) + def test_async_gen_exception_12(self): + async def gen(): + await anext(me) + yield 123 + + me = gen() + ai = me.__aiter__() + an = ai.__anext__() + + with self.assertRaisesRegex(RuntimeError, + r'anext\(\): asynchronous generator is already running'): + an.__next__() + def test_async_gen_3_arg_deprecation_warning(self): async def gen(): yield 123 From ae185fdcca9d48aef425468de8a8a31300280932 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 24 Nov 2022 10:59:07 +0000 Subject: [PATCH 036/112] gh-99708: fix bug where compiler crashes on if expression with an empty body block (GH-99732) --- Lib/test/test_compile.py | 11 ++++++++++ ...2-11-23-18-16-18.gh-issue-99708.7MuaiR.rst | 1 + Python/compile.c | 20 ++++++++++++++++--- 3 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-11-23-18-16-18.gh-issue-99708.7MuaiR.rst diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 791f20bbad032e..998ce57927f1a9 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -1146,6 +1146,17 @@ def test_compare_positions(self): with self.subTest(source): self.assertEqual(actual_positions, expected_positions) + def test_if_expression_expression_empty_block(self): + # See regression in gh-99708 + exprs = [ + "assert (False if 1 else True)", + "def f():\n\tif not (False if 1 else True): raise AssertionError", + "def f():\n\tif not (False if 1 else True): return 12", + ] + for expr in exprs: + with self.subTest(expr=expr): + compile(expr, "", "exec") + @requires_debug_ranges() class TestSourcePositions(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-23-18-16-18.gh-issue-99708.7MuaiR.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-23-18-16-18.gh-issue-99708.7MuaiR.rst new file mode 100644 index 00000000000000..47f385c2eefee7 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-23-18-16-18.gh-issue-99708.7MuaiR.rst @@ -0,0 +1 @@ +Fix bug where compiler crashes on an if expression with an empty body block. diff --git a/Python/compile.c b/Python/compile.c index 366321143a54aa..dd8596defb8efe 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -512,7 +512,7 @@ static int compiler_match(struct compiler *, stmt_ty); static int compiler_pattern_subpattern(struct compiler *, pattern_ty, pattern_context *); -static void remove_redundant_nops(basicblock *bb); +static int remove_redundant_nops(basicblock *bb); static PyCodeObject *assemble(struct compiler *, int addNone); @@ -8666,6 +8666,17 @@ static void propagate_line_numbers(basicblock *entryblock); #ifndef NDEBUG + +static bool +no_redundant_nops(cfg_builder *g) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + if (remove_redundant_nops(b) != 0) { + return false; + } + } + return true; +} + static bool no_redundant_jumps(cfg_builder *g) { for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { @@ -9435,7 +9446,7 @@ inline_small_exit_blocks(basicblock *bb) { return 0; } -static void +static int remove_redundant_nops(basicblock *bb) { /* Remove NOPs when legal to do so. */ int dest = 0; @@ -9483,7 +9494,9 @@ remove_redundant_nops(basicblock *bb) { prev_lineno = lineno; } assert(dest <= bb->b_iused); + int num_removed = bb->b_iused - dest; bb->b_iused = dest; + return num_removed; } static int @@ -9694,10 +9707,11 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache) b->b_iused = 0; } } - eliminate_empty_basic_blocks(g); for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { remove_redundant_nops(b); } + eliminate_empty_basic_blocks(g); + assert(no_redundant_nops(g)); if (remove_redundant_jumps(g) < 0) { return -1; } From 69f6cc77d0f1664f983a83b6ae707d99a99f5c4f Mon Sep 17 00:00:00 2001 From: colorfulappl Date: Thu, 24 Nov 2022 20:24:18 +0800 Subject: [PATCH 037/112] gh-64490: Fix refcount error when arguments are packed to tuple in argument clinic (#99233) --- Lib/test/clinic.test | 4 +- Lib/test/test_clinic.py | 14 ++++ ...2-11-08-11-18-51.gh-issue-64490.VcBgrN.rst | 1 + Modules/_testclinic.c | 37 ++++++++++ Modules/clinic/_testclinic.c.h | 67 ++++++++++++++++++- Tools/clinic/clinic.py | 2 +- 6 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-08-11-18-51.gh-issue-64490.VcBgrN.rst diff --git a/Lib/test/clinic.test b/Lib/test/clinic.test index 47e3e02490c816..7b804e8576aedc 100644 --- a/Lib/test/clinic.test +++ b/Lib/test/clinic.test @@ -3793,7 +3793,7 @@ test_vararg_and_posonly(PyObject *module, PyObject *const *args, Py_ssize_t narg a = args[0]; __clinic_args = PyTuple_New(nargs - 1); for (Py_ssize_t i = 0; i < nargs - 1; ++i) { - PyTuple_SET_ITEM(__clinic_args, i, args[1 + i]); + PyTuple_SET_ITEM(__clinic_args, i, Py_NewRef(args[1 + i])); } return_value = test_vararg_and_posonly_impl(module, a, __clinic_args); @@ -3804,7 +3804,7 @@ exit: static PyObject * test_vararg_and_posonly_impl(PyObject *module, PyObject *a, PyObject *args) -/*[clinic end generated code: output=548bca3a127c22c1 input=08dc2bf7afbf1613]*/ +/*[clinic end generated code: output=081a953b8cbe7617 input=08dc2bf7afbf1613]*/ /*[clinic input] test_vararg diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index 7c1bd1c10d2ab6..a590fa50aab04f 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -1211,6 +1211,20 @@ def test_keyword_only_parameter(self): ac_tester.keyword_only_parameter(1) self.assertEqual(ac_tester.keyword_only_parameter(a=1), (1,)) + def test_vararg_and_posonly(self): + with self.assertRaises(TypeError): + ac_tester.vararg_and_posonly() + with self.assertRaises(TypeError): + ac_tester.vararg_and_posonly(1, b=2) + self.assertEqual(ac_tester.vararg_and_posonly(1, 2, 3, 4), (1, (2, 3, 4))) + + def test_gh_99233_refcount(self): + arg = '*A unique string is not referenced by anywhere else.*' + arg_refcount_origin = sys.getrefcount(arg) + ac_tester.gh_99233_refcount(arg) + arg_refcount_after = sys.getrefcount(arg) + self.assertEqual(arg_refcount_origin, arg_refcount_after) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-11-08-11-18-51.gh-issue-64490.VcBgrN.rst b/Misc/NEWS.d/next/Library/2022-11-08-11-18-51.gh-issue-64490.VcBgrN.rst new file mode 100644 index 00000000000000..f98c181cc9c54b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-08-11-18-51.gh-issue-64490.VcBgrN.rst @@ -0,0 +1 @@ +Fix refcount error when arguments are packed to tuple in Argument Clinic. diff --git a/Modules/_testclinic.c b/Modules/_testclinic.c index c9858e96445714..a23ece2ae0355b 100644 --- a/Modules/_testclinic.c +++ b/Modules/_testclinic.c @@ -892,6 +892,41 @@ keyword_only_parameter_impl(PyObject *module, PyObject *a) } +/*[clinic input] +vararg_and_posonly + + a: object + *args: object + / + +[clinic start generated code]*/ + +static PyObject * +vararg_and_posonly_impl(PyObject *module, PyObject *a, PyObject *args) +/*[clinic end generated code: output=42792f799465a14d input=defe017b19ba52e8]*/ +{ + return pack_arguments_newref(2, a, args); +} + + +/*[clinic input] +gh_99233_refcount + + *args: object + / + +Proof-of-concept of GH-99233 refcount error bug. + +[clinic start generated code]*/ + +static PyObject * +gh_99233_refcount_impl(PyObject *module, PyObject *args) +/*[clinic end generated code: output=585855abfbca9a7f input=85f5fb47ac91a626]*/ +{ + Py_RETURN_NONE; +} + + static PyMethodDef tester_methods[] = { TEST_EMPTY_FUNCTION_METHODDEF OBJECTS_CONVERTER_METHODDEF @@ -933,6 +968,8 @@ static PyMethodDef tester_methods[] = { POSONLY_KEYWORDS_OPT_KWONLY_OPT_METHODDEF POSONLY_OPT_KEYWORDS_OPT_KWONLY_OPT_METHODDEF KEYWORD_ONLY_PARAMETER_METHODDEF + VARARG_AND_POSONLY_METHODDEF + GH_99233_REFCOUNT_METHODDEF {NULL, NULL} }; diff --git a/Modules/clinic/_testclinic.c.h b/Modules/clinic/_testclinic.c.h index b0ac4c2eef8340..eb425821e9cb3f 100644 --- a/Modules/clinic/_testclinic.c.h +++ b/Modules/clinic/_testclinic.c.h @@ -2288,4 +2288,69 @@ keyword_only_parameter(PyObject *module, PyObject *const *args, Py_ssize_t nargs exit: return return_value; } -/*[clinic end generated code: output=a9212f8e6ba18bba input=a9049054013a1b77]*/ + +PyDoc_STRVAR(vararg_and_posonly__doc__, +"vararg_and_posonly($module, a, /, *args)\n" +"--\n" +"\n"); + +#define VARARG_AND_POSONLY_METHODDEF \ + {"vararg_and_posonly", _PyCFunction_CAST(vararg_and_posonly), METH_FASTCALL, vararg_and_posonly__doc__}, + +static PyObject * +vararg_and_posonly_impl(PyObject *module, PyObject *a, PyObject *args); + +static PyObject * +vararg_and_posonly(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *a; + PyObject *__clinic_args = NULL; + + if (!_PyArg_CheckPositional("vararg_and_posonly", nargs, 1, PY_SSIZE_T_MAX)) { + goto exit; + } + a = args[0]; + __clinic_args = PyTuple_New(nargs - 1); + for (Py_ssize_t i = 0; i < nargs - 1; ++i) { + PyTuple_SET_ITEM(__clinic_args, i, Py_NewRef(args[1 + i])); + } + return_value = vararg_and_posonly_impl(module, a, __clinic_args); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + +PyDoc_STRVAR(gh_99233_refcount__doc__, +"gh_99233_refcount($module, /, *args)\n" +"--\n" +"\n" +"Proof-of-concept of GH-99233 refcount error bug."); + +#define GH_99233_REFCOUNT_METHODDEF \ + {"gh_99233_refcount", _PyCFunction_CAST(gh_99233_refcount), METH_FASTCALL, gh_99233_refcount__doc__}, + +static PyObject * +gh_99233_refcount_impl(PyObject *module, PyObject *args); + +static PyObject * +gh_99233_refcount(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *__clinic_args = NULL; + + if (!_PyArg_CheckPositional("gh_99233_refcount", nargs, 0, PY_SSIZE_T_MAX)) { + goto exit; + } + __clinic_args = PyTuple_New(nargs - 0); + for (Py_ssize_t i = 0; i < nargs - 0; ++i) { + PyTuple_SET_ITEM(__clinic_args, i, Py_NewRef(args[0 + i])); + } + return_value = gh_99233_refcount_impl(module, __clinic_args); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} +/*[clinic end generated code: output=a5c9f181f3a32d85 input=a9049054013a1b77]*/ diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index a8687e3470a185..94e17ee9c7dfda 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -956,7 +956,7 @@ def parser_body(prototype, *fields, declarations=''): parser_code.append(normalize_snippet(""" %s = PyTuple_New(%s); for (Py_ssize_t i = 0; i < %s; ++i) {{ - PyTuple_SET_ITEM(%s, i, args[%d + i]); + PyTuple_SET_ITEM(%s, i, Py_NewRef(args[%d + i])); }} """ % ( p.converter.parser_name, From 8dbe08eb7c807f484fe9870f5b7f5ae2881fd966 Mon Sep 17 00:00:00 2001 From: colorfulappl Date: Thu, 24 Nov 2022 22:01:26 +0800 Subject: [PATCH 038/112] gh-99240: Fix double-free bug in Argument Clinic str_converter generated code (GH-99241) Fix double-free bug mentioned at https://github.com/python/cpython/issues/99240, by moving memory clean up out of "exit" label. Automerge-Triggered-By: GH:erlend-aasland --- Lib/test/clinic.test | 33 +++----- Lib/test/test_clinic.py | 15 ++++ ...2-11-08-15-54-43.gh-issue-99240.MhYwcz.rst | 2 + Modules/_testclinic.c | 79 +++++++++++++++++++ Modules/clinic/_testclinic.c.h | 72 ++++++++++++++++- Tools/clinic/clinic.py | 25 +++++- 6 files changed, 201 insertions(+), 25 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst diff --git a/Lib/test/clinic.test b/Lib/test/clinic.test index 7b804e8576aedc..f4842cc962f142 100644 --- a/Lib/test/clinic.test +++ b/Lib/test/clinic.test @@ -1740,29 +1740,18 @@ test_str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t goto exit; } return_value = test_str_converter_encoding_impl(module, a, b, c, d, d_length, e, e_length); + /* Post parse cleanup for a */ + PyMem_FREE(a); + /* Post parse cleanup for b */ + PyMem_FREE(b); + /* Post parse cleanup for c */ + PyMem_FREE(c); + /* Post parse cleanup for d */ + PyMem_FREE(d); + /* Post parse cleanup for e */ + PyMem_FREE(e); exit: - /* Cleanup for a */ - if (a) { - PyMem_FREE(a); - } - /* Cleanup for b */ - if (b) { - PyMem_FREE(b); - } - /* Cleanup for c */ - if (c) { - PyMem_FREE(c); - } - /* Cleanup for d */ - if (d) { - PyMem_FREE(d); - } - /* Cleanup for e */ - if (e) { - PyMem_FREE(e); - } - return return_value; } @@ -1770,7 +1759,7 @@ static PyObject * test_str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c, char *d, Py_ssize_t d_length, char *e, Py_ssize_t e_length) -/*[clinic end generated code: output=8acb886a3843f3bc input=eb4c38e1f898f402]*/ +/*[clinic end generated code: output=999c1deecfa15b0a input=eb4c38e1f898f402]*/ /*[clinic input] diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index a590fa50aab04f..890beeb9efe29f 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -1045,6 +1045,17 @@ def test_str_converter(self): self.assertEqual(ac_tester.str_converter('a', b'b', b'c'), ('a', 'b', 'c')) self.assertEqual(ac_tester.str_converter('a', b'b', 'c\0c'), ('a', 'b', 'c\0c')) + def test_str_converter_encoding(self): + with self.assertRaises(TypeError): + ac_tester.str_converter_encoding(1) + self.assertEqual(ac_tester.str_converter_encoding('a', 'b', 'c'), ('a', 'b', 'c')) + with self.assertRaises(TypeError): + ac_tester.str_converter_encoding('a', b'b\0b', 'c') + self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c')])), ('a', 'b', 'c')) + self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c'), 0, ord('c')])), + ('a', 'b', 'c\x00c')) + self.assertEqual(ac_tester.str_converter_encoding('a', b'b', b'c\x00c'), ('a', 'b', 'c\x00c')) + def test_py_buffer_converter(self): with self.assertRaises(TypeError): ac_tester.py_buffer_converter('a', 'b') @@ -1225,6 +1236,10 @@ def test_gh_99233_refcount(self): arg_refcount_after = sys.getrefcount(arg) self.assertEqual(arg_refcount_origin, arg_refcount_after) + def test_gh_99240_double_free(self): + expected_error = r'gh_99240_double_free\(\) argument 2 must be encoded string without null bytes, not str' + with self.assertRaisesRegex(TypeError, expected_error): + ac_tester.gh_99240_double_free('a', '\0b') if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst b/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst new file mode 100644 index 00000000000000..0a4db052755f87 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst @@ -0,0 +1,2 @@ +Fix double-free bug in Argument Clinic ``str_converter`` by +extracting memory clean up to a new ``post_parsing`` section. diff --git a/Modules/_testclinic.c b/Modules/_testclinic.c index a23ece2ae0355b..56eddfd6fdbf2d 100644 --- a/Modules/_testclinic.c +++ b/Modules/_testclinic.c @@ -551,6 +551,64 @@ str_converter_impl(PyObject *module, const char *a, const char *b, } +/*[clinic input] +str_converter_encoding + + a: str(encoding="idna") + b: str(encoding="idna", accept={bytes, bytearray, str}) + c: str(encoding="idna", accept={bytes, bytearray, str}, zeroes=True) + / + +[clinic start generated code]*/ + +static PyObject * +str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c, + Py_ssize_t c_length) +/*[clinic end generated code: output=af68766049248a1c input=0c5cf5159d0e870d]*/ +{ + assert(!PyErr_Occurred()); + PyObject *out[3] = {NULL,}; + int i = 0; + PyObject *arg; + + arg = PyUnicode_FromString(a); + assert(arg || PyErr_Occurred()); + if (!arg) { + goto error; + } + out[i++] = arg; + + arg = PyUnicode_FromString(b); + assert(arg || PyErr_Occurred()); + if (!arg) { + goto error; + } + out[i++] = arg; + + arg = PyUnicode_FromStringAndSize(c, c_length); + assert(arg || PyErr_Occurred()); + if (!arg) { + goto error; + } + out[i++] = arg; + + PyObject *tuple = PyTuple_New(3); + if (!tuple) { + goto error; + } + for (int j = 0; j < 3; j++) { + PyTuple_SET_ITEM(tuple, j, out[j]); + } + return tuple; + +error: + for (int j = 0; j < i; j++) { + Py_DECREF(out[j]); + } + return NULL; +} + + static PyObject * bytes_from_buffer(Py_buffer *buf) { @@ -927,6 +985,25 @@ gh_99233_refcount_impl(PyObject *module, PyObject *args) } +/*[clinic input] +gh_99240_double_free + + a: str(encoding="idna") + b: str(encoding="idna") + / + +Proof-of-concept of GH-99240 double-free bug. + +[clinic start generated code]*/ + +static PyObject * +gh_99240_double_free_impl(PyObject *module, char *a, char *b) +/*[clinic end generated code: output=586dc714992fe2ed input=23db44aa91870fc7]*/ +{ + Py_RETURN_NONE; +} + + static PyMethodDef tester_methods[] = { TEST_EMPTY_FUNCTION_METHODDEF OBJECTS_CONVERTER_METHODDEF @@ -951,6 +1028,7 @@ static PyMethodDef tester_methods[] = { DOUBLE_CONVERTER_METHODDEF PY_COMPLEX_CONVERTER_METHODDEF STR_CONVERTER_METHODDEF + STR_CONVERTER_ENCODING_METHODDEF PY_BUFFER_CONVERTER_METHODDEF KEYWORDS_METHODDEF KEYWORDS_KWONLY_METHODDEF @@ -970,6 +1048,7 @@ static PyMethodDef tester_methods[] = { KEYWORD_ONLY_PARAMETER_METHODDEF VARARG_AND_POSONLY_METHODDEF GH_99233_REFCOUNT_METHODDEF + GH_99240_DOUBLE_FREE_METHODDEF {NULL, NULL} }; diff --git a/Modules/clinic/_testclinic.c.h b/Modules/clinic/_testclinic.c.h index eb425821e9cb3f..9aad44566bcde0 100644 --- a/Modules/clinic/_testclinic.c.h +++ b/Modules/clinic/_testclinic.c.h @@ -1165,6 +1165,43 @@ str_converter(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(str_converter_encoding__doc__, +"str_converter_encoding($module, a, b, c, /)\n" +"--\n" +"\n"); + +#define STR_CONVERTER_ENCODING_METHODDEF \ + {"str_converter_encoding", _PyCFunction_CAST(str_converter_encoding), METH_FASTCALL, str_converter_encoding__doc__}, + +static PyObject * +str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c, + Py_ssize_t c_length); + +static PyObject * +str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + char *a = NULL; + char *b = NULL; + char *c = NULL; + Py_ssize_t c_length; + + if (!_PyArg_ParseStack(args, nargs, "esetet#:str_converter_encoding", + "idna", &a, "idna", &b, "idna", &c, &c_length)) { + goto exit; + } + return_value = str_converter_encoding_impl(module, a, b, c, c_length); + /* Post parse cleanup for a */ + PyMem_FREE(a); + /* Post parse cleanup for b */ + PyMem_FREE(b); + /* Post parse cleanup for c */ + PyMem_FREE(c); + +exit: + return return_value; +} + PyDoc_STRVAR(py_buffer_converter__doc__, "py_buffer_converter($module, a, b, /)\n" "--\n" @@ -2353,4 +2390,37 @@ gh_99233_refcount(PyObject *module, PyObject *const *args, Py_ssize_t nargs) Py_XDECREF(__clinic_args); return return_value; } -/*[clinic end generated code: output=a5c9f181f3a32d85 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(gh_99240_double_free__doc__, +"gh_99240_double_free($module, a, b, /)\n" +"--\n" +"\n" +"Proof-of-concept of GH-99240 double-free bug."); + +#define GH_99240_DOUBLE_FREE_METHODDEF \ + {"gh_99240_double_free", _PyCFunction_CAST(gh_99240_double_free), METH_FASTCALL, gh_99240_double_free__doc__}, + +static PyObject * +gh_99240_double_free_impl(PyObject *module, char *a, char *b); + +static PyObject * +gh_99240_double_free(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + char *a = NULL; + char *b = NULL; + + if (!_PyArg_ParseStack(args, nargs, "eses:gh_99240_double_free", + "idna", &a, "idna", &b)) { + goto exit; + } + return_value = gh_99240_double_free_impl(module, a, b); + /* Post parse cleanup for a */ + PyMem_FREE(a); + /* Post parse cleanup for b */ + PyMem_FREE(b); + +exit: + return return_value; +} +/*[clinic end generated code: output=49dced2c99bcd0fb input=a9049054013a1b77]*/ diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index 94e17ee9c7dfda..0117a50725da58 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -348,6 +348,12 @@ def __init__(self): # "goto exit" if there are any. self.return_conversion = [] + # The C statements required to do some operations + # after the end of parsing but before cleaning up. + # These operations may be, for example, memory deallocations which + # can only be done without any error happening during argument parsing. + self.post_parsing = [] + # The C statements required to clean up after the impl call. self.cleanup = [] @@ -820,6 +826,7 @@ def parser_body(prototype, *fields, declarations=''): {modifications} {return_value} = {c_basename}_impl({impl_arguments}); {return_conversion} + {post_parsing} {exit_label} {cleanup} @@ -1460,6 +1467,7 @@ def render_function(self, clinic, f): template_dict['impl_parameters'] = ", ".join(data.impl_parameters) template_dict['impl_arguments'] = ", ".join(data.impl_arguments) template_dict['return_conversion'] = format_escape("".join(data.return_conversion).rstrip()) + template_dict['post_parsing'] = format_escape("".join(data.post_parsing).rstrip()) template_dict['cleanup'] = format_escape("".join(data.cleanup)) template_dict['return_value'] = data.return_value @@ -1484,6 +1492,7 @@ def render_function(self, clinic, f): return_conversion=template_dict['return_conversion'], initializers=template_dict['initializers'], modifications=template_dict['modifications'], + post_parsing=template_dict['post_parsing'], cleanup=template_dict['cleanup'], ) @@ -2725,6 +2734,10 @@ def _render_non_self(self, parameter, data): # parse_arguments self.parse_argument(data.parse_arguments) + # post_parsing + if post_parsing := self.post_parsing(): + data.post_parsing.append('/* Post parse cleanup for ' + name + ' */\n' + post_parsing.rstrip() + '\n') + # cleanup cleanup = self.cleanup() if cleanup: @@ -2820,6 +2833,14 @@ def modify(self): """ return "" + def post_parsing(self): + """ + The C statements required to do some operations after the end of parsing but before cleaning up. + Return a string containing this code indented at column 0. + If no operation is necessary, return an empty string. + """ + return "" + def cleanup(self): """ The C statements required to clean up after this variable. @@ -3416,10 +3437,10 @@ def converter_init(self, *, accept={str}, encoding=None, zeroes=False): if NoneType in accept and self.c_default == "Py_None": self.c_default = "NULL" - def cleanup(self): + def post_parsing(self): if self.encoding: name = self.name - return "".join(["if (", name, ") {\n PyMem_FREE(", name, ");\n}\n"]) + return f"PyMem_FREE({name});\n" def parse_arg(self, argname, displayname): if self.format_unit == 's': From 5d09d11aa0b89aeba187f4f520728ccaf4fc5ac1 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 24 Nov 2022 07:32:58 -0800 Subject: [PATCH 039/112] GH-79033: Fix asyncio.Server.wait_closed() (#98582) It was a no-op when used as recommended (after close()). I had to debug one test (test__sock_sendfile_native_failure) -- the cleanup sequence for the test fixture was botched. Hopefully that's not a portend of problems in user code -- this has never worked so people may well be doing this wrong. :-( Co-authored-by: kumar aditya --- Lib/asyncio/base_events.py | 2 +- Lib/test/test_asyncio/test_base_events.py | 2 +- Lib/test/test_asyncio/test_server.py | 27 +++++++++++++++++++ ...2-11-22-19-31-26.gh-issue-79033.MW6kHq.rst | 1 + 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-22-19-31-26.gh-issue-79033.MW6kHq.rst diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index 91d32e3939dcd3..f2f93758c3a817 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -377,7 +377,7 @@ async def serve_forever(self): self._serving_forever_fut = None async def wait_closed(self): - if self._sockets is None or self._waiters is None: + if self._waiters is None or self._active_count == 0: return waiter = self._loop.create_future() self._waiters.append(waiter) diff --git a/Lib/test/test_asyncio/test_base_events.py b/Lib/test/test_asyncio/test_base_events.py index 2dcb20c1cec7f9..7421d18dc636c8 100644 --- a/Lib/test/test_asyncio/test_base_events.py +++ b/Lib/test/test_asyncio/test_base_events.py @@ -2052,11 +2052,11 @@ def prepare(self): def cleanup(): server.close() - self.run_loop(server.wait_closed()) sock.close() if proto.transport is not None: proto.transport.close() self.run_loop(proto.wait_closed()) + self.run_loop(server.wait_closed()) self.addCleanup(cleanup) diff --git a/Lib/test/test_asyncio/test_server.py b/Lib/test/test_asyncio/test_server.py index 860d62d52ef1ea..06d8b60f219f1a 100644 --- a/Lib/test/test_asyncio/test_server.py +++ b/Lib/test/test_asyncio/test_server.py @@ -120,6 +120,33 @@ async def main(srv): self.loop.run_until_complete(srv.serve_forever()) +class TestServer2(unittest.IsolatedAsyncioTestCase): + + async def test_wait_closed(self): + async def serve(*args): + pass + + srv = await asyncio.start_server(serve, socket_helper.HOSTv4, 0) + + # active count = 0 + task1 = asyncio.create_task(srv.wait_closed()) + await asyncio.sleep(0) + self.assertTrue(task1.done()) + + # active count != 0 + srv._attach() + task2 = asyncio.create_task(srv.wait_closed()) + await asyncio.sleep(0) + self.assertFalse(task2.done()) + + srv.close() + await asyncio.sleep(0) + self.assertFalse(task2.done()) + + srv._detach() + await task2 + + @unittest.skipUnless(hasattr(asyncio, 'ProactorEventLoop'), 'Windows only') class ProactorStartServerTests(BaseStartServer, unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2022-11-22-19-31-26.gh-issue-79033.MW6kHq.rst b/Misc/NEWS.d/next/Library/2022-11-22-19-31-26.gh-issue-79033.MW6kHq.rst new file mode 100644 index 00000000000000..4b12fd9c8d798f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-22-19-31-26.gh-issue-79033.MW6kHq.rst @@ -0,0 +1 @@ +Fix :func:`asyncio.Server.wait_closed` to actually do what the docs promise -- wait for all existing connections to complete, after closing the server. From 679d963fc896a3328a6440a1af1bc2a38bf6a5e9 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Thu, 24 Nov 2022 21:36:06 +0530 Subject: [PATCH 040/112] GH-66285: skip asyncio fork tests for platforms without md5 hash (#99745) Such buildbots (at the time of writing, only "AMD64 RHEL8 FIPS Only Blake2 Builtin Hash 3.x") cannot use multiprocessing with a fork server, so just skip the test there. --- Lib/test/test_asyncio/test_unix_events.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_asyncio/test_unix_events.py b/Lib/test/test_asyncio/test_unix_events.py index 4e1dab2f86b4dd..e71e242a5f9f17 100644 --- a/Lib/test/test_asyncio/test_unix_events.py +++ b/Lib/test/test_asyncio/test_unix_events.py @@ -18,6 +18,7 @@ from test.support import os_helper from test.support import socket_helper from test.support import wait_process +from test.support import hashlib_helper if sys.platform == 'win32': raise unittest.SkipTest('UNIX only') @@ -1893,6 +1894,7 @@ async def test_fork_not_share_event_loop(self): self.assertNotEqual(child_loop, id(loop)) wait_process(pid, exitcode=0) + @hashlib_helper.requires_hashdigest('md5') def test_fork_signal_handling(self): # Sending signal to the forked process should not affect the parent # process @@ -1930,6 +1932,7 @@ async def func(): self.assertFalse(parent_handled.is_set()) self.assertTrue(child_handled.is_set()) + @hashlib_helper.requires_hashdigest('md5') def test_fork_asyncio_run(self): ctx = multiprocessing.get_context('fork') manager = ctx.Manager() @@ -1946,6 +1949,7 @@ async def child_main(): self.assertEqual(result.value, 42) + @hashlib_helper.requires_hashdigest('md5') def test_fork_asyncio_subprocess(self): ctx = multiprocessing.get_context('fork') manager = ctx.Manager() From b5b3904f05e77f044f158307bc6bdd2bc1b670a2 Mon Sep 17 00:00:00 2001 From: Zachary Ware Date: Thu, 24 Nov 2022 10:18:40 -0600 Subject: [PATCH 041/112] Fix rendering of audioop license in Doc/license.rst (GH-99752) Also some cosmetic blank line additions for consistency with the formatting of the rest of the file. --- Doc/license.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/license.rst b/Doc/license.rst index 4c2b52ec94a4e9..a934c60698f0b3 100644 --- a/Doc/license.rst +++ b/Doc/license.rst @@ -383,6 +383,7 @@ Project, https://www.wide.ad.jp/. :: OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + Asynchronous socket services ---------------------------- @@ -988,9 +989,12 @@ https://www.w3.org/TR/xml-c14n2-testcases/ and is distributed under the (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + Audioop ------- + The audioop module uses the code base in g771.c file of the SoX project:: + Programming the AdLib/Sound Blaster FM Music Chips Version 2.0 (24 Feb 1992) From 351842b46a7cb3c3f23b200d532cf29e4557ad4b Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Thu, 24 Nov 2022 23:04:21 +0530 Subject: [PATCH 042/112] GH-66285: Revert "fix forking in asyncio" (#99756) --- Lib/asyncio/events.py | 9 -- Lib/test/test_asyncio/test_unix_events.py | 99 ------------------- ...2-11-17-10-56-47.gh-issue-66285.KvjlaB.rst | 1 - 3 files changed, 109 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst diff --git a/Lib/asyncio/events.py b/Lib/asyncio/events.py index 39a01048b4c8dd..a327ba54a323a8 100644 --- a/Lib/asyncio/events.py +++ b/Lib/asyncio/events.py @@ -17,7 +17,6 @@ import subprocess import sys import threading -import signal from . import format_helpers @@ -666,14 +665,6 @@ class _Local(threading.local): def __init__(self): self._local = self._Local() - if hasattr(os, 'fork'): - def on_fork(): - # Reset the loop and wakeupfd in the forked child process. - self._local = self._Local() - signal.set_wakeup_fd(-1) - - os.register_at_fork(after_in_child=on_fork) - def get_event_loop(self): """Get the event loop for the current context. diff --git a/Lib/test/test_asyncio/test_unix_events.py b/Lib/test/test_asyncio/test_unix_events.py index e71e242a5f9f17..93e8611f184d25 100644 --- a/Lib/test/test_asyncio/test_unix_events.py +++ b/Lib/test/test_asyncio/test_unix_events.py @@ -11,14 +11,10 @@ import sys import threading import unittest -import time from unittest import mock import warnings -import multiprocessing from test.support import os_helper from test.support import socket_helper -from test.support import wait_process -from test.support import hashlib_helper if sys.platform == 'win32': raise unittest.SkipTest('UNIX only') @@ -1871,100 +1867,5 @@ async def runner(): wsock.close() -@unittest.skipUnless(hasattr(os, 'fork'), 'requires os.fork()') -class TestFork(unittest.IsolatedAsyncioTestCase): - - async def test_fork_not_share_event_loop(self): - # The forked process should not share the event loop with the parent - loop = asyncio.get_running_loop() - r, w = os.pipe() - self.addCleanup(os.close, r) - self.addCleanup(os.close, w) - pid = os.fork() - if pid == 0: - # child - try: - loop = asyncio.get_event_loop_policy().get_event_loop() - os.write(w, str(id(loop)).encode()) - finally: - os._exit(0) - else: - # parent - child_loop = int(os.read(r, 100).decode()) - self.assertNotEqual(child_loop, id(loop)) - wait_process(pid, exitcode=0) - - @hashlib_helper.requires_hashdigest('md5') - def test_fork_signal_handling(self): - # Sending signal to the forked process should not affect the parent - # process - ctx = multiprocessing.get_context('fork') - manager = ctx.Manager() - self.addCleanup(manager.shutdown) - child_started = manager.Event() - child_handled = manager.Event() - parent_handled = manager.Event() - - def child_main(): - signal.signal(signal.SIGTERM, lambda *args: child_handled.set()) - child_started.set() - time.sleep(1) - - async def main(): - loop = asyncio.get_running_loop() - loop.add_signal_handler(signal.SIGTERM, lambda *args: parent_handled.set()) - - process = ctx.Process(target=child_main) - process.start() - child_started.wait() - os.kill(process.pid, signal.SIGTERM) - process.join() - - async def func(): - await asyncio.sleep(0.1) - return 42 - - # Test parent's loop is still functional - self.assertEqual(await asyncio.create_task(func()), 42) - - asyncio.run(main()) - - self.assertFalse(parent_handled.is_set()) - self.assertTrue(child_handled.is_set()) - - @hashlib_helper.requires_hashdigest('md5') - def test_fork_asyncio_run(self): - ctx = multiprocessing.get_context('fork') - manager = ctx.Manager() - self.addCleanup(manager.shutdown) - result = manager.Value('i', 0) - - async def child_main(): - await asyncio.sleep(0.1) - result.value = 42 - - process = ctx.Process(target=lambda: asyncio.run(child_main())) - process.start() - process.join() - - self.assertEqual(result.value, 42) - - @hashlib_helper.requires_hashdigest('md5') - def test_fork_asyncio_subprocess(self): - ctx = multiprocessing.get_context('fork') - manager = ctx.Manager() - self.addCleanup(manager.shutdown) - result = manager.Value('i', 1) - - async def child_main(): - proc = await asyncio.create_subprocess_exec(sys.executable, '-c', 'pass') - result.value = await proc.wait() - - process = ctx.Process(target=lambda: asyncio.run(child_main())) - process.start() - process.join() - - self.assertEqual(result.value, 0) - if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst b/Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst deleted file mode 100644 index ebd82173882726..00000000000000 --- a/Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst +++ /dev/null @@ -1 +0,0 @@ -Fix :mod:`asyncio` to not share event loop and signal wakeupfd in forked processes. Patch by Kumar Aditya. From 0da728387c99fe6c127b070f2d250dc5bdd62ee5 Mon Sep 17 00:00:00 2001 From: colorfulappl Date: Fri, 25 Nov 2022 03:56:50 +0800 Subject: [PATCH 043/112] gh-64490: Fix bugs in argument clinic varargs processing (#32092) --- .../pycore_global_objects_fini_generated.h | 5 + Include/internal/pycore_global_strings.h | 5 + .../internal/pycore_runtime_init_generated.h | 5 + .../internal/pycore_unicodeobject_generated.h | 10 + Lib/test/clinic.test | 7 +- Lib/test/test_clinic.py | 44 ++ ...2-08-11-09-58-15.gh-issue-64490.PjwhM4.rst | 7 + Modules/_testclinic.c | 119 ++++++ Modules/clinic/_testclinic.c.h | 390 +++++++++++++++++- Python/getargs.c | 20 +- Tools/clinic/clinic.py | 11 +- 11 files changed, 612 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2022-08-11-09-58-15.gh-issue-64490.PjwhM4.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 494bcf293cdb7b..9951fa9951e67a 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -986,6 +986,9 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(keyfile)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(keys)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kind)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kw)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kw1)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kw2)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(lambda)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last_node)); @@ -1084,6 +1087,8 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pid)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(policy)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pos)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pos1)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pos2)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(print_file_and_line)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(priority)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index b0cb8365933e77..12144b02f45574 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -472,6 +472,9 @@ struct _Py_global_strings { STRUCT_FOR_ID(keyfile) STRUCT_FOR_ID(keys) STRUCT_FOR_ID(kind) + STRUCT_FOR_ID(kw) + STRUCT_FOR_ID(kw1) + STRUCT_FOR_ID(kw2) STRUCT_FOR_ID(lambda) STRUCT_FOR_ID(last) STRUCT_FOR_ID(last_node) @@ -570,6 +573,8 @@ struct _Py_global_strings { STRUCT_FOR_ID(pid) STRUCT_FOR_ID(policy) STRUCT_FOR_ID(pos) + STRUCT_FOR_ID(pos1) + STRUCT_FOR_ID(pos2) STRUCT_FOR_ID(print_file_and_line) STRUCT_FOR_ID(priority) STRUCT_FOR_ID(progress) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 4b128da54555b7..87b0f2ed8dfa8c 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -978,6 +978,9 @@ extern "C" { INIT_ID(keyfile), \ INIT_ID(keys), \ INIT_ID(kind), \ + INIT_ID(kw), \ + INIT_ID(kw1), \ + INIT_ID(kw2), \ INIT_ID(lambda), \ INIT_ID(last), \ INIT_ID(last_node), \ @@ -1076,6 +1079,8 @@ extern "C" { INIT_ID(pid), \ INIT_ID(policy), \ INIT_ID(pos), \ + INIT_ID(pos1), \ + INIT_ID(pos2), \ INIT_ID(print_file_and_line), \ INIT_ID(priority), \ INIT_ID(progress), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 7ef1f7e94ddead..80be342b5b3b44 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -850,6 +850,12 @@ _PyUnicode_InitStaticStrings(void) { PyUnicode_InternInPlace(&string); string = &_Py_ID(kind); PyUnicode_InternInPlace(&string); + string = &_Py_ID(kw); + PyUnicode_InternInPlace(&string); + string = &_Py_ID(kw1); + PyUnicode_InternInPlace(&string); + string = &_Py_ID(kw2); + PyUnicode_InternInPlace(&string); string = &_Py_ID(lambda); PyUnicode_InternInPlace(&string); string = &_Py_ID(last); @@ -1046,6 +1052,10 @@ _PyUnicode_InitStaticStrings(void) { PyUnicode_InternInPlace(&string); string = &_Py_ID(pos); PyUnicode_InternInPlace(&string); + string = &_Py_ID(pos1); + PyUnicode_InternInPlace(&string); + string = &_Py_ID(pos2); + PyUnicode_InternInPlace(&string); string = &_Py_ID(print_file_and_line); PyUnicode_InternInPlace(&string); string = &_Py_ID(priority); diff --git a/Lib/test/clinic.test b/Lib/test/clinic.test index f4842cc962f142..0d844234d9d1f6 100644 --- a/Lib/test/clinic.test +++ b/Lib/test/clinic.test @@ -3845,7 +3845,6 @@ test_vararg(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject }; #undef KWTUPLE PyObject *argsbuf[2]; - Py_ssize_t noptargs = 0 + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; PyObject *a; PyObject *__clinic_args = NULL; @@ -3864,7 +3863,7 @@ exit: static PyObject * test_vararg_impl(PyObject *module, PyObject *a, PyObject *args) -/*[clinic end generated code: output=6661f3ca97d85e8c input=81d33815ad1bae6e]*/ +/*[clinic end generated code: output=880365c61ae205d7 input=81d33815ad1bae6e]*/ /*[clinic input] test_vararg_with_default @@ -3918,7 +3917,7 @@ test_vararg_with_default(PyObject *module, PyObject *const *args, Py_ssize_t nar }; #undef KWTUPLE PyObject *argsbuf[3]; - Py_ssize_t noptargs = 0 + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_ssize_t noptargs = Py_MIN(nargs, 1) + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; PyObject *a; PyObject *__clinic_args = NULL; int b = 0; @@ -3947,7 +3946,7 @@ exit: static PyObject * test_vararg_with_default_impl(PyObject *module, PyObject *a, PyObject *args, int b) -/*[clinic end generated code: output=5fe3cfccb1bef781 input=6e110b54acd9b22d]*/ +/*[clinic end generated code: output=291e9a5a09831128 input=6e110b54acd9b22d]*/ /*[clinic input] test_vararg_with_only_defaults diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index 890beeb9efe29f..4abf739cf52ca3 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -730,6 +730,15 @@ def test_parameters_not_permitted_after_slash_for_now(self): x: int """) + def test_parameters_no_more_than_one_vararg(self): + s = self.parse_function_should_fail(""" +module foo +foo.bar + *vararg1: object + *vararg2: object +""") + self.assertEqual(s, "Error on line 0:\nToo many var args\n") + def test_function_not_at_column_0(self): function = self.parse_function(""" module foo @@ -1222,6 +1231,13 @@ def test_keyword_only_parameter(self): ac_tester.keyword_only_parameter(1) self.assertEqual(ac_tester.keyword_only_parameter(a=1), (1,)) + def test_posonly_vararg(self): + with self.assertRaises(TypeError): + ac_tester.posonly_vararg() + self.assertEqual(ac_tester.posonly_vararg(1, 2), (1, 2, ())) + self.assertEqual(ac_tester.posonly_vararg(1, b=2), (1, 2, ())) + self.assertEqual(ac_tester.posonly_vararg(1, 2, 3, 4), (1, 2, (3, 4))) + def test_vararg_and_posonly(self): with self.assertRaises(TypeError): ac_tester.vararg_and_posonly() @@ -1229,6 +1245,33 @@ def test_vararg_and_posonly(self): ac_tester.vararg_and_posonly(1, b=2) self.assertEqual(ac_tester.vararg_and_posonly(1, 2, 3, 4), (1, (2, 3, 4))) + def test_vararg(self): + with self.assertRaises(TypeError): + ac_tester.vararg() + with self.assertRaises(TypeError): + ac_tester.vararg(1, b=2) + self.assertEqual(ac_tester.vararg(1, 2, 3, 4), (1, (2, 3, 4))) + + def test_vararg_with_default(self): + with self.assertRaises(TypeError): + ac_tester.vararg_with_default() + self.assertEqual(ac_tester.vararg_with_default(1, b=False), (1, (), False)) + self.assertEqual(ac_tester.vararg_with_default(1, 2, 3, 4), (1, (2, 3, 4), False)) + self.assertEqual(ac_tester.vararg_with_default(1, 2, 3, 4, b=True), (1, (2, 3, 4), True)) + + def test_vararg_with_only_defaults(self): + self.assertEqual(ac_tester.vararg_with_only_defaults(), ((), None)) + self.assertEqual(ac_tester.vararg_with_only_defaults(b=2), ((), 2)) + self.assertEqual(ac_tester.vararg_with_only_defaults(1, b=2), ((1, ), 2)) + self.assertEqual(ac_tester.vararg_with_only_defaults(1, 2, 3, 4), ((1, 2, 3, 4), None)) + self.assertEqual(ac_tester.vararg_with_only_defaults(1, 2, 3, 4, b=5), ((1, 2, 3, 4), 5)) + + def test_gh_32092_oob(self): + ac_tester.gh_32092_oob(1, 2, 3, 4, kw1=5, kw2=6) + + def test_gh_32092_kw_pass(self): + ac_tester.gh_32092_kw_pass(1, 2, 3) + def test_gh_99233_refcount(self): arg = '*A unique string is not referenced by anywhere else.*' arg_refcount_origin = sys.getrefcount(arg) @@ -1241,5 +1284,6 @@ def test_gh_99240_double_free(self): with self.assertRaisesRegex(TypeError, expected_error): ac_tester.gh_99240_double_free('a', '\0b') + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Tools-Demos/2022-08-11-09-58-15.gh-issue-64490.PjwhM4.rst b/Misc/NEWS.d/next/Tools-Demos/2022-08-11-09-58-15.gh-issue-64490.PjwhM4.rst new file mode 100644 index 00000000000000..4a308a9306055c --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2022-08-11-09-58-15.gh-issue-64490.PjwhM4.rst @@ -0,0 +1,7 @@ +Argument Clinic varargs bugfixes + +* Fix out-of-bounds error in :c:func:`!_PyArg_UnpackKeywordsWithVararg`. +* Fix incorrect check which allowed more than one varargs in clinic.py. +* Fix miscalculation of ``noptargs`` in generated code. +* Do not generate ``noptargs`` when there is a vararg argument and no optional argument. + diff --git a/Modules/_testclinic.c b/Modules/_testclinic.c index 56eddfd6fdbf2d..91fdee24d328d9 100644 --- a/Modules/_testclinic.c +++ b/Modules/_testclinic.c @@ -950,6 +950,25 @@ keyword_only_parameter_impl(PyObject *module, PyObject *a) } +/*[clinic input] +posonly_vararg + + a: object + / + b: object + *args: object + +[clinic start generated code]*/ + +static PyObject * +posonly_vararg_impl(PyObject *module, PyObject *a, PyObject *b, + PyObject *args) +/*[clinic end generated code: output=ee6713acda6b954e input=783427fe7ec2b67a]*/ +{ + return pack_arguments_newref(3, a, b, args); +} + + /*[clinic input] vararg_and_posonly @@ -967,6 +986,100 @@ vararg_and_posonly_impl(PyObject *module, PyObject *a, PyObject *args) } +/*[clinic input] +vararg + + a: object + *args: object + +[clinic start generated code]*/ + +static PyObject * +vararg_impl(PyObject *module, PyObject *a, PyObject *args) +/*[clinic end generated code: output=91ab7a0efc52dd5e input=02c0f772d05f591e]*/ +{ + return pack_arguments_newref(2, a, args); +} + + +/*[clinic input] +vararg_with_default + + a: object + *args: object + b: bool = False + +[clinic start generated code]*/ + +static PyObject * +vararg_with_default_impl(PyObject *module, PyObject *a, PyObject *args, + int b) +/*[clinic end generated code: output=182c01035958ce92 input=68cafa6a79f89e36]*/ +{ + PyObject *obj_b = b ? Py_True : Py_False; + return pack_arguments_newref(3, a, args, obj_b); +} + + +/*[clinic input] +vararg_with_only_defaults + + *args: object + b: object = None + +[clinic start generated code]*/ + +static PyObject * +vararg_with_only_defaults_impl(PyObject *module, PyObject *args, PyObject *b) +/*[clinic end generated code: output=c06b1826d91f2f7b input=678c069bc67550e1]*/ +{ + return pack_arguments_newref(2, args, b); +} + + + +/*[clinic input] +gh_32092_oob + + pos1: object + pos2: object + *varargs: object + kw1: object = None + kw2: object = None + +Proof-of-concept of GH-32092 OOB bug. + +[clinic start generated code]*/ + +static PyObject * +gh_32092_oob_impl(PyObject *module, PyObject *pos1, PyObject *pos2, + PyObject *varargs, PyObject *kw1, PyObject *kw2) +/*[clinic end generated code: output=ee259c130054653f input=46d15c881608f8ff]*/ +{ + Py_RETURN_NONE; +} + + +/*[clinic input] +gh_32092_kw_pass + + pos: object + *args: object + kw: object = None + +Proof-of-concept of GH-32092 keyword args passing bug. + +[clinic start generated code]*/ + +static PyObject * +gh_32092_kw_pass_impl(PyObject *module, PyObject *pos, PyObject *args, + PyObject *kw) +/*[clinic end generated code: output=4a2bbe4f7c8604e9 input=5c0bd5b9079a0cce]*/ +{ + Py_RETURN_NONE; +} + + /*[clinic input] gh_99233_refcount @@ -1046,7 +1159,13 @@ static PyMethodDef tester_methods[] = { POSONLY_KEYWORDS_OPT_KWONLY_OPT_METHODDEF POSONLY_OPT_KEYWORDS_OPT_KWONLY_OPT_METHODDEF KEYWORD_ONLY_PARAMETER_METHODDEF + POSONLY_VARARG_METHODDEF VARARG_AND_POSONLY_METHODDEF + VARARG_METHODDEF + VARARG_WITH_DEFAULT_METHODDEF + VARARG_WITH_ONLY_DEFAULTS_METHODDEF + GH_32092_OOB_METHODDEF + GH_32092_KW_PASS_METHODDEF GH_99233_REFCOUNT_METHODDEF GH_99240_DOUBLE_FREE_METHODDEF {NULL, NULL} diff --git a/Modules/clinic/_testclinic.c.h b/Modules/clinic/_testclinic.c.h index 9aad44566bcde0..21bde529470294 100644 --- a/Modules/clinic/_testclinic.c.h +++ b/Modules/clinic/_testclinic.c.h @@ -2326,6 +2326,66 @@ keyword_only_parameter(PyObject *module, PyObject *const *args, Py_ssize_t nargs return return_value; } +PyDoc_STRVAR(posonly_vararg__doc__, +"posonly_vararg($module, a, /, b, *args)\n" +"--\n" +"\n"); + +#define POSONLY_VARARG_METHODDEF \ + {"posonly_vararg", _PyCFunction_CAST(posonly_vararg), METH_FASTCALL|METH_KEYWORDS, posonly_vararg__doc__}, + +static PyObject * +posonly_vararg_impl(PyObject *module, PyObject *a, PyObject *b, + PyObject *args); + +static PyObject * +posonly_vararg(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(b), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "b", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "posonly_vararg", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + PyObject *a; + PyObject *b; + PyObject *__clinic_args = NULL; + + args = _PyArg_UnpackKeywordsWithVararg(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, 2, argsbuf); + if (!args) { + goto exit; + } + a = args[0]; + b = args[1]; + __clinic_args = args[2]; + return_value = posonly_vararg_impl(module, a, b, __clinic_args); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + PyDoc_STRVAR(vararg_and_posonly__doc__, "vararg_and_posonly($module, a, /, *args)\n" "--\n" @@ -2359,6 +2419,334 @@ vararg_and_posonly(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(vararg__doc__, +"vararg($module, /, a, *args)\n" +"--\n" +"\n"); + +#define VARARG_METHODDEF \ + {"vararg", _PyCFunction_CAST(vararg), METH_FASTCALL|METH_KEYWORDS, vararg__doc__}, + +static PyObject * +vararg_impl(PyObject *module, PyObject *a, PyObject *args); + +static PyObject * +vararg(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(a), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"a", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "vararg", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *a; + PyObject *__clinic_args = NULL; + + args = _PyArg_UnpackKeywordsWithVararg(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, 1, argsbuf); + if (!args) { + goto exit; + } + a = args[0]; + __clinic_args = args[1]; + return_value = vararg_impl(module, a, __clinic_args); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + +PyDoc_STRVAR(vararg_with_default__doc__, +"vararg_with_default($module, /, a, *args, b=False)\n" +"--\n" +"\n"); + +#define VARARG_WITH_DEFAULT_METHODDEF \ + {"vararg_with_default", _PyCFunction_CAST(vararg_with_default), METH_FASTCALL|METH_KEYWORDS, vararg_with_default__doc__}, + +static PyObject * +vararg_with_default_impl(PyObject *module, PyObject *a, PyObject *args, + int b); + +static PyObject * +vararg_with_default(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(a), &_Py_ID(b), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"a", "b", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "vararg_with_default", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + Py_ssize_t noptargs = Py_MIN(nargs, 1) + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + PyObject *a; + PyObject *__clinic_args = NULL; + int b = 0; + + args = _PyArg_UnpackKeywordsWithVararg(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, 1, argsbuf); + if (!args) { + goto exit; + } + a = args[0]; + __clinic_args = args[1]; + if (!noptargs) { + goto skip_optional_kwonly; + } + b = PyObject_IsTrue(args[2]); + if (b < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = vararg_with_default_impl(module, a, __clinic_args, b); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + +PyDoc_STRVAR(vararg_with_only_defaults__doc__, +"vararg_with_only_defaults($module, /, *args, b=None)\n" +"--\n" +"\n"); + +#define VARARG_WITH_ONLY_DEFAULTS_METHODDEF \ + {"vararg_with_only_defaults", _PyCFunction_CAST(vararg_with_only_defaults), METH_FASTCALL|METH_KEYWORDS, vararg_with_only_defaults__doc__}, + +static PyObject * +vararg_with_only_defaults_impl(PyObject *module, PyObject *args, PyObject *b); + +static PyObject * +vararg_with_only_defaults(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(b), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"b", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "vararg_with_only_defaults", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = 0 + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + PyObject *__clinic_args = NULL; + PyObject *b = Py_None; + + args = _PyArg_UnpackKeywordsWithVararg(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, 0, argsbuf); + if (!args) { + goto exit; + } + __clinic_args = args[0]; + if (!noptargs) { + goto skip_optional_kwonly; + } + b = args[1]; +skip_optional_kwonly: + return_value = vararg_with_only_defaults_impl(module, __clinic_args, b); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + +PyDoc_STRVAR(gh_32092_oob__doc__, +"gh_32092_oob($module, /, pos1, pos2, *varargs, kw1=None, kw2=None)\n" +"--\n" +"\n" +"Proof-of-concept of GH-32092 OOB bug."); + +#define GH_32092_OOB_METHODDEF \ + {"gh_32092_oob", _PyCFunction_CAST(gh_32092_oob), METH_FASTCALL|METH_KEYWORDS, gh_32092_oob__doc__}, + +static PyObject * +gh_32092_oob_impl(PyObject *module, PyObject *pos1, PyObject *pos2, + PyObject *varargs, PyObject *kw1, PyObject *kw2); + +static PyObject * +gh_32092_oob(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 4 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(pos1), &_Py_ID(pos2), &_Py_ID(kw1), &_Py_ID(kw2), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pos1", "pos2", "kw1", "kw2", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "gh_32092_oob", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[5]; + Py_ssize_t noptargs = Py_MIN(nargs, 2) + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 2; + PyObject *pos1; + PyObject *pos2; + PyObject *varargs = NULL; + PyObject *kw1 = Py_None; + PyObject *kw2 = Py_None; + + args = _PyArg_UnpackKeywordsWithVararg(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, 2, argsbuf); + if (!args) { + goto exit; + } + pos1 = args[0]; + pos2 = args[1]; + varargs = args[2]; + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[3]) { + kw1 = args[3]; + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + kw2 = args[4]; +skip_optional_kwonly: + return_value = gh_32092_oob_impl(module, pos1, pos2, varargs, kw1, kw2); + +exit: + Py_XDECREF(varargs); + return return_value; +} + +PyDoc_STRVAR(gh_32092_kw_pass__doc__, +"gh_32092_kw_pass($module, /, pos, *args, kw=None)\n" +"--\n" +"\n" +"Proof-of-concept of GH-32092 keyword args passing bug."); + +#define GH_32092_KW_PASS_METHODDEF \ + {"gh_32092_kw_pass", _PyCFunction_CAST(gh_32092_kw_pass), METH_FASTCALL|METH_KEYWORDS, gh_32092_kw_pass__doc__}, + +static PyObject * +gh_32092_kw_pass_impl(PyObject *module, PyObject *pos, PyObject *args, + PyObject *kw); + +static PyObject * +gh_32092_kw_pass(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(pos), &_Py_ID(kw), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pos", "kw", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "gh_32092_kw_pass", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + Py_ssize_t noptargs = Py_MIN(nargs, 1) + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + PyObject *pos; + PyObject *__clinic_args = NULL; + PyObject *kw = Py_None; + + args = _PyArg_UnpackKeywordsWithVararg(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, 1, argsbuf); + if (!args) { + goto exit; + } + pos = args[0]; + __clinic_args = args[1]; + if (!noptargs) { + goto skip_optional_kwonly; + } + kw = args[2]; +skip_optional_kwonly: + return_value = gh_32092_kw_pass_impl(module, pos, __clinic_args, kw); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + PyDoc_STRVAR(gh_99233_refcount__doc__, "gh_99233_refcount($module, /, *args)\n" "--\n" @@ -2423,4 +2811,4 @@ gh_99240_double_free(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=49dced2c99bcd0fb input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9a5ca5909c087102 input=a9049054013a1b77]*/ diff --git a/Python/getargs.c b/Python/getargs.c index 748209d7d713f8..0167dd753d88f7 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -2598,7 +2598,25 @@ _PyArg_UnpackKeywordsWithVararg(PyObject *const *args, Py_ssize_t nargs, current_arg = NULL; } - buf[i + vararg + 1] = current_arg; + /* If an arguments is passed in as a keyword argument, + * it should be placed before `buf[vararg]`. + * + * For example: + * def f(a, /, b, *args): + * pass + * f(1, b=2) + * + * This `buf` array should be: [1, 2, NULL]. + * In this case, nargs < vararg. + * + * Otherwise, we leave a place at `buf[vararg]` for vararg tuple + * so the index is `i + 1`. */ + if (nargs < vararg) { + buf[i] = current_arg; + } + else { + buf[i + 1] = current_arg; + } if (current_arg) { --nkwargs; diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index 0117a50725da58..0ece814e8f1883 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -719,7 +719,7 @@ def output_templates(self, f): vararg = NO_VARARG pos_only = min_pos = max_pos = min_kw_only = pseudo_args = 0 for i, p in enumerate(parameters, 1): - if p.is_keyword_only() or vararg != NO_VARARG: + if p.is_keyword_only(): assert not p.is_positional_only() if not p.is_optional(): min_kw_only = i - max_pos @@ -1016,13 +1016,14 @@ def parser_body(prototype, *fields, declarations=''): parser_definition = parser_body(parser_prototype, *parser_code) else: - has_optional_kw = (max(pos_only, min_pos) + min_kw_only < len(converters)) + has_optional_kw = (max(pos_only, min_pos) + min_kw_only < len(converters) - int(vararg != NO_VARARG)) if vararg == NO_VARARG: args_declaration = "_PyArg_UnpackKeywords", "%s, %s, %s" % ( min_pos, max_pos, min_kw_only ) + nargs = "nargs" else: args_declaration = "_PyArg_UnpackKeywordsWithVararg", "%s, %s, %s, %s" % ( min_pos, @@ -1030,6 +1031,7 @@ def parser_body(prototype, *fields, declarations=''): min_kw_only, vararg ) + nargs = f"Py_MIN(nargs, {max_pos})" if max_pos else "0" if not new_or_init: flags = "METH_FASTCALL|METH_KEYWORDS" parser_prototype = parser_prototype_fastcall_keywords @@ -1037,8 +1039,7 @@ def parser_body(prototype, *fields, declarations=''): declarations = declare_parser(f) declarations += "\nPyObject *argsbuf[%s];" % len(converters) if has_optional_kw: - pre_buffer = "0" if vararg != NO_VARARG else "nargs" - declarations += "\nPy_ssize_t noptargs = %s + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - %d;" % (pre_buffer, min_pos + min_kw_only) + declarations += "\nPy_ssize_t noptargs = %s + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - %d;" % (nargs, min_pos + min_kw_only) parser_code = [normalize_snippet(""" args = %s(args, nargs, NULL, kwnames, &_parser, %s, argsbuf); if (!args) {{ @@ -1055,7 +1056,7 @@ def parser_body(prototype, *fields, declarations=''): declarations += "\nPyObject * const *fastargs;" declarations += "\nPy_ssize_t nargs = PyTuple_GET_SIZE(args);" if has_optional_kw: - declarations += "\nPy_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - %d;" % (min_pos + min_kw_only) + declarations += "\nPy_ssize_t noptargs = %s + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - %d;" % (nargs, min_pos + min_kw_only) parser_code = [normalize_snippet(""" fastargs = %s(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, %s, argsbuf); if (!fastargs) {{ From 3a803bcaacece466e9c137fb4a3c6389780377d6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 24 Nov 2022 22:17:33 +0100 Subject: [PATCH 044/112] Revert "gh-98724: Fix Py_CLEAR() macro side effects" (#99737) Revert "gh-98724: Fix Py_CLEAR() macro side effects (#99100)" This reverts commit c03e05c2e72f3ea5e797389e7d1042eef85ad37a. --- Doc/c-api/refcounting.rst | 46 +-------------------- Doc/whatsnew/3.12.rst | 5 --- Include/cpython/object.h | 44 +++++++++----------- Include/object.h | 19 ++++----- Modules/_testcapimodule.c | 87 --------------------------------------- 5 files changed, 29 insertions(+), 172 deletions(-) diff --git a/Doc/c-api/refcounting.rst b/Doc/c-api/refcounting.rst index d8e9c2da6f3ff3..cd1f2ef7076836 100644 --- a/Doc/c-api/refcounting.rst +++ b/Doc/c-api/refcounting.rst @@ -7,8 +7,8 @@ Reference Counting ****************** -The functions and macros in this section are used for managing reference counts -of Python objects. +The macros in this section are used for managing reference counts of Python +objects. .. c:function:: Py_ssize_t Py_REFCNT(PyObject *o) @@ -129,11 +129,6 @@ of Python objects. It is a good idea to use this macro whenever decrementing the reference count of an object that might be traversed during garbage collection. - .. versionchanged:: 3.12 - The macro argument is now only evaluated once. If the argument has side - effects, these are no longer duplicated. - - .. c:function:: void Py_IncRef(PyObject *o) Increment the reference count for object *o*. A function version of :c:func:`Py_XINCREF`. @@ -144,40 +139,3 @@ of Python objects. Decrement the reference count for object *o*. A function version of :c:func:`Py_XDECREF`. It can be used for runtime dynamic embedding of Python. - - -.. c:macro:: Py_SETREF(dst, src) - - Macro safely decrementing the `dst` reference count and setting `dst` to - `src`. - - As in case of :c:func:`Py_CLEAR`, "the obvious" code can be deadly:: - - Py_DECREF(dst); - dst = src; - - The safe way is:: - - Py_SETREF(dst, src); - - That arranges to set `dst` to `src` _before_ decrementing reference count of - *dst* old value, so that any code triggered as a side-effect of `dst` - getting torn down no longer believes `dst` points to a valid object. - - .. versionadded:: 3.6 - - .. versionchanged:: 3.12 - The macro arguments are now only evaluated once. If an argument has side - effects, these are no longer duplicated. - - -.. c:macro:: Py_XSETREF(dst, src) - - Variant of :c:macro:`Py_SETREF` macro that uses :c:func:`Py_XDECREF` instead - of :c:func:`Py_DECREF`. - - .. versionadded:: 3.6 - - .. versionchanged:: 3.12 - The macro arguments are now only evaluated once. If an argument has side - effects, these are no longer duplicated. diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index a9b69c2ebf43bf..dff4de621b4c49 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -822,11 +822,6 @@ Porting to Python 3.12 :class:`bytes` type is accepted for bytes strings. (Contributed by Victor Stinner in :gh:`98393`.) -* The :c:macro:`Py_CLEAR`, :c:macro:`Py_SETREF` and :c:macro:`Py_XSETREF` - macros now only evaluate their argument once. If the argument has side - effects, these side effects are no longer duplicated. - (Contributed by Victor Stinner in :gh:`98724`.) - Deprecated ---------- diff --git a/Include/cpython/object.h b/Include/cpython/object.h index f4755a7b2fb852..3abfcb7d44f0fb 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -305,41 +305,37 @@ _PyObject_GenericSetAttrWithDict(PyObject *, PyObject *, PyAPI_FUNC(PyObject *) _PyObject_FunctionStr(PyObject *); -/* Safely decref `dst` and set `dst` to `src`. +/* Safely decref `op` and set `op` to `op2`. * * As in case of Py_CLEAR "the obvious" code can be deadly: * - * Py_DECREF(dst); - * dst = src; + * Py_DECREF(op); + * op = op2; * * The safe way is: * - * Py_SETREF(dst, src); + * Py_SETREF(op, op2); * - * That arranges to set `dst` to `src` _before_ decref'ing, so that any code - * triggered as a side-effect of `dst` getting torn down no longer believes - * `dst` points to a valid object. + * That arranges to set `op` to `op2` _before_ decref'ing, so that any code + * triggered as a side-effect of `op` getting torn down no longer believes + * `op` points to a valid object. * - * gh-98724: Use the _tmp_dst_ptr variable to evaluate the 'dst' macro argument - * exactly once, to prevent the duplication of side effects in this macro. + * Py_XSETREF is a variant of Py_SETREF that uses Py_XDECREF instead of + * Py_DECREF. */ -#define Py_SETREF(dst, src) \ - do { \ - PyObject **_tmp_dst_ptr = _Py_CAST(PyObject**, &(dst)); \ - PyObject *_tmp_dst = (*_tmp_dst_ptr); \ - *_tmp_dst_ptr = _PyObject_CAST(src); \ - Py_DECREF(_tmp_dst); \ + +#define Py_SETREF(op, op2) \ + do { \ + PyObject *_py_tmp = _PyObject_CAST(op); \ + (op) = (op2); \ + Py_DECREF(_py_tmp); \ } while (0) -/* Py_XSETREF() is a variant of Py_SETREF() that uses Py_XDECREF() instead of - * Py_DECREF(). - */ -#define Py_XSETREF(dst, src) \ - do { \ - PyObject **_tmp_dst_ptr = _Py_CAST(PyObject**, &(dst)); \ - PyObject *_tmp_dst = (*_tmp_dst_ptr); \ - *_tmp_dst_ptr = _PyObject_CAST(src); \ - Py_XDECREF(_tmp_dst); \ +#define Py_XSETREF(op, op2) \ + do { \ + PyObject *_py_tmp = _PyObject_CAST(op); \ + (op) = (op2); \ + Py_XDECREF(_py_tmp); \ } while (0) diff --git a/Include/object.h b/Include/object.h index a2ed0bd2349f2a..75624fe8c77a51 100644 --- a/Include/object.h +++ b/Include/object.h @@ -598,21 +598,16 @@ static inline void Py_DECREF(PyObject *op) * one of those can't cause problems -- but in part that relies on that * Python integers aren't currently weakly referencable. Best practice is * to use Py_CLEAR() even if you can't think of a reason for why you need to. - * - * gh-98724: Use the _py_tmp_ptr variable to evaluate the macro argument - * exactly once, to prevent the duplication of side effects in this macro. */ -#define Py_CLEAR(op) \ - do { \ - PyObject **_py_tmp_ptr = _Py_CAST(PyObject**, &(op)); \ - if (*_py_tmp_ptr != NULL) { \ - PyObject* _py_tmp = (*_py_tmp_ptr); \ - *_py_tmp_ptr = NULL; \ - Py_DECREF(_py_tmp); \ - } \ +#define Py_CLEAR(op) \ + do { \ + PyObject *_py_tmp = _PyObject_CAST(op); \ + if (_py_tmp != NULL) { \ + (op) = NULL; \ + Py_DECREF(_py_tmp); \ + } \ } while (0) - /* Function to use in case the object pointer can be NULL: */ static inline void Py_XINCREF(PyObject *op) { diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 83eef73a875d9d..3617fafe9b4fdd 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -2589,91 +2589,6 @@ test_set_type_size(PyObject *self, PyObject *Py_UNUSED(ignored)) } -// Test Py_CLEAR() macro -static PyObject* -test_py_clear(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ - // simple case with a variable - PyObject *obj = PyList_New(0); - if (obj == NULL) { - return NULL; - } - Py_CLEAR(obj); - assert(obj == NULL); - - // gh-98724: complex case, Py_CLEAR() argument has a side effect - PyObject* array[1]; - array[0] = PyList_New(0); - if (array[0] == NULL) { - return NULL; - } - - PyObject **p = array; - Py_CLEAR(*p++); - assert(array[0] == NULL); - assert(p == array + 1); - - Py_RETURN_NONE; -} - - -// Test Py_SETREF() and Py_XSETREF() macros, similar to test_py_clear() -static PyObject* -test_py_setref(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ - // Py_SETREF() simple case with a variable - PyObject *obj = PyList_New(0); - if (obj == NULL) { - return NULL; - } - Py_SETREF(obj, NULL); - assert(obj == NULL); - - // Py_XSETREF() simple case with a variable - PyObject *obj2 = PyList_New(0); - if (obj2 == NULL) { - return NULL; - } - Py_XSETREF(obj2, NULL); - assert(obj2 == NULL); - // test Py_XSETREF() when the argument is NULL - Py_XSETREF(obj2, NULL); - assert(obj2 == NULL); - - // gh-98724: complex case, Py_SETREF() argument has a side effect - PyObject* array[1]; - array[0] = PyList_New(0); - if (array[0] == NULL) { - return NULL; - } - - PyObject **p = array; - Py_SETREF(*p++, NULL); - assert(array[0] == NULL); - assert(p == array + 1); - - // gh-98724: complex case, Py_XSETREF() argument has a side effect - PyObject* array2[1]; - array2[0] = PyList_New(0); - if (array2[0] == NULL) { - return NULL; - } - - PyObject **p2 = array2; - Py_XSETREF(*p2++, NULL); - assert(array2[0] == NULL); - assert(p2 == array2 + 1); - - // test Py_XSETREF() when the argument is NULL - p2 = array2; - Py_XSETREF(*p2++, NULL); - assert(array2[0] == NULL); - assert(p2 == array2 + 1); - - Py_RETURN_NONE; -} - - #define TEST_REFCOUNT() \ do { \ PyObject *obj = PyList_New(0); \ @@ -3337,8 +3252,6 @@ static PyMethodDef TestMethods[] = { {"pynumber_tobase", pynumber_tobase, METH_VARARGS}, {"without_gc", without_gc, METH_O}, {"test_set_type_size", test_set_type_size, METH_NOARGS}, - {"test_py_clear", test_py_clear, METH_NOARGS}, - {"test_py_setref", test_py_setref, METH_NOARGS}, {"test_refcount_macros", test_refcount_macros, METH_NOARGS}, {"test_refcount_funcs", test_refcount_funcs, METH_NOARGS}, {"test_py_is_macros", test_py_is_macros, METH_NOARGS}, From b1dcdefc3abf496a3e37e12b85dd9959f5b70341 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Fri, 25 Nov 2022 01:21:25 -0800 Subject: [PATCH 045/112] bpo-41260: C impl of datetime.date.strftime() takes different keyword arg (GH-21712) --- Lib/datetime.py | 4 ++-- Lib/test/datetimetester.py | 3 +++ .../next/Library/2020-08-02-23-46-22.bpo-41260.Q2BNzY.rst | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-08-02-23-46-22.bpo-41260.Q2BNzY.rst diff --git a/Lib/datetime.py b/Lib/datetime.py index 01742680a95bb6..1b0c5cb2d1c6ff 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -1032,13 +1032,13 @@ def ctime(self): _MONTHNAMES[self._month], self._day, self._year) - def strftime(self, fmt): + def strftime(self, format): """ Format using strftime(). Example: "%d/%m/%Y, %H:%M:%S" """ - return _wrap_strftime(self, fmt, self.timetuple()) + return _wrap_strftime(self, format, self.timetuple()) def __format__(self, fmt): if not isinstance(fmt, str): diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index bba96698e9e2eb..121d973b6d5f20 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1489,6 +1489,9 @@ def test_strftime(self): #check that this standard extension works t.strftime("%f") + # bpo-41260: The parameter was named "fmt" in the pure python impl. + t.strftime(format="%f") + def test_strftime_trailing_percent(self): # bpo-35066: Make sure trailing '%' doesn't cause datetime's strftime to # complain. Different libcs have different handling of trailing diff --git a/Misc/NEWS.d/next/Library/2020-08-02-23-46-22.bpo-41260.Q2BNzY.rst b/Misc/NEWS.d/next/Library/2020-08-02-23-46-22.bpo-41260.Q2BNzY.rst new file mode 100644 index 00000000000000..ae2fdd9b84a00e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-08-02-23-46-22.bpo-41260.Q2BNzY.rst @@ -0,0 +1,2 @@ +Rename the *fmt* parameter of the pure Python implementation of +:meth:`datetime.date.strftime` to *format*. From d386115039e75c332c8471c239cf7dc5dee791a7 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Fri, 25 Nov 2022 04:55:26 -0800 Subject: [PATCH 046/112] bpo-38031: Fix a possible assertion failure in _io.FileIO() (#GH-5688) --- Lib/test/test_io.py | 8 ++++++++ .../2019-09-04-19-09-49.bpo-38031.Yq4L72.rst | 2 ++ Modules/_io/fileio.c | 6 +++++- 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-09-04-19-09-49.bpo-38031.Yq4L72.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index bc6071febe6144..c927f15aafef72 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -888,6 +888,14 @@ def badopener(fname, flags): open('non-existent', 'r', opener=badopener) self.assertEqual(str(cm.exception), 'opener returned -2') + def test_opener_invalid_fd(self): + # Check that OSError is raised with error code EBADF if the + # opener returns an invalid file descriptor (see gh-82212). + fd = os_helper.make_bad_fd() + with self.assertRaises(OSError) as cm: + self.open('foo', opener=lambda name, flags: fd) + self.assertEqual(cm.exception.errno, errno.EBADF) + def test_fileio_closefd(self): # Issue #4841 with self.open(__file__, 'rb') as f1, \ diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-09-04-19-09-49.bpo-38031.Yq4L72.rst b/Misc/NEWS.d/next/Core and Builtins/2019-09-04-19-09-49.bpo-38031.Yq4L72.rst new file mode 100644 index 00000000000000..b5964375962f66 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-09-04-19-09-49.bpo-38031.Yq4L72.rst @@ -0,0 +1,2 @@ +Fix a possible assertion failure in :class:`io.FileIO` when the opener +returns an invalid file descriptor. diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 00859978e8cd6c..659297ef1b1d30 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -485,8 +485,12 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, ret = -1; if (!fd_is_own) self->fd = -1; - if (self->fd >= 0) + if (self->fd >= 0) { + PyObject *exc, *val, *tb; + PyErr_Fetch(&exc, &val, &tb); internal_close(self); + _PyErr_ChainExceptions(exc, val, tb); + } done: #ifdef MS_WINDOWS From 8749121b07f48994ea47f2e7ff75fb13c13953f6 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 25 Nov 2022 14:07:28 +0100 Subject: [PATCH 047/112] gh-96168: Add sqlite3 row factory how-to (#99507) Co-authored-by: C.A.M. Gerlach Co-authored-by: Ezio Melotti --- Doc/library/sqlite3.rst | 160 ++++++++++++++++++++++++++++++---------- 1 file changed, 119 insertions(+), 41 deletions(-) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index 1681fc49e9f1e0..0dac2312b2feb1 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -239,6 +239,7 @@ inserted data and retrieved values from it in multiple ways. * :ref:`sqlite3-adapters` * :ref:`sqlite3-converters` * :ref:`sqlite3-connection-context-manager` + * :ref:`sqlite3-howto-row-factory` * :ref:`sqlite3-explanation` for in-depth background on transaction control. @@ -1316,31 +1317,14 @@ Connection objects .. attribute:: row_factory - A callable that accepts two arguments, - a :class:`Cursor` object and the raw row results as a :class:`tuple`, - and returns a custom object representing an SQLite row. - - Example: + The initial :attr:`~Cursor.row_factory` + for :class:`Cursor` objects created from this connection. + Assigning to this attribute does not affect the :attr:`!row_factory` + of existing cursors belonging to this connection, only new ones. + Is ``None`` by default, + meaning each row is returned as a :class:`tuple`. - .. doctest:: - - >>> def dict_factory(cursor, row): - ... col_names = [col[0] for col in cursor.description] - ... return {key: value for key, value in zip(col_names, row)} - >>> con = sqlite3.connect(":memory:") - >>> con.row_factory = dict_factory - >>> for row in con.execute("SELECT 1 AS a, 2 AS b"): - ... print(row) - {'a': 1, 'b': 2} - - If returning a tuple doesn't suffice and you want name-based access to - columns, you should consider setting :attr:`row_factory` to the - highly optimized :class:`sqlite3.Row` type. :class:`Row` provides both - index-based and case-insensitive name-based access to columns with almost no - memory overhead. It will probably be better than your own custom - dictionary-based approach or even a db_row based solution. - - .. XXX what's a db_row-based solution? + See :ref:`sqlite3-howto-row-factory` for more details. .. attribute:: text_factory @@ -1497,7 +1481,7 @@ Cursor objects .. method:: fetchone() - If :attr:`~Connection.row_factory` is ``None``, + If :attr:`~Cursor.row_factory` is ``None``, return the next row query result set as a :class:`tuple`. Else, pass it to the row factory and return its result. Return ``None`` if no more data is available. @@ -1591,6 +1575,22 @@ Cursor objects including :abbr:`CTE (Common Table Expression)` queries. It is only updated by the :meth:`execute` and :meth:`executemany` methods. + .. attribute:: row_factory + + Control how a row fetched from this :class:`!Cursor` is represented. + If ``None``, a row is represented as a :class:`tuple`. + Can be set to the included :class:`sqlite3.Row`; + or a :term:`callable` that accepts two arguments, + a :class:`Cursor` object and the :class:`!tuple` of row values, + and returns a custom object representing an SQLite row. + + Defaults to what :attr:`Connection.row_factory` was set to + when the :class:`!Cursor` was created. + Assigning to this attribute does not affect + :attr:`Connection.row_factory` of the parent connection. + + See :ref:`sqlite3-howto-row-factory` for more details. + .. The sqlite3.Row example used to be a how-to. It has now been incorporated into the Row reference. We keep the anchor here in order not to break @@ -1609,7 +1609,10 @@ Row objects It supports iteration, equality testing, :func:`len`, and :term:`mapping` access by column name and index. - Two row objects compare equal if have equal columns and equal members. + Two :class:`!Row` objects compare equal + if they have identical column names and values. + + See :ref:`sqlite3-howto-row-factory` for more details. .. method:: keys @@ -1620,21 +1623,6 @@ Row objects .. versionchanged:: 3.5 Added support of slicing. - Example: - - .. doctest:: - - >>> con = sqlite3.connect(":memory:") - >>> con.row_factory = sqlite3.Row - >>> res = con.execute("SELECT 'Earth' AS name, 6378 AS radius") - >>> row = res.fetchone() - >>> row.keys() - ['name', 'radius'] - >>> row[0], row["name"] # Access by index and name. - ('Earth', 'Earth') - >>> row["RADIUS"] # Column names are case-insensitive. - 6378 - .. _sqlite3-blob-objects: @@ -2358,6 +2346,96 @@ can be found in the `SQLite URI documentation`_. .. _SQLite URI documentation: https://www.sqlite.org/uri.html +.. _sqlite3-howto-row-factory: + +How to create and use row factories +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, :mod:`!sqlite3` represents each row as a :class:`tuple`. +If a :class:`!tuple` does not suit your needs, +you can use the :class:`sqlite3.Row` class +or a custom :attr:`~Cursor.row_factory`. + +While :attr:`!row_factory` exists as an attribute both on the +:class:`Cursor` and the :class:`Connection`, +it is recommended to set :class:`Connection.row_factory`, +so all cursors created from the connection will use the same row factory. + +:class:`!Row` provides indexed and case-insensitive named access to columns, +with minimal memory overhead and performance impact over a :class:`!tuple`. +To use :class:`!Row` as a row factory, +assign it to the :attr:`!row_factory` attribute: + +.. doctest:: + + >>> con = sqlite3.connect(":memory:") + >>> con.row_factory = sqlite3.Row + +Queries now return :class:`!Row` objects: + +.. doctest:: + + >>> res = con.execute("SELECT 'Earth' AS name, 6378 AS radius") + >>> row = res.fetchone() + >>> row.keys() + ['name', 'radius'] + >>> row[0] # Access by index. + 'Earth' + >>> row["name"] # Access by name. + 'Earth' + >>> row["RADIUS"] # Column names are case-insensitive. + 6378 + +You can create a custom :attr:`~Cursor.row_factory` +that returns each row as a :class:`dict`, with column names mapped to values: + +.. testcode:: + + def dict_factory(cursor, row): + fields = [column[0] for column in cursor.description] + return {key: value for key, value in zip(fields, row)} + +Using it, queries now return a :class:`!dict` instead of a :class:`!tuple`: + +.. doctest:: + + >>> con = sqlite3.connect(":memory:") + >>> con.row_factory = dict_factory + >>> for row in con.execute("SELECT 1 AS a, 2 AS b"): + ... print(row) + {'a': 1, 'b': 2} + +The following row factory returns a :term:`named tuple`: + +.. testcode:: + + from collections import namedtuple + + def namedtuple_factory(cursor, row): + fields = [column[0] for column in cursor.description] + cls = namedtuple("Row", fields) + return cls._make(row) + +:func:`!namedtuple_factory` can be used as follows: + +.. doctest:: + + >>> con = sqlite3.connect(":memory:") + >>> con.row_factory = namedtuple_factory + >>> cur = con.execute("SELECT 1 AS a, 2 AS b") + >>> row = cur.fetchone() + >>> row + Row(a=1, b=2) + >>> row[0] # Indexed access. + 1 + >>> row.b # Attribute access. + 2 + +With some adjustments, the above recipe can be adapted to use a +:class:`~dataclasses.dataclass`, or any other custom class, +instead of a :class:`~collections.namedtuple`. + + .. _sqlite3-explanation: Explanation From 85c128e34daec7625b74746e127afa25888ccde1 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Fri, 25 Nov 2022 09:39:48 -0800 Subject: [PATCH 048/112] bpo-40882: Fix a memory leak in SharedMemory on Windows (GH-20684) In multiprocessing.shared_memory.SharedMemory(), the temporary view returned by MapViewOfFile() should be unmapped when it is no longer needed. --- Lib/multiprocessing/shared_memory.py | 5 +++- .../2020-06-06-15-10-37.bpo-40882.UvNbdj.rst | 2 ++ Modules/_winapi.c | 25 +++++++++++++++++ Modules/clinic/_winapi.c.h | 28 ++++++++++++++++++- 4 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2020-06-06-15-10-37.bpo-40882.UvNbdj.rst diff --git a/Lib/multiprocessing/shared_memory.py b/Lib/multiprocessing/shared_memory.py index 881f2001dd5980..9a1e5aa17b87a2 100644 --- a/Lib/multiprocessing/shared_memory.py +++ b/Lib/multiprocessing/shared_memory.py @@ -173,7 +173,10 @@ def __init__(self, name=None, create=False, size=0): ) finally: _winapi.CloseHandle(h_map) - size = _winapi.VirtualQuerySize(p_buf) + try: + size = _winapi.VirtualQuerySize(p_buf) + finally: + _winapi.UnmapViewOfFile(p_buf) self._mmap = mmap.mmap(-1, size, tagname=name) self._size = size diff --git a/Misc/NEWS.d/next/Windows/2020-06-06-15-10-37.bpo-40882.UvNbdj.rst b/Misc/NEWS.d/next/Windows/2020-06-06-15-10-37.bpo-40882.UvNbdj.rst new file mode 100644 index 00000000000000..2670aeef9a2525 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2020-06-06-15-10-37.bpo-40882.UvNbdj.rst @@ -0,0 +1,2 @@ +Fix a memory leak in :class:`multiprocessing.shared_memory.SharedMemory` on +Windows. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 7a9bedb1887a06..bb4514c36bc7d0 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1393,6 +1393,30 @@ _winapi_MapViewOfFile_impl(PyObject *module, HANDLE file_map, return address; } +/*[clinic input] +_winapi.UnmapViewOfFile + + address: LPCVOID + / +[clinic start generated code]*/ + +static PyObject * +_winapi_UnmapViewOfFile_impl(PyObject *module, LPCVOID address) +/*[clinic end generated code: output=4f7e18ac75d19744 input=8c4b6119ad9288a3]*/ +{ + BOOL success; + + Py_BEGIN_ALLOW_THREADS + success = UnmapViewOfFile(address); + Py_END_ALLOW_THREADS + + if (!success) { + return PyErr_SetFromWindowsErr(0); + } + + Py_RETURN_NONE; +} + /*[clinic input] _winapi.OpenFileMapping -> HANDLE @@ -2062,6 +2086,7 @@ static PyMethodDef winapi_functions[] = { _WINAPI_READFILE_METHODDEF _WINAPI_SETNAMEDPIPEHANDLESTATE_METHODDEF _WINAPI_TERMINATEPROCESS_METHODDEF + _WINAPI_UNMAPVIEWOFFILE_METHODDEF _WINAPI_VIRTUALQUERYSIZE_METHODDEF _WINAPI_WAITNAMEDPIPE_METHODDEF _WINAPI_WAITFORMULTIPLEOBJECTS_METHODDEF diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index cc1a5881e0bfd6..13bf8b482cd69e 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -742,6 +742,32 @@ _winapi_MapViewOfFile(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(_winapi_UnmapViewOfFile__doc__, +"UnmapViewOfFile($module, address, /)\n" +"--\n" +"\n"); + +#define _WINAPI_UNMAPVIEWOFFILE_METHODDEF \ + {"UnmapViewOfFile", (PyCFunction)_winapi_UnmapViewOfFile, METH_O, _winapi_UnmapViewOfFile__doc__}, + +static PyObject * +_winapi_UnmapViewOfFile_impl(PyObject *module, LPCVOID address); + +static PyObject * +_winapi_UnmapViewOfFile(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + LPCVOID address; + + if (!PyArg_Parse(arg, "" F_POINTER ":UnmapViewOfFile", &address)) { + goto exit; + } + return_value = _winapi_UnmapViewOfFile_impl(module, address); + +exit: + return return_value; +} + PyDoc_STRVAR(_winapi_OpenFileMapping__doc__, "OpenFileMapping($module, desired_access, inherit_handle, name, /)\n" "--\n" @@ -1345,4 +1371,4 @@ _winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args exit: return return_value; } -/*[clinic end generated code: output=83c4a3f0e70e7775 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=23ea9e176d86e026 input=a9049054013a1b77]*/ From 5d4d83130c1538586e559a64e3a2341794da92d9 Mon Sep 17 00:00:00 2001 From: Gary Donovan Date: Sat, 26 Nov 2022 05:03:20 +1100 Subject: [PATCH 049/112] Fix typo on inline comment for email.generator (GH-98210) Trivial change to comment - no issue or new entry necessary --- Lib/email/generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/email/generator.py b/Lib/email/generator.py index c9b121624e08d5..885e6ba98540a7 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -170,7 +170,7 @@ def _write(self, msg): # parameter. # # The way we do this, so as to make the _handle_*() methods simpler, - # is to cache any subpart writes into a buffer. The we write the + # is to cache any subpart writes into a buffer. Then we write the # headers and the buffer contents. That way, subpart handlers can # Do The Right Thing, and can still modify the Content-Type: header if # necessary. From 7d2dcc53d09fe903329926bf7bbfe460b1465dab Mon Sep 17 00:00:00 2001 From: Stanley <46876382+slateny@users.noreply.github.com> Date: Fri, 25 Nov 2022 11:10:22 -0800 Subject: [PATCH 050/112] gh-64019: Have attribute table in `inspect` docs link to module attributes instead of listing them (GH-98116) Co-authored-by: Michael Anckaert --- Doc/library/inspect.rst | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst index 44f1ae04c9e39e..9cb7a6f94e49cd 100644 --- a/Doc/library/inspect.rst +++ b/Doc/library/inspect.rst @@ -32,7 +32,7 @@ The :func:`getmembers` function retrieves the members of an object such as a class or module. The functions whose names begin with "is" are mainly provided as convenient choices for the second argument to :func:`getmembers`. They also help you determine when you can expect to find the following special -attributes: +attributes (see :ref:`import-mod-attrs` for module attributes): .. this function name is too big to fit in the ascii-art table below .. |coroutine-origin-link| replace:: :func:`sys.set_coroutine_origin_tracking_depth` @@ -40,11 +40,6 @@ attributes: +-----------+-------------------+---------------------------+ | Type | Attribute | Description | +===========+===================+===========================+ -| module | __doc__ | documentation string | -+-----------+-------------------+---------------------------+ -| | __file__ | filename (missing for | -| | | built-in modules) | -+-----------+-------------------+---------------------------+ | class | __doc__ | documentation string | +-----------+-------------------+---------------------------+ | | __name__ | name with which this | From ae234fbc5ce045066448f2f0cda2f1c3c7ddebea Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 25 Nov 2022 19:15:57 +0000 Subject: [PATCH 051/112] gh-99029: Fix handling of `PureWindowsPath('C:\').relative_to('C:')` (GH-99031) `relative_to()` now treats naked drive paths as relative. This brings its behaviour in line with other parts of pathlib, and with `ntpath.relpath()`, and so allows us to factor out the pathlib-specific implementation. --- Lib/pathlib.py | 56 +++++-------------- Lib/test/test_pathlib.py | 14 ++--- ...2-11-02-23-47-07.gh-issue-99029.7uCiIB.rst | 2 + 3 files changed, 20 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-02-23-47-07.gh-issue-99029.7uCiIB.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index bc57ae60e725b2..f31eb3010368d5 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -632,57 +632,27 @@ def relative_to(self, *other, walk_up=False): The *walk_up* parameter controls whether `..` may be used to resolve the path. """ - # For the purpose of this method, drive and root are considered - # separate parts, i.e.: - # Path('c:/').relative_to('c:') gives Path('/') - # Path('c:/').relative_to('/') raise ValueError if not other: raise TypeError("need at least one argument") - parts = self._parts - drv = self._drv - root = self._root - if root: - abs_parts = [drv, root] + parts[1:] - else: - abs_parts = parts - other_drv, other_root, other_parts = self._parse_args(other) - if other_root: - other_abs_parts = [other_drv, other_root] + other_parts[1:] - else: - other_abs_parts = other_parts - num_parts = len(other_abs_parts) - casefold = self._flavour.casefold_parts - num_common_parts = 0 - for part, other_part in zip(casefold(abs_parts), casefold(other_abs_parts)): - if part != other_part: + path_cls = type(self) + other = path_cls(*other) + for step, path in enumerate([other] + list(other.parents)): + if self.is_relative_to(path): break - num_common_parts += 1 - if walk_up: - failure = root != other_root - if drv or other_drv: - failure = casefold([drv]) != casefold([other_drv]) or (failure and num_parts > 1) - error_message = "{!r} is not on the same drive as {!r}" - up_parts = (num_parts-num_common_parts)*['..'] else: - failure = (root or drv) if num_parts == 0 else num_common_parts != num_parts - error_message = "{!r} is not in the subpath of {!r}" - up_parts = [] - error_message += " OR one path is relative and the other is absolute." - if failure: - formatted = self._format_parsed_parts(other_drv, other_root, other_parts) - raise ValueError(error_message.format(str(self), str(formatted))) - path_parts = up_parts + abs_parts[num_common_parts:] - new_root = root if num_common_parts == 1 else '' - return self._from_parsed_parts('', new_root, path_parts) + raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors") + if step and not walk_up: + raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") + parts = ('..',) * step + self.parts[len(path.parts):] + return path_cls(*parts) def is_relative_to(self, *other): """Return True if the path is relative to another path or False. """ - try: - self.relative_to(*other) - return True - except ValueError: - return False + if not other: + raise TypeError("need at least one argument") + other = type(self)(*other) + return other == self or other in self.parents @property def parts(self): diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 94401e5429cdf2..1d01d3cbd91d14 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1183,10 +1183,6 @@ def test_relative_to(self): self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) self.assertRaises(ValueError, p.relative_to, P('C:/Foo'), walk_up=True) p = P('C:/Foo/Bar') - self.assertEqual(p.relative_to(P('c:')), P('/Foo/Bar')) - self.assertEqual(p.relative_to('c:'), P('/Foo/Bar')) - self.assertEqual(str(p.relative_to(P('c:'))), '\\Foo\\Bar') - self.assertEqual(str(p.relative_to('c:')), '\\Foo\\Bar') self.assertEqual(p.relative_to(P('c:/')), P('Foo/Bar')) self.assertEqual(p.relative_to('c:/'), P('Foo/Bar')) self.assertEqual(p.relative_to(P('c:/foO')), P('Bar')) @@ -1194,10 +1190,6 @@ def test_relative_to(self): self.assertEqual(p.relative_to('c:/foO/'), P('Bar')) self.assertEqual(p.relative_to(P('c:/foO/baR')), P()) self.assertEqual(p.relative_to('c:/foO/baR'), P()) - self.assertEqual(p.relative_to(P('c:'), walk_up=True), P('/Foo/Bar')) - self.assertEqual(p.relative_to('c:', walk_up=True), P('/Foo/Bar')) - self.assertEqual(str(p.relative_to(P('c:'), walk_up=True)), '\\Foo\\Bar') - self.assertEqual(str(p.relative_to('c:', walk_up=True)), '\\Foo\\Bar') self.assertEqual(p.relative_to(P('c:/'), walk_up=True), P('Foo/Bar')) self.assertEqual(p.relative_to('c:/', walk_up=True), P('Foo/Bar')) self.assertEqual(p.relative_to(P('c:/foO'), walk_up=True), P('Bar')) @@ -1209,6 +1201,8 @@ def test_relative_to(self): self.assertEqual(p.relative_to('C:/Foo/Bar/Baz', walk_up=True), P('..')) self.assertEqual(p.relative_to('C:/Foo/Baz', walk_up=True), P('../Bar')) # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, 'c:') + self.assertRaises(ValueError, p.relative_to, P('c:')) self.assertRaises(ValueError, p.relative_to, P('C:/Baz')) self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Bar/Baz')) self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Baz')) @@ -1218,6 +1212,8 @@ def test_relative_to(self): self.assertRaises(ValueError, p.relative_to, P('/')) self.assertRaises(ValueError, p.relative_to, P('/Foo')) self.assertRaises(ValueError, p.relative_to, P('//C/Foo')) + self.assertRaises(ValueError, p.relative_to, 'c:', walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('c:'), walk_up=True) self.assertRaises(ValueError, p.relative_to, P('C:Foo'), walk_up=True) self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) self.assertRaises(ValueError, p.relative_to, P('d:/'), walk_up=True) @@ -1275,13 +1271,13 @@ def test_is_relative_to(self): self.assertFalse(p.is_relative_to(P('C:Foo/Bar/Baz'))) self.assertFalse(p.is_relative_to(P('C:Foo/Baz'))) p = P('C:/Foo/Bar') - self.assertTrue(p.is_relative_to('c:')) self.assertTrue(p.is_relative_to(P('c:/'))) self.assertTrue(p.is_relative_to(P('c:/foO'))) self.assertTrue(p.is_relative_to('c:/foO/')) self.assertTrue(p.is_relative_to(P('c:/foO/baR'))) self.assertTrue(p.is_relative_to('c:/foO/baR')) # Unrelated paths. + self.assertFalse(p.is_relative_to('c:')) self.assertFalse(p.is_relative_to(P('C:/Baz'))) self.assertFalse(p.is_relative_to(P('C:/Foo/Bar/Baz'))) self.assertFalse(p.is_relative_to(P('C:/Foo/Baz'))) diff --git a/Misc/NEWS.d/next/Library/2022-11-02-23-47-07.gh-issue-99029.7uCiIB.rst b/Misc/NEWS.d/next/Library/2022-11-02-23-47-07.gh-issue-99029.7uCiIB.rst new file mode 100644 index 00000000000000..0bfba5e1e32662 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-02-23-47-07.gh-issue-99029.7uCiIB.rst @@ -0,0 +1,2 @@ +:meth:`pathlib.PurePath.relative_to()` now treats naked Windows drive paths +as relative. This brings its behaviour in line with other parts of pathlib. From 5556d3e02ca841b82b1eb42cc3974e0a3bbffaac Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 26 Nov 2022 00:30:37 +0100 Subject: [PATCH 052/112] gh-98724: Fix warnings on Py_SETREF() usage (#99781) Cast argument to the expected type. --- Modules/_curses_panel.c | 2 +- Objects/longobject.c | 2 +- Objects/typeobject.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Modules/_curses_panel.c b/Modules/_curses_panel.c index cd408d6aa35994..2144345de01ba3 100644 --- a/Modules/_curses_panel.c +++ b/Modules/_curses_panel.c @@ -424,7 +424,7 @@ _curses_panel_panel_replace_impl(PyCursesPanelObject *self, PyErr_SetString(state->PyCursesError, "replace_panel() returned ERR"); return NULL; } - Py_SETREF(po->wo, Py_NewRef(win)); + Py_SETREF(po->wo, (PyCursesWindowObject*)Py_NewRef(win)); Py_RETURN_NONE; } diff --git a/Objects/longobject.c b/Objects/longobject.c index f4bd981e4b9870..c84b4d3f316d5d 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4775,7 +4775,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) * because we're primarily trying to cut overhead for small powers. */ assert(bi); /* else there is no significant bit */ - Py_SETREF(z, Py_NewRef(a)); + Py_SETREF(z, (PyLongObject*)Py_NewRef(a)); for (bit = 2; ; bit <<= 1) { if (bit > bi) { /* found the first bit */ assert((bi & bit) == 0); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index ad8a936fa7ce20..b993aa405f6b6a 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -9593,7 +9593,7 @@ super_init_impl(PyObject *self, PyTypeObject *type, PyObject *obj) { return -1; Py_INCREF(obj); } - Py_XSETREF(su->type, Py_NewRef(type)); + Py_XSETREF(su->type, (PyTypeObject*)Py_NewRef(type)); Py_XSETREF(su->obj, obj); Py_XSETREF(su->obj_type, obj_type); return 0; From a86d8545221b16e714ffe3bda5afafc1d4748d13 Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Fri, 25 Nov 2022 19:03:16 -0500 Subject: [PATCH 053/112] Fix typo in `__match_args__` doc (#99785) A opy of #98549, whose author (@icecream17) uses a school computer that blocks the CLA site. I did not mention this in commit comment above so CLA bot does not pick up the name and request the CLA again. --- Doc/reference/datamodel.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 301f41f3952c96..fd682fcff02003 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -2823,7 +2823,7 @@ Customizing positional arguments in class pattern matching When using a class name in a pattern, positional arguments in the pattern are not allowed by default, i.e. ``case MyClass(x, y)`` is typically invalid without special -support in ``MyClass``. To be able to use that kind of patterns, the class needs to +support in ``MyClass``. To be able to use that kind of pattern, the class needs to define a *__match_args__* attribute. .. data:: object.__match_args__ From ec2b76aa8b7c6313293ff9c6814e8bc31e08fcaf Mon Sep 17 00:00:00 2001 From: TheShermanTanker <32636402+TheShermanTanker@users.noreply.github.com> Date: Sat, 26 Nov 2022 17:31:42 +0800 Subject: [PATCH 054/112] GH-95896: posixmodule.c: fix osdefs.h inclusion to not depend on compiler (#95897) Co-authored-by: Steve Dower --- Modules/posixmodule.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 8185517b06b5dd..95ecf1c7c4b28c 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -16,6 +16,9 @@ #ifdef MS_WINDOWS # include # include +# include // UNLEN +# include "osdefs.h" // SEP +# define HAVE_SYMLINK #endif #ifdef __VXWORKS__ @@ -426,18 +429,7 @@ extern char *ctermid_r(char *); # ifdef HAVE_PROCESS_H # include # endif -# ifndef IO_REPARSE_TAG_SYMLINK -# define IO_REPARSE_TAG_SYMLINK (0xA000000CL) -# endif -# ifndef IO_REPARSE_TAG_MOUNT_POINT -# define IO_REPARSE_TAG_MOUNT_POINT (0xA0000003L) -# endif -# include "osdefs.h" // SEP # include -# include -# include // ShellExecute() -# include // UNLEN -# define HAVE_SYMLINK #endif /* _MSC_VER */ #ifndef MAXPATHLEN From 47d673d81fc315069c14f9438ebe61fb70ef1ccc Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 26 Nov 2022 12:33:48 +0300 Subject: [PATCH 055/112] gh-99502: mention bytes-like objects as input in `secrets.compare_digest` (GH-99512) Now it is in sync with https://docs.python.org/3/library/hmac.html#hmac.compare_digest It is the same function, just re-exported. So, I guess they should mention the same input types. --- Doc/library/secrets.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/library/secrets.rst b/Doc/library/secrets.rst index dc8e5f46fb581e..4405dfc0535973 100644 --- a/Doc/library/secrets.rst +++ b/Doc/library/secrets.rst @@ -128,7 +128,9 @@ Other functions .. function:: compare_digest(a, b) - Return ``True`` if strings *a* and *b* are equal, otherwise ``False``, + Return ``True`` if strings or + :term:`bytes-like objects ` + *a* and *b* are equal, otherwise ``False``, using a "constant-time compare" to reduce the risk of `timing attacks `_. See :func:`hmac.compare_digest` for additional details. From e35ca417fe81a64985c2b29e863ce418ae75b96e Mon Sep 17 00:00:00 2001 From: Sam James Date: Sat, 26 Nov 2022 13:08:49 +0000 Subject: [PATCH 056/112] gh-99086: Fix -Wstrict-prototypes, -Wimplicit-function-declaration warnings in configure.ac (#99406) Follow up to 12078e78f6e4a21f344e4eaff529e1ff3b97734f. --- ...2-11-24-02-58-10.gh-issue-99086.DV_4Br.rst | 1 + configure | 52 ++++++++++--------- configure.ac | 52 ++++++++++--------- 3 files changed, 57 insertions(+), 48 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2022-11-24-02-58-10.gh-issue-99086.DV_4Br.rst diff --git a/Misc/NEWS.d/next/Build/2022-11-24-02-58-10.gh-issue-99086.DV_4Br.rst b/Misc/NEWS.d/next/Build/2022-11-24-02-58-10.gh-issue-99086.DV_4Br.rst new file mode 100644 index 00000000000000..2dace165ca1ada --- /dev/null +++ b/Misc/NEWS.d/next/Build/2022-11-24-02-58-10.gh-issue-99086.DV_4Br.rst @@ -0,0 +1 @@ +Fix ``-Wimplicit-int``, ``-Wstrict-prototypes``, and ``-Wimplicit-function-declaration`` compiler warnings in :program:`configure` checks. diff --git a/configure b/configure index 047fd94219d5be..3f8daf9dad5fd8 100755 --- a/configure +++ b/configure @@ -6755,7 +6755,7 @@ if test "x$enable_profiling" = xyes; then CC="$CC -pg" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -int main() { return 0; } +int main(void) { return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : @@ -9220,7 +9220,7 @@ else void* routine(void* p){return NULL;} -int main(){ +int main(void){ pthread_t p; if(pthread_create(&p,NULL,routine,NULL)!=0) return 1; @@ -9275,7 +9275,7 @@ else void* routine(void* p){return NULL;} -int main(){ +int main(void){ pthread_t p; if(pthread_create(&p,NULL,routine,NULL)!=0) return 1; @@ -9324,7 +9324,7 @@ else void* routine(void* p){return NULL;} -int main(){ +int main(void){ pthread_t p; if(pthread_create(&p,NULL,routine,NULL)!=0) return 1; @@ -9373,7 +9373,7 @@ else void* routine(void* p){return NULL;} -int main(){ +int main(void){ pthread_t p; if(pthread_create(&p,NULL,routine,NULL)!=0) return 1; @@ -12233,7 +12233,7 @@ else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -int main() +int main(void) { char s[16]; int i, *p1, *p2; @@ -15042,6 +15042,7 @@ $as_echo_n "checking for pthread_create in -lpthread... " >&6; } /* end confdefs.h. */ #include +#include #include void * start_routine (void *arg) { exit (0); } @@ -15352,7 +15353,7 @@ else void *foo(void *parm) { return NULL; } - int main() { + int main(void) { pthread_attr_t attr; pthread_t id; if (pthread_attr_init(&attr)) return (-1); @@ -16954,7 +16955,7 @@ else #include #include -int main(int argc, char*argv[]) +int main(int argc, char *argv[]) { if(chflags(argv[0], 0) != 0) return 1; @@ -17003,7 +17004,7 @@ else #include #include -int main(int argc, char*argv[]) +int main(int argc, char *argv[]) { if(lchflags(argv[0], 0) != 0) return 1; @@ -19711,7 +19712,7 @@ else #include #include -int main() +int main(void) { int passive, gaierr, inet4 = 0, inet6 = 0; struct addrinfo hints, *ai, *aitop; @@ -20908,7 +20909,7 @@ else #include #include -int main() { +int main(void) { volatile double x, y, z; /* 1./(1-2**-53) -> 1+2**-52 (correct), 1.0 (double rounding) */ x = 0.99999999999999989; /* 1-2**-53 */ @@ -21687,7 +21688,7 @@ else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -int main() +int main(void) { return (((-1)>>3 == -1) ? 0 : 1); } @@ -22589,7 +22590,7 @@ else #include #include -int main() +int main(void) { int val1 = nice(1); if (val1 != -1 && val1 == nice(2)) @@ -22631,7 +22632,7 @@ else #include #include -int main() +int main(void) { struct pollfd poll_struct = { 42, POLLIN|POLLPRI|POLLOUT, 0 }; int poll_test; @@ -22688,7 +22689,7 @@ else extern char *tzname[]; #endif -int main() +int main(void) { /* Note that we need to ensure that not only does tzset(3) do 'something' with localtime, but it works as documented @@ -24350,9 +24351,10 @@ else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ +#include #include -#include -int main() { +#include +int main(void) { size_t len = -1; const char *str = "text"; len = mbstowcs(NULL, str, 0); @@ -24552,7 +24554,7 @@ else #include #include void foo(void *p, void *q) { memmove(p, q, 19); } -int main() { +int main(void) { char a[32] = "123456789000000000"; foo(&a[9], a); if (strcmp(a, "123456789123456789000000000") != 0) @@ -24607,7 +24609,7 @@ else ); return r; } - int main() { + int main(void) { int p = 8; if ((foo(&p) ? : p) != 6) return 1; @@ -24650,7 +24652,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext #include atomic_int int_var; atomic_uintptr_t uintptr_var; - int main() { + int main(void) { atomic_store_explicit(&int_var, 5, memory_order_relaxed); atomic_store_explicit(&uintptr_var, 0, memory_order_relaxed); int loaded_value = atomic_load_explicit(&int_var, memory_order_seq_cst); @@ -24691,7 +24693,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext int val; - int main() { + int main(void) { __atomic_store_n(&val, 1, __ATOMIC_SEQ_CST); (void)__atomic_load_n(&val, __ATOMIC_SEQ_CST); return 0; @@ -24767,7 +24769,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext #include - int main() { + int main(void) { struct dirent entry; return entry.d_type == DT_UNKNOWN; } @@ -24805,11 +24807,12 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ + #include #include #include #include - int main() { + int main(void) { char buffer[1]; const size_t buflen = sizeof(buffer); const int flags = GRND_NONBLOCK; @@ -24852,9 +24855,10 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ + #include #include - int main() { + int main(void) { char buffer[1]; const size_t buflen = sizeof(buffer); const int flags = 0; diff --git a/configure.ac b/configure.ac index 19f12e8d5402fb..734a4db8389915 100644 --- a/configure.ac +++ b/configure.ac @@ -1427,7 +1427,7 @@ AC_ARG_ENABLE(profiling, if test "x$enable_profiling" = xyes; then ac_save_cc="$CC" CC="$CC -pg" - AC_LINK_IFELSE([AC_LANG_SOURCE([[int main() { return 0; }]])], + AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void) { return 0; }]])], [], [enable_profiling=no]) CC="$ac_save_cc" @@ -2553,7 +2553,7 @@ AC_CACHE_CHECK([whether pthreads are available without options], void* routine(void* p){return NULL;} -int main(){ +int main(void){ pthread_t p; if(pthread_create(&p,NULL,routine,NULL)!=0) return 1; @@ -2586,7 +2586,7 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([[ void* routine(void* p){return NULL;} -int main(){ +int main(void){ pthread_t p; if(pthread_create(&p,NULL,routine,NULL)!=0) return 1; @@ -2613,7 +2613,7 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([[ void* routine(void* p){return NULL;} -int main(){ +int main(void){ pthread_t p; if(pthread_create(&p,NULL,routine,NULL)!=0) return 1; @@ -2640,7 +2640,7 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([[ void* routine(void* p){return NULL;} -int main(){ +int main(void){ pthread_t p; if(pthread_create(&p,NULL,routine,NULL)!=0) return 1; @@ -3578,7 +3578,7 @@ esac # check for systems that require aligned memory access AC_CACHE_CHECK([aligned memory access is required], [ac_cv_aligned_required], [AC_RUN_IFELSE([AC_LANG_SOURCE([[ -int main() +int main(void) { char s[16]; int i, *p1, *p2; @@ -4292,6 +4292,7 @@ yes AC_MSG_CHECKING([for pthread_create in -lpthread]) AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #include +#include #include void * start_routine (void *arg) { exit (0); }]], [[ @@ -4361,7 +4362,7 @@ if test "$posix_threads" = "yes"; then void *foo(void *parm) { return NULL; } - int main() { + int main(void) { pthread_attr_t attr; pthread_t id; if (pthread_attr_init(&attr)) return (-1); @@ -4898,7 +4899,7 @@ AC_CACHE_CHECK([for chflags], [ac_cv_have_chflags], [dnl AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include #include -int main(int argc, char*argv[]) +int main(int argc, char *argv[]) { if(chflags(argv[0], 0) != 0) return 1; @@ -4920,7 +4921,7 @@ AC_CACHE_CHECK([for lchflags], [ac_cv_have_lchflags], [dnl AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include #include -int main(int argc, char*argv[]) +int main(int argc, char *argv[]) { if(lchflags(argv[0], 0) != 0) return 1; @@ -5196,7 +5197,7 @@ AS_VAR_IF([ac_cv_func_getaddrinfo], [yes], [ #include #include -int main() +int main(void) { int passive, gaierr, inet4 = 0, inet6 = 0; struct addrinfo hints, *ai, *aitop; @@ -5612,7 +5613,7 @@ CC="$CC $BASECFLAGS" AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include #include -int main() { +int main(void) { volatile double x, y, z; /* 1./(1-2**-53) -> 1+2**-52 (correct), 1.0 (double rounding) */ x = 0.99999999999999989; /* 1-2**-53 */ @@ -5919,7 +5920,7 @@ fi], # or fills with zeros (like the Cray J90, according to Tim Peters). AC_CACHE_CHECK([whether right shift extends the sign bit], [ac_cv_rshift_extends_sign], [ AC_RUN_IFELSE([AC_LANG_SOURCE([[ -int main() +int main(void) { return (((-1)>>3 == -1) ? 0 : 1); } @@ -6118,7 +6119,7 @@ AC_CACHE_CHECK([for broken nice()], [ac_cv_broken_nice], [ AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include #include -int main() +int main(void) { int val1 = nice(1); if (val1 != -1 && val1 == nice(2)) @@ -6140,7 +6141,7 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include #include -int main() +int main(void) { struct pollfd poll_struct = { 42, POLLIN|POLLPRI|POLLOUT, 0 }; int poll_test; @@ -6176,7 +6177,7 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([[ extern char *tzname[]; #endif -int main() +int main(void) { /* Note that we need to ensure that not only does tzset(3) do 'something' with localtime, but it works as documented @@ -6517,9 +6518,10 @@ AC_CHECK_TYPE(socklen_t,, AC_CACHE_CHECK([for broken mbstowcs], [ac_cv_broken_mbstowcs], AC_RUN_IFELSE([AC_LANG_SOURCE([[ +#include #include -#include -int main() { +#include +int main(void) { size_t len = -1; const char *str = "text"; len = mbstowcs(NULL, str, 0); @@ -6646,7 +6648,7 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include #include void foo(void *p, void *q) { memmove(p, q, 19); } -int main() { +int main(void) { char a[32] = "123456789000000000"; foo(&a[9], a); if (strcmp(a, "123456789123456789000000000") != 0) @@ -6687,7 +6689,7 @@ if test "$ac_cv_gcc_asm_for_x87" = yes; then ); return r; } - int main() { + int main(void) { int p = 8; if ((foo(&p) ? : p) != 6) return 1; @@ -6715,7 +6717,7 @@ AC_LINK_IFELSE( #include atomic_int int_var; atomic_uintptr_t uintptr_var; - int main() { + int main(void) { atomic_store_explicit(&int_var, 5, memory_order_relaxed); atomic_store_explicit(&uintptr_var, 0, memory_order_relaxed); int loaded_value = atomic_load_explicit(&int_var, memory_order_seq_cst); @@ -6736,7 +6738,7 @@ AC_LINK_IFELSE( [ AC_LANG_SOURCE([[ int val; - int main() { + int main(void) { __atomic_store_n(&val, 1, __ATOMIC_SEQ_CST); (void)__atomic_load_n(&val, __ATOMIC_SEQ_CST); return 0; @@ -6777,7 +6779,7 @@ AC_LINK_IFELSE( AC_LANG_SOURCE([[ #include - int main() { + int main(void) { struct dirent entry; return entry.d_type == DT_UNKNOWN; } @@ -6795,11 +6797,12 @@ AC_CACHE_CHECK([for the Linux getrandom() syscall], [ac_cv_getrandom_syscall], [ AC_LINK_IFELSE( [ AC_LANG_SOURCE([[ + #include #include #include #include - int main() { + int main(void) { char buffer[1]; const size_t buflen = sizeof(buffer); const int flags = GRND_NONBLOCK; @@ -6822,9 +6825,10 @@ AC_CACHE_CHECK([for the getrandom() function], [ac_cv_func_getrandom], [ AC_LINK_IFELSE( [ AC_LANG_SOURCE([[ + #include #include - int main() { + int main(void) { char buffer[1]; const size_t buflen = sizeof(buffer); const int flags = 0; From dc063a25d29840d863b15c86fdab15b4a1894c73 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 26 Nov 2022 08:28:49 -0500 Subject: [PATCH 057/112] gh-97966: Restore prior expectation that uname_result._fields and ._asdict would include the processor. (gh-98343) --- Lib/platform.py | 6 ++++-- Lib/test/test_platform.py | 8 ++++++++ .../Library/2022-10-16-18-52-00.gh-issue-97966.humlhz.rst | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-10-16-18-52-00.gh-issue-97966.humlhz.rst diff --git a/Lib/platform.py b/Lib/platform.py index 9f5b317287530b..6745321e31c279 100755 --- a/Lib/platform.py +++ b/Lib/platform.py @@ -847,6 +847,8 @@ class uname_result( except when needed. """ + _fields = ('system', 'node', 'release', 'version', 'machine', 'processor') + @functools.cached_property def processor(self): return _unknown_as_blank(_Processor.get()) @@ -860,7 +862,7 @@ def __iter__(self): @classmethod def _make(cls, iterable): # override factory to affect length check - num_fields = len(cls._fields) + num_fields = len(cls._fields) - 1 result = cls.__new__(cls, *iterable) if len(result) != num_fields + 1: msg = f'Expected {num_fields} arguments, got {len(result)}' @@ -874,7 +876,7 @@ def __len__(self): return len(tuple(iter(self))) def __reduce__(self): - return uname_result, tuple(self)[:len(self._fields)] + return uname_result, tuple(self)[:len(self._fields) - 1] _uname_cache = None diff --git a/Lib/test/test_platform.py b/Lib/test/test_platform.py index 9c03a89fd57d07..3992faf8e5cd5b 100644 --- a/Lib/test/test_platform.py +++ b/Lib/test/test_platform.py @@ -277,6 +277,14 @@ def test_uname_slices(self): self.assertEqual(res[:], expected) self.assertEqual(res[:5], expected[:5]) + def test_uname_fields(self): + self.assertIn('processor', platform.uname()._fields) + + def test_uname_asdict(self): + res = platform.uname()._asdict() + self.assertEqual(len(res), 6) + self.assertIn('processor', res) + @unittest.skipIf(sys.platform in ['win32', 'OpenVMS'], "uname -p not used") @support.requires_subprocess() def test_uname_processor(self): diff --git a/Misc/NEWS.d/next/Library/2022-10-16-18-52-00.gh-issue-97966.humlhz.rst b/Misc/NEWS.d/next/Library/2022-10-16-18-52-00.gh-issue-97966.humlhz.rst new file mode 100644 index 00000000000000..b725465ae4f0ef --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-10-16-18-52-00.gh-issue-97966.humlhz.rst @@ -0,0 +1,2 @@ +On ``uname_result``, restored expectation that ``_fields`` and ``_asdict`` +would include all six properties including ``processor``. From 7796d3179b71536dd1d2ca7fdbc1255bdb8cfb52 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 26 Nov 2022 09:44:13 -0500 Subject: [PATCH 058/112] gh-98098: Create packages from zipfile and test_zipfile (gh-98103) * gh-98098: Move zipfile into a package. * Moved test_zipfile to a package * Extracted module for test_path. * Add blurb * Add jaraco as owner of zipfile.Path. * Synchronize with minor changes found at jaraco/zipp@d9e7f4352d. --- .github/CODEOWNERS | 3 + Lib/test/test_zipfile/__init__.py | 5 + .../test_core.py} | 420 +---------------- Lib/test/test_zipfile/test_path.py | 423 ++++++++++++++++++ Lib/{zipfile.py => zipfile/__init__.py} | 387 +--------------- Lib/zipfile/__main__.py | 77 ++++ Lib/zipfile/_path.py | 315 +++++++++++++ ...2-10-08-15-41-00.gh-issue-98098.DugpWi.rst | 2 + 8 files changed, 834 insertions(+), 798 deletions(-) create mode 100644 Lib/test/test_zipfile/__init__.py rename Lib/test/{test_zipfile.py => test_zipfile/test_core.py} (90%) create mode 100644 Lib/test/test_zipfile/test_path.py rename Lib/{zipfile.py => zipfile/__init__.py} (88%) create mode 100644 Lib/zipfile/__main__.py create mode 100644 Lib/zipfile/_path.py create mode 100644 Misc/NEWS.d/next/Library/2022-10-08-15-41-00.gh-issue-98098.DugpWi.rst diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5f6d86209b842e..5d30c0928e5ab8 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -154,3 +154,6 @@ Lib/ast.py @isidentical # pathlib **/*pathlib* @brettcannon + +# zipfile.Path +**/*zipfile/*_path.py @jaraco diff --git a/Lib/test/test_zipfile/__init__.py b/Lib/test/test_zipfile/__init__.py new file mode 100644 index 00000000000000..4b16ecc31156a5 --- /dev/null +++ b/Lib/test/test_zipfile/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile/test_core.py similarity index 90% rename from Lib/test/test_zipfile.py rename to Lib/test/test_zipfile/test_core.py index 6f6f4bc26b0d40..bb0f1467735bcf 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile/test_core.py @@ -6,7 +6,6 @@ import os import pathlib import posixpath -import string import struct import subprocess import sys @@ -14,7 +13,6 @@ import unittest import unittest.mock as mock import zipfile -import functools from tempfile import TemporaryFile @@ -2715,13 +2713,13 @@ def tearDown(self): class ZipInfoTests(unittest.TestCase): def test_from_file(self): zi = zipfile.ZipInfo.from_file(__file__) - self.assertEqual(posixpath.basename(zi.filename), 'test_zipfile.py') + self.assertEqual(posixpath.basename(zi.filename), 'test_core.py') self.assertFalse(zi.is_dir()) self.assertEqual(zi.file_size, os.path.getsize(__file__)) def test_from_file_pathlike(self): zi = zipfile.ZipInfo.from_file(pathlib.Path(__file__)) - self.assertEqual(posixpath.basename(zi.filename), 'test_zipfile.py') + self.assertEqual(posixpath.basename(zi.filename), 'test_core.py') self.assertFalse(zi.is_dir()) self.assertEqual(zi.file_size, os.path.getsize(__file__)) @@ -2867,420 +2865,6 @@ def test_execute_zip64(self): self.assertIn(b'number in executable: 5', output) -# Poor man's technique to consume a (smallish) iterable. -consume = tuple - - -# from jaraco.itertools 5.0 -class jaraco: - class itertools: - class Counter: - def __init__(self, i): - self.count = 0 - self._orig_iter = iter(i) - - def __iter__(self): - return self - - def __next__(self): - result = next(self._orig_iter) - self.count += 1 - return result - - -def add_dirs(zf): - """ - Given a writable zip file zf, inject directory entries for - any directories implied by the presence of children. - """ - for name in zipfile.CompleteDirs._implied_dirs(zf.namelist()): - zf.writestr(name, b"") - return zf - - -def build_alpharep_fixture(): - """ - Create a zip file with this structure: - - . - ├── a.txt - ├── b - │ ├── c.txt - │ ├── d - │ │ └── e.txt - │ └── f.txt - └── g - └── h - └── i.txt - - This fixture has the following key characteristics: - - - a file at the root (a) - - a file two levels deep (b/d/e) - - multiple files in a directory (b/c, b/f) - - a directory containing only a directory (g/h) - - "alpha" because it uses alphabet - "rep" because it's a representative example - """ - data = io.BytesIO() - zf = zipfile.ZipFile(data, "w") - zf.writestr("a.txt", b"content of a") - zf.writestr("b/c.txt", b"content of c") - zf.writestr("b/d/e.txt", b"content of e") - zf.writestr("b/f.txt", b"content of f") - zf.writestr("g/h/i.txt", b"content of i") - zf.filename = "alpharep.zip" - return zf - - -def pass_alpharep(meth): - """ - Given a method, wrap it in a for loop that invokes method - with each subtest. - """ - - @functools.wraps(meth) - def wrapper(self): - for alpharep in self.zipfile_alpharep(): - meth(self, alpharep=alpharep) - - return wrapper - - -class TestPath(unittest.TestCase): - def setUp(self): - self.fixtures = contextlib.ExitStack() - self.addCleanup(self.fixtures.close) - - def zipfile_alpharep(self): - with self.subTest(): - yield build_alpharep_fixture() - with self.subTest(): - yield add_dirs(build_alpharep_fixture()) - - def zipfile_ondisk(self, alpharep): - tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir())) - buffer = alpharep.fp - alpharep.close() - path = tmpdir / alpharep.filename - with path.open("wb") as strm: - strm.write(buffer.getvalue()) - return path - - @pass_alpharep - def test_iterdir_and_types(self, alpharep): - root = zipfile.Path(alpharep) - assert root.is_dir() - a, b, g = root.iterdir() - assert a.is_file() - assert b.is_dir() - assert g.is_dir() - c, f, d = b.iterdir() - assert c.is_file() and f.is_file() - (e,) = d.iterdir() - assert e.is_file() - (h,) = g.iterdir() - (i,) = h.iterdir() - assert i.is_file() - - @pass_alpharep - def test_is_file_missing(self, alpharep): - root = zipfile.Path(alpharep) - assert not root.joinpath('missing.txt').is_file() - - @pass_alpharep - def test_iterdir_on_file(self, alpharep): - root = zipfile.Path(alpharep) - a, b, g = root.iterdir() - with self.assertRaises(ValueError): - a.iterdir() - - @pass_alpharep - def test_subdir_is_dir(self, alpharep): - root = zipfile.Path(alpharep) - assert (root / 'b').is_dir() - assert (root / 'b/').is_dir() - assert (root / 'g').is_dir() - assert (root / 'g/').is_dir() - - @pass_alpharep - def test_open(self, alpharep): - root = zipfile.Path(alpharep) - a, b, g = root.iterdir() - with a.open(encoding="utf-8") as strm: - data = strm.read() - assert data == "content of a" - - def test_open_write(self): - """ - If the zipfile is open for write, it should be possible to - write bytes or text to it. - """ - zf = zipfile.Path(zipfile.ZipFile(io.BytesIO(), mode='w')) - with zf.joinpath('file.bin').open('wb') as strm: - strm.write(b'binary contents') - with zf.joinpath('file.txt').open('w', encoding="utf-8") as strm: - strm.write('text file') - - def test_open_extant_directory(self): - """ - Attempting to open a directory raises IsADirectoryError. - """ - zf = zipfile.Path(add_dirs(build_alpharep_fixture())) - with self.assertRaises(IsADirectoryError): - zf.joinpath('b').open() - - @pass_alpharep - def test_open_binary_invalid_args(self, alpharep): - root = zipfile.Path(alpharep) - with self.assertRaises(ValueError): - root.joinpath('a.txt').open('rb', encoding='utf-8') - with self.assertRaises(ValueError): - root.joinpath('a.txt').open('rb', 'utf-8') - - def test_open_missing_directory(self): - """ - Attempting to open a missing directory raises FileNotFoundError. - """ - zf = zipfile.Path(add_dirs(build_alpharep_fixture())) - with self.assertRaises(FileNotFoundError): - zf.joinpath('z').open() - - @pass_alpharep - def test_read(self, alpharep): - root = zipfile.Path(alpharep) - a, b, g = root.iterdir() - assert a.read_text(encoding="utf-8") == "content of a" - assert a.read_bytes() == b"content of a" - - @pass_alpharep - def test_joinpath(self, alpharep): - root = zipfile.Path(alpharep) - a = root.joinpath("a.txt") - assert a.is_file() - e = root.joinpath("b").joinpath("d").joinpath("e.txt") - assert e.read_text(encoding="utf-8") == "content of e" - - @pass_alpharep - def test_joinpath_multiple(self, alpharep): - root = zipfile.Path(alpharep) - e = root.joinpath("b", "d", "e.txt") - assert e.read_text(encoding="utf-8") == "content of e" - - @pass_alpharep - def test_traverse_truediv(self, alpharep): - root = zipfile.Path(alpharep) - a = root / "a.txt" - assert a.is_file() - e = root / "b" / "d" / "e.txt" - assert e.read_text(encoding="utf-8") == "content of e" - - @pass_alpharep - def test_traverse_simplediv(self, alpharep): - """ - Disable the __future__.division when testing traversal. - """ - code = compile( - source="zipfile.Path(alpharep) / 'a'", - filename="(test)", - mode="eval", - dont_inherit=True, - ) - eval(code) - - @pass_alpharep - def test_pathlike_construction(self, alpharep): - """ - zipfile.Path should be constructable from a path-like object - """ - zipfile_ondisk = self.zipfile_ondisk(alpharep) - pathlike = pathlib.Path(str(zipfile_ondisk)) - zipfile.Path(pathlike) - - @pass_alpharep - def test_traverse_pathlike(self, alpharep): - root = zipfile.Path(alpharep) - root / pathlib.Path("a") - - @pass_alpharep - def test_parent(self, alpharep): - root = zipfile.Path(alpharep) - assert (root / 'a').parent.at == '' - assert (root / 'a' / 'b').parent.at == 'a/' - - @pass_alpharep - def test_dir_parent(self, alpharep): - root = zipfile.Path(alpharep) - assert (root / 'b').parent.at == '' - assert (root / 'b/').parent.at == '' - - @pass_alpharep - def test_missing_dir_parent(self, alpharep): - root = zipfile.Path(alpharep) - assert (root / 'missing dir/').parent.at == '' - - @pass_alpharep - def test_mutability(self, alpharep): - """ - If the underlying zipfile is changed, the Path object should - reflect that change. - """ - root = zipfile.Path(alpharep) - a, b, g = root.iterdir() - alpharep.writestr('foo.txt', 'foo') - alpharep.writestr('bar/baz.txt', 'baz') - assert any(child.name == 'foo.txt' for child in root.iterdir()) - assert (root / 'foo.txt').read_text(encoding="utf-8") == 'foo' - (baz,) = (root / 'bar').iterdir() - assert baz.read_text(encoding="utf-8") == 'baz' - - HUGE_ZIPFILE_NUM_ENTRIES = 2 ** 13 - - def huge_zipfile(self): - """Create a read-only zipfile with a huge number of entries entries.""" - strm = io.BytesIO() - zf = zipfile.ZipFile(strm, "w") - for entry in map(str, range(self.HUGE_ZIPFILE_NUM_ENTRIES)): - zf.writestr(entry, entry) - zf.mode = 'r' - return zf - - def test_joinpath_constant_time(self): - """ - Ensure joinpath on items in zipfile is linear time. - """ - root = zipfile.Path(self.huge_zipfile()) - entries = jaraco.itertools.Counter(root.iterdir()) - for entry in entries: - entry.joinpath('suffix') - # Check the file iterated all items - assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES - - # @func_timeout.func_set_timeout(3) - def test_implied_dirs_performance(self): - data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)] - zipfile.CompleteDirs._implied_dirs(data) - - @pass_alpharep - def test_read_does_not_close(self, alpharep): - alpharep = self.zipfile_ondisk(alpharep) - with zipfile.ZipFile(alpharep) as file: - for rep in range(2): - zipfile.Path(file, 'a.txt').read_text(encoding="utf-8") - - @pass_alpharep - def test_subclass(self, alpharep): - class Subclass(zipfile.Path): - pass - - root = Subclass(alpharep) - assert isinstance(root / 'b', Subclass) - - @pass_alpharep - def test_filename(self, alpharep): - root = zipfile.Path(alpharep) - assert root.filename == pathlib.Path('alpharep.zip') - - @pass_alpharep - def test_root_name(self, alpharep): - """ - The name of the root should be the name of the zipfile - """ - root = zipfile.Path(alpharep) - assert root.name == 'alpharep.zip' == root.filename.name - - @pass_alpharep - def test_suffix(self, alpharep): - """ - The suffix of the root should be the suffix of the zipfile. - The suffix of each nested file is the final component's last suffix, if any. - Includes the leading period, just like pathlib.Path. - """ - root = zipfile.Path(alpharep) - assert root.suffix == '.zip' == root.filename.suffix - - b = root / "b.txt" - assert b.suffix == ".txt" - - c = root / "c" / "filename.tar.gz" - assert c.suffix == ".gz" - - d = root / "d" - assert d.suffix == "" - - @pass_alpharep - def test_suffixes(self, alpharep): - """ - The suffix of the root should be the suffix of the zipfile. - The suffix of each nested file is the final component's last suffix, if any. - Includes the leading period, just like pathlib.Path. - """ - root = zipfile.Path(alpharep) - assert root.suffixes == ['.zip'] == root.filename.suffixes - - b = root / 'b.txt' - assert b.suffixes == ['.txt'] - - c = root / 'c' / 'filename.tar.gz' - assert c.suffixes == ['.tar', '.gz'] - - d = root / 'd' - assert d.suffixes == [] - - e = root / '.hgrc' - assert e.suffixes == [] - - @pass_alpharep - def test_stem(self, alpharep): - """ - The final path component, without its suffix - """ - root = zipfile.Path(alpharep) - assert root.stem == 'alpharep' == root.filename.stem - - b = root / "b.txt" - assert b.stem == "b" - - c = root / "c" / "filename.tar.gz" - assert c.stem == "filename.tar" - - d = root / "d" - assert d.stem == "d" - - @pass_alpharep - def test_root_parent(self, alpharep): - root = zipfile.Path(alpharep) - assert root.parent == pathlib.Path('.') - root.root.filename = 'foo/bar.zip' - assert root.parent == pathlib.Path('foo') - - @pass_alpharep - def test_root_unnamed(self, alpharep): - """ - It is an error to attempt to get the name - or parent of an unnamed zipfile. - """ - alpharep.filename = None - root = zipfile.Path(alpharep) - with self.assertRaises(TypeError): - root.name - with self.assertRaises(TypeError): - root.parent - - # .name and .parent should still work on subs - sub = root / "b" - assert sub.name == "b" - assert sub.parent - - @pass_alpharep - def test_inheritance(self, alpharep): - cls = type('PathChild', (zipfile.Path,), {}) - for alpharep in self.zipfile_alpharep(): - file = cls(alpharep).joinpath('some dir').parent - assert isinstance(file, cls) - - class EncodedMetadataTests(unittest.TestCase): file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three' file_content = [ diff --git a/Lib/test/test_zipfile/test_path.py b/Lib/test/test_zipfile/test_path.py new file mode 100644 index 00000000000000..3c62e9a0b0e65d --- /dev/null +++ b/Lib/test/test_zipfile/test_path.py @@ -0,0 +1,423 @@ +import io +import zipfile +import contextlib +import pathlib +import unittest +import string +import functools + +from test.support.os_helper import temp_dir + + +# Poor man's technique to consume a (smallish) iterable. +consume = tuple + + +# from jaraco.itertools 5.0 +class jaraco: + class itertools: + class Counter: + def __init__(self, i): + self.count = 0 + self._orig_iter = iter(i) + + def __iter__(self): + return self + + def __next__(self): + result = next(self._orig_iter) + self.count += 1 + return result + + +def add_dirs(zf): + """ + Given a writable zip file zf, inject directory entries for + any directories implied by the presence of children. + """ + for name in zipfile.CompleteDirs._implied_dirs(zf.namelist()): + zf.writestr(name, b"") + return zf + + +def build_alpharep_fixture(): + """ + Create a zip file with this structure: + + . + ├── a.txt + ├── b + │ ├── c.txt + │ ├── d + │ │ └── e.txt + │ └── f.txt + └── g + └── h + └── i.txt + + This fixture has the following key characteristics: + + - a file at the root (a) + - a file two levels deep (b/d/e) + - multiple files in a directory (b/c, b/f) + - a directory containing only a directory (g/h) + + "alpha" because it uses alphabet + "rep" because it's a representative example + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("a.txt", b"content of a") + zf.writestr("b/c.txt", b"content of c") + zf.writestr("b/d/e.txt", b"content of e") + zf.writestr("b/f.txt", b"content of f") + zf.writestr("g/h/i.txt", b"content of i") + zf.filename = "alpharep.zip" + return zf + + +def pass_alpharep(meth): + """ + Given a method, wrap it in a for loop that invokes method + with each subtest. + """ + + @functools.wraps(meth) + def wrapper(self): + for alpharep in self.zipfile_alpharep(): + meth(self, alpharep=alpharep) + + return wrapper + + +class TestPath(unittest.TestCase): + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + def zipfile_alpharep(self): + with self.subTest(): + yield build_alpharep_fixture() + with self.subTest(): + yield add_dirs(build_alpharep_fixture()) + + def zipfile_ondisk(self, alpharep): + tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir())) + buffer = alpharep.fp + alpharep.close() + path = tmpdir / alpharep.filename + with path.open("wb") as strm: + strm.write(buffer.getvalue()) + return path + + @pass_alpharep + def test_iterdir_and_types(self, alpharep): + root = zipfile.Path(alpharep) + assert root.is_dir() + a, b, g = root.iterdir() + assert a.is_file() + assert b.is_dir() + assert g.is_dir() + c, f, d = b.iterdir() + assert c.is_file() and f.is_file() + (e,) = d.iterdir() + assert e.is_file() + (h,) = g.iterdir() + (i,) = h.iterdir() + assert i.is_file() + + @pass_alpharep + def test_is_file_missing(self, alpharep): + root = zipfile.Path(alpharep) + assert not root.joinpath('missing.txt').is_file() + + @pass_alpharep + def test_iterdir_on_file(self, alpharep): + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() + with self.assertRaises(ValueError): + a.iterdir() + + @pass_alpharep + def test_subdir_is_dir(self, alpharep): + root = zipfile.Path(alpharep) + assert (root / 'b').is_dir() + assert (root / 'b/').is_dir() + assert (root / 'g').is_dir() + assert (root / 'g/').is_dir() + + @pass_alpharep + def test_open(self, alpharep): + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() + with a.open(encoding="utf-8") as strm: + data = strm.read() + assert data == "content of a" + + def test_open_write(self): + """ + If the zipfile is open for write, it should be possible to + write bytes or text to it. + """ + zf = zipfile.Path(zipfile.ZipFile(io.BytesIO(), mode='w')) + with zf.joinpath('file.bin').open('wb') as strm: + strm.write(b'binary contents') + with zf.joinpath('file.txt').open('w', encoding="utf-8") as strm: + strm.write('text file') + + def test_open_extant_directory(self): + """ + Attempting to open a directory raises IsADirectoryError. + """ + zf = zipfile.Path(add_dirs(build_alpharep_fixture())) + with self.assertRaises(IsADirectoryError): + zf.joinpath('b').open() + + @pass_alpharep + def test_open_binary_invalid_args(self, alpharep): + root = zipfile.Path(alpharep) + with self.assertRaises(ValueError): + root.joinpath('a.txt').open('rb', encoding='utf-8') + with self.assertRaises(ValueError): + root.joinpath('a.txt').open('rb', 'utf-8') + + def test_open_missing_directory(self): + """ + Attempting to open a missing directory raises FileNotFoundError. + """ + zf = zipfile.Path(add_dirs(build_alpharep_fixture())) + with self.assertRaises(FileNotFoundError): + zf.joinpath('z').open() + + @pass_alpharep + def test_read(self, alpharep): + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() + assert a.read_text(encoding="utf-8") == "content of a" + assert a.read_bytes() == b"content of a" + + @pass_alpharep + def test_joinpath(self, alpharep): + root = zipfile.Path(alpharep) + a = root.joinpath("a.txt") + assert a.is_file() + e = root.joinpath("b").joinpath("d").joinpath("e.txt") + assert e.read_text(encoding="utf-8") == "content of e" + + @pass_alpharep + def test_joinpath_multiple(self, alpharep): + root = zipfile.Path(alpharep) + e = root.joinpath("b", "d", "e.txt") + assert e.read_text(encoding="utf-8") == "content of e" + + @pass_alpharep + def test_traverse_truediv(self, alpharep): + root = zipfile.Path(alpharep) + a = root / "a.txt" + assert a.is_file() + e = root / "b" / "d" / "e.txt" + assert e.read_text(encoding="utf-8") == "content of e" + + @pass_alpharep + def test_traverse_simplediv(self, alpharep): + """ + Disable the __future__.division when testing traversal. + """ + code = compile( + source="zipfile.Path(alpharep) / 'a'", + filename="(test)", + mode="eval", + dont_inherit=True, + ) + eval(code) + + @pass_alpharep + def test_pathlike_construction(self, alpharep): + """ + zipfile.Path should be constructable from a path-like object + """ + zipfile_ondisk = self.zipfile_ondisk(alpharep) + pathlike = pathlib.Path(str(zipfile_ondisk)) + zipfile.Path(pathlike) + + @pass_alpharep + def test_traverse_pathlike(self, alpharep): + root = zipfile.Path(alpharep) + root / pathlib.Path("a") + + @pass_alpharep + def test_parent(self, alpharep): + root = zipfile.Path(alpharep) + assert (root / 'a').parent.at == '' + assert (root / 'a' / 'b').parent.at == 'a/' + + @pass_alpharep + def test_dir_parent(self, alpharep): + root = zipfile.Path(alpharep) + assert (root / 'b').parent.at == '' + assert (root / 'b/').parent.at == '' + + @pass_alpharep + def test_missing_dir_parent(self, alpharep): + root = zipfile.Path(alpharep) + assert (root / 'missing dir/').parent.at == '' + + @pass_alpharep + def test_mutability(self, alpharep): + """ + If the underlying zipfile is changed, the Path object should + reflect that change. + """ + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() + alpharep.writestr('foo.txt', 'foo') + alpharep.writestr('bar/baz.txt', 'baz') + assert any(child.name == 'foo.txt' for child in root.iterdir()) + assert (root / 'foo.txt').read_text(encoding="utf-8") == 'foo' + (baz,) = (root / 'bar').iterdir() + assert baz.read_text(encoding="utf-8") == 'baz' + + HUGE_ZIPFILE_NUM_ENTRIES = 2**13 + + def huge_zipfile(self): + """Create a read-only zipfile with a huge number of entries entries.""" + strm = io.BytesIO() + zf = zipfile.ZipFile(strm, "w") + for entry in map(str, range(self.HUGE_ZIPFILE_NUM_ENTRIES)): + zf.writestr(entry, entry) + zf.mode = 'r' + return zf + + def test_joinpath_constant_time(self): + """ + Ensure joinpath on items in zipfile is linear time. + """ + root = zipfile.Path(self.huge_zipfile()) + entries = jaraco.itertools.Counter(root.iterdir()) + for entry in entries: + entry.joinpath('suffix') + # Check the file iterated all items + assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES + + # @func_timeout.func_set_timeout(3) + def test_implied_dirs_performance(self): + data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)] + zipfile.CompleteDirs._implied_dirs(data) + + @pass_alpharep + def test_read_does_not_close(self, alpharep): + alpharep = self.zipfile_ondisk(alpharep) + with zipfile.ZipFile(alpharep) as file: + for rep in range(2): + zipfile.Path(file, 'a.txt').read_text(encoding="utf-8") + + @pass_alpharep + def test_subclass(self, alpharep): + class Subclass(zipfile.Path): + pass + + root = Subclass(alpharep) + assert isinstance(root / 'b', Subclass) + + @pass_alpharep + def test_filename(self, alpharep): + root = zipfile.Path(alpharep) + assert root.filename == pathlib.Path('alpharep.zip') + + @pass_alpharep + def test_root_name(self, alpharep): + """ + The name of the root should be the name of the zipfile + """ + root = zipfile.Path(alpharep) + assert root.name == 'alpharep.zip' == root.filename.name + + @pass_alpharep + def test_suffix(self, alpharep): + """ + The suffix of the root should be the suffix of the zipfile. + The suffix of each nested file is the final component's last suffix, if any. + Includes the leading period, just like pathlib.Path. + """ + root = zipfile.Path(alpharep) + assert root.suffix == '.zip' == root.filename.suffix + + b = root / "b.txt" + assert b.suffix == ".txt" + + c = root / "c" / "filename.tar.gz" + assert c.suffix == ".gz" + + d = root / "d" + assert d.suffix == "" + + @pass_alpharep + def test_suffixes(self, alpharep): + """ + The suffix of the root should be the suffix of the zipfile. + The suffix of each nested file is the final component's last suffix, if any. + Includes the leading period, just like pathlib.Path. + """ + root = zipfile.Path(alpharep) + assert root.suffixes == ['.zip'] == root.filename.suffixes + + b = root / 'b.txt' + assert b.suffixes == ['.txt'] + + c = root / 'c' / 'filename.tar.gz' + assert c.suffixes == ['.tar', '.gz'] + + d = root / 'd' + assert d.suffixes == [] + + e = root / '.hgrc' + assert e.suffixes == [] + + @pass_alpharep + def test_stem(self, alpharep): + """ + The final path component, without its suffix + """ + root = zipfile.Path(alpharep) + assert root.stem == 'alpharep' == root.filename.stem + + b = root / "b.txt" + assert b.stem == "b" + + c = root / "c" / "filename.tar.gz" + assert c.stem == "filename.tar" + + d = root / "d" + assert d.stem == "d" + + @pass_alpharep + def test_root_parent(self, alpharep): + root = zipfile.Path(alpharep) + assert root.parent == pathlib.Path('.') + root.root.filename = 'foo/bar.zip' + assert root.parent == pathlib.Path('foo') + + @pass_alpharep + def test_root_unnamed(self, alpharep): + """ + It is an error to attempt to get the name + or parent of an unnamed zipfile. + """ + alpharep.filename = None + root = zipfile.Path(alpharep) + with self.assertRaises(TypeError): + root.name + with self.assertRaises(TypeError): + root.parent + + # .name and .parent should still work on subs + sub = root / "b" + assert sub.name == "b" + assert sub.parent + + @pass_alpharep + def test_inheritance(self, alpharep): + cls = type('PathChild', (zipfile.Path,), {}) + for alpharep in self.zipfile_alpharep(): + file = cls(alpharep).joinpath('some dir').parent + assert isinstance(file, cls) diff --git a/Lib/zipfile.py b/Lib/zipfile/__init__.py similarity index 88% rename from Lib/zipfile.py rename to Lib/zipfile/__init__.py index 77b643caf9fc91..8f834267b28c2e 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile/__init__.py @@ -6,17 +6,13 @@ import binascii import importlib.util import io -import itertools import os -import posixpath import shutil import stat import struct import sys import threading import time -import contextlib -import pathlib try: import zlib # We may need its compression method @@ -2186,381 +2182,12 @@ def _compile(file, optimize=-1): return (fname, archivename) -def _parents(path): - """ - Given a path with elements separated by - posixpath.sep, generate all parents of that path. - - >>> list(_parents('b/d')) - ['b'] - >>> list(_parents('/b/d/')) - ['/b'] - >>> list(_parents('b/d/f/')) - ['b/d', 'b'] - >>> list(_parents('b')) - [] - >>> list(_parents('')) - [] - """ - return itertools.islice(_ancestry(path), 1, None) - - -def _ancestry(path): - """ - Given a path with elements separated by - posixpath.sep, generate all elements of that path - - >>> list(_ancestry('b/d')) - ['b/d', 'b'] - >>> list(_ancestry('/b/d/')) - ['/b/d', '/b'] - >>> list(_ancestry('b/d/f/')) - ['b/d/f', 'b/d', 'b'] - >>> list(_ancestry('b')) - ['b'] - >>> list(_ancestry('')) - [] - """ - path = path.rstrip(posixpath.sep) - while path and path != posixpath.sep: - yield path - path, tail = posixpath.split(path) - - -_dedupe = dict.fromkeys -"""Deduplicate an iterable in original order""" - - -def _difference(minuend, subtrahend): - """ - Return items in minuend not in subtrahend, retaining order - with O(1) lookup. - """ - return itertools.filterfalse(set(subtrahend).__contains__, minuend) - - -class CompleteDirs(ZipFile): - """ - A ZipFile subclass that ensures that implied directories - are always included in the namelist. - """ - - @staticmethod - def _implied_dirs(names): - parents = itertools.chain.from_iterable(map(_parents, names)) - as_dirs = (p + posixpath.sep for p in parents) - return _dedupe(_difference(as_dirs, names)) - - def namelist(self): - names = super(CompleteDirs, self).namelist() - return names + list(self._implied_dirs(names)) - - def _name_set(self): - return set(self.namelist()) - - def resolve_dir(self, name): - """ - If the name represents a directory, return that name - as a directory (with the trailing slash). - """ - names = self._name_set() - dirname = name + '/' - dir_match = name not in names and dirname in names - return dirname if dir_match else name - - @classmethod - def make(cls, source): - """ - Given a source (filename or zipfile), return an - appropriate CompleteDirs subclass. - """ - if isinstance(source, CompleteDirs): - return source - - if not isinstance(source, ZipFile): - return cls(source) - - # Only allow for FastLookup when supplied zipfile is read-only - if 'r' not in source.mode: - cls = CompleteDirs - - source.__class__ = cls - return source - - -class FastLookup(CompleteDirs): - """ - ZipFile subclass to ensure implicit - dirs exist and are resolved rapidly. - """ - - def namelist(self): - with contextlib.suppress(AttributeError): - return self.__names - self.__names = super(FastLookup, self).namelist() - return self.__names - - def _name_set(self): - with contextlib.suppress(AttributeError): - return self.__lookup - self.__lookup = super(FastLookup, self)._name_set() - return self.__lookup - - -class Path: - """ - A pathlib-compatible interface for zip files. - - Consider a zip file with this structure:: - - . - ├── a.txt - └── b - ├── c.txt - └── d - └── e.txt - - >>> data = io.BytesIO() - >>> zf = ZipFile(data, 'w') - >>> zf.writestr('a.txt', 'content of a') - >>> zf.writestr('b/c.txt', 'content of c') - >>> zf.writestr('b/d/e.txt', 'content of e') - >>> zf.filename = 'mem/abcde.zip' - - Path accepts the zipfile object itself or a filename - - >>> root = Path(zf) - - From there, several path operations are available. - - Directory iteration (including the zip file itself): +from ._path import ( # noqa: E402 + Path, - >>> a, b = root.iterdir() - >>> a - Path('mem/abcde.zip', 'a.txt') - >>> b - Path('mem/abcde.zip', 'b/') + # used privately for tests + CompleteDirs, # noqa: F401 +) - name property: - - >>> b.name - 'b' - - join with divide operator: - - >>> c = b / 'c.txt' - >>> c - Path('mem/abcde.zip', 'b/c.txt') - >>> c.name - 'c.txt' - - Read text: - - >>> c.read_text() - 'content of c' - - existence: - - >>> c.exists() - True - >>> (b / 'missing.txt').exists() - False - - Coercion to string: - - >>> import os - >>> str(c).replace(os.sep, posixpath.sep) - 'mem/abcde.zip/b/c.txt' - - At the root, ``name``, ``filename``, and ``parent`` - resolve to the zipfile. Note these attributes are not - valid and will raise a ``ValueError`` if the zipfile - has no filename. - - >>> root.name - 'abcde.zip' - >>> str(root.filename).replace(os.sep, posixpath.sep) - 'mem/abcde.zip' - >>> str(root.parent) - 'mem' - """ - - __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" - - def __init__(self, root, at=""): - """ - Construct a Path from a ZipFile or filename. - - Note: When the source is an existing ZipFile object, - its type (__class__) will be mutated to a - specialized type. If the caller wishes to retain the - original type, the caller should either create a - separate ZipFile object or pass a filename. - """ - self.root = FastLookup.make(root) - self.at = at - - def open(self, mode='r', *args, pwd=None, **kwargs): - """ - Open this entry as text or binary following the semantics - of ``pathlib.Path.open()`` by passing arguments through - to io.TextIOWrapper(). - """ - if self.is_dir(): - raise IsADirectoryError(self) - zip_mode = mode[0] - if not self.exists() and zip_mode == 'r': - raise FileNotFoundError(self) - stream = self.root.open(self.at, zip_mode, pwd=pwd) - if 'b' in mode: - if args or kwargs: - raise ValueError("encoding args invalid for binary operation") - return stream - else: - kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) - return io.TextIOWrapper(stream, *args, **kwargs) - - @property - def name(self): - return pathlib.Path(self.at).name or self.filename.name - - @property - def suffix(self): - return pathlib.Path(self.at).suffix or self.filename.suffix - - @property - def suffixes(self): - return pathlib.Path(self.at).suffixes or self.filename.suffixes - - @property - def stem(self): - return pathlib.Path(self.at).stem or self.filename.stem - - @property - def filename(self): - return pathlib.Path(self.root.filename).joinpath(self.at) - - def read_text(self, *args, **kwargs): - kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) - with self.open('r', *args, **kwargs) as strm: - return strm.read() - - def read_bytes(self): - with self.open('rb') as strm: - return strm.read() - - def _is_child(self, path): - return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") - - def _next(self, at): - return self.__class__(self.root, at) - - def is_dir(self): - return not self.at or self.at.endswith("/") - - def is_file(self): - return self.exists() and not self.is_dir() - - def exists(self): - return self.at in self.root._name_set() - - def iterdir(self): - if not self.is_dir(): - raise ValueError("Can't listdir a file") - subs = map(self._next, self.root.namelist()) - return filter(self._is_child, subs) - - def __str__(self): - return posixpath.join(self.root.filename, self.at) - - def __repr__(self): - return self.__repr.format(self=self) - - def joinpath(self, *other): - next = posixpath.join(self.at, *other) - return self._next(self.root.resolve_dir(next)) - - __truediv__ = joinpath - - @property - def parent(self): - if not self.at: - return self.filename.parent - parent_at = posixpath.dirname(self.at.rstrip('/')) - if parent_at: - parent_at += '/' - return self._next(parent_at) - - -def main(args=None): - import argparse - - description = 'A simple command-line interface for zipfile module.' - parser = argparse.ArgumentParser(description=description) - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument('-l', '--list', metavar='', - help='Show listing of a zipfile') - group.add_argument('-e', '--extract', nargs=2, - metavar=('', ''), - help='Extract zipfile into target dir') - group.add_argument('-c', '--create', nargs='+', - metavar=('', ''), - help='Create zipfile from sources') - group.add_argument('-t', '--test', metavar='', - help='Test if a zipfile is valid') - parser.add_argument('--metadata-encoding', metavar='', - help='Specify encoding of member names for -l, -e and -t') - args = parser.parse_args(args) - - encoding = args.metadata_encoding - - if args.test is not None: - src = args.test - with ZipFile(src, 'r', metadata_encoding=encoding) as zf: - badfile = zf.testzip() - if badfile: - print("The following enclosed file is corrupted: {!r}".format(badfile)) - print("Done testing") - - elif args.list is not None: - src = args.list - with ZipFile(src, 'r', metadata_encoding=encoding) as zf: - zf.printdir() - - elif args.extract is not None: - src, curdir = args.extract - with ZipFile(src, 'r', metadata_encoding=encoding) as zf: - zf.extractall(curdir) - - elif args.create is not None: - if encoding: - print("Non-conforming encodings not supported with -c.", - file=sys.stderr) - sys.exit(1) - - zip_name = args.create.pop(0) - files = args.create - - def addToZip(zf, path, zippath): - if os.path.isfile(path): - zf.write(path, zippath, ZIP_DEFLATED) - elif os.path.isdir(path): - if zippath: - zf.write(path, zippath) - for nm in sorted(os.listdir(path)): - addToZip(zf, - os.path.join(path, nm), os.path.join(zippath, nm)) - # else: ignore - - with ZipFile(zip_name, 'w') as zf: - for path in files: - zippath = os.path.basename(path) - if not zippath: - zippath = os.path.basename(os.path.dirname(path)) - if zippath in ('', os.curdir, os.pardir): - zippath = '' - addToZip(zf, path, zippath) - - -if __name__ == "__main__": - main() +# used privately for tests +from .__main__ import main # noqa: F401, E402 diff --git a/Lib/zipfile/__main__.py b/Lib/zipfile/__main__.py new file mode 100644 index 00000000000000..a9e5fb1b8d72c4 --- /dev/null +++ b/Lib/zipfile/__main__.py @@ -0,0 +1,77 @@ +import sys +import os +from . import ZipFile, ZIP_DEFLATED + + +def main(args=None): + import argparse + + description = 'A simple command-line interface for zipfile module.' + parser = argparse.ArgumentParser(description=description) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('-l', '--list', metavar='', + help='Show listing of a zipfile') + group.add_argument('-e', '--extract', nargs=2, + metavar=('', ''), + help='Extract zipfile into target dir') + group.add_argument('-c', '--create', nargs='+', + metavar=('', ''), + help='Create zipfile from sources') + group.add_argument('-t', '--test', metavar='', + help='Test if a zipfile is valid') + parser.add_argument('--metadata-encoding', metavar='', + help='Specify encoding of member names for -l, -e and -t') + args = parser.parse_args(args) + + encoding = args.metadata_encoding + + if args.test is not None: + src = args.test + with ZipFile(src, 'r', metadata_encoding=encoding) as zf: + badfile = zf.testzip() + if badfile: + print("The following enclosed file is corrupted: {!r}".format(badfile)) + print("Done testing") + + elif args.list is not None: + src = args.list + with ZipFile(src, 'r', metadata_encoding=encoding) as zf: + zf.printdir() + + elif args.extract is not None: + src, curdir = args.extract + with ZipFile(src, 'r', metadata_encoding=encoding) as zf: + zf.extractall(curdir) + + elif args.create is not None: + if encoding: + print("Non-conforming encodings not supported with -c.", + file=sys.stderr) + sys.exit(1) + + zip_name = args.create.pop(0) + files = args.create + + def addToZip(zf, path, zippath): + if os.path.isfile(path): + zf.write(path, zippath, ZIP_DEFLATED) + elif os.path.isdir(path): + if zippath: + zf.write(path, zippath) + for nm in sorted(os.listdir(path)): + addToZip(zf, + os.path.join(path, nm), os.path.join(zippath, nm)) + # else: ignore + + with ZipFile(zip_name, 'w') as zf: + for path in files: + zippath = os.path.basename(path) + if not zippath: + zippath = os.path.basename(os.path.dirname(path)) + if zippath in ('', os.curdir, os.pardir): + zippath = '' + addToZip(zf, path, zippath) + + +if __name__ == "__main__": + main() diff --git a/Lib/zipfile/_path.py b/Lib/zipfile/_path.py new file mode 100644 index 00000000000000..67ef07a130d1ad --- /dev/null +++ b/Lib/zipfile/_path.py @@ -0,0 +1,315 @@ +import io +import posixpath +import zipfile +import itertools +import contextlib +import pathlib + + +__all__ = ['Path'] + + +def _parents(path): + """ + Given a path with elements separated by + posixpath.sep, generate all parents of that path. + + >>> list(_parents('b/d')) + ['b'] + >>> list(_parents('/b/d/')) + ['/b'] + >>> list(_parents('b/d/f/')) + ['b/d', 'b'] + >>> list(_parents('b')) + [] + >>> list(_parents('')) + [] + """ + return itertools.islice(_ancestry(path), 1, None) + + +def _ancestry(path): + """ + Given a path with elements separated by + posixpath.sep, generate all elements of that path + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] + >>> list(_ancestry('/b/d/')) + ['/b/d', '/b'] + >>> list(_ancestry('b/d/f/')) + ['b/d/f', 'b/d', 'b'] + >>> list(_ancestry('b')) + ['b'] + >>> list(_ancestry('')) + [] + """ + path = path.rstrip(posixpath.sep) + while path and path != posixpath.sep: + yield path + path, tail = posixpath.split(path) + + +_dedupe = dict.fromkeys +"""Deduplicate an iterable in original order""" + + +def _difference(minuend, subtrahend): + """ + Return items in minuend not in subtrahend, retaining order + with O(1) lookup. + """ + return itertools.filterfalse(set(subtrahend).__contains__, minuend) + + +class CompleteDirs(zipfile.ZipFile): + """ + A ZipFile subclass that ensures that implied directories + are always included in the namelist. + """ + + @staticmethod + def _implied_dirs(names): + parents = itertools.chain.from_iterable(map(_parents, names)) + as_dirs = (p + posixpath.sep for p in parents) + return _dedupe(_difference(as_dirs, names)) + + def namelist(self): + names = super(CompleteDirs, self).namelist() + return names + list(self._implied_dirs(names)) + + def _name_set(self): + return set(self.namelist()) + + def resolve_dir(self, name): + """ + If the name represents a directory, return that name + as a directory (with the trailing slash). + """ + names = self._name_set() + dirname = name + '/' + dir_match = name not in names and dirname in names + return dirname if dir_match else name + + @classmethod + def make(cls, source): + """ + Given a source (filename or zipfile), return an + appropriate CompleteDirs subclass. + """ + if isinstance(source, CompleteDirs): + return source + + if not isinstance(source, zipfile.ZipFile): + return cls(source) + + # Only allow for FastLookup when supplied zipfile is read-only + if 'r' not in source.mode: + cls = CompleteDirs + + source.__class__ = cls + return source + + +class FastLookup(CompleteDirs): + """ + ZipFile subclass to ensure implicit + dirs exist and are resolved rapidly. + """ + + def namelist(self): + with contextlib.suppress(AttributeError): + return self.__names + self.__names = super(FastLookup, self).namelist() + return self.__names + + def _name_set(self): + with contextlib.suppress(AttributeError): + return self.__lookup + self.__lookup = super(FastLookup, self)._name_set() + return self.__lookup + + +class Path: + """ + A pathlib-compatible interface for zip files. + + Consider a zip file with this structure:: + + . + ├── a.txt + └── b + ├── c.txt + └── d + └── e.txt + + >>> data = io.BytesIO() + >>> zf = ZipFile(data, 'w') + >>> zf.writestr('a.txt', 'content of a') + >>> zf.writestr('b/c.txt', 'content of c') + >>> zf.writestr('b/d/e.txt', 'content of e') + >>> zf.filename = 'mem/abcde.zip' + + Path accepts the zipfile object itself or a filename + + >>> root = Path(zf) + + From there, several path operations are available. + + Directory iteration (including the zip file itself): + + >>> a, b = root.iterdir() + >>> a + Path('mem/abcde.zip', 'a.txt') + >>> b + Path('mem/abcde.zip', 'b/') + + name property: + + >>> b.name + 'b' + + join with divide operator: + + >>> c = b / 'c.txt' + >>> c + Path('mem/abcde.zip', 'b/c.txt') + >>> c.name + 'c.txt' + + Read text: + + >>> c.read_text() + 'content of c' + + existence: + + >>> c.exists() + True + >>> (b / 'missing.txt').exists() + False + + Coercion to string: + + >>> import os + >>> str(c).replace(os.sep, posixpath.sep) + 'mem/abcde.zip/b/c.txt' + + At the root, ``name``, ``filename``, and ``parent`` + resolve to the zipfile. Note these attributes are not + valid and will raise a ``ValueError`` if the zipfile + has no filename. + + >>> root.name + 'abcde.zip' + >>> str(root.filename).replace(os.sep, posixpath.sep) + 'mem/abcde.zip' + >>> str(root.parent) + 'mem' + """ + + __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" + + def __init__(self, root, at=""): + """ + Construct a Path from a ZipFile or filename. + + Note: When the source is an existing ZipFile object, + its type (__class__) will be mutated to a + specialized type. If the caller wishes to retain the + original type, the caller should either create a + separate ZipFile object or pass a filename. + """ + self.root = FastLookup.make(root) + self.at = at + + def open(self, mode='r', *args, pwd=None, **kwargs): + """ + Open this entry as text or binary following the semantics + of ``pathlib.Path.open()`` by passing arguments through + to io.TextIOWrapper(). + """ + if self.is_dir(): + raise IsADirectoryError(self) + zip_mode = mode[0] + if not self.exists() and zip_mode == 'r': + raise FileNotFoundError(self) + stream = self.root.open(self.at, zip_mode, pwd=pwd) + if 'b' in mode: + if args or kwargs: + raise ValueError("encoding args invalid for binary operation") + return stream + else: + kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) + return io.TextIOWrapper(stream, *args, **kwargs) + + @property + def name(self): + return pathlib.Path(self.at).name or self.filename.name + + @property + def suffix(self): + return pathlib.Path(self.at).suffix or self.filename.suffix + + @property + def suffixes(self): + return pathlib.Path(self.at).suffixes or self.filename.suffixes + + @property + def stem(self): + return pathlib.Path(self.at).stem or self.filename.stem + + @property + def filename(self): + return pathlib.Path(self.root.filename).joinpath(self.at) + + def read_text(self, *args, **kwargs): + kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) + with self.open('r', *args, **kwargs) as strm: + return strm.read() + + def read_bytes(self): + with self.open('rb') as strm: + return strm.read() + + def _is_child(self, path): + return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") + + def _next(self, at): + return self.__class__(self.root, at) + + def is_dir(self): + return not self.at or self.at.endswith("/") + + def is_file(self): + return self.exists() and not self.is_dir() + + def exists(self): + return self.at in self.root._name_set() + + def iterdir(self): + if not self.is_dir(): + raise ValueError("Can't listdir a file") + subs = map(self._next, self.root.namelist()) + return filter(self._is_child, subs) + + def __str__(self): + return posixpath.join(self.root.filename, self.at) + + def __repr__(self): + return self.__repr.format(self=self) + + def joinpath(self, *other): + next = posixpath.join(self.at, *other) + return self._next(self.root.resolve_dir(next)) + + __truediv__ = joinpath + + @property + def parent(self): + if not self.at: + return self.filename.parent + parent_at = posixpath.dirname(self.at.rstrip('/')) + if parent_at: + parent_at += '/' + return self._next(parent_at) diff --git a/Misc/NEWS.d/next/Library/2022-10-08-15-41-00.gh-issue-98098.DugpWi.rst b/Misc/NEWS.d/next/Library/2022-10-08-15-41-00.gh-issue-98098.DugpWi.rst new file mode 100644 index 00000000000000..202275e16ea081 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-10-08-15-41-00.gh-issue-98098.DugpWi.rst @@ -0,0 +1,2 @@ +Created packages from zipfile and test_zipfile modules, separating +``zipfile.Path`` functionality. From 78365b8e283c78e23725748500f48dd2c2ca1161 Mon Sep 17 00:00:00 2001 From: Sam Ezeh Date: Sat, 26 Nov 2022 17:57:05 +0000 Subject: [PATCH 059/112] gh-91078: Return None from TarFile.next when the tarfile is empty (GH-91850) Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> --- Lib/tarfile.py | 2 ++ Lib/test/test_tarfile.py | 12 ++++++++++++ .../2022-04-23-03-46-37.gh-issue-91078.87-hkp.rst | 1 + 3 files changed, 15 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-04-23-03-46-37.gh-issue-91078.87-hkp.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 42100e9a39436e..b47015f5cb6be5 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2339,6 +2339,8 @@ def next(self): # Advance the file pointer. if self.offset != self.fileobj.tell(): + if self.offset == 0: + return None self.fileobj.seek(self.offset - 1) if not self.fileobj.read(1): raise ReadError("unexpected end of data") diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 0868d5d6e90915..213932069201b9 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -734,6 +734,18 @@ def test_zlib_error_does_not_leak(self): with self.assertRaises(tarfile.ReadError): tarfile.open(self.tarname) + def test_next_on_empty_tarfile(self): + fd = io.BytesIO() + tf = tarfile.open(fileobj=fd, mode="w") + tf.close() + + fd.seek(0) + with tarfile.open(fileobj=fd, mode="r|") as tf: + self.assertEqual(tf.next(), None) + + fd.seek(0) + with tarfile.open(fileobj=fd, mode="r") as tf: + self.assertEqual(tf.next(), None) class MiscReadTest(MiscReadTestBase, unittest.TestCase): test_fail_comp = None diff --git a/Misc/NEWS.d/next/Library/2022-04-23-03-46-37.gh-issue-91078.87-hkp.rst b/Misc/NEWS.d/next/Library/2022-04-23-03-46-37.gh-issue-91078.87-hkp.rst new file mode 100644 index 00000000000000..e05d5e2a13146c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-23-03-46-37.gh-issue-91078.87-hkp.rst @@ -0,0 +1 @@ +:meth:`TarFile.next` now returns ``None`` when called on an empty tarfile. From 003f341e99234cf6088341e746ffef15e12ccda2 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 26 Nov 2022 13:00:05 -0500 Subject: [PATCH 060/112] Fix zipfile packaging after GH-98103 (GH-99797) * Add zipfile and test_zipfile to list of packages. Fixes regression introduced in #98103. * Restore support for py -m test.test_zipfile --- Lib/test/test_zipfile/__main__.py | 7 +++++++ Makefile.pre.in | 2 ++ 2 files changed, 9 insertions(+) create mode 100644 Lib/test/test_zipfile/__main__.py diff --git a/Lib/test/test_zipfile/__main__.py b/Lib/test/test_zipfile/__main__.py new file mode 100644 index 00000000000000..e25ac946edffe4 --- /dev/null +++ b/Lib/test/test_zipfile/__main__.py @@ -0,0 +1,7 @@ +import unittest + +from . import load_tests # noqa: F401 + + +if __name__ == "__main__": + unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index 5c49af36d86736..f6df7a620deaed 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1978,6 +1978,7 @@ LIBSUBDIRS= asyncio \ wsgiref \ $(XMLLIBSUBDIRS) \ xmlrpc \ + zipfile \ zoneinfo \ __phello__ TESTSUBDIRS= idlelib/idle_test \ @@ -2051,6 +2052,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_tools \ test/test_ttk \ test/test_warnings test/test_warnings/data \ + test/test_zipfile \ test/test_zoneinfo test/test_zoneinfo/data \ test/test_unittest test/test_unittest/testmock \ test/tracedmodules \ From 5f8898216e7b67b7de6b0b1aad9277e88bcebfdb Mon Sep 17 00:00:00 2001 From: busywhitespace Date: Sat, 26 Nov 2022 19:01:08 +0100 Subject: [PATCH 061/112] gh-99795: Fix typo in importlib.resources.abc (GH-99796) Changing TraversableReader to TraversableResources at one place of the documentation. See #99795 for more details. --- Doc/library/importlib.resources.abc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/importlib.resources.abc.rst b/Doc/library/importlib.resources.abc.rst index 57fffe0d905cbe..7747e89a833c02 100644 --- a/Doc/library/importlib.resources.abc.rst +++ b/Doc/library/importlib.resources.abc.rst @@ -145,7 +145,7 @@ :class:`importlib.resources.abc.ResourceReader` and provides concrete implementations of the :class:`importlib.resources.abc.ResourceReader`'s abstract methods. Therefore, any loader supplying - :class:`importlib.abc.TraversableReader` also supplies ResourceReader. + :class:`importlib.abc.TraversableResources` also supplies ResourceReader. Loaders that wish to support resource reading are expected to implement this interface. From 93f22d30eb7bf579d511b1866674bc1c2513dde9 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 26 Nov 2022 13:05:41 -0500 Subject: [PATCH 062/112] gh-98108: Add limited pickleability to zipfile.Path (GH-98109) * gh-98098: Move zipfile into a package. * Moved test_zipfile to a package * Extracted module for test_path. * Add blurb * Add jaraco as owner of zipfile.Path. * Synchronize with minor changes found at jaraco/zipp@d9e7f4352d. * gh-98108: Sync with zipp 3.9.1 adding pickleability. --- Lib/test/test_zipfile/_functools.py | 9 ++++ Lib/test/test_zipfile/_itertools.py | 12 +++++ Lib/test/test_zipfile/_test_params.py | 39 +++++++++++++++ Lib/test/test_zipfile/test_path.py | 50 +++++++++++-------- Lib/zipfile/_path.py | 20 +++++++- ...2-10-08-19-20-33.gh-issue-98108.WUObqM.rst | 2 + 6 files changed, 110 insertions(+), 22 deletions(-) create mode 100644 Lib/test/test_zipfile/_functools.py create mode 100644 Lib/test/test_zipfile/_itertools.py create mode 100644 Lib/test/test_zipfile/_test_params.py create mode 100644 Misc/NEWS.d/next/Library/2022-10-08-19-20-33.gh-issue-98108.WUObqM.rst diff --git a/Lib/test/test_zipfile/_functools.py b/Lib/test/test_zipfile/_functools.py new file mode 100644 index 00000000000000..75f2b20e06d77f --- /dev/null +++ b/Lib/test/test_zipfile/_functools.py @@ -0,0 +1,9 @@ +import functools + + +# from jaraco.functools 3.5.2 +def compose(*funcs): + def compose_two(f1, f2): + return lambda *args, **kwargs: f1(f2(*args, **kwargs)) + + return functools.reduce(compose_two, funcs) diff --git a/Lib/test/test_zipfile/_itertools.py b/Lib/test/test_zipfile/_itertools.py new file mode 100644 index 00000000000000..559f3f111b88a3 --- /dev/null +++ b/Lib/test/test_zipfile/_itertools.py @@ -0,0 +1,12 @@ +# from more_itertools v8.13.0 +def always_iterable(obj, base_type=(str, bytes)): + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) diff --git a/Lib/test/test_zipfile/_test_params.py b/Lib/test/test_zipfile/_test_params.py new file mode 100644 index 00000000000000..bc95b4ebf4a168 --- /dev/null +++ b/Lib/test/test_zipfile/_test_params.py @@ -0,0 +1,39 @@ +import types +import functools + +from ._itertools import always_iterable + + +def parameterize(names, value_groups): + """ + Decorate a test method to run it as a set of subtests. + + Modeled after pytest.parametrize. + """ + + def decorator(func): + @functools.wraps(func) + def wrapped(self): + for values in value_groups: + resolved = map(Invoked.eval, always_iterable(values)) + params = dict(zip(always_iterable(names), resolved)) + with self.subTest(**params): + func(self, **params) + + return wrapped + + return decorator + + +class Invoked(types.SimpleNamespace): + """ + Wrap a function to be invoked for each usage. + """ + + @classmethod + def wrap(cls, func): + return cls(func=func) + + @classmethod + def eval(cls, cand): + return cand.func() if isinstance(cand, cls) else cand diff --git a/Lib/test/test_zipfile/test_path.py b/Lib/test/test_zipfile/test_path.py index 3c62e9a0b0e65d..02253c59e959fb 100644 --- a/Lib/test/test_zipfile/test_path.py +++ b/Lib/test/test_zipfile/test_path.py @@ -4,7 +4,12 @@ import pathlib import unittest import string -import functools +import pickle +import itertools + +from ._test_params import parameterize, Invoked +from ._functools import compose + from test.support.os_helper import temp_dir @@ -76,18 +81,12 @@ def build_alpharep_fixture(): return zf -def pass_alpharep(meth): - """ - Given a method, wrap it in a for loop that invokes method - with each subtest. - """ - - @functools.wraps(meth) - def wrapper(self): - for alpharep in self.zipfile_alpharep(): - meth(self, alpharep=alpharep) +alpharep_generators = [ + Invoked.wrap(build_alpharep_fixture), + Invoked.wrap(compose(add_dirs, build_alpharep_fixture)), +] - return wrapper +pass_alpharep = parameterize(['alpharep'], alpharep_generators) class TestPath(unittest.TestCase): @@ -95,12 +94,6 @@ def setUp(self): self.fixtures = contextlib.ExitStack() self.addCleanup(self.fixtures.close) - def zipfile_alpharep(self): - with self.subTest(): - yield build_alpharep_fixture() - with self.subTest(): - yield add_dirs(build_alpharep_fixture()) - def zipfile_ondisk(self, alpharep): tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir())) buffer = alpharep.fp @@ -418,6 +411,21 @@ def test_root_unnamed(self, alpharep): @pass_alpharep def test_inheritance(self, alpharep): cls = type('PathChild', (zipfile.Path,), {}) - for alpharep in self.zipfile_alpharep(): - file = cls(alpharep).joinpath('some dir').parent - assert isinstance(file, cls) + file = cls(alpharep).joinpath('some dir').parent + assert isinstance(file, cls) + + @parameterize( + ['alpharep', 'path_type', 'subpath'], + itertools.product( + alpharep_generators, + [str, pathlib.Path], + ['', 'b/'], + ), + ) + def test_pickle(self, alpharep, path_type, subpath): + zipfile_ondisk = path_type(self.zipfile_ondisk(alpharep)) + + saved_1 = pickle.dumps(zipfile.Path(zipfile_ondisk, at=subpath)) + restored_1 = pickle.loads(saved_1) + first, *rest = restored_1.iterdir() + assert first.read_text().startswith('content of ') diff --git a/Lib/zipfile/_path.py b/Lib/zipfile/_path.py index 67ef07a130d1ad..aea17b65b6aa2d 100644 --- a/Lib/zipfile/_path.py +++ b/Lib/zipfile/_path.py @@ -62,7 +62,25 @@ def _difference(minuend, subtrahend): return itertools.filterfalse(set(subtrahend).__contains__, minuend) -class CompleteDirs(zipfile.ZipFile): +class InitializedState: + """ + Mix-in to save the initialization state for pickling. + """ + + def __init__(self, *args, **kwargs): + self.__args = args + self.__kwargs = kwargs + super().__init__(*args, **kwargs) + + def __getstate__(self): + return self.__args, self.__kwargs + + def __setstate__(self, state): + args, kwargs = state + super().__init__(*args, **kwargs) + + +class CompleteDirs(InitializedState, zipfile.ZipFile): """ A ZipFile subclass that ensures that implied directories are always included in the namelist. diff --git a/Misc/NEWS.d/next/Library/2022-10-08-19-20-33.gh-issue-98108.WUObqM.rst b/Misc/NEWS.d/next/Library/2022-10-08-19-20-33.gh-issue-98108.WUObqM.rst new file mode 100644 index 00000000000000..7e962580dda228 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-10-08-19-20-33.gh-issue-98108.WUObqM.rst @@ -0,0 +1,2 @@ +``zipfile.Path`` is now pickleable if its initialization parameters were +pickleable (e.g. for file system paths). From 7f005749b27c7b9108ea24e5c0ff25068910b75c Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 26 Nov 2022 16:57:20 -0500 Subject: [PATCH 063/112] gh-88330: Add more detail about what is a resource. (#99801) --- Doc/library/importlib.resources.rst | 14 +++++++++++--- .../2022-11-26-15-51-23.gh-issue-88330.B_wFq8.rst | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2022-11-26-15-51-23.gh-issue-88330.B_wFq8.rst diff --git a/Doc/library/importlib.resources.rst b/Doc/library/importlib.resources.rst index 827e7d8d5aced4..399191301a3614 100644 --- a/Doc/library/importlib.resources.rst +++ b/Doc/library/importlib.resources.rst @@ -11,9 +11,17 @@ .. versionadded:: 3.7 This module leverages Python's import system to provide access to *resources* -within *packages*. If you can import a package, you can access resources -within that package. Resources can be opened or read, in either binary or -text mode. +within *packages*. + +"Resources" are file-like resources associated with a module or package in +Python. The resources may be contained directly in a package or within a +subdirectory contained in that package. Resources may be text or binary. As a +result, Python module sources (.py) of a package and compilation artifacts +(pycache) are technically de-facto resources of that package. In practice, +however, resources are primarily those non-Python artifacts exposed +specifically by the package author. + +Resources can be opened or read in either binary or text mode. Resources are roughly akin to files inside directories, though it's important to keep in mind that this is just a metaphor. Resources and packages **do diff --git a/Misc/NEWS.d/next/Documentation/2022-11-26-15-51-23.gh-issue-88330.B_wFq8.rst b/Misc/NEWS.d/next/Documentation/2022-11-26-15-51-23.gh-issue-88330.B_wFq8.rst new file mode 100644 index 00000000000000..0f242eecc31258 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2022-11-26-15-51-23.gh-issue-88330.B_wFq8.rst @@ -0,0 +1 @@ +Improved the description of what a resource is in importlib.resources docs. From 25bc115df9d0e82309852609a83b5ab7f804cdc1 Mon Sep 17 00:00:00 2001 From: Ivan Savov Date: Sat, 26 Nov 2022 17:24:04 -0500 Subject: [PATCH 064/112] gh-89682: [doc] reword docstring of __contains__ to clarify that it returns a bool (GH-29043) --- .../Documentation/2022-11-26-21-43-05.gh-issue-89682.DhKoTM.rst | 1 + Objects/typeobject.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Documentation/2022-11-26-21-43-05.gh-issue-89682.DhKoTM.rst diff --git a/Misc/NEWS.d/next/Documentation/2022-11-26-21-43-05.gh-issue-89682.DhKoTM.rst b/Misc/NEWS.d/next/Documentation/2022-11-26-21-43-05.gh-issue-89682.DhKoTM.rst new file mode 100644 index 00000000000000..46be065b653952 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2022-11-26-21-43-05.gh-issue-89682.DhKoTM.rst @@ -0,0 +1 @@ +Reworded docstring of the default ``__contains__`` to clarify that it returns a :class:`bool`. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b993aa405f6b6a..a4974a1b4f7113 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -8741,7 +8741,7 @@ static pytype_slotdef slotdefs[] = { SQSLOT(__delitem__, sq_ass_item, slot_sq_ass_item, wrap_sq_delitem, "__delitem__($self, key, /)\n--\n\nDelete self[key]."), SQSLOT(__contains__, sq_contains, slot_sq_contains, wrap_objobjproc, - "__contains__($self, key, /)\n--\n\nReturn key in self."), + "__contains__($self, key, /)\n--\n\nReturn bool(key in self)."), SQSLOT(__iadd__, sq_inplace_concat, NULL, wrap_binaryfunc, "__iadd__($self, value, /)\n--\n\nImplement self+=value."), From 024ac542d738f56b36bdeb3517a10e93da5acab9 Mon Sep 17 00:00:00 2001 From: Nick Drozd Date: Sat, 26 Nov 2022 16:33:25 -0600 Subject: [PATCH 065/112] bpo-45975: Simplify some while-loops with walrus operator (GH-29347) --- Lib/_pyio.py | 5 +---- Lib/base64.py | 15 +++------------ Lib/ctypes/_aix.py | 8 ++------ Lib/email/parser.py | 5 +---- Lib/ftplib.py | 10 ++-------- Lib/http/client.py | 16 +++------------- Lib/http/cookiejar.py | 9 ++------- Lib/mailbox.py | 5 +---- Lib/mailcap.py | 4 +--- Lib/mimetypes.py | 5 +---- Lib/pstats.py | 2 -- Lib/pydoc.py | 4 +--- Lib/quopri.py | 9 ++------- Lib/shlex.py | 5 +---- Lib/shutil.py | 5 +---- Lib/smtplib.py | 5 +---- Lib/socketserver.py | 3 +-- Lib/tarfile.py | 12 +++--------- Lib/test/test_lzma.py | 20 ++++---------------- Lib/urllib/request.py | 10 ++-------- Lib/wsgiref/handlers.py | 5 +---- Lib/wsgiref/validate.py | 5 +---- Lib/xdrlib.py | 4 +--- Lib/xml/dom/expatbuilder.py | 5 +---- Lib/xml/etree/ElementTree.py | 5 +---- Lib/xml/sax/xmlreader.py | 4 +--- Lib/xmlrpc/client.py | 5 +---- Lib/xmlrpc/server.py | 4 +--- 28 files changed, 41 insertions(+), 153 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 163cf9de279ff0..7f247ff47c9e61 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -638,10 +638,7 @@ def read(self, size=-1): def readall(self): """Read until EOF, using multiple read() call.""" res = bytearray() - while True: - data = self.read(DEFAULT_BUFFER_SIZE) - if not data: - break + while data := self.read(DEFAULT_BUFFER_SIZE): res += data if res: return bytes(res) diff --git a/Lib/base64.py b/Lib/base64.py index 30796a6fd6d05a..95dc7b0086051b 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -508,14 +508,8 @@ def b85decode(b): def encode(input, output): """Encode a file; input and output are binary files.""" - while True: - s = input.read(MAXBINSIZE) - if not s: - break - while len(s) < MAXBINSIZE: - ns = input.read(MAXBINSIZE-len(s)) - if not ns: - break + while s := input.read(MAXBINSIZE): + while len(s) < MAXBINSIZE and (ns := input.read(MAXBINSIZE-len(s))): s += ns line = binascii.b2a_base64(s) output.write(line) @@ -523,10 +517,7 @@ def encode(input, output): def decode(input, output): """Decode a file; input and output are binary files.""" - while True: - line = input.readline() - if not line: - break + while line := input.readline(): s = binascii.a2b_base64(line) output.write(s) diff --git a/Lib/ctypes/_aix.py b/Lib/ctypes/_aix.py index fc3e95cbcc88a5..ee790f713a9ee3 100644 --- a/Lib/ctypes/_aix.py +++ b/Lib/ctypes/_aix.py @@ -108,12 +108,8 @@ def get_ld_headers(file): p = Popen(["/usr/bin/dump", f"-X{AIX_ABI}", "-H", file], universal_newlines=True, stdout=PIPE, stderr=DEVNULL) # be sure to read to the end-of-file - getting all entries - while True: - ld_header = get_ld_header(p) - if ld_header: - ldr_headers.append((ld_header, get_ld_header_info(p))) - else: - break + while ld_header := get_ld_header(p): + ldr_headers.append((ld_header, get_ld_header_info(p))) p.stdout.close() p.wait() return ldr_headers diff --git a/Lib/email/parser.py b/Lib/email/parser.py index 7db4da1ff081c1..e94d455baa5262 100644 --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -49,10 +49,7 @@ def parse(self, fp, headersonly=False): feedparser = FeedParser(self._class, policy=self.policy) if headersonly: feedparser._set_headersonly() - while True: - data = fp.read(8192) - if not data: - break + while data := fp.read(8192): feedparser.feed(data) return feedparser.close() diff --git a/Lib/ftplib.py b/Lib/ftplib.py index c7ca8f632e1bd4..a56e0c3085701b 100644 --- a/Lib/ftplib.py +++ b/Lib/ftplib.py @@ -434,10 +434,7 @@ def retrbinary(self, cmd, callback, blocksize=8192, rest=None): """ self.voidcmd('TYPE I') with self.transfercmd(cmd, rest) as conn: - while 1: - data = conn.recv(blocksize) - if not data: - break + while data := conn.recv(blocksize): callback(data) # shutdown ssl layer if _SSLSocket is not None and isinstance(conn, _SSLSocket): @@ -496,10 +493,7 @@ def storbinary(self, cmd, fp, blocksize=8192, callback=None, rest=None): """ self.voidcmd('TYPE I') with self.transfercmd(cmd, rest) as conn: - while 1: - buf = fp.read(blocksize) - if not buf: - break + while buf := fp.read(blocksize): conn.sendall(buf) if callback: callback(buf) diff --git a/Lib/http/client.py b/Lib/http/client.py index 0a3e950c669622..15c5cf634cf508 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -578,11 +578,7 @@ def _read_chunked(self, amt=None): assert self.chunked != _UNKNOWN value = [] try: - while True: - chunk_left = self._get_chunk_left() - if chunk_left is None: - break - + while (chunk_left := self._get_chunk_left()) is not None: if amt is not None and amt <= chunk_left: value.append(self._safe_read(amt)) self.chunk_left = chunk_left - amt @@ -998,10 +994,7 @@ def send(self, data): encode = self._is_textIO(data) if encode and self.debuglevel > 0: print("encoding file using iso-8859-1") - while 1: - datablock = data.read(self.blocksize) - if not datablock: - break + while datablock := data.read(self.blocksize): if encode: datablock = datablock.encode("iso-8859-1") sys.audit("http.client.send", self, datablock) @@ -1031,10 +1024,7 @@ def _read_readable(self, readable): encode = self._is_textIO(readable) if encode and self.debuglevel > 0: print("encoding file using iso-8859-1") - while True: - datablock = readable.read(self.blocksize) - if not datablock: - break + while datablock := readable.read(self.blocksize): if encode: datablock = datablock.encode("iso-8859-1") yield datablock diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py index 65c45e2b17dfc0..b0161a86fdbb51 100644 --- a/Lib/http/cookiejar.py +++ b/Lib/http/cookiejar.py @@ -1915,9 +1915,7 @@ def _really_load(self, f, filename, ignore_discard, ignore_expires): "comment", "commenturl") try: - while 1: - line = f.readline() - if line == "": break + while (line := f.readline()) != "": if not line.startswith(header): continue line = line[len(header):].strip() @@ -2017,12 +2015,9 @@ def _really_load(self, f, filename, ignore_discard, ignore_expires): filename) try: - while 1: - line = f.readline() + while (line := f.readline()) != "": rest = {} - if line == "": break - # httponly is a cookie flag as defined in rfc6265 # when encoded in a netscape cookie file, # the line is prepended with "#HttpOnly_" diff --git a/Lib/mailbox.py b/Lib/mailbox.py index 70da07ed2e9e8b..59834a2b3b5243 100644 --- a/Lib/mailbox.py +++ b/Lib/mailbox.py @@ -1956,10 +1956,7 @@ def readlines(self, sizehint=None): def __iter__(self): """Iterate over lines.""" - while True: - line = self.readline() - if not line: - return + while line := self.readline(): yield line def tell(self): diff --git a/Lib/mailcap.py b/Lib/mailcap.py index 7278ea7051fccf..2f4656e854b3bb 100644 --- a/Lib/mailcap.py +++ b/Lib/mailcap.py @@ -90,9 +90,7 @@ def _readmailcapfile(fp, lineno): the viewing command is stored with the key "view". """ caps = {} - while 1: - line = fp.readline() - if not line: break + while line := fp.readline(): # Ignore comments and blank lines if line[0] == '#' or line.strip() == '': continue diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 3224363a3f2bfb..37228de4828de5 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -217,10 +217,7 @@ def readfp(self, fp, strict=True): list of standard types, else to the list of non-standard types. """ - while 1: - line = fp.readline() - if not line: - break + while line := fp.readline(): words = line.split() for i in range(len(words)): if words[i][0] == '#': diff --git a/Lib/pstats.py b/Lib/pstats.py index 80408313e8b27f..51bcca84188740 100644 --- a/Lib/pstats.py +++ b/Lib/pstats.py @@ -223,8 +223,6 @@ def get_sort_arg_defs(self): for word, tup in self.sort_arg_dict_default.items(): fragment = word while fragment: - if not fragment: - break if fragment in dict: bad_list[fragment] = 0 break diff --git a/Lib/pydoc.py b/Lib/pydoc.py index c79ec77a8c09e4..0a693f45230c93 100755 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -686,9 +686,7 @@ def markup(self, text, escape=None, funcs={}, classes={}, methods={}): r'RFC[- ]?(\d+)|' r'PEP[- ]?(\d+)|' r'(self\.)?(\w+))') - while True: - match = pattern.search(text, here) - if not match: break + while match := pattern.search(text, here): start, end = match.span() results.append(escape(text[here:start])) diff --git a/Lib/quopri.py b/Lib/quopri.py index 08899c5cb73a30..f36cf7b3951cda 100755 --- a/Lib/quopri.py +++ b/Lib/quopri.py @@ -67,10 +67,7 @@ def write(s, output=output, lineEnd=b'\n'): output.write(s + lineEnd) prevline = None - while 1: - line = input.readline() - if not line: - break + while line := input.readline(): outline = [] # Strip off any readline induced trailing newline stripped = b'' @@ -126,9 +123,7 @@ def decode(input, output, header=False): return new = b'' - while 1: - line = input.readline() - if not line: break + while line := input.readline(): i, n = 0, len(line) if n > 0 and line[n-1:n] == b'\n': partial = 0; n = n-1 diff --git a/Lib/shlex.py b/Lib/shlex.py index a91c9b022627b1..f4821616b62a0f 100644 --- a/Lib/shlex.py +++ b/Lib/shlex.py @@ -333,10 +333,7 @@ def quote(s): def _print_tokens(lexer): - while 1: - tt = lexer.get_token() - if not tt: - break + while tt := lexer.get_token(): print("Token: " + repr(tt)) if __name__ == '__main__': diff --git a/Lib/shutil.py b/Lib/shutil.py index f372406a6c51a8..867925aa10cc04 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -194,10 +194,7 @@ def copyfileobj(fsrc, fdst, length=0): # Localize variable access to minimize overhead. fsrc_read = fsrc.read fdst_write = fdst.write - while True: - buf = fsrc_read(length) - if not buf: - break + while buf := fsrc_read(length): fdst_write(buf) def _samefile(src, dst): diff --git a/Lib/smtplib.py b/Lib/smtplib.py index 05d2f8ccd73c98..18c91746fd7bf2 100755 --- a/Lib/smtplib.py +++ b/Lib/smtplib.py @@ -1099,10 +1099,7 @@ def prompt(prompt): toaddrs = prompt("To").split(',') print("Enter message, end with ^D:") msg = '' - while 1: - line = sys.stdin.readline() - if not line: - break + while line := sys.stdin.readline(): msg = msg + line print("Message length is %d" % len(msg)) diff --git a/Lib/socketserver.py b/Lib/socketserver.py index 30a5cfa59fe05b..842d526b011911 100644 --- a/Lib/socketserver.py +++ b/Lib/socketserver.py @@ -292,8 +292,7 @@ def handle_request(self): selector.register(self, selectors.EVENT_READ) while True: - ready = selector.select(timeout) - if ready: + if selector.select(timeout): return self._handle_request_noblock() else: if timeout is not None: diff --git a/Lib/tarfile.py b/Lib/tarfile.py index b47015f5cb6be5..d686435d90ad1b 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1262,11 +1262,7 @@ def _proc_pax(self, tarfile): # the newline. keyword and value are both UTF-8 encoded strings. regex = re.compile(br"(\d+) ([^=]+)=") pos = 0 - while True: - match = regex.match(buf, pos) - if not match: - break - + while match := regex.match(buf, pos): length, keyword = match.groups() length = int(length) if length == 0: @@ -2418,10 +2414,8 @@ def _load(self): """Read through the entire archive file and look for readable members. """ - while True: - tarinfo = self.next() - if tarinfo is None: - break + while self.next() is not None: + pass self._loaded = True def _check(self, mode=None): diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py index 145c8cfced4080..18f474ba2a8bdc 100644 --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -825,10 +825,7 @@ def test_read_0(self): def test_read_10(self): with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: chunks = [] - while True: - result = f.read(10) - if not result: - break + while result := f.read(10): self.assertLessEqual(len(result), 10) chunks.append(result) self.assertEqual(b"".join(chunks), INPUT) @@ -911,10 +908,7 @@ def test_read_bad_data(self): def test_read1(self): with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: blocks = [] - while True: - result = f.read1() - if not result: - break + while result := f.read1(): blocks.append(result) self.assertEqual(b"".join(blocks), INPUT) self.assertEqual(f.read1(), b"") @@ -926,10 +920,7 @@ def test_read1_0(self): def test_read1_10(self): with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: blocks = [] - while True: - result = f.read1(10) - if not result: - break + while result := f.read1(10): blocks.append(result) self.assertEqual(b"".join(blocks), INPUT) self.assertEqual(f.read1(), b"") @@ -937,10 +928,7 @@ def test_read1_10(self): def test_read1_multistream(self): with LZMAFile(BytesIO(COMPRESSED_XZ * 5)) as f: blocks = [] - while True: - result = f.read1() - if not result: - break + while result := f.read1(): blocks.append(result) self.assertEqual(b"".join(blocks), INPUT * 5) self.assertEqual(f.read1(), b"") diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 278aa3a14bfeea..151034e6a81bf9 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -265,10 +265,7 @@ def urlretrieve(url, filename=None, reporthook=None, data=None): if reporthook: reporthook(blocknum, bs, size) - while True: - block = fp.read(bs) - if not block: - break + while block := fp.read(bs): read += len(block) tfp.write(block) blocknum += 1 @@ -1847,10 +1844,7 @@ def retrieve(self, url, filename=None, reporthook=None, data=None): size = int(headers["Content-Length"]) if reporthook: reporthook(blocknum, bs, size) - while 1: - block = fp.read(bs) - if not block: - break + while block := fp.read(bs): read += len(block) tfp.write(block) blocknum += 1 diff --git a/Lib/wsgiref/handlers.py b/Lib/wsgiref/handlers.py index cd0916dc5553fb..cafe872c7aae9b 100644 --- a/Lib/wsgiref/handlers.py +++ b/Lib/wsgiref/handlers.py @@ -475,10 +475,7 @@ def _write(self,data): from warnings import warn warn("SimpleHandler.stdout.write() should not do partial writes", DeprecationWarning) - while True: - data = data[result:] - if not data: - break + while data := data[result:]: result = self.stdout.write(data) def _flush(self): diff --git a/Lib/wsgiref/validate.py b/Lib/wsgiref/validate.py index 6044e320a474c6..1a1853cd63a0d2 100644 --- a/Lib/wsgiref/validate.py +++ b/Lib/wsgiref/validate.py @@ -214,10 +214,7 @@ def readlines(self, *args): return lines def __iter__(self): - while 1: - line = self.readline() - if not line: - return + while line := self.readline(): yield line def close(self): diff --git a/Lib/xdrlib.py b/Lib/xdrlib.py index b56ffa59b73dcb..f8c2c18228da4d 100644 --- a/Lib/xdrlib.py +++ b/Lib/xdrlib.py @@ -224,9 +224,7 @@ def unpack_string(self): def unpack_list(self, unpack_item): list = [] - while 1: - x = self.unpack_uint() - if x == 0: break + while (x := self.unpack_uint()) != 0: if x != 1: raise ConversionError('0 or 1 expected, got %r' % (x,)) item = unpack_item() diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py index 199c22d0af347e..7dd667bf3fbe04 100644 --- a/Lib/xml/dom/expatbuilder.py +++ b/Lib/xml/dom/expatbuilder.py @@ -200,10 +200,7 @@ def parseFile(self, file): parser = self.getParser() first_buffer = True try: - while 1: - buffer = file.read(16*1024) - if not buffer: - break + while buffer := file.read(16*1024): parser.Parse(buffer, False) if first_buffer and self.document.documentElement: self._setup_subset(buffer) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index ebbe2b703bfd8f..df5d5191126ae1 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -566,10 +566,7 @@ def parse(self, source, parser=None): # it with chunks. self._root = parser._parse_whole(source) return self._root - while True: - data = source.read(65536) - if not data: - break + while data := source.read(65536): parser.feed(data) self._root = parser.close() return self._root diff --git a/Lib/xml/sax/xmlreader.py b/Lib/xml/sax/xmlreader.py index 716f22840414e6..e906121d23b9ef 100644 --- a/Lib/xml/sax/xmlreader.py +++ b/Lib/xml/sax/xmlreader.py @@ -120,10 +120,8 @@ def parse(self, source): file = source.getCharacterStream() if file is None: file = source.getByteStream() - buffer = file.read(self._bufsize) - while buffer: + while buffer := file.read(self._bufsize): self.feed(buffer) - buffer = file.read(self._bufsize) self.close() def feed(self, data): diff --git a/Lib/xmlrpc/client.py b/Lib/xmlrpc/client.py index bef23f4505e03c..ea8da766cb5a7e 100644 --- a/Lib/xmlrpc/client.py +++ b/Lib/xmlrpc/client.py @@ -1339,10 +1339,7 @@ def parse_response(self, response): p, u = self.getparser() - while 1: - data = stream.read(1024) - if not data: - break + while data := stream.read(1024): if self.verbose: print("body:", repr(data)) p.feed(data) diff --git a/Lib/xmlrpc/server.py b/Lib/xmlrpc/server.py index 0c4b558045a9f4..4dddb1d10e08bd 100644 --- a/Lib/xmlrpc/server.py +++ b/Lib/xmlrpc/server.py @@ -720,9 +720,7 @@ def markup(self, text, escape=None, funcs={}, classes={}, methods={}): r'RFC[- ]?(\d+)|' r'PEP[- ]?(\d+)|' r'(self\.)?((?:\w|\.)+))\b') - while 1: - match = pattern.search(text, here) - if not match: break + while match := pattern.search(text, here): start, end = match.span() results.append(escape(text[here:start])) From 191708c56cf45e0e1c98a0e7292ffa67b7e3b09c Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Sun, 27 Nov 2022 11:24:48 +0530 Subject: [PATCH 066/112] GH-66285: fix forking in asyncio (#99769) Closes #66285 --- Lib/asyncio/events.py | 11 +++ Lib/test/test_asyncio/test_unix_events.py | 98 +++++++++++++++++++ ...2-11-17-10-56-47.gh-issue-66285.KvjlaB.rst | 1 + 3 files changed, 110 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst diff --git a/Lib/asyncio/events.py b/Lib/asyncio/events.py index a327ba54a323a8..2836bbcc463fe5 100644 --- a/Lib/asyncio/events.py +++ b/Lib/asyncio/events.py @@ -13,6 +13,7 @@ import contextvars import os +import signal import socket import subprocess import sys @@ -842,3 +843,13 @@ def set_child_watcher(watcher): _c_get_running_loop = get_running_loop _c_get_event_loop = get_event_loop _c__get_event_loop = _get_event_loop + + +if hasattr(os, 'fork'): + def on_fork(): + # Reset the loop and wakeupfd in the forked child process. + if _event_loop_policy is not None: + _event_loop_policy._local = BaseDefaultEventLoopPolicy._Local() + signal.set_wakeup_fd(-1) + + os.register_at_fork(after_in_child=on_fork) diff --git a/Lib/test/test_asyncio/test_unix_events.py b/Lib/test/test_asyncio/test_unix_events.py index 93e8611f184d25..092edb215854b7 100644 --- a/Lib/test/test_asyncio/test_unix_events.py +++ b/Lib/test/test_asyncio/test_unix_events.py @@ -3,6 +3,7 @@ import contextlib import errno import io +import multiprocessing import os import pathlib import signal @@ -15,6 +16,8 @@ import warnings from test.support import os_helper from test.support import socket_helper +from test.support import wait_process +from test.support import hashlib_helper if sys.platform == 'win32': raise unittest.SkipTest('UNIX only') @@ -1867,5 +1870,100 @@ async def runner(): wsock.close() +@unittest.skipUnless(hasattr(os, 'fork'), 'requires os.fork()') +class TestFork(unittest.IsolatedAsyncioTestCase): + + async def test_fork_not_share_event_loop(self): + # The forked process should not share the event loop with the parent + loop = asyncio.get_running_loop() + r, w = os.pipe() + self.addCleanup(os.close, r) + self.addCleanup(os.close, w) + pid = os.fork() + if pid == 0: + # child + try: + loop = asyncio.get_event_loop_policy().get_event_loop() + os.write(w, str(id(loop)).encode()) + finally: + os._exit(0) + else: + # parent + child_loop = int(os.read(r, 100).decode()) + self.assertNotEqual(child_loop, id(loop)) + wait_process(pid, exitcode=0) + + @hashlib_helper.requires_hashdigest('md5') + def test_fork_signal_handling(self): + # Sending signal to the forked process should not affect the parent + # process + ctx = multiprocessing.get_context('fork') + manager = ctx.Manager() + self.addCleanup(manager.shutdown) + child_started = manager.Event() + child_handled = manager.Event() + parent_handled = manager.Event() + + def child_main(): + signal.signal(signal.SIGTERM, lambda *args: child_handled.set()) + child_started.set() + time.sleep(1) + + async def main(): + loop = asyncio.get_running_loop() + loop.add_signal_handler(signal.SIGTERM, lambda *args: parent_handled.set()) + + process = ctx.Process(target=child_main) + process.start() + child_started.wait() + os.kill(process.pid, signal.SIGTERM) + process.join() + + async def func(): + await asyncio.sleep(0.1) + return 42 + + # Test parent's loop is still functional + self.assertEqual(await asyncio.create_task(func()), 42) + + asyncio.run(main()) + + self.assertFalse(parent_handled.is_set()) + self.assertTrue(child_handled.is_set()) + + @hashlib_helper.requires_hashdigest('md5') + def test_fork_asyncio_run(self): + ctx = multiprocessing.get_context('fork') + manager = ctx.Manager() + self.addCleanup(manager.shutdown) + result = manager.Value('i', 0) + + async def child_main(): + await asyncio.sleep(0.1) + result.value = 42 + + process = ctx.Process(target=lambda: asyncio.run(child_main())) + process.start() + process.join() + + self.assertEqual(result.value, 42) + + @hashlib_helper.requires_hashdigest('md5') + def test_fork_asyncio_subprocess(self): + ctx = multiprocessing.get_context('fork') + manager = ctx.Manager() + self.addCleanup(manager.shutdown) + result = manager.Value('i', 1) + + async def child_main(): + proc = await asyncio.create_subprocess_exec(sys.executable, '-c', 'pass') + result.value = await proc.wait() + + process = ctx.Process(target=lambda: asyncio.run(child_main())) + process.start() + process.join() + + self.assertEqual(result.value, 0) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst b/Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst new file mode 100644 index 00000000000000..ebd82173882726 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-17-10-56-47.gh-issue-66285.KvjlaB.rst @@ -0,0 +1 @@ +Fix :mod:`asyncio` to not share event loop and signal wakeupfd in forked processes. Patch by Kumar Aditya. From 62a5dc13e941d01beb215db4218a10977914ab55 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Sat, 26 Nov 2022 22:27:41 -0800 Subject: [PATCH 067/112] bpo-43327: Fix the docs for PyImport_ImportFrozenModuleObject() (#24659) The docs stated that PyImport_ImportFrozenModuleObject() returns a new reference, but it actually returns an int. Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> --- Doc/data/refcounts.dat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 51ccacf13f9e3b..349c4dd5be3d81 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1018,10 +1018,10 @@ PyImport_Import:PyObject*::+1: PyImport_Import:PyObject*:name:0: PyImport_ImportFrozenModule:int::: -PyImport_ImportFrozenModule:const char*::: +PyImport_ImportFrozenModule:const char*:name:: PyImport_ImportFrozenModuleObject:int::: -PyImport_ImportFrozenModuleObject:PyObject*::+1: +PyImport_ImportFrozenModuleObject:PyObject*:name:+1: PyImport_ImportModule:PyObject*::+1: PyImport_ImportModule:const char*:name:: From 22860dbbc8b53954055847d2bb036af68b4ea409 Mon Sep 17 00:00:00 2001 From: George Zhang Date: Sun, 27 Nov 2022 01:38:39 -0500 Subject: [PATCH 068/112] doc: Remove backslashes in doctest grammar docs (#29346) --- Doc/library/doctest.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index 75c6ee289a91e9..c106d5a3383a5e 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -696,10 +696,10 @@ special Python comments following an example's source code: .. productionlist:: doctest directive: "#" "doctest:" `directive_options` - directive_options: `directive_option` ("," `directive_option`)\* + directive_options: `directive_option` ("," `directive_option`)* directive_option: `on_or_off` `directive_option_name` - on_or_off: "+" \| "-" - directive_option_name: "DONT_ACCEPT_BLANKLINE" \| "NORMALIZE_WHITESPACE" \| ... + on_or_off: "+" | "-" + directive_option_name: "DONT_ACCEPT_BLANKLINE" | "NORMALIZE_WHITESPACE" | ... Whitespace is not allowed between the ``+`` or ``-`` and the directive option name. The directive option name can be any of the option flag names explained From 65629399bcfe2a6606b8201d190877f7f54e6be5 Mon Sep 17 00:00:00 2001 From: Brad Wolfe Date: Sun, 27 Nov 2022 11:25:12 +0100 Subject: [PATCH 069/112] gh-85988: Change documentation for sys.float_info.rounds (GH-99675) * Change documentation for sys.float_info.rounds Change the documentation for sys.float_info.rounds to remove references to C99 section 5.2.4.2.2 and instead place the available values inline. * Correction to previous documentation change Newlines were not preserved in generated HTML on previous commit. I have changes the list to a comma-separated list of values and their meanings. * Clarify source for value of FLT_ROUNDS Clarify the source of the FLT_ROUNDS value and change 'floating-point addition' to 'floating-point arithmetic' to indicate that the rounding mode applies to all arithmetic operations. --- Doc/library/sys.rst | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index d54ecd75a2628f..428ce51165c9b5 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -604,12 +604,18 @@ always available. +---------------------+----------------+--------------------------------------------------+ | :const:`radix` | FLT_RADIX | radix of exponent representation | +---------------------+----------------+--------------------------------------------------+ - | :const:`rounds` | FLT_ROUNDS | integer constant representing the rounding mode | - | | | used for arithmetic operations. This reflects | - | | | the value of the system FLT_ROUNDS macro at | - | | | interpreter startup time. See section 5.2.4.2.2 | - | | | of the C99 standard for an explanation of the | - | | | possible values and their meanings. | + | :const:`rounds` | FLT_ROUNDS | integer representing the rounding mode for | + | | | floating-point arithmetic. This reflects the | + | | | value of the system FLT_ROUNDS macro at | + | | | interpreter startup time: | + | | | ``-1`` indeterminable, | + | | | ``0`` toward zero, | + | | | ``1`` to nearest, | + | | | ``2`` toward positive infinity, | + | | | ``3`` toward negative infinity | + | | | | + | | | All other values for FLT_ROUNDS characterize | + | | | implementation-defined rounding behavior. | +---------------------+----------------+--------------------------------------------------+ The attribute :attr:`sys.float_info.dig` needs further explanation. If From 9c9f085e9a1d1464376ea421e5c96472ca11c3b4 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Sun, 27 Nov 2022 12:39:23 +0200 Subject: [PATCH 070/112] Remove unused local variables in inspect.py (#24218) --- Lib/inspect.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/inspect.py b/Lib/inspect.py index d0015aa202044e..311a3f7e04b6a3 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -656,7 +656,7 @@ def classify_class_attrs(cls): if name == '__dict__': raise Exception("__dict__ is special, don't want the proxy") get_obj = getattr(cls, name) - except Exception as exc: + except Exception: pass else: homecls = getattr(get_obj, "__objclass__", homecls) @@ -1310,7 +1310,6 @@ def getargs(co): nkwargs = co.co_kwonlyargcount args = list(names[:nargs]) kwonlyargs = list(names[nargs:nargs+nkwargs]) - step = 0 nargs += nkwargs varargs = None From d08fb257698e3475d6f69bb808211d39e344e5b2 Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Sun, 27 Nov 2022 11:56:14 +0100 Subject: [PATCH 071/112] GH-87235: Make sure "python /dev/fd/9 9 header_offset: - raise ZipImportError(f'bad local header offset: {archive!r}', path=archive) - file_offset += arc_offset - - try: - name = fp.read(name_size) + fp.seek(-END_CENTRAL_DIR_SIZE, 2) + header_position = fp.tell() + buffer = fp.read(END_CENTRAL_DIR_SIZE) except OSError: raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) - if len(name) != name_size: + if len(buffer) != END_CENTRAL_DIR_SIZE: raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) - # On Windows, calling fseek to skip over the fields we don't use is - # slower than reading the data because fseek flushes stdio's - # internal buffers. See issue #8745. + if buffer[:4] != STRING_END_ARCHIVE: + # Bad: End of Central Dir signature + # Check if there's a comment. + try: + fp.seek(0, 2) + file_size = fp.tell() + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", + path=archive) + max_comment_start = max(file_size - MAX_COMMENT_LEN - + END_CENTRAL_DIR_SIZE, 0) + try: + fp.seek(max_comment_start) + data = fp.read() + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", + path=archive) + pos = data.rfind(STRING_END_ARCHIVE) + if pos < 0: + raise ZipImportError(f'not a Zip file: {archive!r}', + path=archive) + buffer = data[pos:pos+END_CENTRAL_DIR_SIZE] + if len(buffer) != END_CENTRAL_DIR_SIZE: + raise ZipImportError(f"corrupt Zip file: {archive!r}", + path=archive) + header_position = file_size - len(data) + pos + + header_size = _unpack_uint32(buffer[12:16]) + header_offset = _unpack_uint32(buffer[16:20]) + if header_position < header_size: + raise ZipImportError(f'bad central directory size: {archive!r}', path=archive) + if header_position < header_offset: + raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive) + header_position -= header_size + arc_offset = header_position - header_offset + if arc_offset < 0: + raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive) + + files = {} + # Start of Central Directory + count = 0 try: - if len(fp.read(header_size - name_size)) != header_size - name_size: - raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + fp.seek(header_position) except OSError: raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + while True: + buffer = fp.read(46) + if len(buffer) < 4: + raise EOFError('EOF read where not expected') + # Start of file header + if buffer[:4] != b'PK\x01\x02': + break # Bad: Central Dir File Header + if len(buffer) != 46: + raise EOFError('EOF read where not expected') + flags = _unpack_uint16(buffer[8:10]) + compress = _unpack_uint16(buffer[10:12]) + time = _unpack_uint16(buffer[12:14]) + date = _unpack_uint16(buffer[14:16]) + crc = _unpack_uint32(buffer[16:20]) + data_size = _unpack_uint32(buffer[20:24]) + file_size = _unpack_uint32(buffer[24:28]) + name_size = _unpack_uint16(buffer[28:30]) + extra_size = _unpack_uint16(buffer[30:32]) + comment_size = _unpack_uint16(buffer[32:34]) + file_offset = _unpack_uint32(buffer[42:46]) + header_size = name_size + extra_size + comment_size + if file_offset > header_offset: + raise ZipImportError(f'bad local header offset: {archive!r}', path=archive) + file_offset += arc_offset - if flags & 0x800: - # UTF-8 file names extension - name = name.decode() - else: - # Historical ZIP filename encoding try: - name = name.decode('ascii') - except UnicodeDecodeError: - name = name.decode('latin1').translate(cp437_table) - - name = name.replace('/', path_sep) - path = _bootstrap_external._path_join(archive, name) - t = (path, compress, data_size, file_size, file_offset, time, date, crc) - files[name] = t - count += 1 + name = fp.read(name_size) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + if len(name) != name_size: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + # On Windows, calling fseek to skip over the fields we don't use is + # slower than reading the data because fseek flushes stdio's + # internal buffers. See issue #8745. + try: + if len(fp.read(header_size - name_size)) != header_size - name_size: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + + if flags & 0x800: + # UTF-8 file names extension + name = name.decode() + else: + # Historical ZIP filename encoding + try: + name = name.decode('ascii') + except UnicodeDecodeError: + name = name.decode('latin1').translate(cp437_table) + + name = name.replace('/', path_sep) + path = _bootstrap_external._path_join(archive, name) + t = (path, compress, data_size, file_size, file_offset, time, date, crc) + files[name] = t + count += 1 + finally: + fp.seek(start_offset) _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive) return files diff --git a/Misc/NEWS.d/next/macOS/2022-11-25-09-23-20.gh-issue-87235.SifjCD.rst b/Misc/NEWS.d/next/macOS/2022-11-25-09-23-20.gh-issue-87235.SifjCD.rst new file mode 100644 index 00000000000000..3111e4975e87b3 --- /dev/null +++ b/Misc/NEWS.d/next/macOS/2022-11-25-09-23-20.gh-issue-87235.SifjCD.rst @@ -0,0 +1 @@ +On macOS ``python3 /dev/fd/9 9 Date: Sun, 27 Nov 2022 06:01:02 -0500 Subject: [PATCH 072/112] gh-99815: remove unused 'invalid' sentinel value and code that checks for it in inspect.signature parsing (GH-21104) --- Lib/inspect.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Lib/inspect.py b/Lib/inspect.py index 311a3f7e04b6a3..a896fcda31d1dd 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -2184,7 +2184,6 @@ def _signature_fromstr(cls, obj, s, skip_bound_arg=True): parameters = [] empty = Parameter.empty - invalid = object() module = None module_dict = {} @@ -2234,17 +2233,12 @@ def visit_Name(self, node): def p(name_node, default_node, default=empty): name = parse_name(name_node) - if name is invalid: - return None if default_node and default_node is not _empty: try: default_node = RewriteSymbolics().visit(default_node) - o = ast.literal_eval(default_node) + default = ast.literal_eval(default_node) except ValueError: - o = invalid - if o is invalid: return None - default = o if o is not invalid else default parameters.append(Parameter(name, kind, default=default, annotation=empty)) # non-keyword-only parameters From 2653b82c1a44371ad0da6b5a1101abbda4acd2d3 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 27 Nov 2022 14:15:26 +0300 Subject: [PATCH 073/112] gh-99677: Deduplicate self-type in `mro` in `inspect._getmembers` (#99678) Closes #99677 --- Lib/inspect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/inspect.py b/Lib/inspect.py index a896fcda31d1dd..31ac888126b57c 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -537,7 +537,7 @@ def _getmembers(object, predicate, getter): processed = set() names = dir(object) if isclass(object): - mro = (object,) + getmro(object) + mro = getmro(object) # add any DynamicClassAttributes to the list of names if object is a class; # this may result in duplicate entries if, for example, a virtual # attribute with the same name as a DynamicClassAttribute exists From 969620d59ab12fc55d0e757a6fbee6aff29830ea Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 27 Nov 2022 19:48:29 +0200 Subject: [PATCH 074/112] Docs: Move .PHONY to each section to avoid copy/paste omissions (#99396) --- Doc/Makefile | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/Doc/Makefile b/Doc/Makefile index b09a9d754fb5aa..3d484ac3ae7937 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -21,10 +21,7 @@ PAPEROPT_letter = -D latex_elements.papersize=letterpaper ALLSPHINXOPTS = -b $(BUILDER) -d build/doctrees $(PAPEROPT_$(PAPER)) -j auto \ $(SPHINXOPTS) $(SPHINXERRORHANDLING) . build/$(BUILDER) $(SOURCES) -.PHONY: help build html htmlhelp latex text texinfo epub changes linkcheck \ - coverage doctest pydoc-topics htmlview clean clean-venv venv dist check serve \ - autobuild-dev autobuild-dev-html autobuild-stable autobuild-stable-html - +.PHONY: help help: @echo "Please use \`make ' where is one of" @echo " clean to remove build files" @@ -44,6 +41,7 @@ help: @echo " dist to create a \"dist\" directory with archived docs for download" @echo " check to run a check for frequent markup errors" +.PHONY: build build: -mkdir -p build # Look first for a Misc/NEWS file (building from a source release tarball @@ -70,38 +68,46 @@ build: $(SPHINXBUILD) $(ALLSPHINXOPTS) @echo +.PHONY: html html: BUILDER = html html: build @echo "Build finished. The HTML pages are in build/html." +.PHONY: htmlhelp htmlhelp: BUILDER = htmlhelp htmlhelp: build @echo "Build finished; now you can run HTML Help Workshop with the" \ "build/htmlhelp/pydoc.hhp project file." +.PHONY: latex latex: BUILDER = latex latex: build @echo "Build finished; the LaTeX files are in build/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." +.PHONY: text text: BUILDER = text text: build @echo "Build finished; the text files are in build/text." +.PHONY: texinfo texinfo: BUILDER = texinfo texinfo: build @echo "Build finished; the python.texi file is in build/texinfo." @echo "Run \`make info' in that directory to run it through makeinfo." +.PHONY: epub epub: BUILDER = epub epub: build @echo "Build finished; the epub files are in build/epub." +.PHONY: changes changes: BUILDER = changes changes: build @echo "The overview file is in build/changes." +.PHONY: linkcheck linkcheck: BUILDER = linkcheck linkcheck: @$(MAKE) build BUILDER=$(BUILDER) || { \ @@ -109,10 +115,12 @@ linkcheck: "or in build/$(BUILDER)/output.txt"; \ false; } +.PHONY: coverage coverage: BUILDER = coverage coverage: build @echo "Coverage finished; see c.txt and python.txt in build/coverage" +.PHONY: doctest doctest: BUILDER = doctest doctest: @$(MAKE) build BUILDER=$(BUILDER) || { \ @@ -120,20 +128,25 @@ doctest: "results in build/doctest/output.txt"; \ false; } +.PHONY: pydoc-topics pydoc-topics: BUILDER = pydoc-topics pydoc-topics: build @echo "Building finished; now run this:" \ "cp build/pydoc-topics/topics.py ../Lib/pydoc_data/topics.py" +.PHONY: htmlview htmlview: html $(PYTHON) -c "import os, webbrowser; webbrowser.open('file://' + os.path.realpath('build/html/index.html'))" +.PHONY: clean clean: clean-venv -rm -rf build/* +.PHONY: clean-venv clean-venv: rm -rf $(VENVDIR) +.PHONY: venv venv: @if [ -d $(VENVDIR) ] ; then \ echo "venv already exists."; \ @@ -145,6 +158,7 @@ venv: echo "The venv has been created in the $(VENVDIR) directory"; \ fi +.PHONY: dist dist: rm -rf dist mkdir -p dist @@ -199,12 +213,14 @@ dist: rm -r dist/python-$(DISTVERSION)-docs-texinfo rm dist/python-$(DISTVERSION)-docs-texinfo.tar +.PHONY: check check: # Check the docs and NEWS files with sphinx-lint. # Ignore the tools and venv dirs and check that the default role is not used. $(SPHINXLINT) -i tools -i $(VENVDIR) --enable default-role $(SPHINXLINT) --enable default-role ../Misc/NEWS.d/next/ +.PHONY: serve serve: @echo "The serve target was removed, use htmlview instead (see bpo-36329)" @@ -216,15 +232,18 @@ serve: # output files) # for development releases: always build +.PHONY: autobuild-dev autobuild-dev: make dist SPHINXOPTS='$(SPHINXOPTS) -Ea -A daily=1' # for quick rebuilds (HTML only) +.PHONY: autobuild-dev-html autobuild-dev-html: make html SPHINXOPTS='$(SPHINXOPTS) -Ea -A daily=1' # for stable releases: only build if not in pre-release stage (alpha, beta) # release candidate downloads are okay, since the stable tree can be in that stage +.PHONY: autobuild-stable autobuild-stable: @case $(DISTVERSION) in *[ab]*) \ echo "Not building; $(DISTVERSION) is not a release version."; \ @@ -232,6 +251,7 @@ autobuild-stable: esac @make autobuild-dev +.PHONY: autobuild-stable-html autobuild-stable-html: @case $(DISTVERSION) in *[ab]*) \ echo "Not building; $(DISTVERSION) is not a release version."; \ From dfc2732a57e3ea6603d62f769d4f9c80be726fa4 Mon Sep 17 00:00:00 2001 From: Sam Ezeh Date: Sun, 27 Nov 2022 17:58:39 +0000 Subject: [PATCH 075/112] gh-91340: Document multiprocessing.set_start_method force parameter (GH-32339) #91340 https://bugs.python.org/issue47184 Automerge-Triggered-By: GH:kumaraditya303 --- Doc/library/multiprocessing.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 5516084780673f..b5ceeb796f8f2f 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -1089,10 +1089,14 @@ Miscellaneous .. versionchanged:: 3.11 Accepts a :term:`path-like object`. -.. function:: set_start_method(method) +.. function:: set_start_method(method, force=False) Set the method which should be used to start child processes. - *method* can be ``'fork'``, ``'spawn'`` or ``'forkserver'``. + The *method* argument can be ``'fork'``, ``'spawn'`` or ``'forkserver'``. + Raises :exc:`RuntimeError` if the start method has already been set and *force* + is not ``True``. If *method* is ``None`` and *force* is ``True`` then the start + method is set to ``None``. If *method* is ``None`` and *force* is ``False`` + then the context is set to the default context. Note that this should be called at most once, and it should be protected inside the ``if __name__ == '__main__'`` clause of the From 276643e207d44c53b87a8108d5b00982defcce1e Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 27 Nov 2022 22:08:30 +0100 Subject: [PATCH 076/112] Docs: both sqlite3 "point examples" now adapt to str (#99823) --- Doc/library/sqlite3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index 0dac2312b2feb1..7e2235b285b814 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -2105,7 +2105,7 @@ The following example illustrates the implicit and explicit approaches: return f"Point({self.x}, {self.y})" def adapt_point(point): - return f"{point.x};{point.y}".encode("utf-8") + return f"{point.x};{point.y}" def convert_point(s): x, y = list(map(float, s.split(b";"))) From 594de165bf2f21d6b28eb17003ea78fc20c0ffed Mon Sep 17 00:00:00 2001 From: Furkan Onder Date: Mon, 28 Nov 2022 09:49:10 +0300 Subject: [PATCH 077/112] gh-51524: Fix bug when calling trace.CoverageResults with valid infile (#99629) Co-authored-by: Terry Jan Reedy --- Lib/test/test_trace.py | 10 ++++++++++ Lib/trace.py | 2 +- .../2022-11-21-17-56-18.gh-issue-51524.nTykx8.rst | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-21-17-56-18.gh-issue-51524.nTykx8.rst diff --git a/Lib/test/test_trace.py b/Lib/test/test_trace.py index 5f712111ca14e0..fad2b3b8379ffc 100644 --- a/Lib/test/test_trace.py +++ b/Lib/test/test_trace.py @@ -1,4 +1,5 @@ import os +from pickle import dump import sys from test.support import captured_stdout from test.support.os_helper import (TESTFN, rmtree, unlink) @@ -412,6 +413,15 @@ def test_issue9936(self): self.assertIn(modname, coverage) self.assertEqual(coverage[modname], (5, 100)) + def test_coverageresults_update(self): + # Update empty CoverageResults with a non-empty infile. + infile = TESTFN + '-infile' + with open(infile, 'wb') as f: + dump(({}, {}, {'caller': 1}), f, protocol=1) + self.addCleanup(unlink, infile) + results = trace.CoverageResults({}, {}, infile, {}) + self.assertEqual(results.callers, {'caller': 1}) + ### Tests that don't mess with sys.settrace and can be traced ### themselves TODO: Skip tests that do mess with sys.settrace when ### regrtest is invoked with -T option. diff --git a/Lib/trace.py b/Lib/trace.py index 2cf3643878d4b8..213e46517d683d 100755 --- a/Lib/trace.py +++ b/Lib/trace.py @@ -172,7 +172,7 @@ def __init__(self, counts=None, calledfuncs=None, infile=None, try: with open(self.infile, 'rb') as f: counts, calledfuncs, callers = pickle.load(f) - self.update(self.__class__(counts, calledfuncs, callers)) + self.update(self.__class__(counts, calledfuncs, callers=callers)) except (OSError, EOFError, ValueError) as err: print(("Skipping counts file %r: %s" % (self.infile, err)), file=sys.stderr) diff --git a/Misc/NEWS.d/next/Library/2022-11-21-17-56-18.gh-issue-51524.nTykx8.rst b/Misc/NEWS.d/next/Library/2022-11-21-17-56-18.gh-issue-51524.nTykx8.rst new file mode 100644 index 00000000000000..63fe7b8a3a3254 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-21-17-56-18.gh-issue-51524.nTykx8.rst @@ -0,0 +1 @@ +Fix bug when calling trace.CoverageResults with valid infile. From 219696abb240607d3f807853c4c180825e60716e Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 28 Nov 2022 09:22:08 +0100 Subject: [PATCH 078/112] gh-99249: Clarify "read-only" slots tp_bases & tp_mro (GH-99342) These slots are marked "should be treated as read-only" in the table at the start of the document. That doesn't say anything about setting them in the static struct. `tp_bases` docs did say that it should be ``NULL`` (TIL!). If you ignore that, seemingly nothing bad happens. However, some slots may not be inherited, depending on which sub-slot structs are present. (FWIW, NumPy sets tp_bases and is affected by the quirk -- though to be fair, its DUAL_INHERIT code probably predates tp_bases docs, and also the result happens to be benign.) This patch makes things explicit. It also makes the summary table legend easier to scan. Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> --- Doc/c-api/typeobj.rst | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 4c462f46056739..8f8869ec668a8d 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -149,10 +149,16 @@ Quick Reference +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ .. [#slots] - A slot name in parentheses indicates it is (effectively) deprecated. - Names in angle brackets should be treated as read-only. - Names in square brackets are for internal use only. - "" (as a prefix) means the field is required (must be non-``NULL``). + + **()**: A slot name in parentheses indicates it is (effectively) deprecated. + + **<>**: Names in angle brackets should be initially set to ``NULL`` and + treated as read-only. + + **[]**: Names in square brackets are for internal use only. + + **** (as a prefix) means the field is required (must be non-``NULL``). + .. [#cols] Columns: **"O"**: set on :c:type:`PyBaseObject_Type` @@ -1923,8 +1929,19 @@ and :c:type:`PyType_Type` effectively act as defaults.) Tuple of base types. - This is set for types created by a class statement. It should be ``NULL`` for - statically defined types. + This field should be set to ``NULL`` and treated as read-only. + Python will fill it in when the type is :c:func:`initialized `. + + For dynamically created classes, the ``Py_tp_bases`` + :c:type:`slot ` can be used instead of the *bases* argument + of :c:func:`PyType_FromSpecWithBases`. + The argument form is preferred. + + .. warning:: + + Multiple inheritance does not work well for statically defined types. + If you set ``tp_bases`` to a tuple, Python will not raise an error, + but some slots will only be inherited from the first base. **Inheritance:** @@ -1936,6 +1953,8 @@ and :c:type:`PyType_Type` effectively act as defaults.) Tuple containing the expanded set of base types, starting with the type itself and ending with :class:`object`, in Method Resolution Order. + This field should be set to ``NULL`` and treated as read-only. + Python will fill it in when the type is :c:func:`initialized `. **Inheritance:** From 492dc02b01828f346dd62412fefc654e781de923 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 28 Nov 2022 10:41:24 +0100 Subject: [PATCH 079/112] bpo-41825: restructure docs for the os.wait*() family (GH-22356) --- Doc/library/os.rst | 235 +++++++++++------- .../2020-09-22-12-32-16.bpo-41825.npcaCb.rst | 3 + 2 files changed, 145 insertions(+), 93 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2020-09-22-12-32-16.bpo-41825.npcaCb.rst diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 775aa32df99a46..b06f9bbcd831c2 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -4491,6 +4491,9 @@ written in Python, such as a mail server's external command delivery program. number is zero); the high bit of the low byte is set if a core file was produced. + If there are no children that could be waited for, :exc:`ChildProcessError` + is raised. + :func:`waitstatus_to_exitcode` can be used to convert the exit status into an exit code. @@ -4498,76 +4501,40 @@ written in Python, such as a mail server's external command delivery program. .. seealso:: - :func:`waitpid` can be used to wait for the completion of a specific - child process and has more options. + The other :func:`!wait*` functions documented below can be used to wait for the + completion of a specific child process and have more options. + :func:`waitpid` is the only one also available on Windows. -.. function:: waitid(idtype, id, options, /) - Wait for the completion of one or more child processes. - *idtype* can be :data:`P_PID`, :data:`P_PGID`, :data:`P_ALL`, or - :data:`P_PIDFD` on Linux. - *id* specifies the pid to wait on. - *options* is constructed from the ORing of one or more of :data:`WEXITED`, - :data:`WSTOPPED` or :data:`WCONTINUED` and additionally may be ORed with - :data:`WNOHANG` or :data:`WNOWAIT`. The return value is an object - representing the data contained in the :c:type:`siginfo_t` structure, namely: - :attr:`si_pid`, :attr:`si_uid`, :attr:`si_signo`, :attr:`si_status`, - :attr:`si_code` or ``None`` if :data:`WNOHANG` is specified and there are no - children in a waitable state. - - .. availability:: Unix, not Emscripten, not WASI. - - .. versionadded:: 3.3 - -.. data:: P_PID - P_PGID - P_ALL - - These are the possible values for *idtype* in :func:`waitid`. They affect - how *id* is interpreted. - - .. availability:: Unix, not Emscripten, not WASI. - - .. versionadded:: 3.3 - -.. data:: P_PIDFD - - This is a Linux-specific *idtype* that indicates that *id* is a file - descriptor that refers to a process. - - .. availability:: Linux >= 5.4 - - .. versionadded:: 3.9 - -.. data:: WEXITED - WSTOPPED - WNOWAIT +.. function:: waitid(idtype, id, options, /) - Flags that can be used in *options* in :func:`waitid` that specify what - child signal to wait for. + Wait for the completion of a child process. - .. availability:: Unix, not Emscripten, not WASI. + *idtype* can be :data:`P_PID`, :data:`P_PGID`, :data:`P_ALL`, or (on Linux) :data:`P_PIDFD`. + The interpretation of *id* depends on it; see their individual descriptions. - .. versionadded:: 3.3 + *options* is an OR combination of flags. At least one of :data:`WEXITED`, + :data:`WSTOPPED` or :data:`WCONTINUED` is required; + :data:`WNOHANG` and :data:`WNOWAIT` are additional optional flags. + The return value is an object representing the data contained in the + :c:type:`!siginfo_t` structure with the following attributes: -.. data:: CLD_EXITED - CLD_KILLED - CLD_DUMPED - CLD_TRAPPED - CLD_STOPPED - CLD_CONTINUED + * :attr:`!si_pid` (process ID) + * :attr:`!si_uid` (real user ID of the child) + * :attr:`!si_signo` (always :data:`~signal.SIGCHLD`) + * :attr:`!si_status` (the exit status or signal number, depending on :attr:`!si_code`) + * :attr:`!si_code` (see :data:`CLD_EXITED` for possible values) - These are the possible values for :attr:`si_code` in the result returned by - :func:`waitid`. + If :data:`WNOHANG` is specified and there are no matching children in the + requested state, ``None`` is returned. + Otherwise, if there are no matching children + that could be waited for, :exc:`ChildProcessError` is raised. .. availability:: Unix, not Emscripten, not WASI. .. versionadded:: 3.3 - .. versionchanged:: 3.9 - Added :data:`CLD_KILLED` and :data:`CLD_STOPPED` values. - .. function:: waitpid(pid, options, /) @@ -4585,8 +4552,11 @@ written in Python, such as a mail server's external command delivery program. ``-1``, status is requested for any process in the process group ``-pid`` (the absolute value of *pid*). - An :exc:`OSError` is raised with the value of errno when the syscall - returns -1. + *options* is an OR combination of flags. If it contains :data:`WNOHANG` and + there are no matching children in the requested state, ``(0, 0)`` is + returned. Otherwise, if there are no matching children that could be waited + for, :exc:`ChildProcessError` is raised. Other options that can be used are + :data:`WUNTRACED` and :data:`WCONTINUED`. On Windows: Wait for completion of a process given by process handle *pid*, and return a tuple containing *pid*, and its exit status shifted left by 8 bits @@ -4599,7 +4569,7 @@ written in Python, such as a mail server's external command delivery program. :func:`waitstatus_to_exitcode` can be used to convert the exit status into an exit code. - .. availability:: Unix, not Emscripten, not WASI. + .. availability:: Unix, Windows, not Emscripten, not WASI. .. versionchanged:: 3.5 If the system call is interrupted and the signal handler does not raise an @@ -4612,9 +4582,9 @@ written in Python, such as a mail server's external command delivery program. Similar to :func:`waitpid`, except no process id argument is given and a 3-element tuple containing the child's process id, exit status indication, and resource usage information is returned. Refer to - :mod:`resource`.\ :func:`~resource.getrusage` for details on resource usage - information. The option argument is the same as that provided to - :func:`waitpid` and :func:`wait4`. + :func:`resource.getrusage` for details on resource usage information. The + *options* argument is the same as that provided to :func:`waitpid` and + :func:`wait4`. :func:`waitstatus_to_exitcode` can be used to convert the exit status into an exitcode. @@ -4625,10 +4595,10 @@ written in Python, such as a mail server's external command delivery program. .. function:: wait4(pid, options) Similar to :func:`waitpid`, except a 3-element tuple, containing the child's - process id, exit status indication, and resource usage information is returned. - Refer to :mod:`resource`.\ :func:`~resource.getrusage` for details on - resource usage information. The arguments to :func:`wait4` are the same - as those provided to :func:`waitpid`. + process id, exit status indication, and resource usage information is + returned. Refer to :func:`resource.getrusage` for details on resource usage + information. The arguments to :func:`wait4` are the same as those provided + to :func:`waitpid`. :func:`waitstatus_to_exitcode` can be used to convert the exit status into an exitcode. @@ -4636,6 +4606,111 @@ written in Python, such as a mail server's external command delivery program. .. availability:: Unix, not Emscripten, not WASI. +.. data:: P_PID + P_PGID + P_ALL + P_PIDFD + + These are the possible values for *idtype* in :func:`waitid`. They affect + how *id* is interpreted: + + * :data:`!P_PID` - wait for the child whose PID is *id*. + * :data:`!P_PGID` - wait for any child whose progress group ID is *id*. + * :data:`!P_ALL` - wait for any child; *id* is ignored. + * :data:`!P_PIDFD` - wait for the child identified by the file descriptor + *id* (a process file descriptor created with :func:`pidfd_open`). + + .. availability:: Unix, not Emscripten, not WASI. + + .. note:: :data:`!P_PIDFD` is only available on Linux >= 5.4. + + .. versionadded:: 3.3 + .. versionadded:: 3.9 + The :data:`!P_PIDFD` constant. + + +.. data:: WCONTINUED + + This *options* flag for :func:`waitpid`, :func:`wait3`, :func:`wait4`, and + :func:`waitid` causes child processes to be reported if they have been + continued from a job control stop since they were last reported. + + .. availability:: Unix, not Emscripten, not WASI. + + +.. data:: WEXITED + + This *options* flag for :func:`waitid` causes child processes that have terminated to + be reported. + + The other ``wait*`` functions always report children that have terminated, + so this option is not available for them. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.3 + + +.. data:: WSTOPPED + + This *options* flag for :func:`waitid` causes child processes that have been stopped + by the delivery of a signal to be reported. + + This option is not available for the other ``wait*`` functions. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.3 + + +.. data:: WUNTRACED + + This *options* flag for :func:`waitpid`, :func:`wait3`, and :func:`wait4` causes + child processes to also be reported if they have been stopped but their + current state has not been reported since they were stopped. + + This option is not available for :func:`waitid`. + + .. availability:: Unix, not Emscripten, not WASI. + + +.. data:: WNOHANG + + This *options* flag causes :func:`waitpid`, :func:`wait3`, :func:`wait4`, and + :func:`waitid` to return right away if no child process status is available + immediately. + + .. availability:: Unix, not Emscripten, not WASI. + + +.. data:: WNOWAIT + + This *options* flag causes :func:`waitid` to leave the child in a waitable state, so that + a later :func:`!wait*` call can be used to retrieve the child status information again. + + This option is not available for the other ``wait*`` functions. + + .. availability:: Unix, not Emscripten, not WASI. + + +.. data:: CLD_EXITED + CLD_KILLED + CLD_DUMPED + CLD_TRAPPED + CLD_STOPPED + CLD_CONTINUED + + These are the possible values for :attr:`!si_code` in the result returned by + :func:`waitid`. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.3 + + .. versionchanged:: 3.9 + Added :data:`CLD_KILLED` and :data:`CLD_STOPPED` values. + + .. function:: waitstatus_to_exitcode(status) Convert a wait status to an exit code. @@ -4668,32 +4743,6 @@ written in Python, such as a mail server's external command delivery program. .. versionadded:: 3.9 -.. data:: WNOHANG - - The option for :func:`waitpid` to return immediately if no child process status - is available immediately. The function returns ``(0, 0)`` in this case. - - .. availability:: Unix, not Emscripten, not WASI. - - -.. data:: WCONTINUED - - This option causes child processes to be reported if they have been continued - from a job control stop since their status was last reported. - - .. availability:: Unix, not Emscripten, not WASI. - - Some Unix systems. - - -.. data:: WUNTRACED - - This option causes child processes to be reported if they have been stopped but - their current state has not been reported since they were stopped. - - .. availability:: Unix, not Emscripten, not WASI. - - The following functions take a process status code as returned by :func:`system`, :func:`wait`, or :func:`waitpid` as a parameter. They may be used to determine the disposition of a process. diff --git a/Misc/NEWS.d/next/Documentation/2020-09-22-12-32-16.bpo-41825.npcaCb.rst b/Misc/NEWS.d/next/Documentation/2020-09-22-12-32-16.bpo-41825.npcaCb.rst new file mode 100644 index 00000000000000..390b4a9824c793 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2020-09-22-12-32-16.bpo-41825.npcaCb.rst @@ -0,0 +1,3 @@ +Restructured the documentation for the :func:`os.wait* ` family of functions, +and improved the docs for :func:`os.waitid` with more explanation of the +possible argument constants. From 53eef27133c1da395b3b4d7ce0ab1d5b743ffb41 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Mon, 28 Nov 2022 02:46:40 -0800 Subject: [PATCH 080/112] bpo-31718: Fix io.IncrementalNewlineDecoder SystemErrors and segfaults (#18640) Co-authored-by: Oren Milman Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> --- Lib/test/test_io.py | 10 ++++++- .../2020-02-23-23-48-15.bpo-31718.sXko5e.rst | 3 ++ Modules/_io/textio.c | 28 +++++++++++++------ 3 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-02-23-23-48-15.bpo-31718.sXko5e.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index c927f15aafef72..c5f2e5060a546d 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -3945,7 +3945,15 @@ def test_translate(self): self.assertEqual(decoder.decode(b"\r\r\n"), "\r\r\n") class CIncrementalNewlineDecoderTest(IncrementalNewlineDecoderTest): - pass + @support.cpython_only + def test_uninitialized(self): + uninitialized = self.IncrementalNewlineDecoder.__new__( + self.IncrementalNewlineDecoder) + self.assertRaises(ValueError, uninitialized.decode, b'bar') + self.assertRaises(ValueError, uninitialized.getstate) + self.assertRaises(ValueError, uninitialized.setstate, (b'foo', 0)) + self.assertRaises(ValueError, uninitialized.reset) + class PyIncrementalNewlineDecoderTest(IncrementalNewlineDecoderTest): pass diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-02-23-23-48-15.bpo-31718.sXko5e.rst b/Misc/NEWS.d/next/Core and Builtins/2020-02-23-23-48-15.bpo-31718.sXko5e.rst new file mode 100644 index 00000000000000..dd96c9e20d8759 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-02-23-23-48-15.bpo-31718.sXko5e.rst @@ -0,0 +1,3 @@ +Raise :exc:`ValueError` instead of :exc:`SystemError` when methods of +uninitialized :class:`io.IncrementalNewlineDecoder` objects are called. +Patch by Oren Milman. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index ff903e9341de27..3091f6efafccd4 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -231,15 +231,16 @@ _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self, PyObject *errors) /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/ { - self->decoder = Py_NewRef(decoder); if (errors == NULL) { - self->errors = Py_NewRef(&_Py_ID(strict)); + errors = Py_NewRef(&_Py_ID(strict)); } else { - self->errors = Py_NewRef(errors); + errors = Py_NewRef(errors); } + Py_XSETREF(self->errors, errors); + Py_XSETREF(self->decoder, Py_NewRef(decoder)); self->translate = translate ? 1 : 0; self->seennl = 0; self->pendingcr = 0; @@ -274,6 +275,13 @@ check_decoded(PyObject *decoded) return 0; } +#define CHECK_INITIALIZED_DECODER(self) \ + if (self->errors == NULL) { \ + PyErr_SetString(PyExc_ValueError, \ + "IncrementalNewlineDecoder.__init__() not called"); \ + return NULL; \ + } + #define SEEN_CR 1 #define SEEN_LF 2 #define SEEN_CRLF 4 @@ -287,11 +295,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *myself, Py_ssize_t output_len; nldecoder_object *self = (nldecoder_object *) myself; - if (self->decoder == NULL) { - PyErr_SetString(PyExc_ValueError, - "IncrementalNewlineDecoder.__init__ not called"); - return NULL; - } + CHECK_INITIALIZED_DECODER(self); /* decode input (with the eventual \r from a previous pass) */ if (self->decoder != Py_None) { @@ -502,6 +506,8 @@ _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self) PyObject *buffer; unsigned long long flag; + CHECK_INITIALIZED_DECODER(self); + if (self->decoder != Py_None) { PyObject *state = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(getstate)); @@ -546,6 +552,8 @@ _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self, PyObject *buffer; unsigned long long flag; + CHECK_INITIALIZED_DECODER(self); + if (!PyTuple_Check(state)) { PyErr_SetString(PyExc_TypeError, "state argument must be a tuple"); return NULL; @@ -576,6 +584,8 @@ static PyObject * _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self) /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/ { + CHECK_INITIALIZED_DECODER(self); + self->seennl = 0; self->pendingcr = 0; if (self->decoder != Py_None) @@ -587,6 +597,8 @@ _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self) static PyObject * incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context) { + CHECK_INITIALIZED_DECODER(self); + switch (self->seennl) { case SEEN_CR: return PyUnicode_FromString("\r"); From 02f72b8b938e301bbaaf0142547014e074bd564c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 28 Nov 2022 16:40:08 +0100 Subject: [PATCH 081/112] gh-89653: PEP 670: Convert macros to functions (#99843) Convert macros to static inline functions to avoid macro pitfalls, like duplication of side effects: * DK_ENTRIES() * DK_UNICODE_ENTRIES() * PyCode_GetNumFree() * PyFloat_AS_DOUBLE() * PyInstanceMethod_GET_FUNCTION() * PyMemoryView_GET_BASE() * PyMemoryView_GET_BUFFER() * PyMethod_GET_FUNCTION() * PyMethod_GET_SELF() * PySet_GET_SIZE() * _PyHeapType_GET_MEMBERS() Changes: * PyCode_GetNumFree() casts PyCode_GetNumFree.co_nfreevars from int to Py_ssize_t to be future proof, and because Py_ssize_t is commonly used in the C API. * PyCode_GetNumFree() doesn't cast its argument: the replaced macro already required the exact type PyCodeObject*. * Add assertions in some functions using "CAST" macros to check the arguments type when Python is built with assertions (debug build). * Remove an outdated comment in unicodeobject.h. --- Include/cpython/classobject.h | 34 ++++++++++++++++++++++---------- Include/cpython/code.h | 7 ++++++- Include/cpython/floatobject.h | 10 ++++++++-- Include/cpython/memoryobject.h | 13 ++++++++++-- Include/cpython/setobject.h | 9 +++++++-- Include/cpython/unicodeobject.h | 2 -- Include/internal/pycore_dict.h | 21 ++++++++++++++------ Include/internal/pycore_object.h | 5 +++-- 8 files changed, 74 insertions(+), 27 deletions(-) diff --git a/Include/cpython/classobject.h b/Include/cpython/classobject.h index 051041965002a3..d7c9ddd1336c46 100644 --- a/Include/cpython/classobject.h +++ b/Include/cpython/classobject.h @@ -26,12 +26,20 @@ PyAPI_FUNC(PyObject *) PyMethod_New(PyObject *, PyObject *); PyAPI_FUNC(PyObject *) PyMethod_Function(PyObject *); PyAPI_FUNC(PyObject *) PyMethod_Self(PyObject *); -/* Macros for direct access to these values. Type checks are *not* - done, so use with care. */ -#define PyMethod_GET_FUNCTION(meth) \ - (((PyMethodObject *)(meth)) -> im_func) -#define PyMethod_GET_SELF(meth) \ - (((PyMethodObject *)(meth)) -> im_self) +#define _PyMethod_CAST(meth) \ + (assert(PyMethod_Check(meth)), _Py_CAST(PyMethodObject*, meth)) + +/* Static inline functions for direct access to these values. + Type checks are *not* done, so use with care. */ +static inline PyObject* PyMethod_GET_FUNCTION(PyObject *meth) { + return _PyMethod_CAST(meth)->im_func; +} +#define PyMethod_GET_FUNCTION(meth) PyMethod_GET_FUNCTION(_PyObject_CAST(meth)) + +static inline PyObject* PyMethod_GET_SELF(PyObject *meth) { + return _PyMethod_CAST(meth)->im_self; +} +#define PyMethod_GET_SELF(meth) PyMethod_GET_SELF(_PyObject_CAST(meth)) typedef struct { PyObject_HEAD @@ -45,10 +53,16 @@ PyAPI_DATA(PyTypeObject) PyInstanceMethod_Type; PyAPI_FUNC(PyObject *) PyInstanceMethod_New(PyObject *); PyAPI_FUNC(PyObject *) PyInstanceMethod_Function(PyObject *); -/* Macros for direct access to these values. Type checks are *not* - done, so use with care. */ -#define PyInstanceMethod_GET_FUNCTION(meth) \ - (((PyInstanceMethodObject *)(meth)) -> func) +#define _PyInstanceMethod_CAST(meth) \ + (assert(PyInstanceMethod_Check(meth)), \ + _Py_CAST(PyInstanceMethodObject*, meth)) + +/* Static inline function for direct access to these values. + Type checks are *not* done, so use with care. */ +static inline PyObject* PyInstanceMethod_GET_FUNCTION(PyObject *meth) { + return _PyInstanceMethod_CAST(meth)->func; +} +#define PyInstanceMethod_GET_FUNCTION(meth) PyInstanceMethod_GET_FUNCTION(_PyObject_CAST(meth)) #ifdef __cplusplus } diff --git a/Include/cpython/code.h b/Include/cpython/code.h index ebac0b12a461bc..15b464fe2ee95c 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -147,7 +147,12 @@ struct PyCodeObject _PyCode_DEF(1); PyAPI_DATA(PyTypeObject) PyCode_Type; #define PyCode_Check(op) Py_IS_TYPE((op), &PyCode_Type) -#define PyCode_GetNumFree(op) ((op)->co_nfreevars) + +static inline Py_ssize_t PyCode_GetNumFree(PyCodeObject *op) { + assert(PyCode_Check(op)); + return op->co_nfreevars; +} + #define _PyCode_CODE(CO) ((_Py_CODEUNIT *)(CO)->co_code_adaptive) #define _PyCode_NBYTES(CO) (Py_SIZE(CO) * (Py_ssize_t)sizeof(_Py_CODEUNIT)) diff --git a/Include/cpython/floatobject.h b/Include/cpython/floatobject.h index 7795d9f83f05cb..127093098bfe64 100644 --- a/Include/cpython/floatobject.h +++ b/Include/cpython/floatobject.h @@ -7,9 +7,15 @@ typedef struct { double ob_fval; } PyFloatObject; -// Macro version of PyFloat_AsDouble() trading safety for speed. +#define _PyFloat_CAST(op) \ + (assert(PyFloat_Check(op)), _Py_CAST(PyFloatObject*, op)) + +// Static inline version of PyFloat_AsDouble() trading safety for speed. // It doesn't check if op is a double object. -#define PyFloat_AS_DOUBLE(op) (((PyFloatObject *)(op))->ob_fval) +static inline double PyFloat_AS_DOUBLE(PyObject *op) { + return _PyFloat_CAST(op)->ob_fval; +} +#define PyFloat_AS_DOUBLE(op) PyFloat_AS_DOUBLE(_PyObject_CAST(op)) PyAPI_FUNC(int) PyFloat_Pack2(double x, char *p, int le); diff --git a/Include/cpython/memoryobject.h b/Include/cpython/memoryobject.h index e2a1e168e463b8..deab3cc89f726e 100644 --- a/Include/cpython/memoryobject.h +++ b/Include/cpython/memoryobject.h @@ -36,7 +36,16 @@ typedef struct { Py_ssize_t ob_array[1]; /* shape, strides, suboffsets */ } PyMemoryViewObject; +#define _PyMemoryView_CAST(op) _Py_CAST(PyMemoryViewObject*, op) + /* Get a pointer to the memoryview's private copy of the exporter's buffer. */ -#define PyMemoryView_GET_BUFFER(op) (&((PyMemoryViewObject *)(op))->view) +static inline Py_buffer* PyMemoryView_GET_BUFFER(PyObject *op) { + return (&_PyMemoryView_CAST(op)->view); +} +#define PyMemoryView_GET_BUFFER(op) PyMemoryView_GET_BUFFER(_PyObject_CAST(op)) + /* Get a pointer to the exporting object (this may be NULL!). */ -#define PyMemoryView_GET_BASE(op) (((PyMemoryViewObject *)(op))->view.obj) +static inline PyObject* PyMemoryView_GET_BASE(PyObject *op) { + return _PyMemoryView_CAST(op)->view.obj; +} +#define PyMemoryView_GET_BASE(op) PyMemoryView_GET_BASE(_PyObject_CAST(op)) diff --git a/Include/cpython/setobject.h b/Include/cpython/setobject.h index b4443a678b7e77..20fd63eaae56e2 100644 --- a/Include/cpython/setobject.h +++ b/Include/cpython/setobject.h @@ -58,8 +58,13 @@ typedef struct { PyObject *weakreflist; /* List of weak references */ } PySetObject; -#define PySet_GET_SIZE(so) \ - (assert(PyAnySet_Check(so)), (((PySetObject *)(so))->used)) +#define _PySet_CAST(so) \ + (assert(PyAnySet_Check(so)), _Py_CAST(PySetObject*, so)) + +static inline Py_ssize_t PySet_GET_SIZE(PyObject *so) { + return _PySet_CAST(so)->used; +} +#define PySet_GET_SIZE(so) PySet_GET_SIZE(_PyObject_CAST(so)) PyAPI_DATA(PyObject *) _PySet_Dummy; diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 86eeab67275ec8..a75336f590e81b 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -231,8 +231,6 @@ enum PyUnicode_Kind { // new compiler warnings on "kind < PyUnicode_KIND(str)" (compare signed and // unsigned numbers) where kind type is an int or on // "unsigned int kind = PyUnicode_KIND(str)" (cast signed to unsigned). -// Only declare the function as static inline function in the limited C API -// version 3.12 which is stricter. #define PyUnicode_KIND(op) (_PyASCIIObject_CAST(op)->state.kind) /* Return a void pointer to the raw unicode buffer. */ diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 25bd3bffb2e7aa..04b7084901303d 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -128,12 +128,21 @@ struct _dictvalues { #else #define DK_SIZE(dk) (1<dk_kind == DICT_KEYS_GENERAL), \ - (PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[(size_t)1 << (dk)->dk_log2_index_bytes])) -#define DK_UNICODE_ENTRIES(dk) \ - (assert((dk)->dk_kind != DICT_KEYS_GENERAL), \ - (PyDictUnicodeEntry*)(&((int8_t*)((dk)->dk_indices))[(size_t)1 << (dk)->dk_log2_index_bytes])) + +static inline void* _DK_ENTRIES(PyDictKeysObject *dk) { + int8_t *indices = (int8_t*)(dk->dk_indices); + size_t index = (size_t)1 << dk->dk_log2_index_bytes; + return (&indices[index]); +} +static inline PyDictKeyEntry* DK_ENTRIES(PyDictKeysObject *dk) { + assert(dk->dk_kind == DICT_KEYS_GENERAL); + return (PyDictKeyEntry*)_DK_ENTRIES(dk); +} +static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) { + assert(dk->dk_kind != DICT_KEYS_GENERAL); + return (PyDictUnicodeEntry*)_DK_ENTRIES(dk); +} + #define DK_IS_UNICODE(dk) ((dk)->dk_kind != DICT_KEYS_GENERAL) #define DICT_VERSION_INCREMENT (1 << DICT_MAX_WATCHERS) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 8b78f79e950e92..33c8c0b75ea742 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -355,8 +355,9 @@ extern int _PyType_HasSubclasses(PyTypeObject *); extern PyObject* _PyType_GetSubclasses(PyTypeObject *); // Access macro to the members which are floating "behind" the object -#define _PyHeapType_GET_MEMBERS(etype) \ - ((PyMemberDef *)(((char *)(etype)) + Py_TYPE(etype)->tp_basicsize)) +static inline PyMemberDef* _PyHeapType_GET_MEMBERS(PyHeapTypeObject *etype) { + return (PyMemberDef*)((char*)etype + Py_TYPE(etype)->tp_basicsize); +} PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, PyObject *); From 65417988a589e6edfeada83227a8b0884a64af4f Mon Sep 17 00:00:00 2001 From: David Miguel Susano Pinto Date: Mon, 28 Nov 2022 16:05:21 +0000 Subject: [PATCH 082/112] Grammatical improvements for ctypes 'winmode' documentation (GH-19167) --- Doc/library/ctypes.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index 71e5545ffe47c6..fd5df875ed74d5 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -1440,8 +1440,8 @@ copy of the windows error code. The *winmode* parameter is used on Windows to specify how the library is loaded (since *mode* is ignored). It takes any value that is valid for the Win32 API -``LoadLibraryEx`` flags parameter. When omitted, the default is to use the flags -that result in the most secure DLL load to avoiding issues such as DLL +``LoadLibraryEx`` flags parameter. When omitted, the default is to use the +flags that result in the most secure DLL load, which avoids issues such as DLL hijacking. Passing the full path to the DLL is the safest way to ensure the correct library and dependencies are loaded. From 7bae15cf37239d4d345e09cc318bd82d03ec30cd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 28 Nov 2022 17:42:22 +0100 Subject: [PATCH 083/112] Use _Py_RVALUE() in macros (#99844) The following macros are modified to use _Py_RVALUE(), so they can no longer be used as l-value: * DK_LOG_SIZE() * _PyCode_CODE() * _PyList_ITEMS() * _PyTuple_ITEMS() * _Py_SLIST_HEAD() * _Py_SLIST_ITEM_NEXT() _PyCode_CODE() is private and other macros are part of the internal C API. --- Include/cpython/code.h | 2 +- Include/internal/pycore_dict.h | 2 +- Include/internal/pycore_hashtable.h | 4 ++-- Include/internal/pycore_list.h | 2 +- Include/internal/pycore_tuple.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 15b464fe2ee95c..fd57e0035bc09a 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -153,7 +153,7 @@ static inline Py_ssize_t PyCode_GetNumFree(PyCodeObject *op) { return op->co_nfreevars; } -#define _PyCode_CODE(CO) ((_Py_CODEUNIT *)(CO)->co_code_adaptive) +#define _PyCode_CODE(CO) _Py_RVALUE((_Py_CODEUNIT *)(CO)->co_code_adaptive) #define _PyCode_NBYTES(CO) (Py_SIZE(CO) * (Py_ssize_t)sizeof(_Py_CODEUNIT)) /* Public interface */ diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 04b7084901303d..2b3b56b343ad99 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -122,7 +122,7 @@ struct _dictvalues { PyObject *values[1]; }; -#define DK_LOG_SIZE(dk) ((dk)->dk_log2_size) +#define DK_LOG_SIZE(dk) _Py_RVALUE((dk)->dk_log2_size) #if SIZEOF_VOID_P > 4 #define DK_SIZE(dk) (((int64_t)1)<next) +#define _Py_SLIST_ITEM_NEXT(ITEM) _Py_RVALUE(((_Py_slist_item_t *)(ITEM))->next) -#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)(SLIST))->head) +#define _Py_SLIST_HEAD(SLIST) _Py_RVALUE(((_Py_slist_t *)(SLIST))->head) /* _Py_hashtable: table entry */ diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 691d13bc8d9ffa..628267cc8a9618 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -35,7 +35,7 @@ struct _Py_list_state { #endif }; -#define _PyList_ITEMS(op) (_PyList_CAST(op)->ob_item) +#define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) extern int _PyList_AppendTakeRefListResize(PyListObject *self, PyObject *newitem); diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h index 1efe4fa2bdef94..504c36338d9e96 100644 --- a/Include/internal/pycore_tuple.h +++ b/Include/internal/pycore_tuple.h @@ -62,7 +62,7 @@ struct _Py_tuple_state { #endif }; -#define _PyTuple_ITEMS(op) (_PyTuple_CAST(op)->ob_item) +#define _PyTuple_ITEMS(op) _Py_RVALUE(_PyTuple_CAST(op)->ob_item) extern PyObject *_PyTuple_FromArray(PyObject *const *, Py_ssize_t); extern PyObject *_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t); From ca3e611b1f620eabb657ef08a95d5f5f554ea773 Mon Sep 17 00:00:00 2001 From: Matthew Hughes <34972397+matthewhughes934@users.noreply.github.com> Date: Mon, 28 Nov 2022 18:08:08 +0000 Subject: [PATCH 084/112] `fnmatch` docs: link to `fnmatch.filter()`, not `builtins.filter()` (#99819) --- Doc/library/fnmatch.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/fnmatch.rst b/Doc/library/fnmatch.rst index 9163da57c7b999..46bf0fc2848058 100644 --- a/Doc/library/fnmatch.rst +++ b/Doc/library/fnmatch.rst @@ -48,7 +48,7 @@ patterns. Also note that :func:`functools.lru_cache` with the *maxsize* of 32768 is used to cache the compiled regex patterns in the following functions: :func:`fnmatch`, -:func:`fnmatchcase`, :func:`filter`. +:func:`fnmatchcase`, :func:`.filter`. .. function:: fnmatch(filename, pattern) From 1d1bb95abdcafe92c771fb3dc4722351b032cc24 Mon Sep 17 00:00:00 2001 From: cemysce <13400533+cemysce@users.noreply.github.com> Date: Mon, 28 Nov 2022 13:25:03 -0500 Subject: [PATCH 085/112] gh-99811: Use correct variable to search for time in format string (GH-99812) Use correct variable to search for asctime --- Lib/logging/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index 86e1efe6e653ac..9241d73d0fd03c 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -511,7 +511,7 @@ def __init__(self, *args, **kwargs): def usesTime(self): fmt = self._fmt - return fmt.find('$asctime') >= 0 or fmt.find(self.asctime_format) >= 0 + return fmt.find('$asctime') >= 0 or fmt.find(self.asctime_search) >= 0 def validate(self): pattern = Template.pattern From 8f2fb7dfe72c882e97e524ef7ce40ceb663cc27e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 29 Nov 2022 09:46:52 +0200 Subject: [PATCH 086/112] gh-99344, gh-99379, gh-99382: Fix issues in substitution of ParamSpec and TypeVarTuple (GH-99412) * Fix substitution of TypeVarTuple and ParamSpec together in user generics. * Fix substitution of ParamSpec followed by TypeVarTuple in generic aliases. * Check the number of arguments in substitution in user generics containing a TypeVarTuple and one or more TypeVar. --- Lib/test/test_typing.py | 81 +++++++++++++++++++ Lib/typing.py | 71 +++++++--------- ...2-11-12-12-08-34.gh-issue-99344.7M_u8G.rst | 2 + ...2-11-12-12-10-23.gh-issue-99379.bcGhxF.rst | 2 + ...2-11-12-12-15-30.gh-issue-99382.dKg_rW.rst | 2 + 5 files changed, 117 insertions(+), 41 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-12-12-08-34.gh-issue-99344.7M_u8G.rst create mode 100644 Misc/NEWS.d/next/Library/2022-11-12-12-10-23.gh-issue-99379.bcGhxF.rst create mode 100644 Misc/NEWS.d/next/Library/2022-11-12-12-15-30.gh-issue-99382.dKg_rW.rst diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 6ff7f612867e62..da602b0199d52c 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -772,20 +772,42 @@ class C(Generic[*Ts]): pass ('generic[*Ts]', '[*Ts]', 'generic[*Ts]'), ('generic[*Ts]', '[T, *Ts]', 'generic[T, *Ts]'), ('generic[*Ts]', '[*Ts, T]', 'generic[*Ts, T]'), + ('generic[T, *Ts]', '[()]', 'TypeError'), ('generic[T, *Ts]', '[int]', 'generic[int]'), ('generic[T, *Ts]', '[int, str]', 'generic[int, str]'), ('generic[T, *Ts]', '[int, str, bool]', 'generic[int, str, bool]'), + ('generic[list[T], *Ts]', '[()]', 'TypeError'), ('generic[list[T], *Ts]', '[int]', 'generic[list[int]]'), ('generic[list[T], *Ts]', '[int, str]', 'generic[list[int], str]'), ('generic[list[T], *Ts]', '[int, str, bool]', 'generic[list[int], str, bool]'), + ('generic[*Ts, T]', '[()]', 'TypeError'), ('generic[*Ts, T]', '[int]', 'generic[int]'), ('generic[*Ts, T]', '[int, str]', 'generic[int, str]'), ('generic[*Ts, T]', '[int, str, bool]', 'generic[int, str, bool]'), + ('generic[*Ts, list[T]]', '[()]', 'TypeError'), ('generic[*Ts, list[T]]', '[int]', 'generic[list[int]]'), ('generic[*Ts, list[T]]', '[int, str]', 'generic[int, list[str]]'), ('generic[*Ts, list[T]]', '[int, str, bool]', 'generic[int, str, list[bool]]'), + ('generic[T1, T2, *Ts]', '[()]', 'TypeError'), + ('generic[T1, T2, *Ts]', '[int]', 'TypeError'), + ('generic[T1, T2, *Ts]', '[int, str]', 'generic[int, str]'), + ('generic[T1, T2, *Ts]', '[int, str, bool]', 'generic[int, str, bool]'), + ('generic[T1, T2, *Ts]', '[int, str, bool, bytes]', 'generic[int, str, bool, bytes]'), + + ('generic[*Ts, T1, T2]', '[()]', 'TypeError'), + ('generic[*Ts, T1, T2]', '[int]', 'TypeError'), + ('generic[*Ts, T1, T2]', '[int, str]', 'generic[int, str]'), + ('generic[*Ts, T1, T2]', '[int, str, bool]', 'generic[int, str, bool]'), + ('generic[*Ts, T1, T2]', '[int, str, bool, bytes]', 'generic[int, str, bool, bytes]'), + + ('generic[T1, *Ts, T2]', '[()]', 'TypeError'), + ('generic[T1, *Ts, T2]', '[int]', 'TypeError'), + ('generic[T1, *Ts, T2]', '[int, str]', 'generic[int, str]'), + ('generic[T1, *Ts, T2]', '[int, str, bool]', 'generic[int, str, bool]'), + ('generic[T1, *Ts, T2]', '[int, str, bool, bytes]', 'generic[int, str, bool, bytes]'), + ('generic[T, *Ts]', '[*tuple_type[int, ...]]', 'generic[int, *tuple_type[int, ...]]'), ('generic[T, *Ts]', '[str, *tuple_type[int, ...]]', 'generic[str, *tuple_type[int, ...]]'), ('generic[T, *Ts]', '[*tuple_type[int, ...], str]', 'generic[int, *tuple_type[int, ...], str]'), @@ -7241,6 +7263,65 @@ class X(Generic[P, P2]): self.assertEqual(G1.__args__, ((int, str), (bytes,))) self.assertEqual(G2.__args__, ((int,), (str, bytes))) + def test_typevartuple_and_paramspecs_in_user_generics(self): + Ts = TypeVarTuple("Ts") + P = ParamSpec("P") + + class X(Generic[*Ts, P]): + f: Callable[P, int] + g: Tuple[*Ts] + + G1 = X[int, [bytes]] + self.assertEqual(G1.__args__, (int, (bytes,))) + G2 = X[int, str, [bytes]] + self.assertEqual(G2.__args__, (int, str, (bytes,))) + G3 = X[[bytes]] + self.assertEqual(G3.__args__, ((bytes,),)) + G4 = X[[]] + self.assertEqual(G4.__args__, ((),)) + with self.assertRaises(TypeError): + X[()] + + class Y(Generic[P, *Ts]): + f: Callable[P, int] + g: Tuple[*Ts] + + G1 = Y[[bytes], int] + self.assertEqual(G1.__args__, ((bytes,), int)) + G2 = Y[[bytes], int, str] + self.assertEqual(G2.__args__, ((bytes,), int, str)) + G3 = Y[[bytes]] + self.assertEqual(G3.__args__, ((bytes,),)) + G4 = Y[[]] + self.assertEqual(G4.__args__, ((),)) + with self.assertRaises(TypeError): + Y[()] + + def test_typevartuple_and_paramspecs_in_generic_aliases(self): + P = ParamSpec('P') + T = TypeVar('T') + Ts = TypeVarTuple('Ts') + + for C in Callable, collections.abc.Callable: + with self.subTest(generic=C): + A = C[P, Tuple[*Ts]] + B = A[[int, str], bytes, float] + self.assertEqual(B.__args__, (int, str, Tuple[bytes, float])) + + class X(Generic[T, P]): + pass + + A = X[Tuple[*Ts], P] + B = A[bytes, float, [int, str]] + self.assertEqual(B.__args__, (Tuple[bytes, float], (int, str,))) + + class Y(Generic[P, T]): + pass + + A = Y[P, Tuple[*Ts]] + B = A[[int, str], bytes, float] + self.assertEqual(B.__args__, ((int, str,), Tuple[bytes, float])) + def test_var_substitution(self): T = TypeVar("T") P = ParamSpec("P") diff --git a/Lib/typing.py b/Lib/typing.py index 233941598f76a3..127e3c942d04d3 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -284,25 +284,6 @@ def _unpack_args(args): newargs.append(arg) return newargs -def _prepare_paramspec_params(cls, params): - """Prepares the parameters for a Generic containing ParamSpec - variables (internal helper). - """ - # Special case where Z[[int, str, bool]] == Z[int, str, bool] in PEP 612. - if (len(cls.__parameters__) == 1 - and params and not _is_param_expr(params[0])): - assert isinstance(cls.__parameters__[0], ParamSpec) - return (params,) - else: - _check_generic(cls, params, len(cls.__parameters__)) - _params = [] - # Convert lists to tuples to help other libraries cache the results. - for p, tvar in zip(params, cls.__parameters__): - if isinstance(tvar, ParamSpec) and isinstance(p, list): - p = tuple(p) - _params.append(p) - return tuple(_params) - def _deduplicate(params): # Weed out strict duplicates, preserving the first of each occurrence. all_params = set(params) @@ -1238,7 +1219,18 @@ def __typing_subst__(self, arg): return arg def __typing_prepare_subst__(self, alias, args): - return _prepare_paramspec_params(alias, args) + params = alias.__parameters__ + i = params.index(self) + if i >= len(args): + raise TypeError(f"Too few arguments for {alias}") + # Special case where Z[[int, str, bool]] == Z[int, str, bool] in PEP 612. + if len(params) == 1 and not _is_param_expr(args[0]): + assert i == 0 + args = (args,) + # Convert lists to tuples to help other libraries cache the results. + elif isinstance(args[i], list): + args = (*args[:i], tuple(args[i]), *args[i+1:]) + return args def _is_dunder(attr): return attr.startswith('__') and attr.endswith('__') @@ -1801,23 +1793,13 @@ def __class_getitem__(cls, params): if not isinstance(params, tuple): params = (params,) - if not params: - # We're only ok with `params` being empty if the class's only type - # parameter is a `TypeVarTuple` (which can contain zero types). - class_params = getattr(cls, "__parameters__", None) - only_class_parameter_is_typevartuple = ( - class_params is not None - and len(class_params) == 1 - and isinstance(class_params[0], TypeVarTuple) - ) - if not only_class_parameter_is_typevartuple: - raise TypeError( - f"Parameter list to {cls.__qualname__}[...] cannot be empty" - ) - params = tuple(_type_convert(p) for p in params) if cls in (Generic, Protocol): # Generic and Protocol can only be subscripted with unique type variables. + if not params: + raise TypeError( + f"Parameter list to {cls.__qualname__}[...] cannot be empty" + ) if not all(_is_typevar_like(p) for p in params): raise TypeError( f"Parameters to {cls.__name__}[...] must all be type variables " @@ -1827,13 +1809,20 @@ def __class_getitem__(cls, params): f"Parameters to {cls.__name__}[...] must all be unique") else: # Subscripting a regular Generic subclass. - if any(isinstance(t, ParamSpec) for t in cls.__parameters__): - params = _prepare_paramspec_params(cls, params) - elif not any(isinstance(p, TypeVarTuple) for p in cls.__parameters__): - # We only run this if there are no TypeVarTuples, because we - # don't check variadic generic arity at runtime (to reduce - # complexity of typing.py). - _check_generic(cls, params, len(cls.__parameters__)) + for param in cls.__parameters__: + prepare = getattr(param, '__typing_prepare_subst__', None) + if prepare is not None: + params = prepare(cls, params) + _check_generic(cls, params, len(cls.__parameters__)) + + new_args = [] + for param, new_arg in zip(cls.__parameters__, params): + if isinstance(param, TypeVarTuple): + new_args.extend(new_arg) + else: + new_args.append(new_arg) + params = tuple(new_args) + return _GenericAlias(cls, params, _paramspec_tvars=True) diff --git a/Misc/NEWS.d/next/Library/2022-11-12-12-08-34.gh-issue-99344.7M_u8G.rst b/Misc/NEWS.d/next/Library/2022-11-12-12-08-34.gh-issue-99344.7M_u8G.rst new file mode 100644 index 00000000000000..412c8c793435af --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-12-12-08-34.gh-issue-99344.7M_u8G.rst @@ -0,0 +1,2 @@ +Fix substitution of :class:`~typing.TypeVarTuple` and +:class:`~typing.ParamSpec` together in user generics. diff --git a/Misc/NEWS.d/next/Library/2022-11-12-12-10-23.gh-issue-99379.bcGhxF.rst b/Misc/NEWS.d/next/Library/2022-11-12-12-10-23.gh-issue-99379.bcGhxF.rst new file mode 100644 index 00000000000000..1950680b1df86c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-12-12-10-23.gh-issue-99379.bcGhxF.rst @@ -0,0 +1,2 @@ +Fix substitution of :class:`~typing.ParamSpec` followed by +:class:`~typing.TypeVarTuple` in generic aliases. diff --git a/Misc/NEWS.d/next/Library/2022-11-12-12-15-30.gh-issue-99382.dKg_rW.rst b/Misc/NEWS.d/next/Library/2022-11-12-12-15-30.gh-issue-99382.dKg_rW.rst new file mode 100644 index 00000000000000..f153f2fceac844 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-12-12-15-30.gh-issue-99382.dKg_rW.rst @@ -0,0 +1,2 @@ +Check the number of arguments in substitution in user generics containing a +:class:`~typing.TypeVarTuple` and one or more :class:`~typing.TypeVar`. From deaa8dee48beeae9928a418736da0608f2f18361 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 29 Nov 2022 09:59:56 +0200 Subject: [PATCH 087/112] gh-99593: Add tests for Unicode C API (part 1) (GH-99651) Add tests for functions corresponding to the str class methods. --- Lib/test/test_capi/test_unicode.py | 491 ++++++++++++++++-- ...2-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst | 1 + Modules/_testcapi/unicode.c | 338 +++++++++++- 3 files changed, 780 insertions(+), 50 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 07b77d3e04bbe0..857579f758386f 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -9,10 +9,35 @@ _testcapi = None +NULL = None + +class Str(str): + pass + + class CAPITest(unittest.TestCase): - # Test PyUnicode_FromFormat() + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromobject(self): + """Test PyUnicode_FromObject()""" + from _testcapi import unicode_fromobject as fromobject + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', + 'a\ud800b\udfffc', '\ud834\udd1e']: + self.assertEqual(fromobject(s), s) + o = Str(s) + s2 = fromobject(o) + self.assertEqual(s2, s) + self.assertIs(type(s2), str) + self.assertIsNot(s2, s) + + self.assertRaises(TypeError, fromobject, b'abc') + self.assertRaises(TypeError, fromobject, []) + # CRASHES fromobject(NULL) + def test_from_format(self): + """Test PyUnicode_FromFormat()""" import_helper.import_module('ctypes') from ctypes import ( c_char_p, @@ -268,10 +293,10 @@ def check_format(expected, format, *args): self.assertRaisesRegex(SystemError, 'invalid format string', PyUnicode_FromFormat, b'%+i', c_int(10)) - # Test PyUnicode_AsWideChar() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_aswidechar(self): + """Test PyUnicode_AsWideChar()""" from _testcapi import unicode_aswidechar import_helper.import_module('ctypes') from ctypes import c_wchar, sizeof @@ -307,10 +332,10 @@ def test_aswidechar(self): self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') - # Test PyUnicode_AsWideCharString() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_aswidecharstring(self): + """Test PyUnicode_AsWideCharString()""" from _testcapi import unicode_aswidecharstring import_helper.import_module('ctypes') from ctypes import c_wchar, sizeof @@ -332,10 +357,10 @@ def test_aswidecharstring(self): self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') - # Test PyUnicode_AsUCS4() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asucs4(self): + """Test PyUnicode_AsUCS4()""" from _testcapi import unicode_asucs4 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', 'a\ud800b\udfffc', '\ud834\udd1e']: @@ -350,10 +375,10 @@ def test_asucs4(self): self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0') self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff') - # Test PyUnicode_AsUTF8() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asutf8(self): + """Test PyUnicode_AsUTF8()""" from _testcapi import unicode_asutf8 bmp = '\u0100' @@ -365,10 +390,10 @@ def test_asutf8(self): self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf') self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc') - # Test PyUnicode_AsUTF8AndSize() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asutf8andsize(self): + """Test PyUnicode_AsUTF8AndSize()""" from _testcapi import unicode_asutf8andsize bmp = '\u0100' @@ -380,54 +405,275 @@ def test_asutf8andsize(self): self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4)) self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc') - # Test PyUnicode_Count() + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_concat(self): + """Test PyUnicode_Concat()""" + from _testcapi import unicode_concat as concat + + self.assertEqual(concat('abc', 'def'), 'abcdef') + self.assertEqual(concat('abc', 'где'), 'abcгде') + self.assertEqual(concat('абв', 'def'), 'абвdef') + self.assertEqual(concat('абв', 'где'), 'абвгде') + self.assertEqual(concat('a\0b', 'c\0d'), 'a\0bc\0d') + + self.assertRaises(TypeError, concat, b'abc', 'def') + self.assertRaises(TypeError, concat, 'abc', b'def') + self.assertRaises(TypeError, concat, b'abc', b'def') + self.assertRaises(TypeError, concat, [], 'def') + self.assertRaises(TypeError, concat, 'abc', []) + self.assertRaises(TypeError, concat, [], []) + # CRASHES concat(NULL, 'def') + # CRASHES concat('abc', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_split(self): + """Test PyUnicode_Split()""" + from _testcapi import unicode_split as split + + self.assertEqual(split('a|b|c|d', '|'), ['a', 'b', 'c', 'd']) + self.assertEqual(split('a|b|c|d', '|', 2), ['a', 'b', 'c|d']) + self.assertEqual(split('a|b|c|d', '\u20ac'), ['a|b|c|d']) + self.assertEqual(split('a||b|c||d', '||'), ['a', 'b|c', 'd']) + self.assertEqual(split('а|б|в|г', '|'), ['а', 'б', 'в', 'г']) + self.assertEqual(split('абабагаламага', 'а'), + ['', 'б', 'б', 'г', 'л', 'м', 'г', '']) + self.assertEqual(split(' a\tb\nc\rd\ve\f', NULL), + ['a', 'b', 'c', 'd', 'e']) + self.assertEqual(split('a\x85b\xa0c\u1680d\u2000e', NULL), + ['a', 'b', 'c', 'd', 'e']) + + self.assertRaises(ValueError, split, 'a|b|c|d', '') + self.assertRaises(TypeError, split, 'a|b|c|d', ord('|')) + self.assertRaises(TypeError, split, [], '|') + # CRASHES split(NULL, '|') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_rsplit(self): + """Test PyUnicode_RSplit()""" + from _testcapi import unicode_rsplit as rsplit + + self.assertEqual(rsplit('a|b|c|d', '|'), ['a', 'b', 'c', 'd']) + self.assertEqual(rsplit('a|b|c|d', '|', 2), ['a|b', 'c', 'd']) + self.assertEqual(rsplit('a|b|c|d', '\u20ac'), ['a|b|c|d']) + self.assertEqual(rsplit('a||b|c||d', '||'), ['a', 'b|c', 'd']) + self.assertEqual(rsplit('а|б|в|г', '|'), ['а', 'б', 'в', 'г']) + self.assertEqual(rsplit('абабагаламага', 'а'), + ['', 'б', 'б', 'г', 'л', 'м', 'г', '']) + self.assertEqual(rsplit('aжbжcжd', 'ж'), ['a', 'b', 'c', 'd']) + self.assertEqual(rsplit(' a\tb\nc\rd\ve\f', NULL), + ['a', 'b', 'c', 'd', 'e']) + self.assertEqual(rsplit('a\x85b\xa0c\u1680d\u2000e', NULL), + ['a', 'b', 'c', 'd', 'e']) + + self.assertRaises(ValueError, rsplit, 'a|b|c|d', '') + self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|')) + self.assertRaises(TypeError, rsplit, [], '|') + # CRASHES rsplit(NULL, '|') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_partition(self): + """Test PyUnicode_Partition()""" + from _testcapi import unicode_partition as partition + + self.assertEqual(partition('a|b|c', '|'), ('a', '|', 'b|c')) + self.assertEqual(partition('a||b||c', '||'), ('a', '||', 'b||c')) + self.assertEqual(partition('а|б|в', '|'), ('а', '|', 'б|в')) + self.assertEqual(partition('кабан', 'а'), ('к', 'а', 'бан')) + self.assertEqual(partition('aжbжc', 'ж'), ('a', 'ж', 'bжc')) + + self.assertRaises(ValueError, partition, 'a|b|c', '') + self.assertRaises(TypeError, partition, b'a|b|c', '|') + self.assertRaises(TypeError, partition, 'a|b|c', b'|') + self.assertRaises(TypeError, partition, 'a|b|c', ord('|')) + self.assertRaises(TypeError, partition, [], '|') + # CRASHES partition(NULL, '|') + # CRASHES partition('a|b|c', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_rpartition(self): + """Test PyUnicode_RPartition()""" + from _testcapi import unicode_rpartition as rpartition + + self.assertEqual(rpartition('a|b|c', '|'), ('a|b', '|', 'c')) + self.assertEqual(rpartition('a||b||c', '||'), ('a||b', '||', 'c')) + self.assertEqual(rpartition('а|б|в', '|'), ('а|б', '|', 'в')) + self.assertEqual(rpartition('кабан', 'а'), ('каб', 'а', 'н')) + self.assertEqual(rpartition('aжbжc', 'ж'), ('aжb', 'ж', 'c')) + + self.assertRaises(ValueError, rpartition, 'a|b|c', '') + self.assertRaises(TypeError, rpartition, b'a|b|c', '|') + self.assertRaises(TypeError, rpartition, 'a|b|c', b'|') + self.assertRaises(TypeError, rpartition, 'a|b|c', ord('|')) + self.assertRaises(TypeError, rpartition, [], '|') + # CRASHES rpartition(NULL, '|') + # CRASHES rpartition('a|b|c', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_splitlines(self): + """Test PyUnicode_SplitLines()""" + from _testcapi import unicode_splitlines as splitlines + + self.assertEqual(splitlines('a\nb\rc\r\nd'), ['a', 'b', 'c', 'd']) + self.assertEqual(splitlines('a\nb\rc\r\nd', True), + ['a\n', 'b\r', 'c\r\n', 'd']) + self.assertEqual(splitlines('a\x85b\u2028c\u2029d'), + ['a', 'b', 'c', 'd']) + self.assertEqual(splitlines('a\x85b\u2028c\u2029d', True), + ['a\x85', 'b\u2028', 'c\u2029', 'd']) + self.assertEqual(splitlines('а\nб\rв\r\nг'), ['а', 'б', 'в', 'г']) + + self.assertRaises(TypeError, splitlines, b'a\nb\rc\r\nd') + # CRASHES splitlines(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_translate(self): + """Test PyUnicode_Translate()""" + from _testcapi import unicode_translate as translate + + self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d') + self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г') + self.assertEqual(translate('abc', {}), 'abc') + self.assertEqual(translate('abc', []), 'abc') + self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}) + self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict') + self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo') + self.assertEqual(translate('abc', {ord('b'): None}, 'ignore'), 'ac') + self.assertEqual(translate('abc', {ord('b'): None}, 'replace'), 'a\ufffdc') + self.assertEqual(translate('abc', {ord('b'): None}, 'backslashreplace'), r'a\x62c') + # XXX Other error handlers do not support UnicodeTranslateError + self.assertRaises(TypeError, translate, b'abc', []) + self.assertRaises(TypeError, translate, 123, []) + self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'}) + self.assertRaises(TypeError, translate, 'abc', 123) + self.assertRaises(TypeError, translate, 'abc', NULL) + self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo') + # CRASHES translate(NULL, []) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_join(self): + """Test PyUnicode_Join()""" + from _testcapi import unicode_join as join + self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c') + self.assertEqual(join('|', ['a', '', 'c']), 'a||c') + self.assertEqual(join('', ['a', 'b', 'c']), 'abc') + self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c') + self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в') + self.assertEqual(join('ж', ['а', 'б', 'в']), 'ажбжв') + self.assertRaises(TypeError, join, b'|', ['a', 'b', 'c']) + self.assertRaises(TypeError, join, '|', [b'a', b'b', b'c']) + self.assertRaises(TypeError, join, NULL, [b'a', b'b', b'c']) + self.assertRaises(TypeError, join, '|', b'123') + self.assertRaises(TypeError, join, '|', 123) + self.assertRaises(SystemError, join, '|', NULL) + @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_count(self): + """Test PyUnicode_Count()""" from _testcapi import unicode_count - st = 'abcabd' - self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2) - self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2) - self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1) - self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a" + for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": + for i, ch in enumerate(str): + self.assertEqual(unicode_count(str, ch, 0, len(str)), 1) + + str = "!>_= end + self.assertEqual(unicode_count(str, '!', 0, 0), 0) + self.assertEqual(unicode_count(str, '!', len(str), 0), 0) + # negative + self.assertEqual(unicode_count(str, '!', -len(str), -1), 1) + # bad arguments + self.assertRaises(TypeError, unicode_count, str, b'!', 0, len(str)) + self.assertRaises(TypeError, unicode_count, b"!>__= end - self.assertEqual(unicode_count(st, 'abc', 0, 0), 0) - self.assertEqual(unicode_count(st, 'a', 3, 2), 0) - self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0) + self.assertEqual(find(str, '!', 0, 0, 1), -1) + self.assertEqual(find(str, '!', len(str), 0, 1), -1) # negative - self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2) - self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1) - # wrong args - self.assertRaises(TypeError, unicode_count, 'a', 'a') - self.assertRaises(TypeError, unicode_count, 'a', 'a', 1) - self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1) - self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1) - # empty string - self.assertEqual(unicode_count('abc', '', 0, 3), 4) - self.assertEqual(unicode_count('abc', '', 1, 3), 3) - self.assertEqual(unicode_count('', '', 0, 1), 1) - self.assertEqual(unicode_count('', 'a', 0, 1), 0) - # different unicode kinds - for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": - for ch in uni: - self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1) - self.assertEqual(unicode_count(st, ch, 0, len(st)), 0) - - # subclasses should still work - class MyStr(str): - pass - - self.assertEqual(unicode_count(MyStr('aab'), 'a', 0, 3), 2) - - # Test PyUnicode_FindChar() + self.assertEqual(find(str, '!', -len(str), -1, 1), 0) + self.assertEqual(find(str, '!', -len(str), -1, -1), 0) + # bad arguments + self.assertRaises(TypeError, find, str, b'!', 0, len(str), 1) + self.assertRaises(TypeError, find, b"!>__'), '<>br<>c<>d<>br<>') + self.assertEqual(replace(str, 'abra', '='), '=cad=') + self.assertEqual(replace(str, 'a', '=', 2), '=br=cadabra') + self.assertEqual(replace(str, 'a', '=', 0), str) + self.assertEqual(replace(str, 'a', '=', sys.maxsize), '=br=c=d=br=') + self.assertEqual(replace(str, 'z', '='), str) + self.assertEqual(replace(str, '', '='), '=a=b=r=a=c=a=d=a=b=r=a=') + self.assertEqual(replace(str, 'a', 'ж'), 'жbrжcжdжbrж') + self.assertEqual(replace('абабагаламага', 'а', '='), '=б=б=г=л=м=г=') + self.assertEqual(replace('Баден-Баден', 'Баден', 'Baden'), 'Baden-Baden') + # bad arguments + self.assertRaises(TypeError, replace, 'a', 'a', b'=') + self.assertRaises(TypeError, replace, 'a', b'a', '=') + self.assertRaises(TypeError, replace, b'a', 'a', '=') + self.assertRaises(TypeError, replace, 'a', 'a', ord('=')) + self.assertRaises(TypeError, replace, 'a', ord('a'), '=') + self.assertRaises(TypeError, replace, [], 'a', '=') + # CRASHES replace('a', 'a', NULL) + # CRASHES replace('a', NULL, '=') + # CRASHES replace(NULL, 'a', '=') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_compare(self): + """Test PyUnicode_Compare()""" + from _testcapi import unicode_compare as compare + + self.assertEqual(compare('abc', 'abc'), 0) + self.assertEqual(compare('abc', 'def'), -1) + self.assertEqual(compare('def', 'abc'), 1) + self.assertEqual(compare('abc', 'abc\0def'), -1) + self.assertEqual(compare('abc\0def', 'abc\0def'), 0) + self.assertEqual(compare('абв', 'abc'), 1) + + self.assertRaises(TypeError, compare, b'abc', 'abc') + self.assertRaises(TypeError, compare, 'abc', b'abc') + self.assertRaises(TypeError, compare, b'abc', b'abc') + self.assertRaises(TypeError, compare, [], 'abc') + self.assertRaises(TypeError, compare, 'abc', []) + self.assertRaises(TypeError, compare, [], []) + # CRASHES compare(NULL, 'abc') + # CRASHES compare('abc', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_comparewithasciistring(self): + """Test PyUnicode_CompareWithASCIIString()""" + from _testcapi import unicode_comparewithasciistring as comparewithasciistring + + self.assertEqual(comparewithasciistring('abc', b'abc'), 0) + self.assertEqual(comparewithasciistring('abc', b'def'), -1) + self.assertEqual(comparewithasciistring('def', b'abc'), 1) + self.assertEqual(comparewithasciistring('abc', b'abc\0def'), 0) + self.assertEqual(comparewithasciistring('abc\0def', b'abc\0def'), 1) + self.assertEqual(comparewithasciistring('абв', b'abc'), 1) + + # CRASHES comparewithasciistring(b'abc', b'abc') + # CRASHES comparewithasciistring([], b'abc') + # CRASHES comparewithasciistring(NULL, b'abc') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_richcompare(self): + """Test PyUnicode_RichCompare()""" + from _testcapi import unicode_richcompare as richcompare + + LT, LE, EQ, NE, GT, GE = range(6) + strings = ('abc', 'абв', '\U0001f600', 'abc\0') + for s1 in strings: + for s2 in strings: + self.assertIs(richcompare(s1, s2, LT), s1 < s2) + self.assertIs(richcompare(s1, s2, LE), s1 <= s2) + self.assertIs(richcompare(s1, s2, EQ), s1 == s2) + self.assertIs(richcompare(s1, s2, NE), s1 != s2) + self.assertIs(richcompare(s1, s2, GT), s1 > s2) + self.assertIs(richcompare(s1, s2, GE), s1 >= s2) + + for op in LT, LE, EQ, NE, GT, GE: + self.assertIs(richcompare(b'abc', 'abc', op), NotImplemented) + self.assertIs(richcompare('abc', b'abc', op), NotImplemented) + self.assertIs(richcompare(b'abc', b'abc', op), NotImplemented) + self.assertIs(richcompare([], 'abc', op), NotImplemented) + self.assertIs(richcompare('abc', [], op), NotImplemented) + self.assertIs(richcompare([], [], op), NotImplemented) + + # CRASHES richcompare(NULL, 'abc', op) + # CRASHES richcompare('abc', NULL, op) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_format(self): + """Test PyUnicode_Format()""" + from _testcapi import unicode_format as format + + self.assertEqual(format('x=%d!', 42), 'x=42!') + self.assertEqual(format('x=%d!', (42,)), 'x=42!') + self.assertEqual(format('x=%d y=%s!', (42, [])), 'x=42 y=[]!') + + self.assertRaises(SystemError, format, 'x=%d!', NULL) + self.assertRaises(SystemError, format, NULL, 42) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_contains(self): + """Test PyUnicode_Contains()""" + from _testcapi import unicode_contains as contains + + self.assertEqual(contains('abcd', ''), 1) + self.assertEqual(contains('abcd', 'b'), 1) + self.assertEqual(contains('abcd', 'x'), 0) + self.assertEqual(contains('abcd', 'ж'), 0) + self.assertEqual(contains('abcd', '\0'), 0) + self.assertEqual(contains('abc\0def', '\0'), 1) + self.assertEqual(contains('abcd', 'bc'), 1) + + self.assertRaises(TypeError, contains, b'abcd', 'b') + self.assertRaises(TypeError, contains, 'abcd', b'b') + self.assertRaises(TypeError, contains, b'abcd', b'b') + self.assertRaises(TypeError, contains, [], 'b') + self.assertRaises(TypeError, contains, 'abcd', ord('b')) + # CRASHES contains(NULL, 'b') + # CRASHES contains('abcd', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_isidentifier(self): + """Test PyUnicode_IsIdentifier()""" + from _testcapi import unicode_isidentifier as isidentifier + + self.assertEqual(isidentifier("a"), 1) + self.assertEqual(isidentifier("b0"), 1) + self.assertEqual(isidentifier("µ"), 1) + self.assertEqual(isidentifier("𝔘𝔫𝔦𝔠𝔬𝔡𝔢"), 1) + + self.assertEqual(isidentifier(""), 0) + self.assertEqual(isidentifier(" "), 0) + self.assertEqual(isidentifier("["), 0) + self.assertEqual(isidentifier("©"), 0) + self.assertEqual(isidentifier("0"), 0) + self.assertEqual(isidentifier("32M"), 0) + + # CRASHES isidentifier(b"a") + # CRASHES isidentifier([]) + # CRASHES isidentifier(NULL) - # Test PyUnicode_CopyCharacters() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_copycharacters(self): + """Test PyUnicode_CopyCharacters()""" from _testcapi import unicode_copycharacters strings = [ diff --git a/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst new file mode 100644 index 00000000000000..ec4cda2080323f --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst @@ -0,0 +1 @@ +Cover the Unicode C API with tests. diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index d5c4a9e5b95ec6..4c5049dd406a7c 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -1,3 +1,4 @@ +#define PY_SSIZE_T_CLEAN #include "parts.h" static struct PyModuleDef *_testcapimodule = NULL; // set at initialization @@ -99,6 +100,17 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) Py_RETURN_NONE; } +#define NULLABLE(x) do { if (x == Py_None) x = NULL; } while (0); + +/* Test PyUnicode_FromObject() */ +static PyObject * +unicode_fromobject(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_FromObject(arg); +} + +/* Test PyUnicode_AsWideChar() */ static PyObject * unicode_aswidechar(PyObject *self, PyObject *args) { @@ -130,6 +142,7 @@ unicode_aswidechar(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, size); } +/* Test PyUnicode_AsWideCharString() */ static PyObject * unicode_aswidecharstring(PyObject *self, PyObject *args) { @@ -151,6 +164,7 @@ unicode_aswidecharstring(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, size); } +/* Test PyUnicode_AsUCS4() */ static PyObject * unicode_asucs4(PyObject *self, PyObject *args) { @@ -181,6 +195,7 @@ unicode_asucs4(PyObject *self, PyObject *args) return result; } +/* Test PyUnicode_AsUTF8() */ static PyObject * unicode_asutf8(PyObject *self, PyObject *args) { @@ -199,6 +214,7 @@ unicode_asutf8(PyObject *self, PyObject *args) return PyBytes_FromString(buffer); } +/* Test PyUnicode_AsUTF8AndSize() */ static PyObject * unicode_asutf8andsize(PyObject *self, PyObject *args) { @@ -223,26 +239,194 @@ unicode_asutf8andsize(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, utf8_len); } +/* Test PyUnicode_Concat() */ +static PyObject * +unicode_concat(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + + if (!PyArg_ParseTuple(args, "OO", &left, &right)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + return PyUnicode_Concat(left, right); +} + +/* Test PyUnicode_Split() */ +static PyObject * +unicode_split(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + Py_ssize_t maxsplit = -1; + + if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_Split(s, sep, maxsplit); +} + +/* Test PyUnicode_RSplit() */ +static PyObject * +unicode_rsplit(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + Py_ssize_t maxsplit = -1; + + if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_RSplit(s, sep, maxsplit); +} + +/* Test PyUnicode_Splitlines() */ +static PyObject * +unicode_splitlines(PyObject *self, PyObject *args) +{ + PyObject *s; + int keepends = 0; + + if (!PyArg_ParseTuple(args, "O|i", &s, &keepends)) + return NULL; + + NULLABLE(s); + return PyUnicode_Splitlines(s, keepends); +} + +/* Test PyUnicode_Partition() */ +static PyObject * +unicode_partition(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + + if (!PyArg_ParseTuple(args, "OO", &s, &sep)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_Partition(s, sep); +} + +/* Test PyUnicode_RPartition() */ +static PyObject * +unicode_rpartition(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + + if (!PyArg_ParseTuple(args, "OO", &s, &sep)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_RPartition(s, sep); +} + +/* Test PyUnicode_Translate() */ +static PyObject * +unicode_translate(PyObject *self, PyObject *args) +{ + PyObject *obj; + PyObject *table; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "OO|z", &obj, &table, &errors)) + return NULL; + + NULLABLE(obj); + NULLABLE(table); + return PyUnicode_Translate(obj, table, errors); +} + +/* Test PyUnicode_Join() */ +static PyObject * +unicode_join(PyObject *self, PyObject *args) +{ + PyObject *sep; + PyObject *seq; + + if (!PyArg_ParseTuple(args, "OO", &sep, &seq)) + return NULL; + + NULLABLE(sep); + NULLABLE(seq); + return PyUnicode_Join(sep, seq); +} + +/* Test PyUnicode_Count() */ static PyObject * unicode_count(PyObject *self, PyObject *args) { PyObject *str; PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; Py_ssize_t result; - Py_ssize_t start, end; - if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr, - &start, &end)) { + if (!PyArg_ParseTuple(args, "OOnn", &str, &substr, &start, &end)) return NULL; - } + NULLABLE(str); + NULLABLE(substr); result = PyUnicode_Count(str, substr, start, end); if (result == -1) return NULL; - else - return PyLong_FromSsize_t(result); + return PyLong_FromSsize_t(result); +} + +/* Test PyUnicode_Find() */ +static PyObject * +unicode_find(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; + int direction; + Py_ssize_t result; + + if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + result = PyUnicode_Find(str, substr, start, end, direction); + if (result == -2) + return NULL; + return PyLong_FromSsize_t(result); } +/* Test PyUnicode_Tailmatch() */ +static PyObject * +unicode_tailmatch(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; + int direction; + Py_ssize_t result; + + if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + result = PyUnicode_Tailmatch(str, substr, start, end, direction); + if (result == -1) + return NULL; + return PyLong_FromSsize_t(result); +} + +/* Test PyUnicode_FindChar() */ static PyObject * unicode_findchar(PyObject *self, PyObject *args) { @@ -264,6 +448,130 @@ unicode_findchar(PyObject *self, PyObject *args) return PyLong_FromSsize_t(result); } +/* Test PyUnicode_Replace() */ +static PyObject * +unicode_replace(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + PyObject *replstr; + Py_ssize_t maxcount = -1; + + if (!PyArg_ParseTuple(args, "OOO|n", &str, &substr, &replstr, &maxcount)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + NULLABLE(replstr); + return PyUnicode_Replace(str, substr, replstr, maxcount); +} + +/* Test PyUnicode_Compare() */ +static PyObject * +unicode_compare(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + int result; + + if (!PyArg_ParseTuple(args, "OO", &left, &right)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + result = PyUnicode_Compare(left, right); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_CompareWithASCIIString() */ +static PyObject * +unicode_comparewithasciistring(PyObject *self, PyObject *args) +{ + PyObject *left; + const char *right = NULL; + Py_ssize_t right_len; + int result; + + if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len)) + return NULL; + + NULLABLE(left); + result = PyUnicode_CompareWithASCIIString(left, right); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_RichCompare() */ +static PyObject * +unicode_richcompare(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + int op; + + if (!PyArg_ParseTuple(args, "OOi", &left, &right, &op)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + return PyUnicode_RichCompare(left, right, op); +} + +/* Test PyUnicode_Format() */ +static PyObject * +unicode_format(PyObject *self, PyObject *args) +{ + PyObject *format; + PyObject *fargs; + + if (!PyArg_ParseTuple(args, "OO", &format, &fargs)) + return NULL; + + NULLABLE(format); + NULLABLE(fargs); + return PyUnicode_Format(format, fargs); +} + +/* Test PyUnicode_Contains() */ +static PyObject * +unicode_contains(PyObject *self, PyObject *args) +{ + PyObject *container; + PyObject *element; + int result; + + if (!PyArg_ParseTuple(args, "OO", &container, &element)) + return NULL; + + NULLABLE(container); + NULLABLE(element); + result = PyUnicode_Contains(container, element); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_IsIdentifier() */ +static PyObject * +unicode_isidentifier(PyObject *self, PyObject *arg) +{ + int result; + + NULLABLE(arg); + result = PyUnicode_IsIdentifier(arg); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_CopyCharacters() */ static PyObject * unicode_copycharacters(PyObject *self, PyObject *args) { @@ -711,13 +1019,31 @@ static PyMethodDef TestMethods[] = { test_unicode_compare_with_ascii, METH_NOARGS}, {"test_string_from_format", test_string_from_format, METH_NOARGS}, {"test_widechar", test_widechar, METH_NOARGS}, + {"unicode_fromobject", unicode_fromobject, METH_O}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring", unicode_aswidecharstring, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, + {"unicode_concat", unicode_concat, METH_VARARGS}, + {"unicode_splitlines", unicode_splitlines, METH_VARARGS}, + {"unicode_split", unicode_split, METH_VARARGS}, + {"unicode_rsplit", unicode_rsplit, METH_VARARGS}, + {"unicode_partition", unicode_partition, METH_VARARGS}, + {"unicode_rpartition", unicode_rpartition, METH_VARARGS}, + {"unicode_translate", unicode_translate, METH_VARARGS}, + {"unicode_join", unicode_join, METH_VARARGS}, {"unicode_count", unicode_count, METH_VARARGS}, + {"unicode_tailmatch", unicode_tailmatch, METH_VARARGS}, + {"unicode_find", unicode_find, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, + {"unicode_replace", unicode_replace, METH_VARARGS}, + {"unicode_compare", unicode_compare, METH_VARARGS}, + {"unicode_comparewithasciistring",unicode_comparewithasciistring,METH_VARARGS}, + {"unicode_richcompare", unicode_richcompare, METH_VARARGS}, + {"unicode_format", unicode_format, METH_VARARGS}, + {"unicode_contains", unicode_contains, METH_VARARGS}, + {"unicode_isidentifier", unicode_isidentifier, METH_O}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {NULL}, }; From 4cfc1b8568bd8bf0d44fadc42ec86696d0561d33 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Tue, 29 Nov 2022 15:37:37 +0530 Subject: [PATCH 088/112] GH-91375: Port `_asyncio` static types to heap types and module state (#99122) Co-authored-by: Erlend E. Aasland --- Modules/_asynciomodule.c | 1043 ++++++++++++++++++----------- Modules/clinic/_asynciomodule.c.h | 175 ++++- 2 files changed, 783 insertions(+), 435 deletions(-) diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index 3edd8a9b2dbf91..cabcaec94bec7e 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -3,11 +3,12 @@ #endif #include "Python.h" -#include "pycore_pyerrors.h" // _PyErr_ClearExcState() -#include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_runtime.h" // _PyRuntime -#include "pycore_global_objects.h" // _Py_ID() -#include // offsetof() +#include "pycore_pyerrors.h" // _PyErr_ClearExcState() +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_runtime_init.h" // _Py_ID() +#include "pycore_moduleobject.h" // _PyModule_GetState() +#include "structmember.h" // PyMemberDef +#include // offsetof() /*[clinic input] @@ -17,35 +18,80 @@ module _asyncio /* State of the _asyncio module */ -static PyObject *asyncio_mod; -static PyObject *traceback_extract_stack; -static PyObject *asyncio_get_event_loop_policy; -static PyObject *asyncio_future_repr_func; -static PyObject *asyncio_iscoroutine_func; -static PyObject *asyncio_task_get_stack_func; -static PyObject *asyncio_task_print_stack_func; -static PyObject *asyncio_task_repr_func; -static PyObject *asyncio_InvalidStateError; -static PyObject *asyncio_CancelledError; -static PyObject *context_kwname; -static int module_initialized; +typedef struct { + PyTypeObject *FutureIterType; + PyTypeObject *TaskStepMethWrapper_Type; + PyTypeObject *FutureType; + PyTypeObject *TaskType; + PyTypeObject *PyRunningLoopHolder_Type; + + PyObject *asyncio_mod; + PyObject *context_kwname; + + /* Dictionary containing tasks that are currently active in + all running event loops. {EventLoop: Task} */ + PyObject *current_tasks; + + /* WeakSet containing all alive tasks. */ + PyObject *all_tasks; + + /* An isinstance type cache for the 'is_coroutine()' function. */ + PyObject *iscoroutine_typecache; -static PyObject *cached_running_holder; -static volatile uint64_t cached_running_holder_tsid; + /* Imports from asyncio.events. */ + PyObject *asyncio_get_event_loop_policy; -/* Counter for autogenerated Task names */ -static uint64_t task_name_counter = 0; + /* Imports from asyncio.base_futures. */ + PyObject *asyncio_future_repr_func; -/* WeakSet containing all alive tasks. */ -static PyObject *all_tasks; + /* Imports from asyncio.exceptions. */ + PyObject *asyncio_CancelledError; + PyObject *asyncio_InvalidStateError; -/* Dictionary containing tasks that are currently active in - all running event loops. {EventLoop: Task} */ -static PyObject *current_tasks; + /* Imports from asyncio.base_tasks. */ + PyObject *asyncio_task_get_stack_func; + PyObject *asyncio_task_print_stack_func; + PyObject *asyncio_task_repr_func; -/* An isinstance type cache for the 'is_coroutine()' function. */ -static PyObject *iscoroutine_typecache; + /* Imports from asyncio.coroutines. */ + PyObject *asyncio_iscoroutine_func; + /* Imports from traceback. */ + PyObject *traceback_extract_stack; + + PyObject *cached_running_holder; // Borrowed ref. + volatile uint64_t cached_running_holder_tsid; + + /* Counter for autogenerated Task names */ + uint64_t task_name_counter; +} asyncio_state; + +static inline asyncio_state * +get_asyncio_state(PyObject *mod) +{ + asyncio_state *state = _PyModule_GetState(mod); + assert(state != NULL); + return state; +} + +static inline asyncio_state * +get_asyncio_state_by_cls(PyTypeObject *cls) +{ + asyncio_state *state = (asyncio_state *)PyType_GetModuleState(cls); + assert(state != NULL); + return state; +} + +static struct PyModuleDef _asynciomodule; + +static inline asyncio_state * +get_asyncio_state_by_def(PyObject *self) +{ + PyTypeObject *tp = Py_TYPE(self); + PyObject *mod = PyType_GetModuleByDef(tp, &_asynciomodule); + assert(mod != NULL); + return get_asyncio_state(mod); +} typedef enum { STATE_PENDING, @@ -101,16 +147,11 @@ typedef struct { } PyRunningLoopHolder; -static PyTypeObject FutureType; -static PyTypeObject TaskType; -static PyTypeObject PyRunningLoopHolder_Type; +#define Future_CheckExact(state, obj) Py_IS_TYPE(obj, state->FutureType) +#define Task_CheckExact(state, obj) Py_IS_TYPE(obj, state->TaskType) - -#define Future_CheckExact(obj) Py_IS_TYPE(obj, &FutureType) -#define Task_CheckExact(obj) Py_IS_TYPE(obj, &TaskType) - -#define Future_Check(obj) PyObject_TypeCheck(obj, &FutureType) -#define Task_Check(obj) PyObject_TypeCheck(obj, &TaskType) +#define Future_Check(state, obj) PyObject_TypeCheck(obj, state->FutureType) +#define Task_Check(state, obj) PyObject_TypeCheck(obj, state->TaskType) #include "clinic/_asynciomodule.c.h" @@ -124,11 +165,11 @@ class _asyncio.Future "FutureObj *" "&Future_Type" /* Get FutureIter from Future */ static PyObject * future_new_iter(PyObject *); -static PyRunningLoopHolder * new_running_loop_holder(PyObject *); +static PyRunningLoopHolder * new_running_loop_holder(asyncio_state *, PyObject *); static int -_is_coroutine(PyObject *coro) +_is_coroutine(asyncio_state *state, PyObject *coro) { /* 'coro' is not a native coroutine, call asyncio.iscoroutine() to check if it's another coroutine flavour. @@ -136,7 +177,7 @@ _is_coroutine(PyObject *coro) Do this check after 'future_init()'; in case we need to raise an error, __del__ needs a properly initialized object. */ - PyObject *res = PyObject_CallOneArg(asyncio_iscoroutine_func, coro); + PyObject *res = PyObject_CallOneArg(state->asyncio_iscoroutine_func, coro); if (res == NULL) { return -1; } @@ -147,12 +188,12 @@ _is_coroutine(PyObject *coro) return is_res_true; } - if (PySet_GET_SIZE(iscoroutine_typecache) < 100) { + if (PySet_GET_SIZE(state->iscoroutine_typecache) < 100) { /* Just in case we don't want to cache more than 100 positive types. That shouldn't ever happen, unless someone stressing the system on purpose. */ - if (PySet_Add(iscoroutine_typecache, (PyObject*) Py_TYPE(coro))) { + if (PySet_Add(state->iscoroutine_typecache, (PyObject*) Py_TYPE(coro))) { return -1; } } @@ -162,7 +203,7 @@ _is_coroutine(PyObject *coro) static inline int -is_coroutine(PyObject *coro) +is_coroutine(asyncio_state *state, PyObject *coro) { if (PyCoro_CheckExact(coro)) { return 1; @@ -178,10 +219,10 @@ is_coroutine(PyObject *coro) a pure-Python function in 99.9% cases. */ int has_it = PySet_Contains( - iscoroutine_typecache, (PyObject*) Py_TYPE(coro)); + state->iscoroutine_typecache, (PyObject*) Py_TYPE(coro)); if (has_it == 0) { /* type(coro) is not in iscoroutine_typecache */ - return _is_coroutine(coro); + return _is_coroutine(state, coro); } /* either an error has occurred or @@ -192,13 +233,13 @@ is_coroutine(PyObject *coro) static PyObject * -get_future_loop(PyObject *fut) +get_future_loop(asyncio_state *state, PyObject *fut) { /* Implementation of `asyncio.futures._get_loop` */ PyObject *getloop; - if (Future_CheckExact(fut) || Task_CheckExact(fut)) { + if (Future_CheckExact(state, fut) || Task_CheckExact(state, fut)) { PyObject *loop = ((FutureObj *)fut)->fut_loop; return Py_NewRef(loop); } @@ -217,15 +258,17 @@ get_future_loop(PyObject *fut) static int -get_running_loop(PyObject **loop) +get_running_loop(asyncio_state *state, PyObject **loop) { PyObject *rl; PyThreadState *ts = _PyThreadState_GET(); uint64_t ts_id = PyThreadState_GetID(ts); - if (ts_id == cached_running_holder_tsid && cached_running_holder != NULL) { + if (state->cached_running_holder_tsid == ts_id && + state->cached_running_holder != NULL) + { // Fast path, check the cache. - rl = cached_running_holder; // borrowed + rl = state->cached_running_holder; // borrowed } else { PyObject *ts_dict = _PyThreadState_GetDict(ts); // borrowed @@ -244,11 +287,11 @@ get_running_loop(PyObject **loop) } } - cached_running_holder = rl; // borrowed - cached_running_holder_tsid = ts_id; + state->cached_running_holder = rl; // borrowed + state->cached_running_holder_tsid = ts_id; } - assert(Py_IS_TYPE(rl, &PyRunningLoopHolder_Type)); + assert(Py_IS_TYPE(rl, state->PyRunningLoopHolder_Type)); PyObject *running_loop = ((PyRunningLoopHolder *)rl)->rl_loop; if (running_loop == Py_None) { @@ -278,7 +321,7 @@ get_running_loop(PyObject **loop) static int -set_running_loop(PyObject *loop) +set_running_loop(asyncio_state *state, PyObject *loop) { PyObject *ts_dict = NULL; @@ -293,7 +336,7 @@ set_running_loop(PyObject *loop) return -1; } - PyRunningLoopHolder *rl = new_running_loop_holder(loop); + PyRunningLoopHolder *rl = new_running_loop_holder(state, loop); if (rl == NULL) { return -1; } @@ -306,20 +349,20 @@ set_running_loop(PyObject *loop) } Py_DECREF(rl); - cached_running_holder = (PyObject *)rl; - cached_running_holder_tsid = PyThreadState_GetID(tstate); + state->cached_running_holder = (PyObject *)rl; + state->cached_running_holder_tsid = PyThreadState_GetID(tstate); return 0; } static PyObject * -get_event_loop(int stacklevel) +get_event_loop(asyncio_state *state, int stacklevel) { PyObject *loop; PyObject *policy; - if (get_running_loop(&loop)) { + if (get_running_loop(state, &loop)) { return NULL; } if (loop != NULL) { @@ -333,7 +376,7 @@ get_event_loop(int stacklevel) return NULL; } - policy = PyObject_CallNoArgs(asyncio_get_event_loop_policy); + policy = PyObject_CallNoArgs(state->asyncio_get_event_loop_policy); if (policy == NULL) { return NULL; } @@ -345,7 +388,8 @@ get_event_loop(int stacklevel) static int -call_soon(PyObject *loop, PyObject *func, PyObject *arg, PyObject *ctx) +call_soon(asyncio_state *state, PyObject *loop, PyObject *func, PyObject *arg, + PyObject *ctx) { PyObject *handle; PyObject *stack[3]; @@ -372,7 +416,8 @@ call_soon(PyObject *loop, PyObject *func, PyObject *arg, PyObject *ctx) } stack[nargs] = (PyObject *)ctx; EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, callable); - handle = PyObject_Vectorcall(callable, stack, nargs, context_kwname); + handle = PyObject_Vectorcall(callable, stack, nargs, + state->context_kwname); Py_DECREF(callable); } @@ -403,17 +448,18 @@ future_ensure_alive(FutureObj *fut) } -#define ENSURE_FUTURE_ALIVE(fut) \ - do { \ - assert(Future_Check(fut) || Task_Check(fut)); \ - if (future_ensure_alive((FutureObj*)fut)) { \ - return NULL; \ - } \ +#define ENSURE_FUTURE_ALIVE(state, fut) \ + do { \ + assert(Future_Check(state, fut) || Task_Check(state, fut)); \ + (void)state; \ + if (future_ensure_alive((FutureObj*)fut)) { \ + return NULL; \ + } \ } while(0); static int -future_schedule_callbacks(FutureObj *fut) +future_schedule_callbacks(asyncio_state *state, FutureObj *fut) { Py_ssize_t len; Py_ssize_t i; @@ -421,7 +467,7 @@ future_schedule_callbacks(FutureObj *fut) if (fut->fut_callback0 != NULL) { /* There's a 1st callback */ - int ret = call_soon( + int ret = call_soon(state, fut->fut_loop, fut->fut_callback0, (PyObject *)fut, fut->fut_context0); @@ -455,7 +501,7 @@ future_schedule_callbacks(FutureObj *fut) PyObject *cb = PyTuple_GET_ITEM(cb_tup, 0); PyObject *ctx = PyTuple_GET_ITEM(cb_tup, 1); - if (call_soon(fut->fut_loop, cb, (PyObject *)fut, ctx)) { + if (call_soon(state, fut->fut_loop, cb, (PyObject *)fut, ctx)) { /* If an error occurs in pure-Python implementation, all callbacks are cleared. */ Py_CLEAR(fut->fut_callbacks); @@ -491,7 +537,8 @@ future_init(FutureObj *fut, PyObject *loop) fut->fut_blocking = 0; if (loop == Py_None) { - loop = get_event_loop(1); + asyncio_state *state = get_asyncio_state_by_def((PyObject *)fut); + loop = get_event_loop(state, 1); if (loop == NULL) { return -1; } @@ -517,7 +564,8 @@ future_init(FutureObj *fut, PyObject *loop) method, which is called during the interpreter shutdown and the traceback module is already unloaded. */ - fut->fut_source_tb = PyObject_CallNoArgs(traceback_extract_stack); + asyncio_state *state = get_asyncio_state_by_def((PyObject *)fut); + fut->fut_source_tb = PyObject_CallNoArgs(state->traceback_extract_stack); if (fut->fut_source_tb == NULL) { return -1; } @@ -527,14 +575,14 @@ future_init(FutureObj *fut, PyObject *loop) } static PyObject * -future_set_result(FutureObj *fut, PyObject *res) +future_set_result(asyncio_state *state, FutureObj *fut, PyObject *res) { if (future_ensure_alive(fut)) { return NULL; } if (fut->fut_state != STATE_PENDING) { - PyErr_SetString(asyncio_InvalidStateError, "invalid state"); + PyErr_SetString(state->asyncio_InvalidStateError, "invalid state"); return NULL; } @@ -542,19 +590,19 @@ future_set_result(FutureObj *fut, PyObject *res) fut->fut_result = Py_NewRef(res); fut->fut_state = STATE_FINISHED; - if (future_schedule_callbacks(fut) == -1) { + if (future_schedule_callbacks(state, fut) == -1) { return NULL; } Py_RETURN_NONE; } static PyObject * -future_set_exception(FutureObj *fut, PyObject *exc) +future_set_exception(asyncio_state *state, FutureObj *fut, PyObject *exc) { PyObject *exc_val = NULL; if (fut->fut_state != STATE_PENDING) { - PyErr_SetString(asyncio_InvalidStateError, "invalid state"); + PyErr_SetString(state->asyncio_InvalidStateError, "invalid state"); return NULL; } @@ -565,7 +613,7 @@ future_set_exception(FutureObj *fut, PyObject *exc) } if (fut->fut_state != STATE_PENDING) { Py_DECREF(exc_val); - PyErr_SetString(asyncio_InvalidStateError, "invalid state"); + PyErr_SetString(state->asyncio_InvalidStateError, "invalid state"); return NULL; } } @@ -591,7 +639,7 @@ future_set_exception(FutureObj *fut, PyObject *exc) fut->fut_exception_tb = PyException_GetTraceback(exc_val); fut->fut_state = STATE_FINISHED; - if (future_schedule_callbacks(fut) == -1) { + if (future_schedule_callbacks(state, fut) == -1) { return NULL; } @@ -600,7 +648,7 @@ future_set_exception(FutureObj *fut, PyObject *exc) } static PyObject * -create_cancelled_error(FutureObj *fut) +create_cancelled_error(asyncio_state *state, FutureObj *fut) { PyObject *exc; if (fut->fut_cancelled_exc != NULL) { @@ -611,34 +659,35 @@ create_cancelled_error(FutureObj *fut) } PyObject *msg = fut->fut_cancel_msg; if (msg == NULL || msg == Py_None) { - exc = PyObject_CallNoArgs(asyncio_CancelledError); + exc = PyObject_CallNoArgs(state->asyncio_CancelledError); } else { - exc = PyObject_CallOneArg(asyncio_CancelledError, msg); + exc = PyObject_CallOneArg(state->asyncio_CancelledError, msg); } return exc; } static void -future_set_cancelled_error(FutureObj *fut) +future_set_cancelled_error(asyncio_state *state, FutureObj *fut) { - PyObject *exc = create_cancelled_error(fut); + PyObject *exc = create_cancelled_error(state, fut); if (exc == NULL) { return; } - PyErr_SetObject(asyncio_CancelledError, exc); + PyErr_SetObject(state->asyncio_CancelledError, exc); Py_DECREF(exc); } static int -future_get_result(FutureObj *fut, PyObject **result) +future_get_result(asyncio_state *state, FutureObj *fut, PyObject **result) { if (fut->fut_state == STATE_CANCELLED) { - future_set_cancelled_error(fut); + future_set_cancelled_error(state, fut); return -1; } if (fut->fut_state != STATE_FINISHED) { - PyErr_SetString(asyncio_InvalidStateError, "Result is not set."); + PyErr_SetString(state->asyncio_InvalidStateError, + "Result is not set."); return -1; } @@ -661,7 +710,8 @@ future_get_result(FutureObj *fut, PyObject **result) } static PyObject * -future_add_done_callback(FutureObj *fut, PyObject *arg, PyObject *ctx) +future_add_done_callback(asyncio_state *state, FutureObj *fut, PyObject *arg, + PyObject *ctx) { if (!future_is_alive(fut)) { PyErr_SetString(PyExc_RuntimeError, "uninitialized Future object"); @@ -671,7 +721,7 @@ future_add_done_callback(FutureObj *fut, PyObject *arg, PyObject *ctx) if (fut->fut_state != STATE_PENDING) { /* The future is done/cancelled, so schedule the callback right away. */ - if (call_soon(fut->fut_loop, arg, (PyObject*) fut, ctx)) { + if (call_soon(state, fut->fut_loop, arg, (PyObject*) fut, ctx)) { return NULL; } } @@ -735,7 +785,7 @@ future_add_done_callback(FutureObj *fut, PyObject *arg, PyObject *ctx) } static PyObject * -future_cancel(FutureObj *fut, PyObject *msg) +future_cancel(asyncio_state *state, FutureObj *fut, PyObject *msg) { fut->fut_log_tb = 0; @@ -747,7 +797,7 @@ future_cancel(FutureObj *fut, PyObject *msg) Py_XINCREF(msg); Py_XSETREF(fut->fut_cancel_msg, msg); - if (future_schedule_callbacks(fut) == -1) { + if (future_schedule_callbacks(state, fut) == -1) { return NULL; } @@ -802,6 +852,7 @@ FutureObj_clear(FutureObj *fut) static int FutureObj_traverse(FutureObj *fut, visitproc visit, void *arg) { + Py_VISIT(Py_TYPE(fut)); Py_VISIT(fut->fut_loop); Py_VISIT(fut->fut_callback0); Py_VISIT(fut->fut_context0); @@ -830,15 +881,16 @@ static PyObject * _asyncio_Future_result_impl(FutureObj *self) /*[clinic end generated code: output=f35f940936a4b1e5 input=49ecf9cf5ec50dc5]*/ { + asyncio_state *state = get_asyncio_state_by_def((PyObject *)self); PyObject *result; if (!future_is_alive(self)) { - PyErr_SetString(asyncio_InvalidStateError, + PyErr_SetString(state->asyncio_InvalidStateError, "Future object is not initialized."); return NULL; } - int res = future_get_result(self, &result); + int res = future_get_result(state, self, &result); if (res == -1) { return NULL; @@ -858,6 +910,9 @@ _asyncio_Future_result_impl(FutureObj *self) /*[clinic input] _asyncio.Future.exception + cls: defining_class + / + Return the exception that was set on this future. The exception (or None if no exception was set) is returned only if @@ -867,22 +922,26 @@ InvalidStateError. [clinic start generated code]*/ static PyObject * -_asyncio_Future_exception_impl(FutureObj *self) -/*[clinic end generated code: output=88b20d4f855e0710 input=733547a70c841c68]*/ +_asyncio_Future_exception_impl(FutureObj *self, PyTypeObject *cls) +/*[clinic end generated code: output=ce75576b187c905b input=3faf15c22acdb60d]*/ { if (!future_is_alive(self)) { - PyErr_SetString(asyncio_InvalidStateError, + asyncio_state *state = get_asyncio_state_by_cls(cls); + PyErr_SetString(state->asyncio_InvalidStateError, "Future object is not initialized."); return NULL; } if (self->fut_state == STATE_CANCELLED) { - future_set_cancelled_error(self); + asyncio_state *state = get_asyncio_state_by_cls(cls); + future_set_cancelled_error(state, self); return NULL; } if (self->fut_state != STATE_FINISHED) { - PyErr_SetString(asyncio_InvalidStateError, "Exception is not set."); + asyncio_state *state = get_asyncio_state_by_cls(cls); + PyErr_SetString(state->asyncio_InvalidStateError, + "Exception is not set."); return NULL; } @@ -897,6 +956,7 @@ _asyncio_Future_exception_impl(FutureObj *self) /*[clinic input] _asyncio.Future.set_result + cls: defining_class result: object / @@ -907,16 +967,19 @@ InvalidStateError. [clinic start generated code]*/ static PyObject * -_asyncio_Future_set_result(FutureObj *self, PyObject *result) -/*[clinic end generated code: output=1ec2e6bcccd6f2ce input=8b75172c2a7b05f1]*/ +_asyncio_Future_set_result_impl(FutureObj *self, PyTypeObject *cls, + PyObject *result) +/*[clinic end generated code: output=99afbbe78f99c32d input=d5a41c1e353acc2e]*/ { - ENSURE_FUTURE_ALIVE(self) - return future_set_result(self, result); + asyncio_state *state = get_asyncio_state_by_cls(cls); + ENSURE_FUTURE_ALIVE(state, self) + return future_set_result(state, self, result); } /*[clinic input] _asyncio.Future.set_exception + cls: defining_class exception: object / @@ -927,16 +990,19 @@ InvalidStateError. [clinic start generated code]*/ static PyObject * -_asyncio_Future_set_exception(FutureObj *self, PyObject *exception) -/*[clinic end generated code: output=f1c1b0cd321be360 input=e45b7d7aa71cc66d]*/ +_asyncio_Future_set_exception_impl(FutureObj *self, PyTypeObject *cls, + PyObject *exception) +/*[clinic end generated code: output=0a5e8b5a52f058d6 input=a245cd49d3df939b]*/ { - ENSURE_FUTURE_ALIVE(self) - return future_set_exception(self, exception); + asyncio_state *state = get_asyncio_state_by_cls(cls); + ENSURE_FUTURE_ALIVE(state, self) + return future_set_exception(state, self, exception); } /*[clinic input] _asyncio.Future.add_done_callback + cls: defining_class fn: object / * @@ -950,25 +1016,27 @@ scheduled with call_soon. [clinic start generated code]*/ static PyObject * -_asyncio_Future_add_done_callback_impl(FutureObj *self, PyObject *fn, - PyObject *context) -/*[clinic end generated code: output=7ce635bbc9554c1e input=15ab0693a96e9533]*/ +_asyncio_Future_add_done_callback_impl(FutureObj *self, PyTypeObject *cls, + PyObject *fn, PyObject *context) +/*[clinic end generated code: output=922e9a4cbd601167 input=599261c521458cc2]*/ { + asyncio_state *state = get_asyncio_state_by_cls(cls); if (context == NULL) { context = PyContext_CopyCurrent(); if (context == NULL) { return NULL; } - PyObject *res = future_add_done_callback(self, fn, context); + PyObject *res = future_add_done_callback(state, self, fn, context); Py_DECREF(context); return res; } - return future_add_done_callback(self, fn, context); + return future_add_done_callback(state, self, fn, context); } /*[clinic input] _asyncio.Future.remove_done_callback + cls: defining_class fn: object / @@ -978,14 +1046,16 @@ Returns the number of callbacks removed. [clinic start generated code]*/ static PyObject * -_asyncio_Future_remove_done_callback(FutureObj *self, PyObject *fn) -/*[clinic end generated code: output=5ab1fb52b24ef31f input=0a43280a149d505b]*/ +_asyncio_Future_remove_done_callback_impl(FutureObj *self, PyTypeObject *cls, + PyObject *fn) +/*[clinic end generated code: output=2da35ccabfe41b98 input=c7518709b86fc747]*/ { PyObject *newlist; Py_ssize_t len, i, j=0; Py_ssize_t cleared_callback0 = 0; - ENSURE_FUTURE_ALIVE(self) + asyncio_state *state = get_asyncio_state_by_cls(cls); + ENSURE_FUTURE_ALIVE(state, self) if (self->fut_callback0 != NULL) { int cmp = PyObject_RichCompareBool(self->fut_callback0, fn, Py_EQ); @@ -1082,6 +1152,8 @@ _asyncio_Future_remove_done_callback(FutureObj *self, PyObject *fn) /*[clinic input] _asyncio.Future.cancel + cls: defining_class + / msg: object = None Cancel the future and schedule callbacks. @@ -1092,11 +1164,13 @@ return True. [clinic start generated code]*/ static PyObject * -_asyncio_Future_cancel_impl(FutureObj *self, PyObject *msg) -/*[clinic end generated code: output=3edebbc668e5aba3 input=925eb545251f2c5a]*/ +_asyncio_Future_cancel_impl(FutureObj *self, PyTypeObject *cls, + PyObject *msg) +/*[clinic end generated code: output=074956f35904b034 input=bba8f8b786941a94]*/ { - ENSURE_FUTURE_ALIVE(self) - return future_cancel(self, msg); + asyncio_state *state = get_asyncio_state_by_cls(cls); + ENSURE_FUTURE_ALIVE(state, self) + return future_cancel(state, self, msg); } /*[clinic input] @@ -1141,14 +1215,18 @@ _asyncio_Future_done_impl(FutureObj *self) /*[clinic input] _asyncio.Future.get_loop + cls: defining_class + / + Return the event loop the Future is bound to. [clinic start generated code]*/ static PyObject * -_asyncio_Future_get_loop_impl(FutureObj *self) -/*[clinic end generated code: output=119b6ea0c9816c3f input=cba48c2136c79d1f]*/ +_asyncio_Future_get_loop_impl(FutureObj *self, PyTypeObject *cls) +/*[clinic end generated code: output=f50ea6c374d9ee97 input=163c2c498b45a1f0]*/ { - ENSURE_FUTURE_ALIVE(self) + asyncio_state *state = get_asyncio_state_by_cls(cls); + ENSURE_FUTURE_ALIVE(state, self) return Py_NewRef(self->fut_loop); } @@ -1185,7 +1263,8 @@ FutureObj_set_blocking(FutureObj *fut, PyObject *val, void *Py_UNUSED(ignored)) static PyObject * FutureObj_get_log_traceback(FutureObj *fut, void *Py_UNUSED(ignored)) { - ENSURE_FUTURE_ALIVE(fut) + asyncio_state *state = get_asyncio_state_by_def((PyObject *)fut); + ENSURE_FUTURE_ALIVE(state, fut) if (fut->fut_log_tb) { Py_RETURN_TRUE; } @@ -1226,9 +1305,10 @@ FutureObj_get_loop(FutureObj *fut, void *Py_UNUSED(ignored)) static PyObject * FutureObj_get_callbacks(FutureObj *fut, void *Py_UNUSED(ignored)) { + asyncio_state *state = get_asyncio_state_by_def((PyObject *)fut); Py_ssize_t i; - ENSURE_FUTURE_ALIVE(fut) + ENSURE_FUTURE_ALIVE(state, fut) if (fut->fut_callback0 == NULL) { if (fut->fut_callbacks == NULL) { @@ -1277,7 +1357,8 @@ FutureObj_get_callbacks(FutureObj *fut, void *Py_UNUSED(ignored)) static PyObject * FutureObj_get_result(FutureObj *fut, void *Py_UNUSED(ignored)) { - ENSURE_FUTURE_ALIVE(fut) + asyncio_state *state = get_asyncio_state_by_def((PyObject *)fut); + ENSURE_FUTURE_ALIVE(state, fut) if (fut->fut_result == NULL) { Py_RETURN_NONE; } @@ -1287,7 +1368,8 @@ FutureObj_get_result(FutureObj *fut, void *Py_UNUSED(ignored)) static PyObject * FutureObj_get_exception(FutureObj *fut, void *Py_UNUSED(ignored)) { - ENSURE_FUTURE_ALIVE(fut) + asyncio_state *state = get_asyncio_state_by_def((PyObject *)fut); + ENSURE_FUTURE_ALIVE(state, fut) if (fut->fut_exception == NULL) { Py_RETURN_NONE; } @@ -1328,9 +1410,10 @@ FutureObj_set_cancel_message(FutureObj *fut, PyObject *msg, static PyObject * FutureObj_get_state(FutureObj *fut, void *Py_UNUSED(ignored)) { + asyncio_state *state = get_asyncio_state_by_def((PyObject *)fut); PyObject *ret = NULL; - ENSURE_FUTURE_ALIVE(fut) + ENSURE_FUTURE_ALIVE(state, fut) switch (fut->fut_state) { case STATE_PENDING: @@ -1351,8 +1434,9 @@ FutureObj_get_state(FutureObj *fut, void *Py_UNUSED(ignored)) static PyObject * FutureObj_repr(FutureObj *fut) { - ENSURE_FUTURE_ALIVE(fut) - return PyObject_CallOneArg(asyncio_future_repr_func, (PyObject *)fut); + asyncio_state *state = get_asyncio_state_by_def((PyObject *)fut); + ENSURE_FUTURE_ALIVE(state, fut) + return PyObject_CallOneArg(state->asyncio_future_repr_func, (PyObject *)fut); } /*[clinic input] @@ -1368,7 +1452,8 @@ static PyObject * _asyncio_Future__make_cancelled_error_impl(FutureObj *self) /*[clinic end generated code: output=a5df276f6c1213de input=ac6effe4ba795ecc]*/ { - return create_cancelled_error(self); + asyncio_state *state = get_asyncio_state_by_def((PyObject *)self); + return create_cancelled_error(state, self); } static void @@ -1431,13 +1516,6 @@ FutureObj_finalize(FutureObj *fut) PyErr_Restore(error_type, error_value, error_traceback); } -static PyAsyncMethods FutureType_as_async = { - (unaryfunc)future_new_iter, /* am_await */ - 0, /* am_aiter */ - 0, /* am_anext */ - 0, /* am_send */ -}; - static PyMethodDef FutureType_methods[] = { _ASYNCIO_FUTURE_RESULT_METHODDEF _ASYNCIO_FUTURE_EXCEPTION_METHODDEF @@ -1454,6 +1532,12 @@ static PyMethodDef FutureType_methods[] = { {NULL, NULL} /* Sentinel */ }; +static PyMemberDef FutureType_members[] = { + {"__weaklistoffset__", T_PYSSIZET, offsetof(FutureObj, fut_weakreflist), READONLY}, + {"__dictoffset__", T_PYSSIZET, offsetof(FutureObj, dict), READONLY}, + {NULL}, +}; + #define FUTURE_COMMON_GETSETLIST \ {"_state", (getter)FutureObj_get_state, NULL, NULL}, \ {"_asyncio_future_blocking", (getter)FutureObj_get_blocking, \ @@ -1476,25 +1560,31 @@ static PyGetSetDef FutureType_getsetlist[] = { static void FutureObj_dealloc(PyObject *self); -static PyTypeObject FutureType = { - PyVarObject_HEAD_INIT(NULL, 0) - "_asyncio.Future", - sizeof(FutureObj), /* tp_basicsize */ - .tp_dealloc = FutureObj_dealloc, - .tp_as_async = &FutureType_as_async, - .tp_repr = (reprfunc)FutureObj_repr, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, - .tp_doc = _asyncio_Future___init____doc__, - .tp_traverse = (traverseproc)FutureObj_traverse, - .tp_clear = (inquiry)FutureObj_clear, - .tp_weaklistoffset = offsetof(FutureObj, fut_weakreflist), - .tp_iter = (getiterfunc)future_new_iter, - .tp_methods = FutureType_methods, - .tp_getset = FutureType_getsetlist, - .tp_dictoffset = offsetof(FutureObj, dict), - .tp_init = (initproc)_asyncio_Future___init__, - .tp_new = PyType_GenericNew, - .tp_finalize = (destructor)FutureObj_finalize, +static PyType_Slot Future_slots[] = { + {Py_tp_dealloc, FutureObj_dealloc}, + {Py_tp_repr, (reprfunc)FutureObj_repr}, + {Py_tp_doc, (void *)_asyncio_Future___init____doc__}, + {Py_tp_traverse, (traverseproc)FutureObj_traverse}, + {Py_tp_clear, (inquiry)FutureObj_clear}, + {Py_tp_iter, (getiterfunc)future_new_iter}, + {Py_tp_methods, FutureType_methods}, + {Py_tp_members, FutureType_members}, + {Py_tp_getset, FutureType_getsetlist}, + {Py_tp_init, (initproc)_asyncio_Future___init__}, + {Py_tp_new, PyType_GenericNew}, + {Py_tp_finalize, (destructor)FutureObj_finalize}, + + // async slots + {Py_am_await, (unaryfunc)future_new_iter}, + {0, NULL}, +}; + +static PyType_Spec Future_spec = { + .name = "_asyncio.Future", + .basicsize = sizeof(FutureObj), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE | + Py_TPFLAGS_IMMUTABLETYPE), + .slots = Future_slots, }; static void @@ -1502,16 +1592,12 @@ FutureObj_dealloc(PyObject *self) { FutureObj *fut = (FutureObj *)self; - if (Future_CheckExact(fut)) { - /* When fut is subclass of Future, finalizer is called from - * subtype_dealloc. - */ - if (PyObject_CallFinalizerFromDealloc(self) < 0) { - // resurrected. - return; - } + if (PyObject_CallFinalizerFromDealloc(self) < 0) { + // resurrected. + return; } + PyTypeObject *tp = Py_TYPE(fut); PyObject_GC_UnTrack(self); if (fut->fut_weakreflist != NULL) { @@ -1519,7 +1605,8 @@ FutureObj_dealloc(PyObject *self) } (void)FutureObj_clear(fut); - Py_TYPE(fut)->tp_free(fut); + tp->tp_free(fut); + Py_DECREF(tp); } @@ -1539,8 +1626,9 @@ static Py_ssize_t fi_freelist_len = 0; static void FutureIter_dealloc(futureiterobject *it) { + PyTypeObject *tp = Py_TYPE(it); PyObject_GC_UnTrack(it); - Py_CLEAR(it->future); + tp->tp_clear((PyObject *)it); if (fi_freelist_len < FI_FREELIST_MAXLEN) { fi_freelist_len++; @@ -1549,6 +1637,7 @@ FutureIter_dealloc(futureiterobject *it) } else { PyObject_GC_Del(it); + Py_DECREF(tp); } } @@ -1686,16 +1775,24 @@ FutureIter_throw(futureiterobject *self, PyObject *const *args, Py_ssize_t nargs return NULL; } +static int +FutureIter_clear(futureiterobject *it) +{ + Py_CLEAR(it->future); + return 0; +} + static PyObject * FutureIter_close(futureiterobject *self, PyObject *arg) { - Py_CLEAR(self->future); + (void)FutureIter_clear(self); Py_RETURN_NONE; } static int FutureIter_traverse(futureiterobject *it, visitproc visit, void *arg) { + Py_VISIT(Py_TYPE(it)); Py_VISIT(it->future); return 0; } @@ -1707,27 +1804,26 @@ static PyMethodDef FutureIter_methods[] = { {NULL, NULL} /* Sentinel */ }; -static PyAsyncMethods FutureIterType_as_async = { - 0, /* am_await */ - 0, /* am_aiter */ - 0, /* am_anext */ - (sendfunc)FutureIter_am_send, /* am_send */ +static PyType_Slot FutureIter_slots[] = { + {Py_tp_dealloc, (destructor)FutureIter_dealloc}, + {Py_tp_getattro, PyObject_GenericGetAttr}, + {Py_tp_traverse, (traverseproc)FutureIter_traverse}, + {Py_tp_clear, FutureIter_clear}, + {Py_tp_iter, PyObject_SelfIter}, + {Py_tp_iternext, (iternextfunc)FutureIter_iternext}, + {Py_tp_methods, FutureIter_methods}, + + // async methods + {Py_am_send, (sendfunc)FutureIter_am_send}, + {0, NULL}, }; - -static PyTypeObject FutureIterType = { - PyVarObject_HEAD_INIT(NULL, 0) - "_asyncio.FutureIter", - .tp_basicsize = sizeof(futureiterobject), - .tp_itemsize = 0, - .tp_dealloc = (destructor)FutureIter_dealloc, - .tp_as_async = &FutureIterType_as_async, - .tp_getattro = PyObject_GenericGetAttr, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, - .tp_traverse = (traverseproc)FutureIter_traverse, - .tp_iter = PyObject_SelfIter, - .tp_iternext = (iternextfunc)FutureIter_iternext, - .tp_methods = FutureIter_methods, +static PyType_Spec FutureIter_spec = { + .name = "_asyncio.FutureIter", + .basicsize = sizeof(futureiterobject), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_IMMUTABLETYPE), + .slots = FutureIter_slots, }; static PyObject * @@ -1735,12 +1831,13 @@ future_new_iter(PyObject *fut) { futureiterobject *it; - if (!PyObject_TypeCheck(fut, &FutureType)) { + asyncio_state *state = get_asyncio_state_by_def((PyObject *)fut); + if (!Future_Check(state, fut)) { PyErr_BadInternalCall(); return NULL; } - ENSURE_FUTURE_ALIVE(fut) + ENSURE_FUTURE_ALIVE(state, fut) if (fi_freelist_len) { fi_freelist_len--; @@ -1750,7 +1847,7 @@ future_new_iter(PyObject *fut) _Py_NewReference((PyObject*) it); } else { - it = PyObject_GC_New(futureiterobject, &FutureIterType); + it = PyObject_GC_New(futureiterobject, state->FutureIterType); if (it == NULL) { return NULL; } @@ -1770,9 +1867,9 @@ class _asyncio.Task "TaskObj *" "&Task_Type" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=719dcef0fcc03b37]*/ -static int task_call_step_soon(TaskObj *, PyObject *); +static int task_call_step_soon(asyncio_state *state, TaskObj *, PyObject *); static PyObject * task_wakeup(TaskObj *, PyObject *); -static PyObject * task_step(TaskObj *, PyObject *); +static PyObject * task_step(asyncio_state *, TaskObj *, PyObject *); /* ----- Task._step wrapper */ @@ -1787,9 +1884,11 @@ TaskStepMethWrapper_clear(TaskStepMethWrapper *o) static void TaskStepMethWrapper_dealloc(TaskStepMethWrapper *o) { + PyTypeObject *tp = Py_TYPE(o); PyObject_GC_UnTrack(o); (void)TaskStepMethWrapper_clear(o); Py_TYPE(o)->tp_free(o); + Py_DECREF(tp); } static PyObject * @@ -1804,13 +1903,15 @@ TaskStepMethWrapper_call(TaskStepMethWrapper *o, PyErr_SetString(PyExc_TypeError, "function takes no positional arguments"); return NULL; } - return task_step(o->sw_task, o->sw_arg); + asyncio_state *state = get_asyncio_state_by_def((PyObject *)o); + return task_step(state, o->sw_task, o->sw_arg); } static int TaskStepMethWrapper_traverse(TaskStepMethWrapper *o, visitproc visit, void *arg) { + Py_VISIT(Py_TYPE(o)); Py_VISIT(o->sw_task); Py_VISIT(o->sw_arg); return 0; @@ -1830,25 +1931,30 @@ static PyGetSetDef TaskStepMethWrapper_getsetlist[] = { {NULL} /* Sentinel */ }; -static PyTypeObject TaskStepMethWrapper_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "TaskStepMethWrapper", - .tp_basicsize = sizeof(TaskStepMethWrapper), - .tp_itemsize = 0, - .tp_getset = TaskStepMethWrapper_getsetlist, - .tp_dealloc = (destructor)TaskStepMethWrapper_dealloc, - .tp_call = (ternaryfunc)TaskStepMethWrapper_call, - .tp_getattro = PyObject_GenericGetAttr, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, - .tp_traverse = (traverseproc)TaskStepMethWrapper_traverse, - .tp_clear = (inquiry)TaskStepMethWrapper_clear, +static PyType_Slot TaskStepMethWrapper_slots[] = { + {Py_tp_getset, TaskStepMethWrapper_getsetlist}, + {Py_tp_dealloc, (destructor)TaskStepMethWrapper_dealloc}, + {Py_tp_call, (ternaryfunc)TaskStepMethWrapper_call}, + {Py_tp_getattro, PyObject_GenericGetAttr}, + {Py_tp_traverse, (traverseproc)TaskStepMethWrapper_traverse}, + {Py_tp_clear, (inquiry)TaskStepMethWrapper_clear}, + {0, NULL}, +}; + +static PyType_Spec TaskStepMethWrapper_spec = { + .name = "_asyncio.TaskStepMethWrapper", + .basicsize = sizeof(TaskStepMethWrapper), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_IMMUTABLETYPE), + .slots = TaskStepMethWrapper_slots, }; static PyObject * TaskStepMethWrapper_new(TaskObj *task, PyObject *arg) { + asyncio_state *state = get_asyncio_state_by_def((PyObject *)task); TaskStepMethWrapper *o; - o = PyObject_GC_New(TaskStepMethWrapper, &TaskStepMethWrapper_Type); + o = PyObject_GC_New(TaskStepMethWrapper, state->TaskStepMethWrapper_Type); if (o == NULL) { return NULL; } @@ -1872,10 +1978,10 @@ static PyMethodDef TaskWakeupDef = { /* ----- Task introspection helpers */ static int -register_task(PyObject *task) +register_task(asyncio_state *state, PyObject *task) { - PyObject *res = PyObject_CallMethodOneArg(all_tasks, - &_Py_ID(add), task); + PyObject *res = PyObject_CallMethodOneArg(state->all_tasks, + &_Py_ID(add), task); if (res == NULL) { return -1; } @@ -1885,9 +1991,9 @@ register_task(PyObject *task) static int -unregister_task(PyObject *task) +unregister_task(asyncio_state *state, PyObject *task) { - PyObject *res = PyObject_CallMethodOneArg(all_tasks, + PyObject *res = PyObject_CallMethodOneArg(state->all_tasks, &_Py_ID(discard), task); if (res == NULL) { return -1; @@ -1898,7 +2004,7 @@ unregister_task(PyObject *task) static int -enter_task(PyObject *loop, PyObject *task) +enter_task(asyncio_state *state, PyObject *loop, PyObject *task) { PyObject *item; Py_hash_t hash; @@ -1906,7 +2012,7 @@ enter_task(PyObject *loop, PyObject *task) if (hash == -1) { return -1; } - item = _PyDict_GetItem_KnownHash(current_tasks, loop, hash); + item = _PyDict_GetItem_KnownHash(state->current_tasks, loop, hash); if (item != NULL) { Py_INCREF(item); PyErr_Format( @@ -1920,12 +2026,12 @@ enter_task(PyObject *loop, PyObject *task) if (PyErr_Occurred()) { return -1; } - return _PyDict_SetItem_KnownHash(current_tasks, loop, task, hash); + return _PyDict_SetItem_KnownHash(state->current_tasks, loop, task, hash); } static int -leave_task(PyObject *loop, PyObject *task) +leave_task(asyncio_state *state, PyObject *loop, PyObject *task) /*[clinic end generated code: output=0ebf6db4b858fb41 input=51296a46313d1ad8]*/ { PyObject *item; @@ -1934,7 +2040,7 @@ leave_task(PyObject *loop, PyObject *task) if (hash == -1) { return -1; } - item = _PyDict_GetItem_KnownHash(current_tasks, loop, hash); + item = _PyDict_GetItem_KnownHash(state->current_tasks, loop, hash); if (item != task) { if (item == NULL) { /* Not entered, replace with None */ @@ -1946,7 +2052,7 @@ leave_task(PyObject *loop, PyObject *task) task, item, NULL); return -1; } - return _PyDict_DelItem_KnownHash(current_tasks, loop, hash); + return _PyDict_DelItem_KnownHash(state->current_tasks, loop, hash); } /* ----- Task */ @@ -1973,7 +2079,8 @@ _asyncio_Task___init___impl(TaskObj *self, PyObject *coro, PyObject *loop, return -1; } - int is_coro = is_coroutine(coro); + asyncio_state *state = get_asyncio_state_by_def((PyObject *)self); + int is_coro = is_coroutine(state, coro); if (is_coro == -1) { return -1; } @@ -2002,7 +2109,8 @@ _asyncio_Task___init___impl(TaskObj *self, PyObject *coro, PyObject *loop, Py_XSETREF(self->task_coro, coro); if (name == Py_None) { - name = PyUnicode_FromFormat("Task-%" PRIu64, ++task_name_counter); + name = PyUnicode_FromFormat("Task-%" PRIu64, + ++state->task_name_counter); } else if (!PyUnicode_CheckExact(name)) { name = PyObject_Str(name); } else { @@ -2013,10 +2121,10 @@ _asyncio_Task___init___impl(TaskObj *self, PyObject *coro, PyObject *loop, return -1; } - if (task_call_step_soon(self, NULL)) { + if (task_call_step_soon(state, self, NULL)) { return -1; } - return register_task((PyObject*)self); + return register_task(state, (PyObject*)self); } static int @@ -2033,11 +2141,23 @@ TaskObj_clear(TaskObj *task) static int TaskObj_traverse(TaskObj *task, visitproc visit, void *arg) { + Py_VISIT(Py_TYPE(task)); Py_VISIT(task->task_context); Py_VISIT(task->task_coro); Py_VISIT(task->task_name); Py_VISIT(task->task_fut_waiter); - (void)FutureObj_traverse((FutureObj*) task, visit, arg); + FutureObj *fut = (FutureObj *)task; + Py_VISIT(fut->fut_loop); + Py_VISIT(fut->fut_callback0); + Py_VISIT(fut->fut_context0); + Py_VISIT(fut->fut_callbacks); + Py_VISIT(fut->fut_result); + Py_VISIT(fut->fut_exception); + Py_VISIT(fut->fut_exception_tb); + Py_VISIT(fut->fut_source_tb); + Py_VISIT(fut->fut_cancel_msg); + Py_VISIT(fut->fut_cancelled_exc); + Py_VISIT(fut->dict); return 0; } @@ -2101,7 +2221,9 @@ TaskObj_get_fut_waiter(TaskObj *task, void *Py_UNUSED(ignored)) static PyObject * TaskObj_repr(TaskObj *task) { - return PyObject_CallOneArg(asyncio_task_repr_func, (PyObject *)task); + asyncio_state *state = get_asyncio_state_by_def((PyObject *)task); + return PyObject_CallOneArg(state->asyncio_task_repr_func, + (PyObject *)task); } @@ -2238,6 +2360,8 @@ _asyncio_Task_uncancel_impl(TaskObj *self) /*[clinic input] _asyncio.Task.get_stack + cls: defining_class + / * limit: object = None @@ -2263,16 +2387,20 @@ returned for a suspended coroutine. [clinic start generated code]*/ static PyObject * -_asyncio_Task_get_stack_impl(TaskObj *self, PyObject *limit) -/*[clinic end generated code: output=c9aeeeebd1e18118 input=05b323d42b809b90]*/ +_asyncio_Task_get_stack_impl(TaskObj *self, PyTypeObject *cls, + PyObject *limit) +/*[clinic end generated code: output=6774dfc10d3857fa input=8e01c9b2618ae953]*/ { + asyncio_state *state = get_asyncio_state_by_cls(cls); return PyObject_CallFunctionObjArgs( - asyncio_task_get_stack_func, self, limit, NULL); + state->asyncio_task_get_stack_func, self, limit, NULL); } /*[clinic input] _asyncio.Task.print_stack + cls: defining_class + / * limit: object = None file: object = None @@ -2287,12 +2415,13 @@ to sys.stderr. [clinic start generated code]*/ static PyObject * -_asyncio_Task_print_stack_impl(TaskObj *self, PyObject *limit, - PyObject *file) -/*[clinic end generated code: output=7339e10314cd3f4d input=1a0352913b7fcd92]*/ +_asyncio_Task_print_stack_impl(TaskObj *self, PyTypeObject *cls, + PyObject *limit, PyObject *file) +/*[clinic end generated code: output=b38affe9289ec826 input=150b35ba2d3a7dee]*/ { + asyncio_state *state = get_asyncio_state_by_cls(cls); return PyObject_CallFunctionObjArgs( - asyncio_task_print_stack_func, self, limit, file, NULL); + state->asyncio_task_print_stack_func, self, limit, file, NULL); } /*[clinic input] @@ -2475,6 +2604,12 @@ static PyMethodDef TaskType_methods[] = { {NULL, NULL} /* Sentinel */ }; +static PyMemberDef TaskType_members[] = { + {"__weaklistoffset__", T_PYSSIZET, offsetof(TaskObj, task_weakreflist), READONLY}, + {"__dictoffset__", T_PYSSIZET, offsetof(TaskObj, dict), READONLY}, + {NULL}, +}; + static PyGetSetDef TaskType_getsetlist[] = { FUTURE_COMMON_GETSETLIST {"_log_destroy_pending", (getter)TaskObj_get_log_destroy_pending, @@ -2485,26 +2620,31 @@ static PyGetSetDef TaskType_getsetlist[] = { {NULL} /* Sentinel */ }; -static PyTypeObject TaskType = { - PyVarObject_HEAD_INIT(NULL, 0) - "_asyncio.Task", - sizeof(TaskObj), /* tp_basicsize */ - .tp_base = &FutureType, - .tp_dealloc = TaskObj_dealloc, - .tp_as_async = &FutureType_as_async, - .tp_repr = (reprfunc)TaskObj_repr, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, - .tp_doc = _asyncio_Task___init____doc__, - .tp_traverse = (traverseproc)TaskObj_traverse, - .tp_clear = (inquiry)TaskObj_clear, - .tp_weaklistoffset = offsetof(TaskObj, task_weakreflist), - .tp_iter = (getiterfunc)future_new_iter, - .tp_methods = TaskType_methods, - .tp_getset = TaskType_getsetlist, - .tp_dictoffset = offsetof(TaskObj, dict), - .tp_init = (initproc)_asyncio_Task___init__, - .tp_new = PyType_GenericNew, - .tp_finalize = (destructor)TaskObj_finalize, +static PyType_Slot Task_slots[] = { + {Py_tp_dealloc, TaskObj_dealloc}, + {Py_tp_repr, (reprfunc)TaskObj_repr}, + {Py_tp_doc, (void *)_asyncio_Task___init____doc__}, + {Py_tp_traverse, (traverseproc)TaskObj_traverse}, + {Py_tp_clear, (inquiry)TaskObj_clear}, + {Py_tp_iter, (getiterfunc)future_new_iter}, + {Py_tp_methods, TaskType_methods}, + {Py_tp_members, TaskType_members}, + {Py_tp_getset, TaskType_getsetlist}, + {Py_tp_init, (initproc)_asyncio_Task___init__}, + {Py_tp_new, PyType_GenericNew}, + {Py_tp_finalize, (destructor)TaskObj_finalize}, + + // async slots + {Py_am_await, (unaryfunc)future_new_iter}, + {0, NULL}, +}; + +static PyType_Spec Task_spec = { + .name = "_asyncio.Task", + .basicsize = sizeof(TaskObj), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE | + Py_TPFLAGS_IMMUTABLETYPE), + .slots = Task_slots, }; static void @@ -2512,16 +2652,12 @@ TaskObj_dealloc(PyObject *self) { TaskObj *task = (TaskObj *)self; - if (Task_CheckExact(self)) { - /* When fut is subclass of Task, finalizer is called from - * subtype_dealloc. - */ - if (PyObject_CallFinalizerFromDealloc(self) < 0) { - // resurrected. - return; - } + if (PyObject_CallFinalizerFromDealloc(self) < 0) { + // resurrected. + return; } + PyTypeObject *tp = Py_TYPE(task); PyObject_GC_UnTrack(self); if (task->task_weakreflist != NULL) { @@ -2529,24 +2665,26 @@ TaskObj_dealloc(PyObject *self) } (void)TaskObj_clear(task); - Py_TYPE(task)->tp_free(task); + tp->tp_free(task); + Py_DECREF(tp); } static int -task_call_step_soon(TaskObj *task, PyObject *arg) +task_call_step_soon(asyncio_state *state, TaskObj *task, PyObject *arg) { PyObject *cb = TaskStepMethWrapper_new(task, arg); if (cb == NULL) { return -1; } - int ret = call_soon(task->task_loop, cb, NULL, task->task_context); + int ret = call_soon(state, task->task_loop, cb, NULL, task->task_context); Py_DECREF(cb); return ret; } static PyObject * -task_set_error_soon(TaskObj *task, PyObject *et, const char *format, ...) +task_set_error_soon(asyncio_state *state, TaskObj *task, PyObject *et, + const char *format, ...) { PyObject* msg; @@ -2565,7 +2703,7 @@ task_set_error_soon(TaskObj *task, PyObject *et, const char *format, ...) return NULL; } - if (task_call_step_soon(task, e) == -1) { + if (task_call_step_soon(state, task, e) == -1) { Py_DECREF(e); return NULL; } @@ -2589,7 +2727,7 @@ gen_status_from_result(PyObject **result) } static PyObject * -task_step_impl(TaskObj *task, PyObject *exc) +task_step_impl(asyncio_state *state, TaskObj *task, PyObject *exc) { int res; int clear_exc = 0; @@ -2598,7 +2736,7 @@ task_step_impl(TaskObj *task, PyObject *exc) PyObject *o; if (task->task_state != STATE_PENDING) { - PyErr_Format(asyncio_InvalidStateError, + PyErr_Format(state->asyncio_InvalidStateError, "_step(): already done: %R %R", task, exc ? exc : Py_None); @@ -2610,7 +2748,7 @@ task_step_impl(TaskObj *task, PyObject *exc) if (exc) { /* Check if exc is a CancelledError */ - res = PyObject_IsInstance(exc, asyncio_CancelledError); + res = PyObject_IsInstance(exc, state->asyncio_CancelledError); if (res == -1) { /* An error occurred, abort */ goto fail; @@ -2623,7 +2761,7 @@ task_step_impl(TaskObj *task, PyObject *exc) if (!exc) { /* exc was not a CancelledError */ - exc = create_cancelled_error((FutureObj*)task); + exc = create_cancelled_error(state, (FutureObj*)task); if (!exc) { goto fail; @@ -2670,10 +2808,11 @@ task_step_impl(TaskObj *task, PyObject *exc) if (task->task_must_cancel) { // Task is cancelled right before coro stops. task->task_must_cancel = 0; - tmp = future_cancel((FutureObj*)task, task->task_cancel_msg); + tmp = future_cancel(state, (FutureObj*)task, + task->task_cancel_msg); } else { - tmp = future_set_result((FutureObj*)task, result); + tmp = future_set_result(state, (FutureObj*)task, result); } Py_DECREF(result); @@ -2685,7 +2824,7 @@ task_step_impl(TaskObj *task, PyObject *exc) Py_RETURN_NONE; } - if (PyErr_ExceptionMatches(asyncio_CancelledError)) { + if (PyErr_ExceptionMatches(state->asyncio_CancelledError)) { /* CancelledError */ PyErr_Fetch(&et, &ev, &tb); assert(et); @@ -2700,7 +2839,7 @@ task_step_impl(TaskObj *task, PyObject *exc) /* transfer ownership */ fut->fut_cancelled_exc = ev; - return future_cancel(fut, NULL); + return future_cancel(state, fut, NULL); } /* Some other exception; pop it and call Task.set_exception() */ @@ -2711,7 +2850,7 @@ task_step_impl(TaskObj *task, PyObject *exc) PyException_SetTraceback(ev, tb); } - o = future_set_exception((FutureObj*)task, ev); + o = future_set_exception(state, (FutureObj*)task, ev); if (!o) { /* An exception in Task.set_exception() */ Py_DECREF(et); @@ -2743,7 +2882,7 @@ task_step_impl(TaskObj *task, PyObject *exc) } /* Check if `result` is FutureObj or TaskObj (and not a subclass) */ - if (Future_CheckExact(result) || Task_CheckExact(result)) { + if (Future_CheckExact(state, result) || Task_CheckExact(state, result)) { PyObject *wrapper; PyObject *tmp; FutureObj *fut = (FutureObj*)result; @@ -2764,7 +2903,7 @@ task_step_impl(TaskObj *task, PyObject *exc) if (wrapper == NULL) { goto fail; } - tmp = future_add_done_callback( + tmp = future_add_done_callback(state, (FutureObj*)result, wrapper, task->task_context); Py_DECREF(wrapper); if (tmp == NULL) { @@ -2799,7 +2938,7 @@ task_step_impl(TaskObj *task, PyObject *exc) /* Check if `result` is None */ if (result == Py_None) { /* Bare yield relinquishes control for one event loop iteration. */ - if (task_call_step_soon(task, NULL)) { + if (task_call_step_soon(state, task, NULL)) { goto fail; } return result; @@ -2821,7 +2960,7 @@ task_step_impl(TaskObj *task, PyObject *exc) } /* Check if `result` future is attached to a different loop */ - PyObject *oloop = get_future_loop(result); + PyObject *oloop = get_future_loop(state, result); if (oloop == NULL) { goto fail; } @@ -2857,7 +2996,7 @@ task_step_impl(TaskObj *task, PyObject *exc) stack[0] = wrapper; stack[1] = (PyObject *)task->task_context; EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, add_cb); - tmp = PyObject_Vectorcall(add_cb, stack, 1, context_kwname); + tmp = PyObject_Vectorcall(add_cb, stack, 1, state->context_kwname); Py_DECREF(add_cb); Py_DECREF(wrapper); if (tmp == NULL) { @@ -2898,7 +3037,7 @@ task_step_impl(TaskObj *task, PyObject *exc) if (res) { /* `result` is a generator */ o = task_set_error_soon( - task, PyExc_RuntimeError, + state, task, PyExc_RuntimeError, "yield was used instead of yield from for " "generator in task %R with %R", task, result); Py_DECREF(result); @@ -2907,20 +3046,20 @@ task_step_impl(TaskObj *task, PyObject *exc) /* The `result` is none of the above */ o = task_set_error_soon( - task, PyExc_RuntimeError, "Task got bad yield: %R", result); + state, task, PyExc_RuntimeError, "Task got bad yield: %R", result); Py_DECREF(result); return o; self_await: o = task_set_error_soon( - task, PyExc_RuntimeError, + state, task, PyExc_RuntimeError, "Task cannot await on itself: %R", task); Py_DECREF(result); return o; yield_insteadof_yf: o = task_set_error_soon( - task, PyExc_RuntimeError, + state, task, PyExc_RuntimeError, "yield was used instead of yield from " "in task %R with %R", task, result); @@ -2929,7 +3068,7 @@ task_step_impl(TaskObj *task, PyObject *exc) different_loop: o = task_set_error_soon( - task, PyExc_RuntimeError, + state, task, PyExc_RuntimeError, "Task %R got Future %R attached to a different loop", task, result); Py_DECREF(result); @@ -2941,25 +3080,25 @@ task_step_impl(TaskObj *task, PyObject *exc) } static PyObject * -task_step(TaskObj *task, PyObject *exc) +task_step(asyncio_state *state, TaskObj *task, PyObject *exc) { PyObject *res; - if (enter_task(task->task_loop, (PyObject*)task) < 0) { + if (enter_task(state, task->task_loop, (PyObject*)task) < 0) { return NULL; } - res = task_step_impl(task, exc); + res = task_step_impl(state, task, exc); if (res == NULL) { PyObject *et, *ev, *tb; PyErr_Fetch(&et, &ev, &tb); - leave_task(task->task_loop, (PyObject*)task); + leave_task(state, task->task_loop, (PyObject*)task); _PyErr_ChainExceptions(et, ev, tb); /* Normalizes (et, ev, tb) */ return NULL; } else { - if (leave_task(task->task_loop, (PyObject*)task) < 0) { + if (leave_task(state, task->task_loop, (PyObject*)task) < 0) { Py_DECREF(res); return NULL; } @@ -2976,9 +3115,10 @@ task_wakeup(TaskObj *task, PyObject *o) PyObject *result; assert(o); - if (Future_CheckExact(o) || Task_CheckExact(o)) { + asyncio_state *state = get_asyncio_state_by_def((PyObject *)task); + if (Future_CheckExact(state, o) || Task_CheckExact(state, o)) { PyObject *fut_result = NULL; - int res = future_get_result((FutureObj*)o, &fut_result); + int res = future_get_result(state, (FutureObj*)o, &fut_result); switch(res) { case -1: @@ -2986,10 +3126,10 @@ task_wakeup(TaskObj *task, PyObject *o) break; /* exception raised */ case 0: Py_DECREF(fut_result); - return task_step(task, NULL); + return task_step(state, task, NULL); default: assert(res == 1); - result = task_step(task, fut_result); + result = task_step(state, task, fut_result); Py_DECREF(fut_result); return result; } @@ -2998,7 +3138,7 @@ task_wakeup(TaskObj *task, PyObject *o) PyObject *fut_result = PyObject_CallMethod(o, "result", NULL); if (fut_result != NULL) { Py_DECREF(fut_result); - return task_step(task, NULL); + return task_step(state, task, NULL); } /* exception raised */ } @@ -3010,7 +3150,7 @@ task_wakeup(TaskObj *task, PyObject *o) PyException_SetTraceback(ev, tb); } - result = task_step(task, ev); + result = task_step(state, task, ev); Py_DECREF(et); Py_XDECREF(tb); @@ -3038,7 +3178,8 @@ _asyncio__get_running_loop_impl(PyObject *module) /*[clinic end generated code: output=b4390af721411a0a input=0a21627e25a4bd43]*/ { PyObject *loop; - if (get_running_loop(&loop)) { + asyncio_state *state = get_asyncio_state(module); + if (get_running_loop(state, &loop)) { return NULL; } if (loop == NULL) { @@ -3063,7 +3204,8 @@ static PyObject * _asyncio__set_running_loop(PyObject *module, PyObject *loop) /*[clinic end generated code: output=ae56bf7a28ca189a input=4c9720233d606604]*/ { - if (set_running_loop(loop)) { + asyncio_state *state = get_asyncio_state(module); + if (set_running_loop(state, loop)) { return NULL; } Py_RETURN_NONE; @@ -3086,7 +3228,8 @@ static PyObject * _asyncio_get_event_loop_impl(PyObject *module) /*[clinic end generated code: output=2a2d8b2f824c648b input=9364bf2916c8655d]*/ { - return get_event_loop(1); + asyncio_state *state = get_asyncio_state(module); + return get_event_loop(state, 1); } /*[clinic input] @@ -3098,7 +3241,8 @@ static PyObject * _asyncio__get_event_loop_impl(PyObject *module, int stacklevel) /*[clinic end generated code: output=9c1d6d3c802e67c9 input=d17aebbd686f711d]*/ { - return get_event_loop(stacklevel-1); + asyncio_state *state = get_asyncio_state(module); + return get_event_loop(state, stacklevel-1); } /*[clinic input] @@ -3114,7 +3258,8 @@ _asyncio_get_running_loop_impl(PyObject *module) /*[clinic end generated code: output=c247b5f9e529530e input=2a3bf02ba39f173d]*/ { PyObject *loop; - if (get_running_loop(&loop)) { + asyncio_state *state = get_asyncio_state(module); + if (get_running_loop(state, &loop)) { return NULL; } if (loop == NULL) { @@ -3139,7 +3284,8 @@ static PyObject * _asyncio__register_task_impl(PyObject *module, PyObject *task) /*[clinic end generated code: output=8672dadd69a7d4e2 input=21075aaea14dfbad]*/ { - if (register_task(task) < 0) { + asyncio_state *state = get_asyncio_state(module); + if (register_task(state, task) < 0) { return NULL; } Py_RETURN_NONE; @@ -3160,7 +3306,8 @@ static PyObject * _asyncio__unregister_task_impl(PyObject *module, PyObject *task) /*[clinic end generated code: output=6e5585706d568a46 input=28fb98c3975f7bdc]*/ { - if (unregister_task(task) < 0) { + asyncio_state *state = get_asyncio_state(module); + if (unregister_task(state, task) < 0) { return NULL; } Py_RETURN_NONE; @@ -3184,7 +3331,8 @@ static PyObject * _asyncio__enter_task_impl(PyObject *module, PyObject *loop, PyObject *task) /*[clinic end generated code: output=a22611c858035b73 input=de1b06dca70d8737]*/ { - if (enter_task(loop, task) < 0) { + asyncio_state *state = get_asyncio_state(module); + if (enter_task(state, loop, task) < 0) { return NULL; } Py_RETURN_NONE; @@ -3208,7 +3356,8 @@ static PyObject * _asyncio__leave_task_impl(PyObject *module, PyObject *loop, PyObject *task) /*[clinic end generated code: output=0ebf6db4b858fb41 input=51296a46313d1ad8]*/ { - if (leave_task(loop, task) < 0) { + asyncio_state *state = get_asyncio_state(module); + if (leave_task(state, loop, task) < 0) { return NULL; } Py_RETURN_NONE; @@ -3219,10 +3368,10 @@ _asyncio__leave_task_impl(PyObject *module, PyObject *loop, PyObject *task) static PyRunningLoopHolder * -new_running_loop_holder(PyObject *loop) +new_running_loop_holder(asyncio_state *state, PyObject *loop) { - PyRunningLoopHolder *rl = PyObject_New( - PyRunningLoopHolder, &PyRunningLoopHolder_Type); + PyRunningLoopHolder *rl = PyObject_GC_New( + PyRunningLoopHolder, state->PyRunningLoopHolder_Type); if (rl == NULL) { return NULL; } @@ -3232,28 +3381,59 @@ new_running_loop_holder(PyObject *loop) #endif rl->rl_loop = Py_NewRef(loop); + PyObject_GC_Track(rl); return rl; } +static int +PyRunningLoopHolder_clear(PyRunningLoopHolder *rl) +{ + Py_CLEAR(rl->rl_loop); + return 0; +} + + +static int +PyRunningLoopHolder_traverse(PyRunningLoopHolder *rl, visitproc visit, + void *arg) +{ + Py_VISIT(Py_TYPE(rl)); + Py_VISIT(rl->rl_loop); + return 0; +} + + static void PyRunningLoopHolder_tp_dealloc(PyRunningLoopHolder *rl) { - if (cached_running_holder == (PyObject *)rl) { - cached_running_holder = NULL; + asyncio_state *state = get_asyncio_state_by_def((PyObject *)rl); + if (state->cached_running_holder == (PyObject *)rl) { + state->cached_running_holder = NULL; } - Py_CLEAR(rl->rl_loop); - PyObject_Free(rl); + PyTypeObject *tp = Py_TYPE(rl); + PyObject_GC_UnTrack(rl); + PyRunningLoopHolder_clear(rl); + PyObject_GC_Del(rl); + Py_DECREF(tp); } -static PyTypeObject PyRunningLoopHolder_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "_RunningLoopHolder", - sizeof(PyRunningLoopHolder), - .tp_getattro = PyObject_GenericGetAttr, - .tp_flags = Py_TPFLAGS_DEFAULT, - .tp_dealloc = (destructor)PyRunningLoopHolder_tp_dealloc, +static PyType_Slot PyRunningLoopHolder_slots[] = { + {Py_tp_getattro, PyObject_GenericGetAttr}, + {Py_tp_dealloc, (destructor)PyRunningLoopHolder_tp_dealloc}, + {Py_tp_traverse, (traverseproc)PyRunningLoopHolder_traverse}, + {Py_tp_clear, PyRunningLoopHolder_clear}, + {0, NULL}, +}; + + +static PyType_Spec PyRunningLoopHolder_spec = { + .name = "_asyncio._RunningLoopHolder", + .basicsize = sizeof(PyRunningLoopHolder), + .slots = PyRunningLoopHolder_slots, + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_IMMUTABLETYPE), }; @@ -3279,58 +3459,106 @@ module_free_freelists(void) fi_freelist = NULL; } +static int +module_traverse(PyObject *mod, visitproc visit, void *arg) +{ + asyncio_state *state = get_asyncio_state(mod); + + Py_VISIT(state->FutureIterType); + Py_VISIT(state->TaskStepMethWrapper_Type); + Py_VISIT(state->FutureType); + Py_VISIT(state->TaskType); + Py_VISIT(state->PyRunningLoopHolder_Type); + + Py_VISIT(state->asyncio_mod); + Py_VISIT(state->traceback_extract_stack); + Py_VISIT(state->asyncio_future_repr_func); + Py_VISIT(state->asyncio_get_event_loop_policy); + Py_VISIT(state->asyncio_iscoroutine_func); + Py_VISIT(state->asyncio_task_get_stack_func); + Py_VISIT(state->asyncio_task_print_stack_func); + Py_VISIT(state->asyncio_task_repr_func); + Py_VISIT(state->asyncio_InvalidStateError); + Py_VISIT(state->asyncio_CancelledError); + + Py_VISIT(state->all_tasks); + Py_VISIT(state->current_tasks); + Py_VISIT(state->iscoroutine_typecache); + + Py_VISIT(state->context_kwname); + + // Visit freelist. + PyObject *next = (PyObject*) fi_freelist; + while (next != NULL) { + PyObject *current = next; + Py_VISIT(current); + next = (PyObject*) ((futureiterobject*) current)->future; + } + return 0; +} -static void -module_free(void *m) +static int +module_clear(PyObject *mod) { - Py_CLEAR(asyncio_mod); - Py_CLEAR(traceback_extract_stack); - Py_CLEAR(asyncio_future_repr_func); - Py_CLEAR(asyncio_get_event_loop_policy); - Py_CLEAR(asyncio_iscoroutine_func); - Py_CLEAR(asyncio_task_get_stack_func); - Py_CLEAR(asyncio_task_print_stack_func); - Py_CLEAR(asyncio_task_repr_func); - Py_CLEAR(asyncio_InvalidStateError); - Py_CLEAR(asyncio_CancelledError); + asyncio_state *state = get_asyncio_state(mod); + + Py_CLEAR(state->FutureIterType); + Py_CLEAR(state->TaskStepMethWrapper_Type); + Py_CLEAR(state->FutureType); + Py_CLEAR(state->TaskType); + Py_CLEAR(state->PyRunningLoopHolder_Type); - Py_CLEAR(all_tasks); - Py_CLEAR(current_tasks); - Py_CLEAR(iscoroutine_typecache); + Py_CLEAR(state->asyncio_mod); + Py_CLEAR(state->traceback_extract_stack); + Py_CLEAR(state->asyncio_future_repr_func); + Py_CLEAR(state->asyncio_get_event_loop_policy); + Py_CLEAR(state->asyncio_iscoroutine_func); + Py_CLEAR(state->asyncio_task_get_stack_func); + Py_CLEAR(state->asyncio_task_print_stack_func); + Py_CLEAR(state->asyncio_task_repr_func); + Py_CLEAR(state->asyncio_InvalidStateError); + Py_CLEAR(state->asyncio_CancelledError); - Py_CLEAR(context_kwname); + Py_CLEAR(state->all_tasks); + Py_CLEAR(state->current_tasks); + Py_CLEAR(state->iscoroutine_typecache); + + Py_CLEAR(state->context_kwname); module_free_freelists(); - module_initialized = 0; + return 0; +} + +static void +module_free(void *mod) +{ + (void)module_clear((PyObject *)mod); } static int -module_init(void) +module_init(asyncio_state *state) { PyObject *module = NULL; - if (module_initialized) { - return 0; - } - asyncio_mod = PyImport_ImportModule("asyncio"); - if (asyncio_mod == NULL) { + state->asyncio_mod = PyImport_ImportModule("asyncio"); + if (state->asyncio_mod == NULL) { goto fail; } - current_tasks = PyDict_New(); - if (current_tasks == NULL) { + state->current_tasks = PyDict_New(); + if (state->current_tasks == NULL) { goto fail; } - iscoroutine_typecache = PySet_New(NULL); - if (iscoroutine_typecache == NULL) { + state->iscoroutine_typecache = PySet_New(NULL); + if (state->iscoroutine_typecache == NULL) { goto fail; } - context_kwname = Py_BuildValue("(s)", "context"); - if (context_kwname == NULL) { + state->context_kwname = Py_BuildValue("(s)", "context"); + if (state->context_kwname == NULL) { goto fail; } @@ -3348,42 +3576,40 @@ module_init(void) } WITH_MOD("asyncio.events") - GET_MOD_ATTR(asyncio_get_event_loop_policy, "get_event_loop_policy") + GET_MOD_ATTR(state->asyncio_get_event_loop_policy, "get_event_loop_policy") WITH_MOD("asyncio.base_futures") - GET_MOD_ATTR(asyncio_future_repr_func, "_future_repr") + GET_MOD_ATTR(state->asyncio_future_repr_func, "_future_repr") WITH_MOD("asyncio.exceptions") - GET_MOD_ATTR(asyncio_InvalidStateError, "InvalidStateError") - GET_MOD_ATTR(asyncio_CancelledError, "CancelledError") + GET_MOD_ATTR(state->asyncio_InvalidStateError, "InvalidStateError") + GET_MOD_ATTR(state->asyncio_CancelledError, "CancelledError") WITH_MOD("asyncio.base_tasks") - GET_MOD_ATTR(asyncio_task_repr_func, "_task_repr") - GET_MOD_ATTR(asyncio_task_get_stack_func, "_task_get_stack") - GET_MOD_ATTR(asyncio_task_print_stack_func, "_task_print_stack") + GET_MOD_ATTR(state->asyncio_task_repr_func, "_task_repr") + GET_MOD_ATTR(state->asyncio_task_get_stack_func, "_task_get_stack") + GET_MOD_ATTR(state->asyncio_task_print_stack_func, "_task_print_stack") WITH_MOD("asyncio.coroutines") - GET_MOD_ATTR(asyncio_iscoroutine_func, "iscoroutine") + GET_MOD_ATTR(state->asyncio_iscoroutine_func, "iscoroutine") WITH_MOD("traceback") - GET_MOD_ATTR(traceback_extract_stack, "extract_stack") + GET_MOD_ATTR(state->traceback_extract_stack, "extract_stack") PyObject *weak_set; WITH_MOD("weakref") GET_MOD_ATTR(weak_set, "WeakSet"); - all_tasks = PyObject_CallNoArgs(weak_set); + state->all_tasks = PyObject_CallNoArgs(weak_set); Py_CLEAR(weak_set); - if (all_tasks == NULL) { + if (state->all_tasks == NULL) { goto fail; } - module_initialized = 1; Py_DECREF(module); return 0; fail: Py_CLEAR(module); - module_free(NULL); return -1; #undef WITH_MOD @@ -3405,64 +3631,71 @@ static PyMethodDef asyncio_methods[] = { {NULL, NULL} }; -static struct PyModuleDef _asynciomodule = { - PyModuleDef_HEAD_INIT, /* m_base */ - "_asyncio", /* m_name */ - module_doc, /* m_doc */ - -1, /* m_size */ - asyncio_methods, /* m_methods */ - NULL, /* m_slots */ - NULL, /* m_traverse */ - NULL, /* m_clear */ - (freefunc)module_free /* m_free */ -}; +static int +module_exec(PyObject *mod) +{ + asyncio_state *state = get_asyncio_state(mod); +#define CREATE_TYPE(m, tp, spec, base) \ + do { \ + tp = (PyTypeObject *)PyType_FromMetaclass(NULL, m, spec, \ + (PyObject *)base); \ + if (tp == NULL) { \ + return -1; \ + } \ + } while (0) -PyMODINIT_FUNC -PyInit__asyncio(void) -{ - if (module_init() < 0) { - return NULL; - } - if (PyType_Ready(&FutureIterType) < 0) { - return NULL; + CREATE_TYPE(mod, state->TaskStepMethWrapper_Type, &TaskStepMethWrapper_spec, NULL); + CREATE_TYPE(mod, state->PyRunningLoopHolder_Type, &PyRunningLoopHolder_spec, NULL); + CREATE_TYPE(mod, state->FutureIterType, &FutureIter_spec, NULL); + CREATE_TYPE(mod, state->FutureType, &Future_spec, NULL); + CREATE_TYPE(mod, state->TaskType, &Task_spec, state->FutureType); + +#undef CREATE_TYPE + + if (PyModule_AddType(mod, state->FutureType) < 0) { + return -1; } - if (PyType_Ready(&TaskStepMethWrapper_Type) < 0) { - return NULL; + + if (PyModule_AddType(mod, state->TaskType) < 0) { + return -1; } - if (PyType_Ready(&PyRunningLoopHolder_Type) < 0) { - return NULL; + // Must be done after types are added to avoid a circular dependency + if (module_init(state) < 0) { + return -1; } - PyObject *m = PyModule_Create(&_asynciomodule); - if (m == NULL) { - return NULL; + if (PyModule_AddObjectRef(mod, "_all_tasks", state->all_tasks) < 0) { + return -1; } - /* FutureType and TaskType are made ready by PyModule_AddType() calls below. */ - if (PyModule_AddType(m, &FutureType) < 0) { - Py_DECREF(m); - return NULL; + if (PyModule_AddObjectRef(mod, "_current_tasks", state->current_tasks) < 0) { + return -1; } - if (PyModule_AddType(m, &TaskType) < 0) { - Py_DECREF(m); - return NULL; - } - Py_INCREF(all_tasks); - if (PyModule_AddObject(m, "_all_tasks", all_tasks) < 0) { - Py_DECREF(all_tasks); - Py_DECREF(m); - return NULL; - } + return 0; +} - Py_INCREF(current_tasks); - if (PyModule_AddObject(m, "_current_tasks", current_tasks) < 0) { - Py_DECREF(current_tasks); - Py_DECREF(m); - return NULL; - } +static struct PyModuleDef_Slot module_slots[] = { + {Py_mod_exec, module_exec}, + {0, NULL}, +}; + +static struct PyModuleDef _asynciomodule = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_asyncio", + .m_doc = module_doc, + .m_size = sizeof(asyncio_state), + .m_methods = asyncio_methods, + .m_slots = module_slots, + .m_traverse = module_traverse, + .m_clear = module_clear, + .m_free = (freefunc)module_free, +}; - return m; +PyMODINIT_FUNC +PyInit__asyncio(void) +{ + return PyModuleDef_Init(&_asynciomodule); } diff --git a/Modules/clinic/_asynciomodule.c.h b/Modules/clinic/_asynciomodule.c.h index ddec54c8d7c2bc..11db478a8b4827 100644 --- a/Modules/clinic/_asynciomodule.c.h +++ b/Modules/clinic/_asynciomodule.c.h @@ -112,15 +112,19 @@ PyDoc_STRVAR(_asyncio_Future_exception__doc__, "InvalidStateError."); #define _ASYNCIO_FUTURE_EXCEPTION_METHODDEF \ - {"exception", (PyCFunction)_asyncio_Future_exception, METH_NOARGS, _asyncio_Future_exception__doc__}, + {"exception", _PyCFunction_CAST(_asyncio_Future_exception), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _asyncio_Future_exception__doc__}, static PyObject * -_asyncio_Future_exception_impl(FutureObj *self); +_asyncio_Future_exception_impl(FutureObj *self, PyTypeObject *cls); static PyObject * -_asyncio_Future_exception(FutureObj *self, PyObject *Py_UNUSED(ignored)) +_asyncio_Future_exception(FutureObj *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { - return _asyncio_Future_exception_impl(self); + if (nargs) { + PyErr_SetString(PyExc_TypeError, "exception() takes no arguments"); + return NULL; + } + return _asyncio_Future_exception_impl(self, cls); } PyDoc_STRVAR(_asyncio_Future_set_result__doc__, @@ -133,7 +137,42 @@ PyDoc_STRVAR(_asyncio_Future_set_result__doc__, "InvalidStateError."); #define _ASYNCIO_FUTURE_SET_RESULT_METHODDEF \ - {"set_result", (PyCFunction)_asyncio_Future_set_result, METH_O, _asyncio_Future_set_result__doc__}, + {"set_result", _PyCFunction_CAST(_asyncio_Future_set_result), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _asyncio_Future_set_result__doc__}, + +static PyObject * +_asyncio_Future_set_result_impl(FutureObj *self, PyTypeObject *cls, + PyObject *result); + +static PyObject * +_asyncio_Future_set_result(FutureObj *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) + #else + # define KWTUPLE NULL + #endif + + static const char * const _keywords[] = {"", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "set_result", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *result; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + result = args[0]; + return_value = _asyncio_Future_set_result_impl(self, cls, result); + +exit: + return return_value; +} PyDoc_STRVAR(_asyncio_Future_set_exception__doc__, "set_exception($self, exception, /)\n" @@ -145,7 +184,42 @@ PyDoc_STRVAR(_asyncio_Future_set_exception__doc__, "InvalidStateError."); #define _ASYNCIO_FUTURE_SET_EXCEPTION_METHODDEF \ - {"set_exception", (PyCFunction)_asyncio_Future_set_exception, METH_O, _asyncio_Future_set_exception__doc__}, + {"set_exception", _PyCFunction_CAST(_asyncio_Future_set_exception), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _asyncio_Future_set_exception__doc__}, + +static PyObject * +_asyncio_Future_set_exception_impl(FutureObj *self, PyTypeObject *cls, + PyObject *exception); + +static PyObject * +_asyncio_Future_set_exception(FutureObj *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) + #else + # define KWTUPLE NULL + #endif + + static const char * const _keywords[] = {"", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "set_exception", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *exception; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + exception = args[0]; + return_value = _asyncio_Future_set_exception_impl(self, cls, exception); + +exit: + return return_value; +} PyDoc_STRVAR(_asyncio_Future_add_done_callback__doc__, "add_done_callback($self, fn, /, *, context=)\n" @@ -158,14 +232,14 @@ PyDoc_STRVAR(_asyncio_Future_add_done_callback__doc__, "scheduled with call_soon."); #define _ASYNCIO_FUTURE_ADD_DONE_CALLBACK_METHODDEF \ - {"add_done_callback", _PyCFunction_CAST(_asyncio_Future_add_done_callback), METH_FASTCALL|METH_KEYWORDS, _asyncio_Future_add_done_callback__doc__}, + {"add_done_callback", _PyCFunction_CAST(_asyncio_Future_add_done_callback), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _asyncio_Future_add_done_callback__doc__}, static PyObject * -_asyncio_Future_add_done_callback_impl(FutureObj *self, PyObject *fn, - PyObject *context); +_asyncio_Future_add_done_callback_impl(FutureObj *self, PyTypeObject *cls, + PyObject *fn, PyObject *context); static PyObject * -_asyncio_Future_add_done_callback(FutureObj *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_asyncio_Future_add_done_callback(FutureObj *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -208,7 +282,7 @@ _asyncio_Future_add_done_callback(FutureObj *self, PyObject *const *args, Py_ssi } context = args[1]; skip_optional_kwonly: - return_value = _asyncio_Future_add_done_callback_impl(self, fn, context); + return_value = _asyncio_Future_add_done_callback_impl(self, cls, fn, context); exit: return return_value; @@ -223,7 +297,42 @@ PyDoc_STRVAR(_asyncio_Future_remove_done_callback__doc__, "Returns the number of callbacks removed."); #define _ASYNCIO_FUTURE_REMOVE_DONE_CALLBACK_METHODDEF \ - {"remove_done_callback", (PyCFunction)_asyncio_Future_remove_done_callback, METH_O, _asyncio_Future_remove_done_callback__doc__}, + {"remove_done_callback", _PyCFunction_CAST(_asyncio_Future_remove_done_callback), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _asyncio_Future_remove_done_callback__doc__}, + +static PyObject * +_asyncio_Future_remove_done_callback_impl(FutureObj *self, PyTypeObject *cls, + PyObject *fn); + +static PyObject * +_asyncio_Future_remove_done_callback(FutureObj *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) + #else + # define KWTUPLE NULL + #endif + + static const char * const _keywords[] = {"", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "remove_done_callback", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *fn; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + fn = args[0]; + return_value = _asyncio_Future_remove_done_callback_impl(self, cls, fn); + +exit: + return return_value; +} PyDoc_STRVAR(_asyncio_Future_cancel__doc__, "cancel($self, /, msg=None)\n" @@ -236,13 +345,14 @@ PyDoc_STRVAR(_asyncio_Future_cancel__doc__, "return True."); #define _ASYNCIO_FUTURE_CANCEL_METHODDEF \ - {"cancel", _PyCFunction_CAST(_asyncio_Future_cancel), METH_FASTCALL|METH_KEYWORDS, _asyncio_Future_cancel__doc__}, + {"cancel", _PyCFunction_CAST(_asyncio_Future_cancel), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _asyncio_Future_cancel__doc__}, static PyObject * -_asyncio_Future_cancel_impl(FutureObj *self, PyObject *msg); +_asyncio_Future_cancel_impl(FutureObj *self, PyTypeObject *cls, + PyObject *msg); static PyObject * -_asyncio_Future_cancel(FutureObj *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_asyncio_Future_cancel(FutureObj *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -283,7 +393,7 @@ _asyncio_Future_cancel(FutureObj *self, PyObject *const *args, Py_ssize_t nargs, } msg = args[0]; skip_optional_pos: - return_value = _asyncio_Future_cancel_impl(self, msg); + return_value = _asyncio_Future_cancel_impl(self, cls, msg); exit: return return_value; @@ -335,15 +445,19 @@ PyDoc_STRVAR(_asyncio_Future_get_loop__doc__, "Return the event loop the Future is bound to."); #define _ASYNCIO_FUTURE_GET_LOOP_METHODDEF \ - {"get_loop", (PyCFunction)_asyncio_Future_get_loop, METH_NOARGS, _asyncio_Future_get_loop__doc__}, + {"get_loop", _PyCFunction_CAST(_asyncio_Future_get_loop), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _asyncio_Future_get_loop__doc__}, static PyObject * -_asyncio_Future_get_loop_impl(FutureObj *self); +_asyncio_Future_get_loop_impl(FutureObj *self, PyTypeObject *cls); static PyObject * -_asyncio_Future_get_loop(FutureObj *self, PyObject *Py_UNUSED(ignored)) +_asyncio_Future_get_loop(FutureObj *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { - return _asyncio_Future_get_loop_impl(self); + if (nargs) { + PyErr_SetString(PyExc_TypeError, "get_loop() takes no arguments"); + return NULL; + } + return _asyncio_Future_get_loop_impl(self, cls); } PyDoc_STRVAR(_asyncio_Future__make_cancelled_error__doc__, @@ -612,13 +726,14 @@ PyDoc_STRVAR(_asyncio_Task_get_stack__doc__, "returned for a suspended coroutine."); #define _ASYNCIO_TASK_GET_STACK_METHODDEF \ - {"get_stack", _PyCFunction_CAST(_asyncio_Task_get_stack), METH_FASTCALL|METH_KEYWORDS, _asyncio_Task_get_stack__doc__}, + {"get_stack", _PyCFunction_CAST(_asyncio_Task_get_stack), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _asyncio_Task_get_stack__doc__}, static PyObject * -_asyncio_Task_get_stack_impl(TaskObj *self, PyObject *limit); +_asyncio_Task_get_stack_impl(TaskObj *self, PyTypeObject *cls, + PyObject *limit); static PyObject * -_asyncio_Task_get_stack(TaskObj *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_asyncio_Task_get_stack(TaskObj *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -659,7 +774,7 @@ _asyncio_Task_get_stack(TaskObj *self, PyObject *const *args, Py_ssize_t nargs, } limit = args[0]; skip_optional_kwonly: - return_value = _asyncio_Task_get_stack_impl(self, limit); + return_value = _asyncio_Task_get_stack_impl(self, cls, limit); exit: return return_value; @@ -678,14 +793,14 @@ PyDoc_STRVAR(_asyncio_Task_print_stack__doc__, "to sys.stderr."); #define _ASYNCIO_TASK_PRINT_STACK_METHODDEF \ - {"print_stack", _PyCFunction_CAST(_asyncio_Task_print_stack), METH_FASTCALL|METH_KEYWORDS, _asyncio_Task_print_stack__doc__}, + {"print_stack", _PyCFunction_CAST(_asyncio_Task_print_stack), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _asyncio_Task_print_stack__doc__}, static PyObject * -_asyncio_Task_print_stack_impl(TaskObj *self, PyObject *limit, - PyObject *file); +_asyncio_Task_print_stack_impl(TaskObj *self, PyTypeObject *cls, + PyObject *limit, PyObject *file); static PyObject * -_asyncio_Task_print_stack(TaskObj *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_asyncio_Task_print_stack(TaskObj *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -733,7 +848,7 @@ _asyncio_Task_print_stack(TaskObj *self, PyObject *const *args, Py_ssize_t nargs } file = args[1]; skip_optional_kwonly: - return_value = _asyncio_Task_print_stack_impl(self, limit, file); + return_value = _asyncio_Task_print_stack_impl(self, cls, limit, file); exit: return return_value; @@ -1189,4 +1304,4 @@ _asyncio__leave_task(PyObject *module, PyObject *const *args, Py_ssize_t nargs, exit: return return_value; } -/*[clinic end generated code: output=f117b2246eaf7a55 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=550bc6603df89ed9 input=a9049054013a1b77]*/ From 4246fe977d850f8b78505c982f055d33d52ff339 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Nov 2022 12:12:17 +0100 Subject: [PATCH 089/112] gh-99845: Change _PyDict_KeysSize() return type to size_t (#99848) * Change _PyDict_KeysSize() and shared_keys_usable_size() return type from signed (Py_ssize_t) to unsigned (size_t) type. * new_values() argument type is now unsigned (size_t). * init_inline_values() now uses size_t rather than int for the 'i' iterator variable. * type.__sizeof__() implementation now uses unsigned (size_t) type. --- Include/internal/pycore_dict.h | 2 +- Objects/dictobject.c | 57 ++++++++++++++++------------------ Objects/typeobject.c | 7 +++-- 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 2b3b56b343ad99..c74a3437713039 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -39,7 +39,7 @@ extern PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *); * Returns the version number, or zero if it was not possible to get a version number. */ extern uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictKeysObject *dictkeys); -extern Py_ssize_t _PyDict_KeysSize(PyDictKeysObject *keys); +extern size_t _PyDict_KeysSize(PyDictKeysObject *keys); /* _Py_dict_lookup() returns index of entry which can be used like DK_ENTRIES(dk)[index]. * -1 when no entry found, -3 when compare raises error. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c58d07b51bd89a..b9067213820b52 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -685,9 +685,9 @@ free_keys_object(PyDictKeysObject *keys) } static inline PyDictValues* -new_values(Py_ssize_t size) +new_values(size_t size) { - assert(size > 0); + assert(size >= 1); size_t prefix_size = _Py_SIZE_ROUND_UP(size+2, sizeof(PyObject *)); assert(prefix_size < 256); size_t n = prefix_size + size * sizeof(PyObject *); @@ -746,27 +746,24 @@ new_dict(PyDictKeysObject *keys, PyDictValues *values, Py_ssize_t used, int free return (PyObject *)mp; } -static inline Py_ssize_t +static inline size_t shared_keys_usable_size(PyDictKeysObject *keys) { - return keys->dk_nentries + keys->dk_usable; + return (size_t)keys->dk_nentries + (size_t)keys->dk_usable; } /* Consumes a reference to the keys object */ static PyObject * new_dict_with_shared_keys(PyDictKeysObject *keys) { - PyDictValues *values; - Py_ssize_t i, size; - - size = shared_keys_usable_size(keys); - values = new_values(size); + size_t size = shared_keys_usable_size(keys); + PyDictValues *values = new_values(size); if (values == NULL) { dictkeys_decref(keys); return PyErr_NoMemory(); } ((char *)values)[-2] = 0; - for (i = 0; i < size; i++) { + for (size_t i = 0; i < size; i++) { values->values[i] = NULL; } return new_dict(keys, values, 0, 1); @@ -781,7 +778,7 @@ clone_combined_dict_keys(PyDictObject *orig) assert(orig->ma_values == NULL); assert(orig->ma_keys->dk_refcnt == 1); - Py_ssize_t keys_size = _PyDict_KeysSize(orig->ma_keys); + size_t keys_size = _PyDict_KeysSize(orig->ma_keys); PyDictKeysObject *keys = PyObject_Malloc(keys_size); if (keys == NULL) { PyErr_NoMemory(); @@ -2959,7 +2956,6 @@ PyDict_Copy(PyObject *o) { PyObject *copy; PyDictObject *mp; - Py_ssize_t i, n; if (o == NULL || !PyDict_Check(o)) { PyErr_BadInternalCall(); @@ -2974,9 +2970,8 @@ PyDict_Copy(PyObject *o) if (_PyDict_HasSplitTable(mp)) { PyDictObject *split_copy; - Py_ssize_t size = shared_keys_usable_size(mp->ma_keys); - PyDictValues *newvalues; - newvalues = new_values(size); + size_t size = shared_keys_usable_size(mp->ma_keys); + PyDictValues *newvalues = new_values(size); if (newvalues == NULL) return PyErr_NoMemory(); split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type); @@ -2991,7 +2986,7 @@ PyDict_Copy(PyObject *o) split_copy->ma_used = mp->ma_used; split_copy->ma_version_tag = DICT_NEXT_VERSION(); dictkeys_incref(mp->ma_keys); - for (i = 0, n = size; i < n; i++) { + for (size_t i = 0; i < size; i++) { PyObject *value = mp->ma_values->values[i]; split_copy->ma_values->values[i] = Py_XNewRef(value); } @@ -3514,9 +3509,7 @@ static PyObject *dictiter_new(PyDictObject *, PyTypeObject *); Py_ssize_t _PyDict_SizeOf(PyDictObject *mp) { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(mp)); + size_t res = _PyObject_SIZE(Py_TYPE(mp)); if (mp->ma_values) { res += shared_keys_usable_size(mp->ma_keys) * sizeof(PyObject*); } @@ -3525,17 +3518,19 @@ _PyDict_SizeOf(PyDictObject *mp) if (mp->ma_keys->dk_refcnt == 1) { res += _PyDict_KeysSize(mp->ma_keys); } - return res; + assert(res <= (size_t)PY_SSIZE_T_MAX); + return (Py_ssize_t)res; } -Py_ssize_t +size_t _PyDict_KeysSize(PyDictKeysObject *keys) { - size_t es = keys->dk_kind == DICT_KEYS_GENERAL - ? sizeof(PyDictKeyEntry) : sizeof(PyDictUnicodeEntry); - return (sizeof(PyDictKeysObject) - + ((size_t)1 << keys->dk_log2_index_bytes) - + USABLE_FRACTION(DK_SIZE(keys)) * es); + size_t es = (keys->dk_kind == DICT_KEYS_GENERAL + ? sizeof(PyDictKeyEntry) : sizeof(PyDictUnicodeEntry)); + size_t size = sizeof(PyDictKeysObject); + size += (size_t)1 << keys->dk_log2_index_bytes; + size += USABLE_FRACTION((size_t)DK_SIZE(keys)) * es; + return size; } static PyObject * @@ -5286,16 +5281,15 @@ init_inline_values(PyObject *obj, PyTypeObject *tp) if (keys->dk_usable > 1) { keys->dk_usable--; } - Py_ssize_t size = shared_keys_usable_size(keys); - assert(size > 0); + size_t size = shared_keys_usable_size(keys); PyDictValues *values = new_values(size); if (values == NULL) { PyErr_NoMemory(); return -1; } - assert(((uint8_t *)values)[-1] >= size+2); + assert(((uint8_t *)values)[-1] >= (size + 2)); ((uint8_t *)values)[-2] = 0; - for (int i = 0; i < size; i++) { + for (size_t i = 0; i < size; i++) { values->values[i] = NULL; } _PyDictOrValues_SetValues(_PyObject_DictOrValuesPointer(obj), values); @@ -5335,7 +5329,8 @@ make_dict_from_instance_attributes(PyDictKeysObject *keys, PyDictValues *values) dictkeys_incref(keys); Py_ssize_t used = 0; Py_ssize_t track = 0; - for (Py_ssize_t i = 0; i < shared_keys_usable_size(keys); i++) { + size_t size = shared_keys_usable_size(keys); + for (size_t i = 0; i < size; i++) { PyObject *val = values->values[i]; if (val != NULL) { used += 1; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a4974a1b4f7113..ae80f5a8fd88e0 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4680,16 +4680,17 @@ static PyObject * type___sizeof___impl(PyTypeObject *self) /*[clinic end generated code: output=766f4f16cd3b1854 input=99398f24b9cf45d6]*/ { - Py_ssize_t size; + size_t size; if (self->tp_flags & Py_TPFLAGS_HEAPTYPE) { PyHeapTypeObject* et = (PyHeapTypeObject*)self; size = sizeof(PyHeapTypeObject); if (et->ht_cached_keys) size += _PyDict_KeysSize(et->ht_cached_keys); } - else + else { size = sizeof(PyTypeObject); - return PyLong_FromSsize_t(size); + } + return PyLong_FromSize_t(size); } static PyMethodDef type_methods[] = { From 74d5f61ebd1cb14907bf7dae1ad9c1e676707bc5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Nov 2022 12:15:21 +0100 Subject: [PATCH 090/112] gh-99845: Clean up _PyObject_VAR_SIZE() usage (#99847) * code_sizeof() now uses an unsigned type (size_t) to compute the result. * Fix _PyObject_ComputedDictPointer(): cast _PyObject_VAR_SIZE() to Py_ssize_t, rather than long: it's a different type on 64-bit Windows. * Clarify that _PyObject_VAR_SIZE() uses an unsigned type (size_t). --- Modules/gcmodule.c | 5 ++--- Objects/codeobject.c | 10 ++++------ Objects/object.c | 13 ++++++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index cacfad7335634c..6630faa6f4471d 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -2329,7 +2329,6 @@ _PyObject_GC_New(PyTypeObject *tp) PyVarObject * _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) { - size_t size; PyVarObject *op; if (nitems < 0) { @@ -2337,7 +2336,7 @@ _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) return NULL; } size_t presize = _PyType_PreHeaderSize(tp); - size = _PyObject_VAR_SIZE(tp, nitems); + size_t size = _PyObject_VAR_SIZE(tp, nitems); op = (PyVarObject *)gc_alloc(size, presize); if (op == NULL) { return NULL; @@ -2351,7 +2350,7 @@ _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) { const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op)); - if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) { + if (basicsize > (size_t)PY_SSIZE_T_MAX - sizeof(PyGC_Head)) { return (PyVarObject *)PyErr_NoMemory(); } diff --git a/Objects/codeobject.c b/Objects/codeobject.c index fc1db72977aa01..f5d90cf65fcec3 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1867,15 +1867,13 @@ static PyGetSetDef code_getsetlist[] = { static PyObject * code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args)) { - Py_ssize_t res = _PyObject_VAR_SIZE(Py_TYPE(co), Py_SIZE(co)); - + size_t res = _PyObject_VAR_SIZE(Py_TYPE(co), Py_SIZE(co)); _PyCodeObjectExtra *co_extra = (_PyCodeObjectExtra*) co->co_extra; if (co_extra != NULL) { - res += sizeof(_PyCodeObjectExtra) + - (co_extra->ce_size-1) * sizeof(co_extra->ce_extras[0]); + res += sizeof(_PyCodeObjectExtra); + res += ((size_t)co_extra->ce_size - 1) * sizeof(co_extra->ce_extras[0]); } - - return PyLong_FromSsize_t(res); + return PyLong_FromSize_t(res); } static PyObject * diff --git a/Objects/object.c b/Objects/object.c index a499cb32b22f58..687bd36d2b4af1 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1043,22 +1043,25 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) PyObject ** _PyObject_ComputedDictPointer(PyObject *obj) { - Py_ssize_t dictoffset; PyTypeObject *tp = Py_TYPE(obj); - assert((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); - dictoffset = tp->tp_dictoffset; - if (dictoffset == 0) + + Py_ssize_t dictoffset = tp->tp_dictoffset; + if (dictoffset == 0) { return NULL; + } + if (dictoffset < 0) { assert(dictoffset != -1); + Py_ssize_t tsize = Py_SIZE(obj); if (tsize < 0) { tsize = -tsize; } size_t size = _PyObject_VAR_SIZE(tp, tsize); + assert(size <= (size_t)PY_SSIZE_T_MAX); + dictoffset += (Py_ssize_t)size; - dictoffset += (long)size; _PyObject_ASSERT(obj, dictoffset > 0); _PyObject_ASSERT(obj, dictoffset % SIZEOF_VOID_P == 0); } From 05dfc539c2de34021e44f621f2b7b993682ee200 Mon Sep 17 00:00:00 2001 From: Sam Ezeh Date: Tue, 29 Nov 2022 16:21:01 +0000 Subject: [PATCH 091/112] gh-90717: Update the documentation for the altchars paremeter in base64 library (GH-94187) --- Doc/library/base64.rst | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index a02ba739146aaf..4ca3768f827c6b 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -53,11 +53,13 @@ The modern interface provides: Encode the :term:`bytes-like object` *s* using Base64 and return the encoded :class:`bytes`. - Optional *altchars* must be a :term:`bytes-like object` of at least - length 2 (additional characters are ignored) which specifies an alternative - alphabet for the ``+`` and ``/`` characters. This allows an application to e.g. - generate URL or filesystem safe Base64 strings. The default is ``None``, for - which the standard Base64 alphabet is used. + Optional *altchars* must be a :term:`bytes-like object` of length 2 which + specifies an alternative alphabet for the ``+`` and ``/`` characters. + This allows an application to e.g. generate URL or filesystem safe Base64 + strings. The default is ``None``, for which the standard Base64 alphabet is used. + + May assert or raise a a :exc:`ValueError` if the length of *altchars* is not 2. Raises a + :exc:`TypeError` if *altchars* is not a :term:`bytes-like object`. .. function:: b64decode(s, altchars=None, validate=False) @@ -65,9 +67,9 @@ The modern interface provides: Decode the Base64 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. - Optional *altchars* must be a :term:`bytes-like object` or ASCII string of - at least length 2 (additional characters are ignored) which specifies the - alternative alphabet used instead of the ``+`` and ``/`` characters. + Optional *altchars* must be a :term:`bytes-like object` or ASCII string + of length 2 which specifies the alternative alphabet used instead of the + ``+`` and ``/`` characters. A :exc:`binascii.Error` exception is raised if *s* is incorrectly padded. @@ -80,6 +82,7 @@ The modern interface provides: For more information about the strict base64 check, see :func:`binascii.a2b_base64` + May assert or raise a :exc:`ValueError` if the length of *altchars* is not 2. .. function:: standard_b64encode(s) From d74a58872c243bc766055d42f8280721629e3c82 Mon Sep 17 00:00:00 2001 From: "Yilei \"Dolee\" Yang" Date: Tue, 29 Nov 2022 12:30:58 -0800 Subject: [PATCH 092/112] whatsnew-3.10: Mention PEP 647 in the Release highlights section. (#99853) Mention PEP 647 in the Release highlights section. Also re-ordered the list so it matches the order in the details sections below. --- Doc/whatsnew/3.10.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 2389b1a63b1ca2..1c21caf355f082 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -77,8 +77,9 @@ Interpreter improvements: New typing features: * :pep:`604`, Allow writing union types as X | Y -* :pep:`613`, Explicit Type Aliases * :pep:`612`, Parameter Specification Variables +* :pep:`613`, Explicit Type Aliases +* :pep:`647`, User-Defined Type Guards Important deprecations, removals or restrictions: From 052bc128ac0bcafd36a0cbee5f4c2a10d74468dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Tue, 29 Nov 2022 21:34:52 +0100 Subject: [PATCH 093/112] Docs: improve accuracy of socketserver reference (#24767) --- Doc/library/socketserver.rst | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst index 26785395ec0312..ceb962e860042d 100644 --- a/Doc/library/socketserver.rst +++ b/Doc/library/socketserver.rst @@ -96,8 +96,7 @@ synchronous servers of four types:: Note that :class:`UnixDatagramServer` derives from :class:`UDPServer`, not from :class:`UnixStreamServer` --- the only difference between an IP and a Unix -stream server is the address family, which is simply repeated in both Unix -server classes. +server is the address family. .. class:: ForkingMixIn @@ -431,11 +430,8 @@ Request Handler Objects The :attr:`self.rfile` and :attr:`self.wfile` attributes can be read or written, respectively, to get the request data or return data to the client. - - The :attr:`rfile` attributes of both classes support the - :class:`io.BufferedIOBase` readable interface, and - :attr:`DatagramRequestHandler.wfile` supports the - :class:`io.BufferedIOBase` writable interface. + The :attr:`!rfile` attributes support the :class:`io.BufferedIOBase` readable interface, + and :attr:`!wfile` attributes support the :class:`!io.BufferedIOBase` writable interface. .. versionchanged:: 3.6 :attr:`StreamRequestHandler.wfile` also supports the From ed391090cc8332406e6225d40877db6ff44a7104 Mon Sep 17 00:00:00 2001 From: Pete Wicken <2273100+JamoBox@users.noreply.github.com> Date: Tue, 29 Nov 2022 21:32:18 +0000 Subject: [PATCH 094/112] gh-82836: fix private network check (#97733) Fixes private checks for network objects. The previous method would incorrectly return True for a private check in cases such as "0.0.0.0/0". --- Lib/ipaddress.py | 20 ++++++++--- Lib/test/test_ipaddress.py | 33 +++++++++++++++++++ ...2-10-02-12-38-22.gh-issue-82836.OvYLmC.rst | 1 + 3 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-10-02-12-38-22.gh-issue-82836.OvYLmC.rst diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py index 3f15601e700d68..1cb71d8032e173 100644 --- a/Lib/ipaddress.py +++ b/Lib/ipaddress.py @@ -1077,15 +1077,16 @@ def is_link_local(self): @property def is_private(self): - """Test if this address is allocated for private networks. + """Test if this network belongs to a private range. Returns: - A boolean, True if the address is reserved per + A boolean, True if the network is reserved per iana-ipv4-special-registry or iana-ipv6-special-registry. """ - return (self.network_address.is_private and - self.broadcast_address.is_private) + return any(self.network_address in priv_network and + self.broadcast_address in priv_network + for priv_network in self._constants._private_networks) @property def is_global(self): @@ -1122,6 +1123,15 @@ def is_loopback(self): return (self.network_address.is_loopback and self.broadcast_address.is_loopback) + +class _BaseConstants: + + _private_networks = [] + + +_BaseNetwork._constants = _BaseConstants + + class _BaseV4: """Base IPv4 object. @@ -1561,6 +1571,7 @@ class _IPv4Constants: IPv4Address._constants = _IPv4Constants +IPv4Network._constants = _IPv4Constants class _BaseV6: @@ -2285,3 +2296,4 @@ class _IPv6Constants: IPv6Address._constants = _IPv6Constants +IPv6Network._constants = _IPv6Constants diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py index 5c656c49e2e75f..a5388b2e5debd8 100644 --- a/Lib/test/test_ipaddress.py +++ b/Lib/test/test_ipaddress.py @@ -2277,6 +2277,39 @@ def testReservedIpv4(self): self.assertEqual(False, ipaddress.ip_address('128.0.0.0').is_loopback) self.assertEqual(True, ipaddress.ip_network('0.0.0.0').is_unspecified) + def testPrivateNetworks(self): + self.assertEqual(False, ipaddress.ip_network("0.0.0.0/0").is_private) + self.assertEqual(False, ipaddress.ip_network("1.0.0.0/8").is_private) + + self.assertEqual(True, ipaddress.ip_network("0.0.0.0/8").is_private) + self.assertEqual(True, ipaddress.ip_network("10.0.0.0/8").is_private) + self.assertEqual(True, ipaddress.ip_network("127.0.0.0/8").is_private) + self.assertEqual(True, ipaddress.ip_network("169.254.0.0/16").is_private) + self.assertEqual(True, ipaddress.ip_network("172.16.0.0/12").is_private) + self.assertEqual(True, ipaddress.ip_network("192.0.0.0/29").is_private) + self.assertEqual(True, ipaddress.ip_network("192.0.0.170/31").is_private) + self.assertEqual(True, ipaddress.ip_network("192.0.2.0/24").is_private) + self.assertEqual(True, ipaddress.ip_network("192.168.0.0/16").is_private) + self.assertEqual(True, ipaddress.ip_network("198.18.0.0/15").is_private) + self.assertEqual(True, ipaddress.ip_network("198.51.100.0/24").is_private) + self.assertEqual(True, ipaddress.ip_network("203.0.113.0/24").is_private) + self.assertEqual(True, ipaddress.ip_network("240.0.0.0/4").is_private) + self.assertEqual(True, ipaddress.ip_network("255.255.255.255/32").is_private) + + self.assertEqual(False, ipaddress.ip_network("::/0").is_private) + self.assertEqual(False, ipaddress.ip_network("::ff/128").is_private) + + self.assertEqual(True, ipaddress.ip_network("::1/128").is_private) + self.assertEqual(True, ipaddress.ip_network("::/128").is_private) + self.assertEqual(True, ipaddress.ip_network("::ffff:0:0/96").is_private) + self.assertEqual(True, ipaddress.ip_network("100::/64").is_private) + self.assertEqual(True, ipaddress.ip_network("2001::/23").is_private) + self.assertEqual(True, ipaddress.ip_network("2001:2::/48").is_private) + self.assertEqual(True, ipaddress.ip_network("2001:db8::/32").is_private) + self.assertEqual(True, ipaddress.ip_network("2001:10::/28").is_private) + self.assertEqual(True, ipaddress.ip_network("fc00::/7").is_private) + self.assertEqual(True, ipaddress.ip_network("fe80::/10").is_private) + def testReservedIpv6(self): self.assertEqual(True, ipaddress.ip_network('ffff::').is_multicast) diff --git a/Misc/NEWS.d/next/Library/2022-10-02-12-38-22.gh-issue-82836.OvYLmC.rst b/Misc/NEWS.d/next/Library/2022-10-02-12-38-22.gh-issue-82836.OvYLmC.rst new file mode 100644 index 00000000000000..dcbea66d66bf7c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-10-02-12-38-22.gh-issue-82836.OvYLmC.rst @@ -0,0 +1 @@ +Fix :attr:`~ipaddress.IPv4Address.is_private` properties in the :mod:`ipaddress` module. Previously non-private networks (0.0.0.0/0) would return True from this method; now they correctly return False. From 8bb2303fd75c299d8fc85229889ac75e867c135c Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Wed, 30 Nov 2022 07:58:20 +0900 Subject: [PATCH 095/112] gh-99127: Allow some features of syslog to the main interpreter only (gh-99128) --- Doc/library/syslog.rst | 21 ++++++ Doc/whatsnew/3.12.rst | 9 +++ Lib/test/test_syslog.py | 64 +++++++++++++++++++ ...2-11-05-22-26-35.gh-issue-99127.Btk7ih.rst | 1 + Modules/syslogmodule.c | 29 ++++++++- 5 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-11-05-22-26-35.gh-issue-99127.Btk7ih.rst diff --git a/Doc/library/syslog.rst b/Doc/library/syslog.rst index 766ff57cc66d69..f29ef03267b1ba 100644 --- a/Doc/library/syslog.rst +++ b/Doc/library/syslog.rst @@ -40,6 +40,13 @@ The module defines the following functions: it wasn't called prior to the call to :func:`syslog`, deferring to the syslog implementation to call ``openlog()``. + .. versionchanged:: 3.12 + This function is restricted in subinterpreters. + (Only code that runs in multiple interpreters is affected and + the restriction is not relevant for most users.) + :func:`openlog` must be called in the main interpreter before :func:`syslog` may be used + in a subinterpreter. Otherwise it will raise :exc:`RuntimeError`. + .. function:: openlog([ident[, logoption[, facility]]]) @@ -60,6 +67,13 @@ The module defines the following functions: In previous versions, keyword arguments were not allowed, and *ident* was required. + .. versionchanged:: 3.12 + This function is restricted in subinterpreters. + (Only code that runs in multiple interpreters is affected and + the restriction is not relevant for most users.) + This may only be called in the main interpreter. + It will raise :exc:`RuntimeError` if called in a subinterpreter. + .. function:: closelog() @@ -72,6 +86,13 @@ The module defines the following functions: .. audit-event:: syslog.closelog "" syslog.closelog + .. versionchanged:: 3.12 + This function is restricted in subinterpreters. + (Only code that runs in multiple interpreters is affected and + the restriction is not relevant for most users.) + This may only be called in the main interpreter. + It will raise :exc:`RuntimeError` if called in a subinterpreter. + .. function:: setlogmask(maskpri) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index dff4de621b4c49..c0f98b59ccaf0f 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -673,6 +673,15 @@ Changes in the Python API :class:`bytes` type is accepted for bytes strings. (Contributed by Victor Stinner in :gh:`98393`.) +* :func:`syslog.openlog` and :func:`syslog.closelog` now fail if used in subinterpreters. + :func:`syslog.syslog` may still be used in subinterpreters, + but now only if :func:`syslog.openlog` has already been called in the main interpreter. + These new restrictions do not apply to the main interpreter, + so only a very small set of users might be affected. + This change helps with interpreter isolation. Furthermore, :mod:`syslog` is a wrapper + around process-global resources, which are best managed from the main interpreter. + (Contributed by Dong-hee Na in :gh:`99127`.) + Build Changes ============= diff --git a/Lib/test/test_syslog.py b/Lib/test/test_syslog.py index 2125ec58d87e03..54db80fa9df1af 100644 --- a/Lib/test/test_syslog.py +++ b/Lib/test/test_syslog.py @@ -5,6 +5,7 @@ import threading import time import unittest +from textwrap import dedent # XXX(nnorwitz): This test sucks. I don't know of a platform independent way # to verify that the messages were really logged. @@ -78,6 +79,69 @@ def logger(): finally: sys.setswitchinterval(orig_si) + def test_subinterpreter_syslog(self): + # syslog.syslog() is not allowed in subinterpreters, but only if + # syslog.openlog() hasn't been called in the main interpreter yet. + with self.subTest('before openlog()'): + code = dedent(''' + import syslog + caught_error = False + try: + syslog.syslog('foo') + except RuntimeError: + caught_error = True + assert(caught_error) + ''') + res = support.run_in_subinterp(code) + self.assertEqual(res, 0) + + syslog.openlog() + try: + with self.subTest('after openlog()'): + code = dedent(''' + import syslog + syslog.syslog('foo') + ''') + res = support.run_in_subinterp(code) + self.assertEqual(res, 0) + finally: + syslog.closelog() + + def test_subinterpreter_openlog(self): + try: + code = dedent(''' + import syslog + caught_error = False + try: + syslog.openlog() + except RuntimeError: + caught_error = True + + assert(caught_error) + ''') + res = support.run_in_subinterp(code) + self.assertEqual(res, 0) + finally: + syslog.closelog() + + def test_subinterpreter_closelog(self): + syslog.openlog('python') + try: + code = dedent(''' + import syslog + caught_error = False + try: + syslog.closelog() + except RuntimeError: + caught_error = True + + assert(caught_error) + ''') + res = support.run_in_subinterp(code) + self.assertEqual(res, 0) + finally: + syslog.closelog() + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-05-22-26-35.gh-issue-99127.Btk7ih.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-05-22-26-35.gh-issue-99127.Btk7ih.rst new file mode 100644 index 00000000000000..e93ae4e7b127d1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-05-22-26-35.gh-issue-99127.Btk7ih.rst @@ -0,0 +1 @@ +Allow some features of :mod:`syslog` to the main interpreter only. Patch by Dong-hee Na. diff --git a/Modules/syslogmodule.c b/Modules/syslogmodule.c index 23833b72850313..f45aa5227f1cbf 100644 --- a/Modules/syslogmodule.c +++ b/Modules/syslogmodule.c @@ -61,10 +61,16 @@ module syslog #include "clinic/syslogmodule.c.h" -/* only one instance, only one syslog, so globals should be ok */ -static PyObject *S_ident_o = NULL; /* identifier, held by openlog() */ +/* only one instance, only one syslog, so globals should be ok, + * these fields are writable from the main interpreter only. */ +static PyObject *S_ident_o = NULL; // identifier, held by openlog() static char S_log_open = 0; +static inline int +is_main_interpreter(void) +{ + return (PyInterpreterState_Get() == PyInterpreterState_Main()); +} static PyObject * syslog_get_argv(void) @@ -135,6 +141,13 @@ syslog_openlog_impl(PyObject *module, PyObject *ident, long logopt, long facility) /*[clinic end generated code: output=5476c12829b6eb75 input=8a987a96a586eee7]*/ { + // Since the sys.openlog changes the process level state of syslog library, + // this operation is only allowed for the main interpreter. + if (!is_main_interpreter()) { + PyErr_SetString(PyExc_RuntimeError, "subinterpreter can't use syslog.openlog()"); + return NULL; + } + const char *ident_str = NULL; if (ident) { @@ -195,6 +208,11 @@ syslog_syslog_impl(PyObject *module, int group_left_1, int priority, /* if log is not opened, open it now */ if (!S_log_open) { + if (!is_main_interpreter()) { + PyErr_SetString(PyExc_RuntimeError, "subinterpreter can't use syslog.syslog() " + "until the syslog is opened by the main interpreter"); + return NULL; + } PyObject *openlog_ret = syslog_openlog_impl(module, NULL, 0, LOG_USER); if (openlog_ret == NULL) { return NULL; @@ -229,6 +247,13 @@ static PyObject * syslog_closelog_impl(PyObject *module) /*[clinic end generated code: output=97890a80a24b1b84 input=fb77a54d447acf07]*/ { + // Since the sys.closelog changes the process level state of syslog library, + // this operation is only allowed for the main interpreter. + if (!is_main_interpreter()) { + PyErr_SetString(PyExc_RuntimeError, "sunbinterpreter can't use syslog.closelog()"); + return NULL; + } + if (PySys_Audit("syslog.closelog", NULL) < 0) { return NULL; } From c314198fadd3dc79ee15f375a409163d8fb33586 Mon Sep 17 00:00:00 2001 From: Wenzel Jakob Date: Wed, 30 Nov 2022 09:33:32 +0100 Subject: [PATCH 096/112] gh-98253: Break potential reference cycles in external code worsened by typing.py lru_cache (#98591) --- Lib/typing.py | 14 +++++++++++--- .../2022-10-24-11-01-05.gh-issue-98253.HVd5v4.rst | 10 ++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-10-24-11-01-05.gh-issue-98253.HVd5v4.rst diff --git a/Lib/typing.py b/Lib/typing.py index 127e3c942d04d3..38e227e3c55d59 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -325,6 +325,7 @@ def _flatten_literal_params(parameters): _cleanups = [] +_caches = {} def _tp_cache(func=None, /, *, typed=False): @@ -332,13 +333,20 @@ def _tp_cache(func=None, /, *, typed=False): original function for non-hashable arguments. """ def decorator(func): - cached = functools.lru_cache(typed=typed)(func) - _cleanups.append(cached.cache_clear) + # The callback 'inner' references the newly created lru_cache + # indirectly by performing a lookup in the global '_caches' dictionary. + # This breaks a reference that can be problematic when combined with + # C API extensions that leak references to types. See GH-98253. + + cache = functools.lru_cache(typed=typed)(func) + _caches[func] = cache + _cleanups.append(cache.cache_clear) + del cache @functools.wraps(func) def inner(*args, **kwds): try: - return cached(*args, **kwds) + return _caches[func](*args, **kwds) except TypeError: pass # All real errors (not unhashable args) are raised below. return func(*args, **kwds) diff --git a/Misc/NEWS.d/next/Library/2022-10-24-11-01-05.gh-issue-98253.HVd5v4.rst b/Misc/NEWS.d/next/Library/2022-10-24-11-01-05.gh-issue-98253.HVd5v4.rst new file mode 100644 index 00000000000000..00df0070f3b9c1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-10-24-11-01-05.gh-issue-98253.HVd5v4.rst @@ -0,0 +1,10 @@ +The implementation of the typing module is now more resilient to reference +leaks in binary extension modules. + +Previously, a reference leak in a typed C API-based extension module could leak +internals of the typing module, which could in turn introduce leaks in +essentially any other package with typed function signatures. Although the +typing package is not the original source of the problem, such non-local +dependences exacerbate debugging of large-scale projects, and the +implementation was therefore changed to reduce harm by providing better +isolation. From 59665d0280c2299ea87e9af45cedc90656cb6f55 Mon Sep 17 00:00:00 2001 From: dmjohnsson23 Date: Wed, 30 Nov 2022 04:14:41 -0700 Subject: [PATCH 097/112] Improve zip64 limit error message (#95892) --- Lib/zipfile/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 8f834267b28c2e..e1833dd1772d56 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1202,10 +1202,10 @@ def close(self): if not self._zip64: if self._file_size > ZIP64_LIMIT: raise RuntimeError( - 'File size unexpectedly exceeded ZIP64 limit') + 'File size too large, try using force_zip64') if self._compress_size > ZIP64_LIMIT: raise RuntimeError( - 'Compressed size unexpectedly exceeded ZIP64 limit') + 'Compressed size too large, try using force_zip64') # Seek backwards and write file header (which will now include # correct CRC and file sizes) From fe17d353134748dc772f8743ceadc2dd9e0db187 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Wed, 30 Nov 2022 16:55:16 +0530 Subject: [PATCH 098/112] GH-81057: remove static state from suggestions.c (#99411) --- Python/suggestions.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/Python/suggestions.c b/Python/suggestions.c index ad645c7d96fa57..f2c018ef2c4533 100644 --- a/Python/suggestions.c +++ b/Python/suggestions.c @@ -41,10 +41,8 @@ substitution_cost(char a, char b) static Py_ssize_t levenshtein_distance(const char *a, size_t a_size, const char *b, size_t b_size, - size_t max_cost) + size_t max_cost, size_t *buffer) { - static size_t buffer[MAX_STRING_SIZE]; - // Both strings are the same (by identity) if (a == b) { return 0; @@ -147,12 +145,16 @@ calculate_suggestions(PyObject *dir, if (name_str == NULL) { return NULL; } - + size_t *buffer = PyMem_New(size_t, MAX_STRING_SIZE); + if (buffer == NULL) { + return PyErr_NoMemory(); + } for (int i = 0; i < dir_size; ++i) { PyObject *item = PyList_GET_ITEM(dir, i); Py_ssize_t item_size; const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size); if (item_str == NULL) { + PyMem_Free(buffer); return NULL; } if (PyUnicode_CompareWithASCIIString(name, item_str) == 0) { @@ -163,8 +165,8 @@ calculate_suggestions(PyObject *dir, // Don't take matches we've already beaten. max_distance = Py_MIN(max_distance, suggestion_distance - 1); Py_ssize_t current_distance = - levenshtein_distance(name_str, name_size, - item_str, item_size, max_distance); + levenshtein_distance(name_str, name_size, item_str, + item_size, max_distance, buffer); if (current_distance > max_distance) { continue; } @@ -173,6 +175,7 @@ calculate_suggestions(PyObject *dir, suggestion_distance = current_distance; } } + PyMem_Free(buffer); return Py_XNewRef(suggestion); } @@ -238,7 +241,7 @@ get_suggestions_for_name_error(PyObject* name, PyFrameObject* frame) if (!self) { goto error; } - + if (PyObject_HasAttr(self, name)) { Py_DECREF(dir); return PyUnicode_FromFormat("self.%S", name); @@ -401,6 +404,14 @@ _Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost) if (max_cost == -1) { max_cost = MOVE_COST * Py_MAX(size_a, size_b); } - return levenshtein_distance(utf8_a, size_a, utf8_b, size_b, max_cost); + size_t *buffer = PyMem_New(size_t, MAX_STRING_SIZE); + if (buffer == NULL) { + PyErr_NoMemory(); + return -1; + } + Py_ssize_t res = levenshtein_distance(utf8_a, size_a, + utf8_b, size_b, max_cost, buffer); + PyMem_Free(buffer); + return res; } From 19c38801ba2f42a220adece5d5f12e833b41822a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Wed, 30 Nov 2022 12:33:50 +0100 Subject: [PATCH 099/112] gh-99824: Document that sqlite3.connect implicitly open a transaction if autocommit=False (#99825) Authored-by: C.A.M. Gerlach --- Doc/library/sqlite3.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index 7e2235b285b814..a9ee1e207d0c05 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -2468,9 +2468,9 @@ which implies :pep:`249`-compliant transaction control. This means: * :mod:`!sqlite3` ensures that a transaction is always open, - so :meth:`Connection.commit` and :meth:`Connection.rollback` - will implicitly open a new transaction immediately after closing - the pending one. + so :func:`connect`, :meth:`Connection.commit`, and :meth:`Connection.rollback` + will implicitly open a new transaction + (immediately after closing the pending one, for the latter two). :mod:`!sqlite3` uses ``BEGIN DEFERRED`` statements when opening transactions. * Transactions should be committed explicitly using :meth:`!commit`. * Transactions should be rolled back explicitly using :meth:`!rollback`. From 417206a05c4545bde96c2bbbea92b53e6cac0d48 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 30 Nov 2022 11:36:06 +0000 Subject: [PATCH 100/112] gh-99891: Fix infinite recursion in the tokenizer when showing warnings (GH-99893) Automerge-Triggered-By: GH:pablogsal --- Lib/test/test_source_encoding.py | 12 ++++++++++++ .../2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst | 3 +++ Parser/tokenizer.c | 8 ++++++++ Parser/tokenizer.h | 1 + 4 files changed, 24 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py index cfc4b13f18f330..b05173ad00d442 100644 --- a/Lib/test/test_source_encoding.py +++ b/Lib/test/test_source_encoding.py @@ -160,6 +160,18 @@ def test_file_parse_error_multiline(self): finally: os.unlink(TESTFN) + def test_tokenizer_fstring_warning_in_first_line(self): + source = "0b1and 2" + with open(TESTFN, "w") as fd: + fd.write("{}".format(source)) + try: + retcode, stdout, stderr = script_helper.assert_python_ok(TESTFN) + self.assertIn(b"SyntaxWarning: invalid binary litera", stderr) + self.assertEqual(stderr.count(source.encode()), 1) + finally: + os.unlink(TESTFN) + + class AbstractSourceEncodingTest: def test_default_coding(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst new file mode 100644 index 00000000000000..20cd361affeaa5 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst @@ -0,0 +1,3 @@ +Fix a bug in the tokenizer that could cause infinite recursion when showing +syntax warnings that happen in the first line of the source. Patch by Pablo +Galindo diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index ce72e1529024c1..463c0e00ca1411 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -97,6 +97,7 @@ tok_new(void) tok->async_def_nl = 0; tok->interactive_underflow = IUNDERFLOW_NORMAL; tok->str = NULL; + tok->report_warnings = 1; #ifdef Py_DEBUG tok->debug = _Py_GetConfig()->parser_debug; #endif @@ -1201,6 +1202,10 @@ indenterror(struct tok_state *tok) static int parser_warn(struct tok_state *tok, PyObject *category, const char *format, ...) { + if (!tok->report_warnings) { + return 0; + } + PyObject *errmsg; va_list vargs; va_start(vargs, format); @@ -2239,6 +2244,9 @@ _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) } } struct token token; + // We don't want to report warnings here because it could cause infinite recursion + // if fetching the encoding shows a warning. + tok->report_warnings = 0; while (tok->lineno < 2 && tok->done == E_OK) { _PyTokenizer_Get(tok, &token); } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 2542d30e1da0ed..16a94d5f51d664 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -92,6 +92,7 @@ struct tok_state { NEWLINE token after it. */ /* How to proceed when asked for a new token in interactive mode */ enum interactive_underflow_t interactive_underflow; + int report_warnings; #ifdef Py_DEBUG int debug; #endif From ac12e3941f9c584c24b33d05beee95457e9d06a3 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 30 Nov 2022 12:37:30 +0000 Subject: [PATCH 101/112] gh-87092: move all localsplus preparation into separate function called from assembler stage (GH-99869) --- Python/compile.c | 71 ++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index dd8596defb8efe..da31f1c45c363e 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -8730,6 +8730,41 @@ remove_redundant_jumps(cfg_builder *g) { return 0; } +static int +prepare_localsplus(struct compiler* c, int code_flags) +{ + assert(PyDict_GET_SIZE(c->u->u_varnames) < INT_MAX); + assert(PyDict_GET_SIZE(c->u->u_cellvars) < INT_MAX); + assert(PyDict_GET_SIZE(c->u->u_freevars) < INT_MAX); + int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames); + int ncellvars = (int)PyDict_GET_SIZE(c->u->u_cellvars); + int nfreevars = (int)PyDict_GET_SIZE(c->u->u_freevars); + assert(INT_MAX - nlocals - ncellvars > 0); + assert(INT_MAX - nlocals - ncellvars - nfreevars > 0); + int nlocalsplus = nlocals + ncellvars + nfreevars; + int* cellfixedoffsets = build_cellfixedoffsets(c); + if (cellfixedoffsets == NULL) { + return -1; + } + + cfg_builder* g = CFG_BUILDER(c); + + // This must be called before fix_cell_offsets(). + if (insert_prefix_instructions(c, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) { + PyMem_Free(cellfixedoffsets); + return -1; + } + + int numdropped = fix_cell_offsets(c, g->g_entryblock, cellfixedoffsets); + PyMem_Free(cellfixedoffsets); // At this point we're done with it. + cellfixedoffsets = NULL; + if (numdropped < 0) { + return -1; + } + nlocalsplus -= numdropped; + return nlocalsplus; +} + static PyCodeObject * assemble(struct compiler *c, int addNone) { @@ -8751,20 +8786,6 @@ assemble(struct compiler *c, int addNone) ADDOP(c, NO_LOCATION, RETURN_VALUE); } - assert(PyDict_GET_SIZE(c->u->u_varnames) < INT_MAX); - assert(PyDict_GET_SIZE(c->u->u_cellvars) < INT_MAX); - assert(PyDict_GET_SIZE(c->u->u_freevars) < INT_MAX); - int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames); - int ncellvars = (int)PyDict_GET_SIZE(c->u->u_cellvars); - int nfreevars = (int)PyDict_GET_SIZE(c->u->u_freevars); - assert(INT_MAX - nlocals - ncellvars > 0); - assert(INT_MAX - nlocals - ncellvars - nfreevars > 0); - int nlocalsplus = nlocals + ncellvars + nfreevars; - int *cellfixedoffsets = build_cellfixedoffsets(c); - if (cellfixedoffsets == NULL) { - goto error; - } - int nblocks = 0; for (basicblock *b = CFG_BUILDER(c)->g_block_list; b != NULL; b = b->b_list) { nblocks++; @@ -8787,19 +8808,6 @@ assemble(struct compiler *c, int addNone) } } - // This must be called before fix_cell_offsets(). - if (insert_prefix_instructions(c, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) { - goto error; - } - - int numdropped = fix_cell_offsets(c, g->g_entryblock, cellfixedoffsets); - PyMem_Free(cellfixedoffsets); // At this point we're done with it. - cellfixedoffsets = NULL; - if (numdropped < 0) { - goto error; - } - nlocalsplus -= numdropped; - /** Preprocessing **/ /* Map labels to targets and mark exception handlers */ if (translate_jump_labels_to_targets(g->g_entryblock)) { @@ -8839,6 +8847,12 @@ assemble(struct compiler *c, int addNone) } /** Assembly **/ + + int nlocalsplus = prepare_localsplus(c, code_flags); + if (nlocalsplus < 0) { + goto error; + } + int maxdepth = stackdepth(g->g_entryblock, code_flags); if (maxdepth < 0) { goto error; @@ -8904,9 +8918,6 @@ assemble(struct compiler *c, int addNone) error: Py_XDECREF(consts); assemble_free(&a); - if (cellfixedoffsets != NULL) { - PyMem_Free(cellfixedoffsets); - } return co; } From a694b8222e8b0683682958222699953379fd2d48 Mon Sep 17 00:00:00 2001 From: Arne de Laat Date: Wed, 30 Nov 2022 16:27:28 +0100 Subject: [PATCH 102/112] Fix typo in exception message in `multiprocessing.pool` (#99900) --- Lib/multiprocessing/pool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py index 961d7e5991847a..4f5d88cb975cb7 100644 --- a/Lib/multiprocessing/pool.py +++ b/Lib/multiprocessing/pool.py @@ -696,7 +696,7 @@ def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, change_notifier, if (not result_handler.is_alive()) and (len(cache) != 0): raise AssertionError( - "Cannot have cache with result_hander not alive") + "Cannot have cache with result_handler not alive") result_handler._state = TERMINATE change_notifier.put(None) From 18a6967544795cdcce45b45700b7a9ed3994b8fb Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 30 Nov 2022 16:16:54 +0000 Subject: [PATCH 103/112] GH-99877) --- Python/compile.c | 64 +++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index da31f1c45c363e..e200c5abb59853 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -171,6 +171,24 @@ struct instr { struct basicblock_ *i_except; /* target block when exception is raised */ }; +/* One arg*/ +#define INSTR_SET_OP1(I, OP, ARG) \ + do { \ + assert(HAS_ARG(OP)); \ + struct instr *_instr__ptr_ = (I); \ + _instr__ptr_->i_opcode = (OP); \ + _instr__ptr_->i_oparg = (ARG); \ + } while (0); + +/* No args*/ +#define INSTR_SET_OP0(I, OP) \ + do { \ + assert(!HAS_ARG(OP)); \ + struct instr *_instr__ptr_ = (I); \ + _instr__ptr_->i_opcode = (OP); \ + _instr__ptr_->i_oparg = 0; \ + } while (0); + typedef struct exceptstack { struct basicblock_ *handlers[CO_MAXBLOCKS+1]; int depth; @@ -218,7 +236,8 @@ instr_size(struct instr *instruction) { int opcode = instruction->i_opcode; assert(!IS_PSEUDO_OPCODE(opcode)); - int oparg = HAS_ARG(opcode) ? instruction->i_oparg : 0; + int oparg = instruction->i_oparg; + assert(HAS_ARG(opcode) || oparg == 0); int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg); int caches = _PyOpcode_Caches[opcode]; return extended_args + 1 + caches; @@ -229,7 +248,8 @@ write_instr(_Py_CODEUNIT *codestr, struct instr *instruction, int ilen) { int opcode = instruction->i_opcode; assert(!IS_PSEUDO_OPCODE(opcode)); - int oparg = HAS_ARG(opcode) ? instruction->i_oparg : 0; + int oparg = instruction->i_oparg; + assert(HAS_ARG(opcode) || oparg == 0); int caches = _PyOpcode_Caches[opcode]; switch (ilen - caches) { case 4: @@ -7598,7 +7618,7 @@ convert_exception_handlers_to_nops(basicblock *entryblock) { for (int i = 0; i < b->b_iused; i++) { struct instr *instr = &b->b_instr[i]; if (is_block_push(instr) || instr->i_opcode == POP_BLOCK) { - instr->i_opcode = NOP; + INSTR_SET_OP0(instr, NOP); } } } @@ -8723,7 +8743,7 @@ remove_redundant_jumps(cfg_builder *g) { } if (last->i_target == b->b_next) { assert(b->b_next->b_iused); - last->i_opcode = NOP; + INSTR_SET_OP0(last, NOP); } } } @@ -8999,10 +9019,9 @@ fold_tuple_on_constants(PyObject *const_cache, } Py_DECREF(newconst); for (int i = 0; i < n; i++) { - inst[i].i_opcode = NOP; + INSTR_SET_OP0(&inst[i], NOP); } - inst[n].i_opcode = LOAD_CONST; - inst[n].i_oparg = (int)index; + INSTR_SET_OP1(&inst[n], LOAD_CONST, (int)index); return 0; } @@ -9099,7 +9118,7 @@ swaptimize(basicblock *block, int *ix) } // NOP out any unused instructions: while (0 <= current) { - instructions[current--].i_opcode = NOP; + INSTR_SET_OP0(&instructions[current--], NOP); } PyMem_Free(stack); *ix += len - 1; @@ -9165,7 +9184,7 @@ apply_static_swaps(basicblock *block, int i) } } // Success! - swap->i_opcode = NOP; + INSTR_SET_OP0(swap, NOP); struct instr temp = block->b_instr[j]; block->b_instr[j] = block->b_instr[k]; block->b_instr[k] = temp; @@ -9202,7 +9221,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) assert(PyDict_CheckExact(const_cache)); assert(PyList_CheckExact(consts)); struct instr nop; - nop.i_opcode = NOP; + INSTR_SET_OP0(&nop, NOP); struct instr *target; for (int i = 0; i < bb->b_iused; i++) { struct instr *inst = &bb->b_instr[i]; @@ -9236,13 +9255,13 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) if (is_true == -1) { goto error; } - inst->i_opcode = NOP; + INSTR_SET_OP0(inst, NOP); jump_if_true = nextop == POP_JUMP_IF_TRUE; if (is_true == jump_if_true) { bb->b_instr[i+1].i_opcode = JUMP; } else { - bb->b_instr[i+1].i_opcode = NOP; + INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); } break; case JUMP_IF_FALSE_OR_POP: @@ -9261,8 +9280,8 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) bb->b_instr[i+1].i_opcode = JUMP; } else { - inst->i_opcode = NOP; - bb->b_instr[i+1].i_opcode = NOP; + INSTR_SET_OP0(inst, NOP); + INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); } break; case IS_OP: @@ -9273,8 +9292,8 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) int jump_op = i+2 < bb->b_iused ? bb->b_instr[i+2].i_opcode : 0; if (Py_IsNone(cnt) && (jump_op == POP_JUMP_IF_FALSE || jump_op == POP_JUMP_IF_TRUE)) { unsigned char nextarg = bb->b_instr[i+1].i_oparg; - inst->i_opcode = NOP; - bb->b_instr[i+1].i_opcode = NOP; + INSTR_SET_OP0(inst, NOP); + INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); bb->b_instr[i+2].i_opcode = nextarg ^ (jump_op == POP_JUMP_IF_FALSE) ? POP_JUMP_IF_NOT_NONE : POP_JUMP_IF_NONE; } @@ -9292,12 +9311,12 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) { switch(oparg) { case 1: - inst->i_opcode = NOP; - bb->b_instr[i+1].i_opcode = NOP; + INSTR_SET_OP0(inst, NOP); + INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); continue; case 2: case 3: - inst->i_opcode = NOP; + INSTR_SET_OP0(inst, NOP); bb->b_instr[i+1].i_opcode = SWAP; continue; } @@ -9406,7 +9425,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) break; case SWAP: if (oparg == 1) { - inst->i_opcode = NOP; + INSTR_SET_OP0(inst, NOP); break; } if (swaptimize(bb, &i)) { @@ -9418,8 +9437,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) break; case PUSH_NULL: if (nextop == LOAD_GLOBAL && (inst[1].i_opcode & 1) == 0) { - inst->i_opcode = NOP; - inst->i_oparg = 0; + INSTR_SET_OP0(inst, NOP); inst[1].i_oparg |= 1; } break; @@ -9448,7 +9466,7 @@ inline_small_exit_blocks(basicblock *bb) { } basicblock *target = last->i_target; if (basicblock_exits_scope(target) && target->b_iused <= MAX_COPY_SIZE) { - last->i_opcode = NOP; + INSTR_SET_OP0(last, NOP); if (basicblock_append_instructions(bb, target) < 0) { return -1; } From 85dd6cb6df996b1197266d1a50ecc9187a91e481 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 30 Nov 2022 17:22:52 +0100 Subject: [PATCH 104/112] gh-99845: Use size_t type in __sizeof__() methods (#99846) The implementation of __sizeof__() methods using _PyObject_SIZE() now use an unsigned type (size_t) to compute the size, rather than a signed type (Py_ssize_t). Cast explicitly signed (Py_ssize_t) values to unsigned type (Py_ssize_t). --- Modules/_collectionsmodule.c | 12 +++++------- Modules/_decimal/_decimal.c | 8 +++----- Modules/_elementtree.c | 13 +++++++------ Modules/_io/bufferedio.c | 11 +++++------ Modules/_io/bytesio.c | 10 ++++------ Modules/_pickle.c | 27 ++++++++++++-------------- Modules/_struct.c | 10 ++++------ Modules/arraymodule.c | 6 +++--- Modules/clinic/_elementtree.c.h | 10 +++++----- Modules/clinic/_pickle.c.h | 18 ++++++++--------- Modules/itertoolsmodule.c | 34 +++++++++++++-------------------- Modules/mmapmodule.c | 9 ++++----- Objects/bytearrayobject.c | 7 +++---- Objects/listobject.c | 7 +++---- Objects/setobject.c | 11 +++++------ 15 files changed, 85 insertions(+), 108 deletions(-) diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index 5fa583821889f3..68131f3b54d2ea 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -1508,15 +1508,13 @@ deque_init(dequeobject *deque, PyObject *args, PyObject *kwdargs) static PyObject * deque_sizeof(dequeobject *deque, void *unused) { - Py_ssize_t res; - Py_ssize_t blocks; - - res = _PyObject_SIZE(Py_TYPE(deque)); + size_t res = _PyObject_SIZE(Py_TYPE(deque)); + size_t blocks; blocks = (size_t)(deque->leftindex + Py_SIZE(deque) + BLOCKLEN - 1) / BLOCKLEN; - assert(deque->leftindex + Py_SIZE(deque) - 1 == - (blocks - 1) * BLOCKLEN + deque->rightindex); + assert(((size_t)deque->leftindex + (size_t)Py_SIZE(deque) - 1) == + ((blocks - 1) * BLOCKLEN + (size_t)deque->rightindex)); res += blocks * sizeof(block); - return PyLong_FromSsize_t(res); + return PyLong_FromSize_t(res); } PyDoc_STRVAR(sizeof_doc, diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 2d6e4e4d62b42c..bc97615ffb4b72 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -4796,13 +4796,11 @@ dec_reduce(PyObject *self, PyObject *dummy UNUSED) static PyObject * dec_sizeof(PyObject *v, PyObject *dummy UNUSED) { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(v)); + size_t res = _PyObject_SIZE(Py_TYPE(v)); if (mpd_isdynamic_data(MPD(v))) { - res += MPD(v)->alloc * sizeof(mpd_uint_t); + res += (size_t)MPD(v)->alloc * sizeof(mpd_uint_t); } - return PyLong_FromSsize_t(res); + return PyLong_FromSize_t(res); } /* __trunc__ */ diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 0c68ede42ca61d..555c22f88b36d5 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -876,19 +876,20 @@ deepcopy(PyObject *object, PyObject *memo) /*[clinic input] -_elementtree.Element.__sizeof__ -> Py_ssize_t +_elementtree.Element.__sizeof__ -> size_t [clinic start generated code]*/ -static Py_ssize_t +static size_t _elementtree_Element___sizeof___impl(ElementObject *self) -/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/ +/*[clinic end generated code: output=baae4e7ae9fe04ec input=54e298c501f3e0d0]*/ { - Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self)); + size_t result = _PyObject_SIZE(Py_TYPE(self)); if (self->extra) { result += sizeof(ElementObjectExtra); - if (self->extra->children != self->extra->_children) - result += sizeof(PyObject*) * self->extra->allocated; + if (self->extra->children != self->extra->_children) { + result += (size_t)self->extra->allocated * sizeof(PyObject*); + } } return result; } diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index e2610931caae07..6df55b5b8303c2 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -389,12 +389,11 @@ buffered_dealloc(buffered *self) static PyObject * buffered_sizeof(buffered *self, PyObject *Py_UNUSED(ignored)) { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(self)); - if (self->buffer) - res += self->buffer_size; - return PyLong_FromSsize_t(res); + size_t res = _PyObject_SIZE(Py_TYPE(self)); + if (self->buffer) { + res += (size_t)self->buffer_size; + } + return PyLong_FromSize_t(res); } static int diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 41be3497506d3f..6698c60355fcc5 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -957,17 +957,15 @@ _io_BytesIO___init___impl(bytesio *self, PyObject *initvalue) static PyObject * bytesio_sizeof(bytesio *self, void *unused) { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(self)); + size_t res = _PyObject_SIZE(Py_TYPE(self)); if (self->buf && !SHARED_BUF(self)) { - Py_ssize_t s = _PySys_GetSizeOf(self->buf); - if (s == -1) { + size_t s = _PySys_GetSizeOf(self->buf); + if (s == (size_t)-1) { return NULL; } res += s; } - return PyLong_FromSsize_t(res); + return PyLong_FromSize_t(res); } static int diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 2078779663a919..1b34977806b661 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -4575,26 +4575,25 @@ _pickle_Pickler_dump(PicklerObject *self, PyObject *obj) /*[clinic input] -_pickle.Pickler.__sizeof__ -> Py_ssize_t +_pickle.Pickler.__sizeof__ -> size_t Returns size in memory, in bytes. [clinic start generated code]*/ -static Py_ssize_t +static size_t _pickle_Pickler___sizeof___impl(PicklerObject *self) -/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/ +/*[clinic end generated code: output=23ad75658d3b59ff input=d8127c8e7012ebd7]*/ { - Py_ssize_t res, s; - - res = _PyObject_SIZE(Py_TYPE(self)); + size_t res = _PyObject_SIZE(Py_TYPE(self)); if (self->memo != NULL) { res += sizeof(PyMemoTable); res += self->memo->mt_allocated * sizeof(PyMemoEntry); } if (self->output_buffer != NULL) { - s = _PySys_GetSizeOf(self->output_buffer); - if (s == -1) + size_t s = _PySys_GetSizeOf(self->output_buffer); + if (s == (size_t)-1) { return -1; + } res += s; } return res; @@ -7079,22 +7078,20 @@ _pickle_Unpickler_find_class_impl(UnpicklerObject *self, /*[clinic input] -_pickle.Unpickler.__sizeof__ -> Py_ssize_t +_pickle.Unpickler.__sizeof__ -> size_t Returns size in memory, in bytes. [clinic start generated code]*/ -static Py_ssize_t +static size_t _pickle_Unpickler___sizeof___impl(UnpicklerObject *self) -/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/ +/*[clinic end generated code: output=4648d84c228196df input=27180b2b6b524012]*/ { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(self)); + size_t res = _PyObject_SIZE(Py_TYPE(self)); if (self->memo != NULL) res += self->memo_size * sizeof(PyObject *); if (self->marks != NULL) - res += self->marks_size * sizeof(Py_ssize_t); + res += (size_t)self->marks_size * sizeof(Py_ssize_t); if (self->input_line != NULL) res += strlen(self->input_line) + 1; if (self->encoding != NULL) diff --git a/Modules/_struct.c b/Modules/_struct.c index c960b81b246ece..0cf34fbf9a3afa 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -2090,13 +2090,11 @@ PyDoc_STRVAR(s_sizeof__doc__, static PyObject * s_sizeof(PyStructObject *self, void *unused) { - Py_ssize_t size; - formatcode *code; - - size = _PyObject_SIZE(Py_TYPE(self)) + sizeof(formatcode); - for (code = self->s_codes; code->fmtdef != NULL; code++) + size_t size = _PyObject_SIZE(Py_TYPE(self)) + sizeof(formatcode); + for (formatcode *code = self->s_codes; code->fmtdef != NULL; code++) { size += sizeof(formatcode); - return PyLong_FromSsize_t(size); + } + return PyLong_FromSize_t(size); } /* List of functions */ diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index dcf510e9788ee5..114c69a033593c 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -1773,9 +1773,9 @@ static PyObject * array_array___sizeof___impl(arrayobject *self) /*[clinic end generated code: output=d8e1c61ebbe3eaed input=805586565bf2b3c6]*/ { - Py_ssize_t res; - res = _PyObject_SIZE(Py_TYPE(self)) + self->allocated * self->ob_descr->itemsize; - return PyLong_FromSsize_t(res); + size_t res = _PyObject_SIZE(Py_TYPE(self)); + res += (size_t)self->allocated * (size_t)self->ob_descr->itemsize; + return PyLong_FromSize_t(res); } diff --git a/Modules/clinic/_elementtree.c.h b/Modules/clinic/_elementtree.c.h index 0a2a74e220c2d1..33ccaf7e7c7eaf 100644 --- a/Modules/clinic/_elementtree.c.h +++ b/Modules/clinic/_elementtree.c.h @@ -106,20 +106,20 @@ PyDoc_STRVAR(_elementtree_Element___sizeof____doc__, #define _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF \ {"__sizeof__", (PyCFunction)_elementtree_Element___sizeof__, METH_NOARGS, _elementtree_Element___sizeof____doc__}, -static Py_ssize_t +static size_t _elementtree_Element___sizeof___impl(ElementObject *self); static PyObject * _elementtree_Element___sizeof__(ElementObject *self, PyObject *Py_UNUSED(ignored)) { PyObject *return_value = NULL; - Py_ssize_t _return_value; + size_t _return_value; _return_value = _elementtree_Element___sizeof___impl(self); - if ((_return_value == -1) && PyErr_Occurred()) { + if ((_return_value == (size_t)-1) && PyErr_Occurred()) { goto exit; } - return_value = PyLong_FromSsize_t(_return_value); + return_value = PyLong_FromSize_t(_return_value); exit: return return_value; @@ -1105,4 +1105,4 @@ _elementtree_XMLParser__setevents(XMLParserObject *self, PyObject *const *args, exit: return return_value; } -/*[clinic end generated code: output=67a80531eaf43815 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=4ad006cadce01571 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_pickle.c.h b/Modules/clinic/_pickle.c.h index e9ff2604719abd..adb3abc5eb2372 100644 --- a/Modules/clinic/_pickle.c.h +++ b/Modules/clinic/_pickle.c.h @@ -49,20 +49,20 @@ PyDoc_STRVAR(_pickle_Pickler___sizeof____doc__, #define _PICKLE_PICKLER___SIZEOF___METHODDEF \ {"__sizeof__", (PyCFunction)_pickle_Pickler___sizeof__, METH_NOARGS, _pickle_Pickler___sizeof____doc__}, -static Py_ssize_t +static size_t _pickle_Pickler___sizeof___impl(PicklerObject *self); static PyObject * _pickle_Pickler___sizeof__(PicklerObject *self, PyObject *Py_UNUSED(ignored)) { PyObject *return_value = NULL; - Py_ssize_t _return_value; + size_t _return_value; _return_value = _pickle_Pickler___sizeof___impl(self); - if ((_return_value == -1) && PyErr_Occurred()) { + if ((_return_value == (size_t)-1) && PyErr_Occurred()) { goto exit; } - return_value = PyLong_FromSsize_t(_return_value); + return_value = PyLong_FromSize_t(_return_value); exit: return return_value; @@ -301,20 +301,20 @@ PyDoc_STRVAR(_pickle_Unpickler___sizeof____doc__, #define _PICKLE_UNPICKLER___SIZEOF___METHODDEF \ {"__sizeof__", (PyCFunction)_pickle_Unpickler___sizeof__, METH_NOARGS, _pickle_Unpickler___sizeof____doc__}, -static Py_ssize_t +static size_t _pickle_Unpickler___sizeof___impl(UnpicklerObject *self); static PyObject * _pickle_Unpickler___sizeof__(UnpicklerObject *self, PyObject *Py_UNUSED(ignored)) { PyObject *return_value = NULL; - Py_ssize_t _return_value; + size_t _return_value; _return_value = _pickle_Unpickler___sizeof___impl(self); - if ((_return_value == -1) && PyErr_Occurred()) { + if ((_return_value == (size_t)-1) && PyErr_Occurred()) { goto exit; } - return_value = PyLong_FromSsize_t(_return_value); + return_value = PyLong_FromSize_t(_return_value); exit: return return_value; @@ -980,4 +980,4 @@ _pickle_loads(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=3321309c2157ee74 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=730dc26938561313 input=a9049054013a1b77]*/ diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 4b0a4d88c435c6..e8b9bc76eec935 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -2484,11 +2484,9 @@ product_dealloc(productobject *lz) static PyObject * product_sizeof(productobject *lz, void *unused) { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(lz)); - res += PyTuple_GET_SIZE(lz->pools) * sizeof(Py_ssize_t); - return PyLong_FromSsize_t(res); + size_t res = _PyObject_SIZE(Py_TYPE(lz)); + res += (size_t)PyTuple_GET_SIZE(lz->pools) * sizeof(Py_ssize_t); + return PyLong_FromSize_t(res); } PyDoc_STRVAR(sizeof_doc, "Returns size in memory, in bytes."); @@ -2817,11 +2815,9 @@ combinations_dealloc(combinationsobject *co) static PyObject * combinations_sizeof(combinationsobject *co, void *unused) { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(co)); - res += co->r * sizeof(Py_ssize_t); - return PyLong_FromSsize_t(res); + size_t res = _PyObject_SIZE(Py_TYPE(co)); + res += (size_t)co->r * sizeof(Py_ssize_t); + return PyLong_FromSize_t(res); } static int @@ -3153,11 +3149,9 @@ cwr_dealloc(cwrobject *co) static PyObject * cwr_sizeof(cwrobject *co, void *unused) { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(co)); - res += co->r * sizeof(Py_ssize_t); - return PyLong_FromSsize_t(res); + size_t res = _PyObject_SIZE(Py_TYPE(co)); + res += (size_t)co->r * sizeof(Py_ssize_t); + return PyLong_FromSize_t(res); } static int @@ -3498,12 +3492,10 @@ permutations_dealloc(permutationsobject *po) static PyObject * permutations_sizeof(permutationsobject *po, void *unused) { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(po)); - res += PyTuple_GET_SIZE(po->pool) * sizeof(Py_ssize_t); - res += po->r * sizeof(Py_ssize_t); - return PyLong_FromSsize_t(res); + size_t res = _PyObject_SIZE(Py_TYPE(po)); + res += (size_t)PyTuple_GET_SIZE(po->pool) * sizeof(Py_ssize_t); + res += (size_t)po->r * sizeof(Py_ssize_t); + return PyLong_FromSize_t(res); } static int diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 2311840e22105f..8244202376c74e 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -804,12 +804,11 @@ mmap__repr__method(PyObject *self) static PyObject * mmap__sizeof__method(mmap_object *self, void *unused) { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(self)); - if (self->tagname) + size_t res = _PyObject_SIZE(Py_TYPE(self)); + if (self->tagname) { res += strlen(self->tagname) + 1; - return PyLong_FromSsize_t(res); + } + return PyLong_FromSize_t(res); } #endif diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 0ba6fb5b76ccc7..f24690a02bda26 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2151,10 +2151,9 @@ static PyObject * bytearray_sizeof_impl(PyByteArrayObject *self) /*[clinic end generated code: output=738abdd17951c427 input=e27320fd98a4bc5a]*/ { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(self)) + self->ob_alloc * sizeof(char); - return PyLong_FromSsize_t(res); + size_t res = _PyObject_SIZE(Py_TYPE(self)); + res += (size_t)self->ob_alloc * sizeof(char); + return PyLong_FromSize_t(res); } static PySequenceMethods bytearray_as_sequence = { diff --git a/Objects/listobject.c b/Objects/listobject.c index da623c9719aeb8..0e696fbffb3f53 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2806,10 +2806,9 @@ static PyObject * list___sizeof___impl(PyListObject *self) /*[clinic end generated code: output=3417541f95f9a53e input=b8030a5d5ce8a187]*/ { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(self)) + self->allocated * sizeof(void*); - return PyLong_FromSsize_t(res); + size_t res = _PyObject_SIZE(Py_TYPE(self)); + res += (size_t)self->allocated * sizeof(void*); + return PyLong_FromSize_t(res); } static PyObject *list_iter(PyObject *seq); diff --git a/Objects/setobject.c b/Objects/setobject.c index ae9e9b99446116..fcdda2a0bca2b6 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1957,12 +1957,11 @@ set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored)) static PyObject * set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored)) { - Py_ssize_t res; - - res = _PyObject_SIZE(Py_TYPE(so)); - if (so->table != so->smalltable) - res = res + (so->mask + 1) * sizeof(setentry); - return PyLong_FromSsize_t(res); + size_t res = _PyObject_SIZE(Py_TYPE(so)); + if (so->table != so->smalltable) { + res += ((size_t)so->mask + 1) * sizeof(setentry); + } + return PyLong_FromSize_t(res); } PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes"); From 131801d14dfc4f0b2b79103612c88e2e282ff158 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 30 Nov 2022 18:17:50 +0100 Subject: [PATCH 105/112] gh-99845: PEP 670: Convert PyObject macros to functions (#99850) Convert macros to static inline functions to avoid macro pitfalls, like duplication of side effects: * _PyObject_SIZE() * _PyObject_VAR_SIZE() The result type is size_t (unsigned). --- Include/cpython/objimpl.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Include/cpython/objimpl.h b/Include/cpython/objimpl.h index d7c76eab5c7312..0b038d31080be9 100644 --- a/Include/cpython/objimpl.h +++ b/Include/cpython/objimpl.h @@ -2,7 +2,9 @@ # error "this header file must not be included directly" #endif -#define _PyObject_SIZE(typeobj) ( (typeobj)->tp_basicsize ) +static inline size_t _PyObject_SIZE(PyTypeObject *type) { + return _Py_STATIC_CAST(size_t, type->tp_basicsize); +} /* _PyObject_VAR_SIZE returns the number of bytes (as size_t) allocated for a vrbl-size object with nitems items, exclusive of gc overhead (if any). The @@ -18,10 +20,11 @@ # error "_PyObject_VAR_SIZE requires SIZEOF_VOID_P be a power of 2" #endif -#define _PyObject_VAR_SIZE(typeobj, nitems) \ - _Py_SIZE_ROUND_UP((typeobj)->tp_basicsize + \ - (nitems)*(typeobj)->tp_itemsize, \ - SIZEOF_VOID_P) +static inline size_t _PyObject_VAR_SIZE(PyTypeObject *type, Py_ssize_t nitems) { + size_t size = _Py_STATIC_CAST(size_t, type->tp_basicsize); + size += _Py_STATIC_CAST(size_t, nitems) * _Py_STATIC_CAST(size_t, type->tp_itemsize); + return _Py_SIZE_ROUND_UP(size, SIZEOF_VOID_P); +} /* This example code implements an object constructor with a custom From bf94c653f4291ba2db506453e0e00a82fe06b70a Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Wed, 30 Nov 2022 14:17:08 -0500 Subject: [PATCH 106/112] GH-99905: Fix output of misses in summarize_stats.py execution counts (GH-99906) This was an indentation error introduced in 2844aa6a --- Tools/scripts/summarize_stats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 8d91bda5a43a0d..9c098064fe5403 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -317,11 +317,11 @@ def calculate_execution_counts(opcode_stats, total): for (count, name, miss) in counts: cumulative += count if miss: - miss = f"{100*miss/count:0.1f}%" + miss = f"{100*miss/count:0.1f}%" else: miss = "" - rows.append((name, count, f"{100*count/total:0.1f}%", - f"{100*cumulative/total:0.1f}%", miss)) + rows.append((name, count, f"{100*count/total:0.1f}%", + f"{100*cumulative/total:0.1f}%", miss)) return rows def emit_execution_counts(opcode_stats, total): From 9628136fac997847b4662e6a17faf06d2a0507eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Wed, 30 Nov 2022 20:44:10 +0100 Subject: [PATCH 107/112] bpo-47220: Document the optional callback parameter of weakref.WeakMethod (GH-25491) --- Doc/library/weakref.rst | 6 ++++-- .../next/Library/2022-04-04-22-54-11.bpo-47220.L9jYu4.rst | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-04-04-22-54-11.bpo-47220.L9jYu4.rst diff --git a/Doc/library/weakref.rst b/Doc/library/weakref.rst index a1e542b1e927e4..73e7b21ae405d2 100644 --- a/Doc/library/weakref.rst +++ b/Doc/library/weakref.rst @@ -143,7 +143,7 @@ See :ref:`__slots__ documentation ` for details. ``ProxyType`` or ``CallableProxyType``, depending on whether *object* is callable. Proxy objects are not :term:`hashable` regardless of the referent; this avoids a number of problems related to their fundamentally mutable nature, and - prevent their use as dictionary keys. *callback* is the same as the parameter + prevents their use as dictionary keys. *callback* is the same as the parameter of the same name to the :func:`ref` function. Accessing an attribute of the proxy object after the referent is @@ -212,7 +212,7 @@ objects. discarded when no strong reference to it exists any more. -.. class:: WeakMethod(method) +.. class:: WeakMethod(method[, callback]) A custom :class:`ref` subclass which simulates a weak reference to a bound method (i.e., a method defined on a class and looked up on an instance). @@ -238,6 +238,8 @@ objects. >>> r() >>> + *callback* is the same as the parameter of the same name to the :func:`ref` function. + .. versionadded:: 3.4 .. class:: finalize(obj, func, /, *args, **kwargs) diff --git a/Misc/NEWS.d/next/Library/2022-04-04-22-54-11.bpo-47220.L9jYu4.rst b/Misc/NEWS.d/next/Library/2022-04-04-22-54-11.bpo-47220.L9jYu4.rst new file mode 100644 index 00000000000000..6e2af088640b55 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-04-22-54-11.bpo-47220.L9jYu4.rst @@ -0,0 +1,2 @@ +Document the optional *callback* parameter of :class:`WeakMethod`. Patch by +Géry Ogam. From 787764219f874ce2035699ed772af1e9f3bbf813 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 30 Nov 2022 23:04:30 +0200 Subject: [PATCH 108/112] gh-89189: More compact range iterator (GH-27986) --- Include/internal/pycore_range.h | 1 - Lib/test/test_range.py | 38 +++++++-- Lib/test/test_sys.py | 3 +- .../2021-08-29-15-55-19.bpo-45026.z7nTA3.rst | 3 + Objects/rangeobject.c | 79 ++++++++++--------- Python/bytecodes.c | 7 +- Python/generated_cases.c.h | 7 +- 7 files changed, 88 insertions(+), 50 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-08-29-15-55-19.bpo-45026.z7nTA3.rst diff --git a/Include/internal/pycore_range.h b/Include/internal/pycore_range.h index 809e89a1e01b60..bf045ec4fd8332 100644 --- a/Include/internal/pycore_range.h +++ b/Include/internal/pycore_range.h @@ -10,7 +10,6 @@ extern "C" { typedef struct { PyObject_HEAD - long index; long start; long step; long len; diff --git a/Lib/test/test_range.py b/Lib/test/test_range.py index 851ad5b7c2f485..7be76b32ac2935 100644 --- a/Lib/test/test_range.py +++ b/Lib/test/test_range.py @@ -407,11 +407,7 @@ def test_iterator_pickling_overflowing_index(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): with self.subTest(proto=proto): it = iter(range(2**32 + 2)) - _, _, idx = it.__reduce__() - self.assertEqual(idx, 0) - it.__setstate__(2**32 + 1) # undocumented way to set r->index - _, _, idx = it.__reduce__() - self.assertEqual(idx, 2**32 + 1) + it.__setstate__(2**32 + 1) # undocumented way to advance an iterator d = pickle.dumps(it, proto) it = pickle.loads(d) self.assertEqual(next(it), 2**32 + 1) @@ -442,6 +438,38 @@ def test_large_exhausted_iterator_pickling(self): self.assertEqual(list(i), []) self.assertEqual(list(i2), []) + def test_iterator_unpickle_compat(self): + testcases = [ + b'c__builtin__\niter\n(c__builtin__\nxrange\n(I10\nI20\nI2\ntRtRI2\nb.', + b'c__builtin__\niter\n(c__builtin__\nxrange\n(K\nK\x14K\x02tRtRK\x02b.', + b'\x80\x02c__builtin__\niter\nc__builtin__\nxrange\nK\nK\x14K\x02\x87R\x85RK\x02b.', + b'\x80\x03cbuiltins\niter\ncbuiltins\nrange\nK\nK\x14K\x02\x87R\x85RK\x02b.', + b'\x80\x04\x951\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x8c\x04iter\x93\x8c\x08builtins\x8c\x05range\x93K\nK\x14K\x02\x87R\x85RK\x02b.', + + b'c__builtin__\niter\n(c__builtin__\nxrange\n(L-36893488147419103232L\nI20\nI2\ntRtRL18446744073709551623L\nb.', + b'c__builtin__\niter\n(c__builtin__\nxrange\n(L-36893488147419103232L\nK\x14K\x02tRtRL18446744073709551623L\nb.', + b'\x80\x02c__builtin__\niter\nc__builtin__\nxrange\n\x8a\t\x00\x00\x00\x00\x00\x00\x00\x00\xfeK\x14K\x02\x87R\x85R\x8a\t\x07\x00\x00\x00\x00\x00\x00\x00\x01b.', + b'\x80\x03cbuiltins\niter\ncbuiltins\nrange\n\x8a\t\x00\x00\x00\x00\x00\x00\x00\x00\xfeK\x14K\x02\x87R\x85R\x8a\t\x07\x00\x00\x00\x00\x00\x00\x00\x01b.', + b'\x80\x04\x95C\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x8c\x04iter\x93\x8c\x08builtins\x8c\x05range\x93\x8a\t\x00\x00\x00\x00\x00\x00\x00\x00\xfeK\x14K\x02\x87R\x85R\x8a\t\x07\x00\x00\x00\x00\x00\x00\x00\x01b.', + ] + for t in testcases: + it = pickle.loads(t) + self.assertEqual(list(it), [14, 16, 18]) + + def test_iterator_setstate(self): + it = iter(range(10, 20, 2)) + it.__setstate__(2) + self.assertEqual(list(it), [14, 16, 18]) + it = reversed(range(10, 20, 2)) + it.__setstate__(3) + self.assertEqual(list(it), [12, 10]) + it = iter(range(-2**65, 20, 2)) + it.__setstate__(2**64 + 7) + self.assertEqual(list(it), [14, 16, 18]) + it = reversed(range(10, 2**65, 2)) + it.__setstate__(2**64 - 7) + self.assertEqual(list(it), [12, 10]) + def test_odd_bug(self): # This used to raise a "SystemError: NULL result without error" # because the range validation step was eating the exception diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 2403c7c815f2c0..17a5026e2571e1 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1484,7 +1484,8 @@ def delx(self): del self.__x # PyCapsule # XXX # rangeiterator - check(iter(range(1)), size('4l')) + check(iter(range(1)), size('3l')) + check(iter(range(2**65)), size('3P')) # reverse check(reversed(''), size('nP')) # range diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-08-29-15-55-19.bpo-45026.z7nTA3.rst b/Misc/NEWS.d/next/Core and Builtins/2021-08-29-15-55-19.bpo-45026.z7nTA3.rst new file mode 100644 index 00000000000000..481ab53e4f5197 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-08-29-15-55-19.bpo-45026.z7nTA3.rst @@ -0,0 +1,3 @@ +Optimize the :class:`range` object iterator. It is now smaller, faster +iteration of ranges containing large numbers. Smaller pickles, faster +unpickling. diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index a889aa04db81f0..992e7c079ded54 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -756,18 +756,19 @@ PyTypeObject PyRange_Type = { static PyObject * rangeiter_next(_PyRangeIterObject *r) { - if (r->index < r->len) - /* cast to unsigned to avoid possible signed overflow - in intermediate calculations. */ - return PyLong_FromLong((long)(r->start + - (unsigned long)(r->index++) * r->step)); + if (r->len > 0) { + long result = r->start; + r->start = result + r->step; + r->len--; + return PyLong_FromLong(result); + } return NULL; } static PyObject * rangeiter_len(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored)) { - return PyLong_FromLong(r->len - r->index); + return PyLong_FromLong(r->len); } PyDoc_STRVAR(length_hint_doc, @@ -794,8 +795,8 @@ rangeiter_reduce(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored)) if (range == NULL) goto err; /* return the result */ - return Py_BuildValue( - "N(N)l", _PyEval_GetBuiltin(&_Py_ID(iter)), range, r->index); + return Py_BuildValue("N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)), + range, Py_None); err: Py_XDECREF(start); Py_XDECREF(stop); @@ -814,7 +815,8 @@ rangeiter_setstate(_PyRangeIterObject *r, PyObject *state) index = 0; else if (index > r->len) index = r->len; /* exhausted iterator */ - r->index = index; + r->start += index * r->step; + r->len -= index; Py_RETURN_NONE; } @@ -904,13 +906,11 @@ fast_range_iter(long start, long stop, long step, long len) it->start = start; it->step = step; it->len = len; - it->index = 0; return (PyObject *)it; } typedef struct { PyObject_HEAD - PyObject *index; PyObject *start; PyObject *step; PyObject *len; @@ -919,7 +919,8 @@ typedef struct { static PyObject * longrangeiter_len(longrangeiterobject *r, PyObject *no_args) { - return PyNumber_Subtract(r->len, r->index); + Py_INCREF(r->len); + return r->len; } static PyObject * @@ -946,8 +947,8 @@ longrangeiter_reduce(longrangeiterobject *r, PyObject *Py_UNUSED(ignored)) } /* return the result */ - return Py_BuildValue( - "N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)), range, r->index); + return Py_BuildValue("N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)), + range, Py_None); } static PyObject * @@ -970,7 +971,22 @@ longrangeiter_setstate(longrangeiterobject *r, PyObject *state) if (cmp > 0) state = r->len; } - Py_XSETREF(r->index, Py_NewRef(state)); + PyObject *product = PyNumber_Multiply(state, r->step); + if (product == NULL) + return NULL; + PyObject *new_start = PyNumber_Add(r->start, product); + Py_DECREF(product); + if (new_start == NULL) + return NULL; + PyObject *new_len = PyNumber_Subtract(r->len, state); + if (new_len == NULL) { + Py_DECREF(new_start); + return NULL; + } + PyObject *tmp = r->start; + r->start = new_start; + Py_SETREF(r->len, new_len); + Py_DECREF(tmp); Py_RETURN_NONE; } @@ -987,7 +1003,6 @@ static PyMethodDef longrangeiter_methods[] = { static void longrangeiter_dealloc(longrangeiterobject *r) { - Py_XDECREF(r->index); Py_XDECREF(r->start); Py_XDECREF(r->step); Py_XDECREF(r->len); @@ -997,29 +1012,21 @@ longrangeiter_dealloc(longrangeiterobject *r) static PyObject * longrangeiter_next(longrangeiterobject *r) { - PyObject *product, *new_index, *result; - if (PyObject_RichCompareBool(r->index, r->len, Py_LT) != 1) + if (PyObject_RichCompareBool(r->len, _PyLong_GetZero(), Py_GT) != 1) return NULL; - new_index = PyNumber_Add(r->index, _PyLong_GetOne()); - if (!new_index) + PyObject *new_start = PyNumber_Add(r->start, r->step); + if (new_start == NULL) { return NULL; - - product = PyNumber_Multiply(r->index, r->step); - if (!product) { - Py_DECREF(new_index); - return NULL; - } - - result = PyNumber_Add(r->start, product); - Py_DECREF(product); - if (result) { - Py_SETREF(r->index, new_index); } - else { - Py_DECREF(new_index); + PyObject *new_len = PyNumber_Subtract(r->len, _PyLong_GetOne()); + if (new_len == NULL) { + Py_DECREF(new_start); + return NULL; } - + PyObject *result = r->start; + r->start = new_start; + Py_SETREF(r->len, new_len); return result; } @@ -1108,7 +1115,6 @@ range_iter(PyObject *seq) it->start = Py_NewRef(r->start); it->step = Py_NewRef(r->step); it->len = Py_NewRef(r->length); - it->index = Py_NewRef(_PyLong_GetZero()); return (PyObject *)it; } @@ -1186,7 +1192,7 @@ range_reverse(PyObject *seq, PyObject *Py_UNUSED(ignored)) it = PyObject_New(longrangeiterobject, &PyLongRangeIter_Type); if (it == NULL) return NULL; - it->index = it->start = it->step = NULL; + it->start = it->step = NULL; /* start + (len - 1) * step */ it->len = Py_NewRef(range->length); @@ -1210,7 +1216,6 @@ range_reverse(PyObject *seq, PyObject *Py_UNUSED(ignored)) if (!it->step) goto create_failure; - it->index = Py_NewRef(_PyLong_GetZero()); return (PyObject *)it; create_failure: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a1f910da8ed54a..41dd1acc937d71 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2620,14 +2620,15 @@ dummy_func( STAT_INC(FOR_ITER, hit); _Py_CODEUNIT next = next_instr[INLINE_CACHE_ENTRIES_FOR_ITER]; assert(_PyOpcode_Deopt[_Py_OPCODE(next)] == STORE_FAST); - if (r->index >= r->len) { + if (r->len <= 0) { STACK_SHRINK(1); Py_DECREF(r); JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); } else { - long value = (long)(r->start + - (unsigned long)(r->index++) * r->step); + long value = r->start; + r->start = value + r->step; + r->len--; if (_PyLong_AssignValue(&GETLOCAL(_Py_OPARG(next)), value) < 0) { goto error; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ae8fdd5e99c3dc..3af60b83d84e70 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2638,14 +2638,15 @@ STAT_INC(FOR_ITER, hit); _Py_CODEUNIT next = next_instr[INLINE_CACHE_ENTRIES_FOR_ITER]; assert(_PyOpcode_Deopt[_Py_OPCODE(next)] == STORE_FAST); - if (r->index >= r->len) { + if (r->len <= 0) { STACK_SHRINK(1); Py_DECREF(r); JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); } else { - long value = (long)(r->start + - (unsigned long)(r->index++) * r->step); + long value = r->start; + r->start = value + r->step; + r->len--; if (_PyLong_AssignValue(&GETLOCAL(_Py_OPARG(next)), value) < 0) { goto error; } From e0f91deb5930ecb02e7f8ced9bd82609e6889fb0 Mon Sep 17 00:00:00 2001 From: ram vikram singh Date: Thu, 1 Dec 2022 04:22:21 +0530 Subject: [PATCH 109/112] GH-98906 ```re``` module: ```search() vs. match()``` section should mention ```fullmatch()``` (GH-98916) Mention fullmatch along with search and match. --- Doc/library/re.rst | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 0034b46fb1ced2..e6e242320fd878 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -1565,16 +1565,22 @@ search() vs. match() .. sectionauthor:: Fred L. Drake, Jr. -Python offers two different primitive operations based on regular expressions: -:func:`re.match` checks for a match only at the beginning of the string, while -:func:`re.search` checks for a match anywhere in the string (this is what Perl -does by default). +Python offers different primitive operations based on regular expressions: + ++ :func:`re.match` checks for a match only at the beginning of the string ++ :func:`re.search` checks for a match anywhere in the string + (this is what Perl does by default) ++ :func:`re.fullmatch` checks for entire string to be a match + For example:: >>> re.match("c", "abcdef") # No match >>> re.search("c", "abcdef") # Match + >>> re.fullmatch("p.*n", "python") # Match + + >>> re.fullmatch("r.*n", "python") # No match Regular expressions beginning with ``'^'`` can be used with :func:`search` to restrict the match at the beginning of the string:: @@ -1588,8 +1594,8 @@ Note however that in :const:`MULTILINE` mode :func:`match` only matches at the beginning of the string, whereas using :func:`search` with a regular expression beginning with ``'^'`` will match at the beginning of each line. :: - >>> re.match('X', 'A\nB\nX', re.MULTILINE) # No match - >>> re.search('^X', 'A\nB\nX', re.MULTILINE) # Match + >>> re.match("X", "A\nB\nX", re.MULTILINE) # No match + >>> re.search("^X", "A\nB\nX", re.MULTILINE) # Match From d460c8ec52716a37080d31fdc0f673edcc98bee8 Mon Sep 17 00:00:00 2001 From: "C.A.M. Gerlach" Date: Thu, 1 Dec 2022 01:19:41 -0600 Subject: [PATCH 110/112] Doc: Add summary line to isolation_level & autocommit sqlite3.connect params (#99917) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add summary lines to isolation_level and autocommit connect() params Co-authored-by: Géry Ogam --- Doc/library/sqlite3.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index a9ee1e207d0c05..960f2966afe1f2 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -292,6 +292,7 @@ Module functions By default (``0``), type detection is disabled. :param isolation_level: + Control legacy transaction handling behaviour. See :attr:`Connection.isolation_level` and :ref:`sqlite3-transaction-control-isolation-level` for more information. Can be ``"DEFERRED"`` (default), ``"EXCLUSIVE"`` or ``"IMMEDIATE"``; @@ -325,6 +326,7 @@ Module functions enabling various :ref:`sqlite3-uri-tricks`. :param autocommit: + Control :pep:`249` transaction handling behaviour. See :attr:`Connection.autocommit` and :ref:`sqlite3-transaction-control-autocommit` for more information. *autocommit* currently defaults to From f08e52ccb027f6f703302b8c1a82db9fd3934270 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 1 Dec 2022 14:54:51 +0200 Subject: [PATCH 111/112] gh-99612: Fix PyUnicode_DecodeUTF8Stateful() for ASCII-only data (GH-99613) Previously *consumed was not set in this case. --- Lib/test/test_capi/test_codecs.py | 54 +++++++++++++++++++ ...2-11-20-09-52-50.gh-issue-99612.eBHksg.rst | 2 + Modules/_testcapi/unicode.c | 36 +++++++++++++ Objects/unicodeobject.c | 3 ++ 4 files changed, 95 insertions(+) create mode 100644 Lib/test/test_capi/test_codecs.py create mode 100644 Misc/NEWS.d/next/C API/2022-11-20-09-52-50.gh-issue-99612.eBHksg.rst diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py new file mode 100644 index 00000000000000..e46726192aa05b --- /dev/null +++ b/Lib/test/test_capi/test_codecs.py @@ -0,0 +1,54 @@ +import unittest +from test.support import import_helper + +_testcapi = import_helper.import_module('_testcapi') + + +class CAPITest(unittest.TestCase): + + def test_decodeutf8(self): + """Test PyUnicode_DecodeUTF8()""" + decodeutf8 = _testcapi.unicode_decodeutf8 + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-8') + self.assertEqual(decodeutf8(b), s) + self.assertEqual(decodeutf8(b, 'strict'), s) + + self.assertRaises(UnicodeDecodeError, decodeutf8, b'\x80') + self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xc0') + self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xff') + self.assertRaises(UnicodeDecodeError, decodeutf8, b'a\xf0\x9f') + self.assertEqual(decodeutf8(b'a\xf0\x9f', 'replace'), 'a\ufffd') + self.assertEqual(decodeutf8(b'a\xf0\x9fb', 'replace'), 'a\ufffdb') + + self.assertRaises(LookupError, decodeutf8, b'a\x80', 'foo') + # TODO: Test PyUnicode_DecodeUTF8() with NULL as data and + # negative size. + + def test_decodeutf8stateful(self): + """Test PyUnicode_DecodeUTF8Stateful()""" + decodeutf8stateful = _testcapi.unicode_decodeutf8stateful + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-8') + self.assertEqual(decodeutf8stateful(b), (s, len(b))) + self.assertEqual(decodeutf8stateful(b, 'strict'), (s, len(b))) + + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\x80') + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xc0') + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xff') + self.assertEqual(decodeutf8stateful(b'a\xf0\x9f'), ('a', 1)) + self.assertEqual(decodeutf8stateful(b'a\xf0\x9f', 'replace'), ('a', 1)) + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'a\xf0\x9fb') + self.assertEqual(decodeutf8stateful(b'a\xf0\x9fb', 'replace'), ('a\ufffdb', 4)) + + self.assertRaises(LookupError, decodeutf8stateful, b'a\x80', 'foo') + # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and + # negative size. + # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of + # "consumed". + + +if __name__ == "__main__": + unittest.main() diff --git a/Misc/NEWS.d/next/C API/2022-11-20-09-52-50.gh-issue-99612.eBHksg.rst b/Misc/NEWS.d/next/C API/2022-11-20-09-52-50.gh-issue-99612.eBHksg.rst new file mode 100644 index 00000000000000..40e3c8db5403c7 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2022-11-20-09-52-50.gh-issue-99612.eBHksg.rst @@ -0,0 +1,2 @@ +Fix :c:func:`PyUnicode_DecodeUTF8Stateful` for ASCII-only data: +``*consumed`` was not set. diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index 4c5049dd406a7c..2d23993ce420b3 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -239,6 +239,40 @@ unicode_asutf8andsize(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, utf8_len); } +/* Test PyUnicode_DecodeUTF8() */ +static PyObject * +unicode_decodeutf8(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeUTF8(data, size, errors); +} + +/* Test PyUnicode_DecodeUTF8Stateful() */ +static PyObject * +unicode_decodeutf8stateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + Py_ssize_t consumed = 123456789; + PyObject *result; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF8Stateful(data, size, errors, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(Nn)", result, consumed); +} + /* Test PyUnicode_Concat() */ static PyObject * unicode_concat(PyObject *self, PyObject *args) @@ -1025,6 +1059,8 @@ static PyMethodDef TestMethods[] = { {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, + {"unicode_decodeutf8", unicode_decodeutf8, METH_VARARGS}, + {"unicode_decodeutf8stateful",unicode_decodeutf8stateful, METH_VARARGS}, {"unicode_concat", unicode_concat, METH_VARARGS}, {"unicode_splitlines", unicode_splitlines, METH_VARARGS}, {"unicode_split", unicode_split, METH_VARARGS}, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 55f029dd504ca0..19bde13a6f238a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4530,6 +4530,9 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, } s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u)); if (s == end) { + if (consumed) { + *consumed = size; + } return u; } From 0563be23a557917228a8b48cbb31bda285a3a815 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Thu, 1 Dec 2022 13:05:56 +0000 Subject: [PATCH 112/112] gh-99894: Ensure the local names don't collide with the test file in traceback suggestion error checking (#99895) Co-authored-by: Victor Stinner --- Lib/test/test_traceback.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index c17bbb48b65b2d..95b1bae4f60850 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -2978,9 +2978,9 @@ class MyClass: for name in ("b", "v", "m", "py"): with self.subTest(name=name): actual = self.get_suggestion(MyClass, name) - self.assertNotIn("you mean", actual) - self.assertNotIn("vvv", actual) - self.assertNotIn("mom", actual) + self.assertNotIn("Did you mean", actual) + self.assertNotIn("'vvv", actual) + self.assertNotIn("'mom'", actual) self.assertNotIn("'id'", actual) self.assertNotIn("'w'", actual) self.assertNotIn("'pytho'", actual) @@ -3168,9 +3168,9 @@ def test_import_from_error_bad_suggestions_do_not_trigger_for_small_names(self): for name in ("b", "v", "m", "py"): with self.subTest(name=name): actual = self.get_import_from_suggestion(code, name) - self.assertNotIn("you mean", actual) - self.assertNotIn("vvv", actual) - self.assertNotIn("mom", actual) + self.assertNotIn("Did you mean", actual) + self.assertNotIn("'vvv'", actual) + self.assertNotIn("'mom'", actual) self.assertNotIn("'id'", actual) self.assertNotIn("'w'", actual) self.assertNotIn("'pytho'", actual)