From 32a373a4c2cfe13463d1a4afce4be06abbe41075 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 27 May 2024 16:21:18 +0200 Subject: [PATCH 01/12] gh-119609: Add PyUnicode_Export() function Add PyUnicode_Export() and PyUnicode_Import() functions to the C API. --- Doc/c-api/unicode.rst | 55 +++++++ Doc/data/stable_abi.dat | 3 + Doc/whatsnew/3.14.rst | 5 + Include/unicodeobject.h | 31 ++++ Lib/test/test_capi/test_unicode.py | 130 +++++++++++++++- Lib/test/test_stable_abi_ctypes.py | 3 + ...-05-27-17-46-17.gh-issue-119609.kPIx6S.rst | 2 + Misc/stable_abi.toml | 6 + Modules/_testlimitedcapi/unicode.c | 40 +++++ Objects/unicodeobject.c | 142 ++++++++++++++++++ PC/python3dll.c | 3 + 11 files changed, 418 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 02e696c303fa91..1b532382a2576a 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -341,6 +341,61 @@ APIs: .. versionadded:: 3.3 +.. c:function:: const void* PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_ssize_t *size, uint32_t *format) + + Export the contents of the *unicode* string in one of the requested format + *requested_formats*. + + * On success, set *\*size* and *\*format*, and return the contents. + * On error, set an exception and return ``NULL``. + + The contents is valid as long as *unicode* is valid. + + The export must be released by :c:func:`PyUnicode_ReleaseExport`. + + *unicode*, *size* and *format* must not be NULL. + + Available formats: + + .. c:namespace:: NULL + + =================================== ======== =========================== + Constant Identifier Value Description + =================================== ======== =========================== + .. c:macro:: PyUnicode_FORMAT_ASCII ``0x01`` ASCII string (``Py_UCS1*``) + .. c:macro:: PyUnicode_FORMAT_UCS1 ``0x02`` UCS-1 string (``Py_UCS1*``) + .. c:macro:: PyUnicode_FORMAT_UCS2 ``0x04`` UCS-2 string (``Py_UCS2*``) + .. c:macro:: PyUnicode_FORMAT_UCS4 ``0x08`` UCS-4 string (``Py_UCS4*``) + .. c:macro:: PyUnicode_FORMAT_UTF8 ``0x10`` UTF-8 string (``char*``) + =================================== ======== =========================== + + *requested_formats* can be a single format or a combination of the formats + in the table above. + + .. versionadded:: 3.14 + + +.. c:function:: void PyUnicode_ReleaseExport(PyObject *unicode, const void* data, uint32_t format) + + Release an export created by :c:func:`PyUnicode_Export`. + + .. versionadded:: 3.14 + + +.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t size, uint32_t format) + + Import a string from the *format* format. + + * Return a reference to a new string object on success. + * Set an exception and return ``NULL`` on error. + + *data* must not be NULL. *size* must be positive or zero. + + See :c:func:`PyUnicode_Export` for the available formats. + + .. versionadded:: 3.14 + + .. c:function:: PyObject* PyUnicode_FromKindAndData(int kind, const void *buffer, \ Py_ssize_t size) diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index c18c813104cf65..80222096f3a0b6 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -776,6 +776,7 @@ function,PyUnicode_EncodeFSDefault,3.2,, function,PyUnicode_EncodeLocale,3.7,, function,PyUnicode_EqualToUTF8,3.13,, function,PyUnicode_EqualToUTF8AndSize,3.13,, +function,PyUnicode_Export,3.14,, function,PyUnicode_FSConverter,3.2,, function,PyUnicode_FSDecoder,3.2,, function,PyUnicode_Find,3.2,, @@ -791,6 +792,7 @@ function,PyUnicode_FromStringAndSize,3.2,, function,PyUnicode_FromWideChar,3.2,, function,PyUnicode_GetDefaultEncoding,3.2,, function,PyUnicode_GetLength,3.7,, +function,PyUnicode_Import,3.14,, function,PyUnicode_InternFromString,3.2,, function,PyUnicode_InternInPlace,3.2,, function,PyUnicode_IsIdentifier,3.2,, @@ -799,6 +801,7 @@ function,PyUnicode_Partition,3.2,, function,PyUnicode_RPartition,3.2,, function,PyUnicode_RSplit,3.2,, function,PyUnicode_ReadChar,3.7,, +function,PyUnicode_ReleaseExport,3.14,, function,PyUnicode_Replace,3.2,, function,PyUnicode_Resize,3.2,, function,PyUnicode_RichCompare,3.2,, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 804d39ab64646d..8ce5747eb7c764 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -298,6 +298,11 @@ New Features (Contributed by Victor Stinner in :gh:`119182`.) +* Add :c:func:`PyUnicode_Export` and :c:func:`PyUnicode_Import` functions to + export and import strings. + (Contributed by Victor Stinner in :gh:`119609`.) + + Porting to Python 3.14 ---------------------- diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index dee00715b3c51d..a97eb2518501f3 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -248,6 +248,37 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( const char *u /* UTF-8 encoded string */ ); +#define PyUnicode_FORMAT_ASCII 0x01 +#define PyUnicode_FORMAT_UCS1 0x02 +#define PyUnicode_FORMAT_UCS2 0x04 +#define PyUnicode_FORMAT_UCS4 0x08 +#define PyUnicode_FORMAT_UTF8 0x10 + +// Get the content of a string in the requested format: +// - Return the content, set '*size' and '*format' on success. +// - Set an exception and return NULL on error. +// +// The export must be released by PyUnicode_ReleaseExport(). +PyAPI_FUNC(const void*) PyUnicode_Export( + PyObject *unicode, + uint32_t requested_formats, + Py_ssize_t *size, + uint32_t *format); + +// Release an export created by PyUnicode_Export(). +PyAPI_FUNC(void) PyUnicode_ReleaseExport( + PyObject *unicode, + const void* data, + uint32_t format); + +// Create a string object from a string in the format 'format'. +// - Return a reference to a new string object on success. +// - Set an exception and return NULL on error. +PyAPI_FUNC(PyObject*) PyUnicode_Import( + const void *data, + Py_ssize_t size, + uint32_t format); + /* --- wchar_t support for platforms which support it --------------------- */ #ifdef HAVE_WCHAR_H diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index a69f817c515ba7..a8bc1a2117687c 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -24,6 +24,14 @@ class Str(str): pass +PyUnicode_FORMAT_ASCII = 0x01 +PyUnicode_FORMAT_UCS1 = 0x02 +PyUnicode_FORMAT_UCS2 = 0x04 +PyUnicode_FORMAT_UCS4 = 0x08 +PyUnicode_FORMAT_UTF8 = 0x10 +# Invalid native format +PyUnicode_FORMAT_INVALID = 0x20 + class CAPITest(unittest.TestCase): @support.cpython_only @@ -1675,6 +1683,124 @@ def test_pep393_utf8_caching_bug(self): # Check that the second call returns the same result self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) - -if __name__ == "__main__": + def test_unicode_export(self): + # Test PyUnicode_Export() and PyUnicode_FreeExport() + unicode_export = _testlimitedcapi.unicode_export + if sys.byteorder == 'little': + ucs2_enc = 'utf-16le' + ucs4_enc = 'utf-32le' + else: + ucs2_enc = 'utf-16be' + ucs4_enc = 'utf-32be' + + # export to the native format + formats = (PyUnicode_FORMAT_ASCII + | PyUnicode_FORMAT_UCS1 + | PyUnicode_FORMAT_UCS2 + | PyUnicode_FORMAT_UCS4) + self.assertEqual(unicode_export("abc", formats), + (b'abc', PyUnicode_FORMAT_ASCII)) + self.assertEqual(unicode_export("latin1:\xe9", formats), + (b'latin1:\xe9', PyUnicode_FORMAT_UCS1)) + self.assertEqual(unicode_export('ucs2:\u20ac', formats), + ('ucs2:\u20ac'.encode(ucs2_enc), + PyUnicode_FORMAT_UCS2)) + self.assertEqual(unicode_export('ucs4:\U0010ffff', formats), + ('ucs4:\U0010ffff'.encode(ucs4_enc), + PyUnicode_FORMAT_UCS4)) + + # export ASCII as UCS1 + self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS1), + (b'abc', PyUnicode_FORMAT_UCS1)) + + # always export to UCS4 + self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS4), + ('abc'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4)) + self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UCS4), + ('latin1:\xe9'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4)) + self.assertEqual(unicode_export('ucs2:\u20ac', PyUnicode_FORMAT_UCS4), + ('ucs2:\u20ac'.encode(ucs4_enc), + PyUnicode_FORMAT_UCS4)) + self.assertEqual(unicode_export('ucs4:\U0010ffff', PyUnicode_FORMAT_UCS4), + ('ucs4:\U0010ffff'.encode(ucs4_enc), + PyUnicode_FORMAT_UCS4)) + + # always export to UTF8 + self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UTF8), + ('abc'.encode('utf8'), PyUnicode_FORMAT_UTF8)) + self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UTF8), + ('latin1:\xe9'.encode('utf8'), PyUnicode_FORMAT_UTF8)) + self.assertEqual(unicode_export('ucs2:\u20ac', PyUnicode_FORMAT_UTF8), + ('ucs2:\u20ac'.encode('utf8'), + PyUnicode_FORMAT_UTF8)) + self.assertEqual(unicode_export('ucs4:\U0010ffff', PyUnicode_FORMAT_UTF8), + ('ucs4:\U0010ffff'.encode('utf8'), + PyUnicode_FORMAT_UTF8)) + + # No supported format or invalid format + with self.assertRaisesRegex(ValueError, + "unable to find a matching export format"): + unicode_export('abc', 0) + with self.assertRaisesRegex(ValueError, + "unable to find a matching export format"): + unicode_export('abc', PyUnicode_FORMAT_INVALID) + + def test_unicode_import(self): + # Test PyUnicode_Import() + unicode_import = _testlimitedcapi.unicode_import + if sys.byteorder == 'little': + ucs2_enc = 'utf-16le' + ucs4_enc = 'utf-32le' + else: + ucs2_enc = 'utf-16be' + ucs4_enc = 'utf-32be' + + self.assertEqual(unicode_import(b'abc', PyUnicode_FORMAT_ASCII), + "abc") + self.assertEqual(unicode_import(b'latin1:\xe9', PyUnicode_FORMAT_UCS1), + "latin1:\xe9") + + self.assertEqual(unicode_import('ucs2:\u20ac'.encode(ucs2_enc), + PyUnicode_FORMAT_UCS2), + 'ucs2:\u20ac') + + self.assertEqual(unicode_import('ucs4:\U0010ffff'.encode(ucs4_enc), + PyUnicode_FORMAT_UCS4), + 'ucs4:\U0010ffff') + + text = "abc\xe9\U0010ffff" + self.assertEqual(unicode_import(text.encode('utf8'), + PyUnicode_FORMAT_UTF8), + text) + + # Empty string + for native_format in ( + PyUnicode_FORMAT_ASCII, + PyUnicode_FORMAT_UCS1, + PyUnicode_FORMAT_UCS2, + PyUnicode_FORMAT_UCS4, + PyUnicode_FORMAT_UTF8, + ): + with self.subTest(native_format=native_format): + self.assertEqual(unicode_import(b'', native_format), + '') + + # Invalid format + with self.assertRaises(ValueError): + unicode_import(b'', PyUnicode_FORMAT_INVALID) + + # Invalid size + ucs2 = 'ucs2:\u20ac'.encode(ucs2_enc) + with self.assertRaises(ValueError): + unicode_import(ucs2[:-1], PyUnicode_FORMAT_UCS2) + ucs4 = 'ucs4:\U0010ffff'.encode(ucs4_enc) + with self.assertRaises(ValueError): + unicode_import(ucs4[:-1], PyUnicode_FORMAT_UCS4) + with self.assertRaises(ValueError): + unicode_import(ucs4[:-2], PyUnicode_FORMAT_UCS4) + with self.assertRaises(ValueError): + unicode_import(ucs4[:-3], PyUnicode_FORMAT_UCS4) + + +if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 47dff5c28f6ff8..b4e977f4e972e2 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -798,6 +798,7 @@ def test_windows_feature_macros(self): "PyUnicode_EncodeLocale", "PyUnicode_EqualToUTF8", "PyUnicode_EqualToUTF8AndSize", + "PyUnicode_Export", "PyUnicode_FSConverter", "PyUnicode_FSDecoder", "PyUnicode_Find", @@ -814,6 +815,7 @@ def test_windows_feature_macros(self): "PyUnicode_GetDefaultEncoding", "PyUnicode_GetLength", "PyUnicode_GetSize", + "PyUnicode_Import", "PyUnicode_InternFromString", "PyUnicode_InternImmortal", "PyUnicode_InternInPlace", @@ -823,6 +825,7 @@ def test_windows_feature_macros(self): "PyUnicode_RPartition", "PyUnicode_RSplit", "PyUnicode_ReadChar", + "PyUnicode_ReleaseExport", "PyUnicode_Replace", "PyUnicode_Resize", "PyUnicode_RichCompare", diff --git a/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst b/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst new file mode 100644 index 00000000000000..3eae4543f087d0 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyUnicode_Export` and :c:func:`PyUnicode_Import` functions to +export and import strings. Patch by Victor Stinner. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 305978f9f0c5c4..47d4be45965229 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2510,3 +2510,9 @@ [function.Py_TYPE] added = '3.14' +[function.PyUnicode_Export] + added = '3.14' +[function.PyUnicode_ReleaseExport] + added = '3.14' +[function.PyUnicode_Import] + added = '3.14' diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c index 2b70d09108a333..655dda3af196d1 100644 --- a/Modules/_testlimitedcapi/unicode.c +++ b/Modules/_testlimitedcapi/unicode.c @@ -1837,6 +1837,44 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) #undef CHECK_FORMAT_0 } + +// Test PyUnicode_Export() +static PyObject* +unicode_export(PyObject *self, PyObject *args) +{ + PyObject *obj; + unsigned int supported_formats; + if (!PyArg_ParseTuple(args, "OI", &obj, &supported_formats)) { + return NULL; + } + + Py_ssize_t size; + uint32_t format; + const void *data = PyUnicode_Export(obj, supported_formats, &size, &format); + if (data == NULL) { + return NULL; + } + + PyObject *res = Py_BuildValue("y#I", data, size, (unsigned int)format); + PyUnicode_ReleaseExport(obj, data, format); + return res; +} + + +// Test PyUnicode_Import() +static PyObject* +unicode_import(PyObject *self, PyObject *args) +{ + const void *data; + Py_ssize_t size; + unsigned int format; + if (!PyArg_ParseTuple(args, "y#I", &data, &size, &format)) { + return NULL; + } + return PyUnicode_Import(data, size, format); +} + + static PyMethodDef TestMethods[] = { {"codec_incrementalencoder", codec_incrementalencoder, METH_VARARGS}, {"codec_incrementaldecoder", codec_incrementaldecoder, METH_VARARGS}, @@ -1924,6 +1962,8 @@ static PyMethodDef TestMethods[] = { {"unicode_format", unicode_format, METH_VARARGS}, {"unicode_contains", unicode_contains, METH_VARARGS}, {"unicode_isidentifier", unicode_isidentifier, METH_O}, + {"unicode_export", unicode_export, METH_VARARGS}, + {"unicode_import", unicode_import, METH_VARARGS}, {NULL}, }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e6feed47fbb2bf..928065b7c6237c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2097,6 +2097,148 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size) return res; } +const void* +PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, + Py_ssize_t *size, uint32_t *format) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode); + goto error; + } + + Py_ssize_t len = PyUnicode_GET_LENGTH(unicode); + + if (PyUnicode_IS_ASCII(unicode) + && (requested_formats & PyUnicode_FORMAT_ASCII)) + { + *format = PyUnicode_FORMAT_ASCII; + *size = len; + return PyUnicode_1BYTE_DATA(unicode); + } + + int kind = PyUnicode_KIND(unicode); + if (kind == PyUnicode_1BYTE_KIND + && (requested_formats & PyUnicode_FORMAT_UCS1)) + { + *format = PyUnicode_FORMAT_UCS1; + *size = len; + return PyUnicode_1BYTE_DATA(unicode); + } + + if (kind == PyUnicode_2BYTE_KIND + && (requested_formats & PyUnicode_FORMAT_UCS2)) + { + *format = PyUnicode_FORMAT_UCS2; + *size = len * 2; + return PyUnicode_2BYTE_DATA(unicode); + } + + if (kind == PyUnicode_4BYTE_KIND + && (requested_formats & PyUnicode_FORMAT_UCS4)) + { + *format = PyUnicode_FORMAT_UCS4; + *size = len * 4; + return PyUnicode_4BYTE_DATA(unicode); + } + + if (requested_formats & PyUnicode_FORMAT_UCS4) { + // Convert UCS1 or UCS2 to UCS4 + Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode); + if (ucs4 == NULL) { + goto error; + } + *format = PyUnicode_FORMAT_UCS4; + *size = len * 4; + return ucs4; + } + + if (requested_formats & PyUnicode_FORMAT_UTF8) { + // Encode UCS1, UCS2 or UCS4 to UTF-8 + const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, size); + if (utf8 == NULL) { + goto error; + } + *format = PyUnicode_FORMAT_UTF8; + return utf8; + } + + PyErr_Format(PyExc_ValueError, "unable to find a matching export format"); + + +error: + *size = 0; + *format = 0; + return NULL; +} + +void +PyUnicode_ReleaseExport(PyObject *unicode, const void* data, + uint32_t format) +{ + switch (format) + { + case PyUnicode_FORMAT_ASCII: + break; + case PyUnicode_FORMAT_UCS1: + break; + case PyUnicode_FORMAT_UCS2: + break; + case PyUnicode_FORMAT_UCS4: + if (PyUnicode_KIND(unicode) != PyUnicode_4BYTE_KIND) { + PyMem_Free((void*)data); + } + break; + case PyUnicode_FORMAT_UTF8: + break; + default: + // ignore silently an unknown format + break; + } +} + +PyObject* +PyUnicode_Import(const void *data, Py_ssize_t size, + uint32_t format) +{ + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "Negative size"); + return NULL; + } + + switch (format) + { + case PyUnicode_FORMAT_ASCII: + return PyUnicode_DecodeASCII((const char*)data, size, NULL); + + case PyUnicode_FORMAT_UCS1: + return _PyUnicode_FromUCS1(data, size); + + case PyUnicode_FORMAT_UCS2: + if (size % 2) { + PyErr_Format(PyExc_ValueError, "size must be a multiple of 2: %zd", + size); + return NULL; + } + return _PyUnicode_FromUCS2(data, size / 2); + + case PyUnicode_FORMAT_UCS4: + if (size % 4) { + PyErr_Format(PyExc_ValueError, "size must be a multiple of 4: %zd", + size); + return NULL; + } + return _PyUnicode_FromUCS4(data, size / 4); + + case PyUnicode_FORMAT_UTF8: + return PyUnicode_DecodeUTF8((const char*)data, size, NULL); + + default: + PyErr_Format(PyExc_ValueError, "unknown format: %i", + format); + return NULL; + } +} + PyObject* PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size) { diff --git a/PC/python3dll.c b/PC/python3dll.c index 0bcf1cc507e1e8..3086a08c0b70f5 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -708,6 +708,7 @@ EXPORT_FUNC(PyUnicode_EncodeFSDefault) EXPORT_FUNC(PyUnicode_EncodeLocale) EXPORT_FUNC(PyUnicode_EqualToUTF8) EXPORT_FUNC(PyUnicode_EqualToUTF8AndSize) +EXPORT_FUNC(PyUnicode_Export) EXPORT_FUNC(PyUnicode_Find) EXPORT_FUNC(PyUnicode_FindChar) EXPORT_FUNC(PyUnicode_Format) @@ -724,6 +725,7 @@ EXPORT_FUNC(PyUnicode_FSDecoder) EXPORT_FUNC(PyUnicode_GetDefaultEncoding) EXPORT_FUNC(PyUnicode_GetLength) EXPORT_FUNC(PyUnicode_GetSize) +EXPORT_FUNC(PyUnicode_Import) EXPORT_FUNC(PyUnicode_InternFromString) EXPORT_FUNC(PyUnicode_InternImmortal) EXPORT_FUNC(PyUnicode_InternInPlace) @@ -731,6 +733,7 @@ EXPORT_FUNC(PyUnicode_IsIdentifier) EXPORT_FUNC(PyUnicode_Join) EXPORT_FUNC(PyUnicode_Partition) EXPORT_FUNC(PyUnicode_ReadChar) +EXPORT_FUNC(PyUnicode_ReleaseExport) EXPORT_FUNC(PyUnicode_Replace) EXPORT_FUNC(PyUnicode_Resize) EXPORT_FUNC(PyUnicode_RichCompare) From c42cebccc196b40d4bc7004a3307fceca8524c1d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 21 Jun 2024 10:39:00 +0200 Subject: [PATCH 02/12] stable_abi.toml: Add constants --- Misc/stable_abi.toml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 47d4be45965229..e3c89af3799480 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2516,3 +2516,13 @@ added = '3.14' [function.PyUnicode_Import] added = '3.14' +[const.PyUnicode_FORMAT_ASCII] + added = '3.14' +[const.PyUnicode_FORMAT_UCS1] + added = '3.14' +[const.PyUnicode_FORMAT_UCS2] + added = '3.14' +[const.PyUnicode_FORMAT_UCS4] + added = '3.14' +[const.PyUnicode_FORMAT_UTF8] + added = '3.14' From 1310b6d80ede2eb3aa3f15bdd299bdfde411889b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 21 Jun 2024 10:46:48 +0200 Subject: [PATCH 03/12] Export UCS1 as UCS2 Co-Authored-By: Petr Viktorin --- Lib/test/test_capi/test_unicode.py | 6 ++++++ Objects/unicodeobject.c | 30 +++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index a8bc1a2117687c..8f5861d283421b 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1713,6 +1713,12 @@ def test_unicode_export(self): self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS1), (b'abc', PyUnicode_FORMAT_UCS1)) + # export ASCII and UCS1 to UCS2 + self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS2), + ('abc'.encode(ucs2_enc), PyUnicode_FORMAT_UCS2)) + self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UCS2), + ('latin1:\xe9'.encode(ucs2_enc), PyUnicode_FORMAT_UCS2)) + # always export to UCS4 self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS4), ('abc'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4)) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 928065b7c6237c..d75e648fb5fc39 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2105,9 +2105,9 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode); goto error; } - Py_ssize_t len = PyUnicode_GET_LENGTH(unicode); + // Native ASCII if (PyUnicode_IS_ASCII(unicode) && (requested_formats & PyUnicode_FORMAT_ASCII)) { @@ -2116,6 +2116,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, return PyUnicode_1BYTE_DATA(unicode); } + // Native UCS1 int kind = PyUnicode_KIND(unicode); if (kind == PyUnicode_1BYTE_KIND && (requested_formats & PyUnicode_FORMAT_UCS1)) @@ -2125,6 +2126,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, return PyUnicode_1BYTE_DATA(unicode); } + // Native UCS2 if (kind == PyUnicode_2BYTE_KIND && (requested_formats & PyUnicode_FORMAT_UCS2)) { @@ -2133,6 +2135,28 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, return PyUnicode_2BYTE_DATA(unicode); } + // Convert ASCII or UCS1 to UCS2 + if (kind == PyUnicode_1BYTE_KIND + && requested_formats & PyUnicode_FORMAT_UCS2) + { + Py_UCS2 *ucs2 = PyMem_Malloc((len + 1) * sizeof(Py_UCS2)); + if (!ucs2) { + PyErr_NoMemory(); + goto error; + } + + _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2, + PyUnicode_1BYTE_DATA(unicode), + PyUnicode_1BYTE_DATA(unicode) + len, + ucs2); + ucs2[len] = 0; + + *format = PyUnicode_FORMAT_UCS2; + *size = len * 2; + return ucs2; + } + + // Native UCS4 if (kind == PyUnicode_4BYTE_KIND && (requested_formats & PyUnicode_FORMAT_UCS4)) { @@ -2141,8 +2165,8 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, return PyUnicode_4BYTE_DATA(unicode); } + // Convert ASCII, UCS1 or UCS2 to UCS4 if (requested_formats & PyUnicode_FORMAT_UCS4) { - // Convert UCS1 or UCS2 to UCS4 Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode); if (ucs4 == NULL) { goto error; @@ -2152,6 +2176,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, return ucs4; } + // Convert to UTF-8 if (requested_formats & PyUnicode_FORMAT_UTF8) { // Encode UCS1, UCS2 or UCS4 to UTF-8 const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, size); @@ -2164,7 +2189,6 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, PyErr_Format(PyExc_ValueError, "unable to find a matching export format"); - error: *size = 0; *format = 0; From 1c781036927abd5be60212a45814f06b12fa69d7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 21 Jun 2024 10:53:20 +0200 Subject: [PATCH 04/12] Add test_unicode_export_import_roundtrip() --- Lib/test/test_capi/test_unicode.py | 34 ++++++++++++++++++++++++++++++ Modules/_testlimitedcapi/unicode.c | 6 +++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 8f5861d283421b..1d78b8f8ffd228 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1808,5 +1808,39 @@ def test_unicode_import(self): unicode_import(ucs4[:-3], PyUnicode_FORMAT_UCS4) + + def test_unicode_export_import_roundtrip(self): + unicode_export = _testlimitedcapi.unicode_export + unicode_import = _testlimitedcapi.unicode_import + + ASCII = PyUnicode_FORMAT_ASCII + UCS1 = PyUnicode_FORMAT_UCS1 + UCS2 = PyUnicode_FORMAT_UCS2 + UCS4 = PyUnicode_FORMAT_UCS4 + UTF8 = PyUnicode_FORMAT_UTF8 + ALL = (ASCII | UCS1 | UCS2 | UCS4 | UTF8) + + for string, allowed_formats in ( + ('', {ASCII, UCS1, UCS2, UCS4, UTF8}), + ('ascii', {ASCII, UCS1, UCS2, UCS4, UTF8}), + ('latin1:\xe9', {UCS1, UCS2, UCS4, UTF8}), + ('ucs2:\u20ac', {UCS2, UCS4, UTF8}), + ('ucs4:\U0001f638', {UCS4, UTF8}), + ): + for format in ASCII, UCS1, UCS2, UCS4, UTF8: + with self.subTest(string=string, format=format): + if format not in allowed_formats: + with self.assertRaises(ValueError): + unicode_export(string, format) + else: + buf, buf_fmt = unicode_export(string, format) + restored = unicode_import(buf, buf_fmt) + self.assertEqual(restored, string) + + buf, buf_fmt = unicode_export(string, ALL) + restored = unicode_import(buf, buf_fmt) + self.assertEqual(restored, string) + + if __name__ == '__main__': unittest.main() diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c index 655dda3af196d1..7fdb3d08a34184 100644 --- a/Modules/_testlimitedcapi/unicode.c +++ b/Modules/_testlimitedcapi/unicode.c @@ -1843,14 +1843,14 @@ static PyObject* unicode_export(PyObject *self, PyObject *args) { PyObject *obj; - unsigned int supported_formats; - if (!PyArg_ParseTuple(args, "OI", &obj, &supported_formats)) { + unsigned int requested_formats; + if (!PyArg_ParseTuple(args, "OI", &obj, &requested_formats)) { return NULL; } Py_ssize_t size; uint32_t format; - const void *data = PyUnicode_Export(obj, supported_formats, &size, &format); + const void *data = PyUnicode_Export(obj, requested_formats, &size, &format); if (data == NULL) { return NULL; } From 62f4598501a4e6918a58abff2c4f8d284a4afbf5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 21 Jun 2024 10:55:51 +0200 Subject: [PATCH 05/12] Rename size parameter to nbytes --- Doc/c-api/unicode.rst | 10 ++++----- Include/unicodeobject.h | 6 ++--- Objects/unicodeobject.c | 49 ++++++++++++++++++++--------------------- 3 files changed, 32 insertions(+), 33 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 1b532382a2576a..c82e586c497665 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -341,19 +341,19 @@ APIs: .. versionadded:: 3.3 -.. c:function:: const void* PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_ssize_t *size, uint32_t *format) +.. c:function:: const void* PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_ssize_t *nbytes, uint32_t *format) Export the contents of the *unicode* string in one of the requested format *requested_formats*. - * On success, set *\*size* and *\*format*, and return the contents. + * On success, set *\*nbytes* and *\*format*, and return the contents. * On error, set an exception and return ``NULL``. The contents is valid as long as *unicode* is valid. The export must be released by :c:func:`PyUnicode_ReleaseExport`. - *unicode*, *size* and *format* must not be NULL. + *unicode*, *nbytes* and *format* must not be NULL. Available formats: @@ -382,14 +382,14 @@ APIs: .. versionadded:: 3.14 -.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t size, uint32_t format) +.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format) Import a string from the *format* format. * Return a reference to a new string object on success. * Set an exception and return ``NULL`` on error. - *data* must not be NULL. *size* must be positive or zero. + *data* must not be NULL. *nbytes* must be positive or zero. See :c:func:`PyUnicode_Export` for the available formats. diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index a97eb2518501f3..8263b6b64a04f4 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -255,14 +255,14 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( #define PyUnicode_FORMAT_UTF8 0x10 // Get the content of a string in the requested format: -// - Return the content, set '*size' and '*format' on success. +// - Return the content, set '*nbytes' and '*format' on success. // - Set an exception and return NULL on error. // // The export must be released by PyUnicode_ReleaseExport(). PyAPI_FUNC(const void*) PyUnicode_Export( PyObject *unicode, uint32_t requested_formats, - Py_ssize_t *size, + Py_ssize_t *nbytes, uint32_t *format); // Release an export created by PyUnicode_Export(). @@ -276,7 +276,7 @@ PyAPI_FUNC(void) PyUnicode_ReleaseExport( // - Set an exception and return NULL on error. PyAPI_FUNC(PyObject*) PyUnicode_Import( const void *data, - Py_ssize_t size, + Py_ssize_t nbytes, uint32_t format); /* --- wchar_t support for platforms which support it --------------------- */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d75e648fb5fc39..44aae4936dd645 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2099,7 +2099,7 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size) const void* PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, - Py_ssize_t *size, uint32_t *format) + Py_ssize_t *nbytes, uint32_t *format) { if (!PyUnicode_Check(unicode)) { PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode); @@ -2112,7 +2112,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, && (requested_formats & PyUnicode_FORMAT_ASCII)) { *format = PyUnicode_FORMAT_ASCII; - *size = len; + *nbytes = len; return PyUnicode_1BYTE_DATA(unicode); } @@ -2122,7 +2122,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, && (requested_formats & PyUnicode_FORMAT_UCS1)) { *format = PyUnicode_FORMAT_UCS1; - *size = len; + *nbytes = len; return PyUnicode_1BYTE_DATA(unicode); } @@ -2131,7 +2131,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, && (requested_formats & PyUnicode_FORMAT_UCS2)) { *format = PyUnicode_FORMAT_UCS2; - *size = len * 2; + *nbytes = len * 2; return PyUnicode_2BYTE_DATA(unicode); } @@ -2152,7 +2152,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, ucs2[len] = 0; *format = PyUnicode_FORMAT_UCS2; - *size = len * 2; + *nbytes = len * 2; return ucs2; } @@ -2161,7 +2161,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, && (requested_formats & PyUnicode_FORMAT_UCS4)) { *format = PyUnicode_FORMAT_UCS4; - *size = len * 4; + *nbytes = len * 4; return PyUnicode_4BYTE_DATA(unicode); } @@ -2172,14 +2172,14 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, goto error; } *format = PyUnicode_FORMAT_UCS4; - *size = len * 4; + *nbytes = len * 4; return ucs4; } // Convert to UTF-8 if (requested_formats & PyUnicode_FORMAT_UTF8) { // Encode UCS1, UCS2 or UCS4 to UTF-8 - const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, size); + const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, nbytes); if (utf8 == NULL) { goto error; } @@ -2190,7 +2190,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, PyErr_Format(PyExc_ValueError, "unable to find a matching export format"); error: - *size = 0; + *nbytes = 0; *format = 0; return NULL; } @@ -2221,44 +2221,43 @@ PyUnicode_ReleaseExport(PyObject *unicode, const void* data, } PyObject* -PyUnicode_Import(const void *data, Py_ssize_t size, +PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format) { - if (size < 0) { - PyErr_SetString(PyExc_ValueError, "Negative size"); + if (nbytes < 0) { + PyErr_SetString(PyExc_ValueError, "Negative nbytes"); return NULL; } switch (format) { case PyUnicode_FORMAT_ASCII: - return PyUnicode_DecodeASCII((const char*)data, size, NULL); + return PyUnicode_DecodeASCII((const char*)data, nbytes, NULL); case PyUnicode_FORMAT_UCS1: - return _PyUnicode_FromUCS1(data, size); + return _PyUnicode_FromUCS1(data, nbytes); case PyUnicode_FORMAT_UCS2: - if (size % 2) { - PyErr_Format(PyExc_ValueError, "size must be a multiple of 2: %zd", - size); + if (nbytes % 2) { + PyErr_Format(PyExc_ValueError, "nbytes must be a multiple of 2: %zd", + nbytes); return NULL; } - return _PyUnicode_FromUCS2(data, size / 2); + return _PyUnicode_FromUCS2(data, nbytes / 2); case PyUnicode_FORMAT_UCS4: - if (size % 4) { - PyErr_Format(PyExc_ValueError, "size must be a multiple of 4: %zd", - size); + if (nbytes % 4) { + PyErr_Format(PyExc_ValueError, "nbytes must be a multiple of 4: %zd", + nbytes); return NULL; } - return _PyUnicode_FromUCS4(data, size / 4); + return _PyUnicode_FromUCS4(data, nbytes / 4); case PyUnicode_FORMAT_UTF8: - return PyUnicode_DecodeUTF8((const char*)data, size, NULL); + return PyUnicode_DecodeUTF8((const char*)data, nbytes, NULL); default: - PyErr_Format(PyExc_ValueError, "unknown format: %i", - format); + PyErr_Format(PyExc_ValueError, "unknown format: %i", format); return NULL; } } From 53cd9375ffeef93339b5f299ac947b7b8d2a9550 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 21 Jun 2024 10:58:32 +0200 Subject: [PATCH 06/12] Update doc --- Doc/c-api/unicode.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index c82e586c497665..22123d444fd5b2 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -372,6 +372,8 @@ APIs: *requested_formats* can be a single format or a combination of the formats in the table above. + Note that future versions of Python may introduce additional formats. + .. versionadded:: 3.14 @@ -384,7 +386,7 @@ APIs: .. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format) - Import a string from the *format* format. + Create a string object from a buffer in an “export format”. * Return a reference to a new string object on success. * Set an exception and return ``NULL`` on error. From 3a084919928c5575cc3571c0757a954292338377 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 21 Jun 2024 11:03:39 +0200 Subject: [PATCH 07/12] Make sure that exported string ends with NUL character --- Modules/_testlimitedcapi/unicode.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c index 7fdb3d08a34184..5328cba49c2b03 100644 --- a/Modules/_testlimitedcapi/unicode.c +++ b/Modules/_testlimitedcapi/unicode.c @@ -1850,11 +1850,33 @@ unicode_export(PyObject *self, PyObject *args) Py_ssize_t size; uint32_t format; - const void *data = PyUnicode_Export(obj, requested_formats, &size, &format); + const char *data = PyUnicode_Export(obj, requested_formats, &size, &format); if (data == NULL) { return NULL; } + // Make sure that the exported string ends with a NUL character + switch (format) + { + case PyUnicode_FORMAT_ASCII: + case PyUnicode_FORMAT_UCS1: + assert(data[size] == 0); + break; + case PyUnicode_FORMAT_UCS2: + assert(data[size] == 0); + assert(data[size+1] == 0); + break; + case PyUnicode_FORMAT_UCS4: + assert(data[size] == 0); + assert(data[size+1] == 0); + assert(data[size+2] == 0); + assert(data[size+3] == 0); + break; + case PyUnicode_FORMAT_UTF8: + assert(data[size] == 0); + break; + } + PyObject *res = Py_BuildValue("y#I", data, size, (unsigned int)format); PyUnicode_ReleaseExport(obj, data, format); return res; From a9a90838c051ca17aa73041d25bafc29d91b7d91 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 21 Jun 2024 11:05:55 +0200 Subject: [PATCH 08/12] tests: rename size to nbytes --- Modules/_testlimitedcapi/unicode.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c index 5328cba49c2b03..e059d349a18aa3 100644 --- a/Modules/_testlimitedcapi/unicode.c +++ b/Modules/_testlimitedcapi/unicode.c @@ -1848,9 +1848,9 @@ unicode_export(PyObject *self, PyObject *args) return NULL; } - Py_ssize_t size; + Py_ssize_t nbytes; uint32_t format; - const char *data = PyUnicode_Export(obj, requested_formats, &size, &format); + const char *data = PyUnicode_Export(obj, requested_formats, &nbytes, &format); if (data == NULL) { return NULL; } @@ -1860,24 +1860,24 @@ unicode_export(PyObject *self, PyObject *args) { case PyUnicode_FORMAT_ASCII: case PyUnicode_FORMAT_UCS1: - assert(data[size] == 0); + assert(data[nbytes] == 0); break; case PyUnicode_FORMAT_UCS2: - assert(data[size] == 0); - assert(data[size+1] == 0); + assert(data[nbytes] == 0); + assert(data[nbytes+1] == 0); break; case PyUnicode_FORMAT_UCS4: - assert(data[size] == 0); - assert(data[size+1] == 0); - assert(data[size+2] == 0); - assert(data[size+3] == 0); + assert(data[nbytes] == 0); + assert(data[nbytes+1] == 0); + assert(data[nbytes+2] == 0); + assert(data[nbytes+3] == 0); break; case PyUnicode_FORMAT_UTF8: - assert(data[size] == 0); + assert(data[nbytes] == 0); break; } - PyObject *res = Py_BuildValue("y#I", data, size, (unsigned int)format); + PyObject *res = Py_BuildValue("y#I", data, nbytes, (unsigned int)format); PyUnicode_ReleaseExport(obj, data, format); return res; } @@ -1888,12 +1888,12 @@ static PyObject* unicode_import(PyObject *self, PyObject *args) { const void *data; - Py_ssize_t size; + Py_ssize_t nbytes; unsigned int format; - if (!PyArg_ParseTuple(args, "y#I", &data, &size, &format)) { + if (!PyArg_ParseTuple(args, "y#I", &data, &nbytes, &format)) { return NULL; } - return PyUnicode_Import(data, size, format); + return PyUnicode_Import(data, nbytes, format); } From 1297c52e7390dec3e7a37cf6a3d7b64f5d92d361 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 22 Jun 2024 17:17:46 +0200 Subject: [PATCH 09/12] Update Doc/c-api/unicode.rst Co-authored-by: Petr Viktorin --- Doc/c-api/unicode.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 22123d444fd5b2..ca9a9806c50882 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -360,7 +360,7 @@ APIs: .. c:namespace:: NULL =================================== ======== =========================== - Constant Identifier Value Description + Constant Identifier Value Description =================================== ======== =========================== .. c:macro:: PyUnicode_FORMAT_ASCII ``0x01`` ASCII string (``Py_UCS1*``) .. c:macro:: PyUnicode_FORMAT_UCS1 ``0x02`` UCS-1 string (``Py_UCS1*``) From d8e3d5d7ab33fd18ba95ebb23ba4a7e832af306e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 22 Jun 2024 17:18:16 +0200 Subject: [PATCH 10/12] Update Doc/c-api/unicode.rst Co-authored-by: Petr Viktorin --- Doc/c-api/unicode.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index ca9a9806c50882..7f2c0e819e17fc 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -381,6 +381,11 @@ APIs: Release an export created by :c:func:`PyUnicode_Export`. + Each argument must match the corresponding argument or result of + a single earlier call to :c:func:`PyUnicode_Export`. + In particular, this means that you must hold a reference to *unicode* + while an export is valid. + .. versionadded:: 3.14 From da67c897d6daef4bee730a243c2a1f84ded281c4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 22 Jun 2024 17:22:22 +0200 Subject: [PATCH 11/12] Address Petr's review --- Doc/c-api/unicode.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 7f2c0e819e17fc..15f0d126ab7cd2 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -352,6 +352,9 @@ APIs: The contents is valid as long as *unicode* is valid. The export must be released by :c:func:`PyUnicode_ReleaseExport`. + The contents of the buffer are valid until they are released. + + The buffer must not be modified. *unicode*, *nbytes* and *format* must not be NULL. @@ -369,8 +372,9 @@ APIs: .. c:macro:: PyUnicode_FORMAT_UTF8 ``0x10`` UTF-8 string (``char*``) =================================== ======== =========================== - *requested_formats* can be a single format or a combination of the formats - in the table above. + *requested_formats* can be a single format or a bitwise combination of the + formats in the table above. + On success, *\*format* will be set to a single one of the requested flags. Note that future versions of Python may introduce additional formats. From f3857d879581c429db3d13f44ddc8f89c98c7fd8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 26 Jun 2024 11:10:33 +0200 Subject: [PATCH 12/12] Fix doc: add uint32_t type --- Doc/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Doc/conf.py b/Doc/conf.py index 8a14646801ebac..f23e4a93fd3311 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -140,6 +140,7 @@ ('c:type', 'size_t'), ('c:type', 'ssize_t'), ('c:type', 'time_t'), + ('c:type', 'uint32_t'), ('c:type', 'uint64_t'), ('c:type', 'uintmax_t'), ('c:type', 'uintptr_t'),