From 32a373a4c2cfe13463d1a4afce4be06abbe41075 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Mon, 27 May 2024 16:21:18 +0200
Subject: [PATCH 01/12] gh-119609: Add PyUnicode_Export() function

Add PyUnicode_Export() and PyUnicode_Import() functions to the C API.
---
 Doc/c-api/unicode.rst                         |  55 +++++++
 Doc/data/stable_abi.dat                       |   3 +
 Doc/whatsnew/3.14.rst                         |   5 +
 Include/unicodeobject.h                       |  31 ++++
 Lib/test/test_capi/test_unicode.py            | 130 +++++++++++++++-
 Lib/test/test_stable_abi_ctypes.py            |   3 +
 ...-05-27-17-46-17.gh-issue-119609.kPIx6S.rst |   2 +
 Misc/stable_abi.toml                          |   6 +
 Modules/_testlimitedcapi/unicode.c            |  40 +++++
 Objects/unicodeobject.c                       | 142 ++++++++++++++++++
 PC/python3dll.c                               |   3 +
 11 files changed, 418 insertions(+), 2 deletions(-)
 create mode 100644 Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 02e696c303fa91..1b532382a2576a 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -341,6 +341,61 @@ APIs:
    .. versionadded:: 3.3
 
 
+.. c:function:: const void* PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_ssize_t *size, uint32_t *format)
+
+   Export the contents of the *unicode* string in one of the requested format
+   *requested_formats*.
+
+   * On success, set *\*size* and *\*format*, and return the contents.
+   * On error, set an exception and return ``NULL``.
+
+   The contents is valid as long as *unicode* is valid.
+
+   The export must be released by :c:func:`PyUnicode_ReleaseExport`.
+
+   *unicode*, *size* and *format* must not be NULL.
+
+   Available formats:
+
+   .. c:namespace:: NULL
+
+   ===================================  ========  ===========================
+   Constant Identifier                  Value  Description
+   ===================================  ========  ===========================
+   .. c:macro:: PyUnicode_FORMAT_ASCII  ``0x01``  ASCII string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS1   ``0x02``  UCS-1 string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS2   ``0x04``  UCS-2 string (``Py_UCS2*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS4   ``0x08``  UCS-4 string (``Py_UCS4*``)
+   .. c:macro:: PyUnicode_FORMAT_UTF8   ``0x10``  UTF-8 string (``char*``)
+   ===================================  ========  ===========================
+
+   *requested_formats* can be a single format or a combination of the formats
+   in the table above.
+
+   .. versionadded:: 3.14
+
+
+.. c:function:: void PyUnicode_ReleaseExport(PyObject *unicode, const void* data, uint32_t format)
+
+   Release an export created by :c:func:`PyUnicode_Export`.
+
+   .. versionadded:: 3.14
+
+
+.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t size, uint32_t format)
+
+   Import a string from the *format* format.
+
+   * Return a reference to a new string object on success.
+   * Set an exception and return ``NULL`` on error.
+
+   *data* must not be NULL. *size* must be positive or zero.
+
+   See :c:func:`PyUnicode_Export` for the available formats.
+
+   .. versionadded:: 3.14
+
+
 .. c:function:: PyObject* PyUnicode_FromKindAndData(int kind, const void *buffer, \
                                                     Py_ssize_t size)
 
diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index c18c813104cf65..80222096f3a0b6 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -776,6 +776,7 @@ function,PyUnicode_EncodeFSDefault,3.2,,
 function,PyUnicode_EncodeLocale,3.7,,
 function,PyUnicode_EqualToUTF8,3.13,,
 function,PyUnicode_EqualToUTF8AndSize,3.13,,
+function,PyUnicode_Export,3.14,,
 function,PyUnicode_FSConverter,3.2,,
 function,PyUnicode_FSDecoder,3.2,,
 function,PyUnicode_Find,3.2,,
@@ -791,6 +792,7 @@ function,PyUnicode_FromStringAndSize,3.2,,
 function,PyUnicode_FromWideChar,3.2,,
 function,PyUnicode_GetDefaultEncoding,3.2,,
 function,PyUnicode_GetLength,3.7,,
+function,PyUnicode_Import,3.14,,
 function,PyUnicode_InternFromString,3.2,,
 function,PyUnicode_InternInPlace,3.2,,
 function,PyUnicode_IsIdentifier,3.2,,
@@ -799,6 +801,7 @@ function,PyUnicode_Partition,3.2,,
 function,PyUnicode_RPartition,3.2,,
 function,PyUnicode_RSplit,3.2,,
 function,PyUnicode_ReadChar,3.7,,
+function,PyUnicode_ReleaseExport,3.14,,
 function,PyUnicode_Replace,3.2,,
 function,PyUnicode_Resize,3.2,,
 function,PyUnicode_RichCompare,3.2,,
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 804d39ab64646d..8ce5747eb7c764 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -298,6 +298,11 @@ New Features
 
   (Contributed by Victor Stinner in :gh:`119182`.)
 
+* Add :c:func:`PyUnicode_Export` and :c:func:`PyUnicode_Import` functions to
+  export and import strings.
+  (Contributed by Victor Stinner in :gh:`119609`.)
+
+
 Porting to Python 3.14
 ----------------------
 
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index dee00715b3c51d..a97eb2518501f3 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -248,6 +248,37 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
     const char *u              /* UTF-8 encoded string */
     );
 
+#define PyUnicode_FORMAT_ASCII 0x01
+#define PyUnicode_FORMAT_UCS1 0x02
+#define PyUnicode_FORMAT_UCS2 0x04
+#define PyUnicode_FORMAT_UCS4 0x08
+#define PyUnicode_FORMAT_UTF8 0x10
+
+// Get the content of a string in the requested format:
+// - Return the content, set '*size' and '*format' on success.
+// - Set an exception and return NULL on error.
+//
+// The export must be released by PyUnicode_ReleaseExport().
+PyAPI_FUNC(const void*) PyUnicode_Export(
+    PyObject *unicode,
+    uint32_t requested_formats,
+    Py_ssize_t *size,
+    uint32_t *format);
+
+// Release an export created by PyUnicode_Export().
+PyAPI_FUNC(void) PyUnicode_ReleaseExport(
+    PyObject *unicode,
+    const void* data,
+    uint32_t format);
+
+// Create a string object from a string in the format 'format'.
+// - Return a reference to a new string object on success.
+// - Set an exception and return NULL on error.
+PyAPI_FUNC(PyObject*) PyUnicode_Import(
+    const void *data,
+    Py_ssize_t size,
+    uint32_t format);
+
 /* --- wchar_t support for platforms which support it --------------------- */
 
 #ifdef HAVE_WCHAR_H
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index a69f817c515ba7..a8bc1a2117687c 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -24,6 +24,14 @@ class Str(str):
     pass
 
 
+PyUnicode_FORMAT_ASCII = 0x01
+PyUnicode_FORMAT_UCS1 = 0x02
+PyUnicode_FORMAT_UCS2 = 0x04
+PyUnicode_FORMAT_UCS4 = 0x08
+PyUnicode_FORMAT_UTF8 = 0x10
+# Invalid native format
+PyUnicode_FORMAT_INVALID = 0x20
+
 class CAPITest(unittest.TestCase):
 
     @support.cpython_only
@@ -1675,6 +1683,124 @@ def test_pep393_utf8_caching_bug(self):
                 # Check that the second call returns the same result
                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
 
-
-if __name__ == "__main__":
+    def test_unicode_export(self):
+        # Test PyUnicode_Export() and PyUnicode_FreeExport()
+        unicode_export = _testlimitedcapi.unicode_export
+        if sys.byteorder == 'little':
+            ucs2_enc = 'utf-16le'
+            ucs4_enc = 'utf-32le'
+        else:
+            ucs2_enc = 'utf-16be'
+            ucs4_enc = 'utf-32be'
+
+        # export to the native format
+        formats = (PyUnicode_FORMAT_ASCII
+                   | PyUnicode_FORMAT_UCS1
+                   | PyUnicode_FORMAT_UCS2
+                   | PyUnicode_FORMAT_UCS4)
+        self.assertEqual(unicode_export("abc", formats),
+                         (b'abc', PyUnicode_FORMAT_ASCII))
+        self.assertEqual(unicode_export("latin1:\xe9", formats),
+                         (b'latin1:\xe9', PyUnicode_FORMAT_UCS1))
+        self.assertEqual(unicode_export('ucs2:\u20ac', formats),
+                         ('ucs2:\u20ac'.encode(ucs2_enc),
+                          PyUnicode_FORMAT_UCS2))
+        self.assertEqual(unicode_export('ucs4:\U0010ffff', formats),
+                         ('ucs4:\U0010ffff'.encode(ucs4_enc),
+                          PyUnicode_FORMAT_UCS4))
+
+        # export ASCII as UCS1
+        self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS1),
+                         (b'abc', PyUnicode_FORMAT_UCS1))
+
+        # always export to UCS4
+        self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS4),
+                         ('abc'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4))
+        self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UCS4),
+                         ('latin1:\xe9'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4))
+        self.assertEqual(unicode_export('ucs2:\u20ac', PyUnicode_FORMAT_UCS4),
+                         ('ucs2:\u20ac'.encode(ucs4_enc),
+                          PyUnicode_FORMAT_UCS4))
+        self.assertEqual(unicode_export('ucs4:\U0010ffff', PyUnicode_FORMAT_UCS4),
+                         ('ucs4:\U0010ffff'.encode(ucs4_enc),
+                          PyUnicode_FORMAT_UCS4))
+
+        # always export to UTF8
+        self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UTF8),
+                         ('abc'.encode('utf8'), PyUnicode_FORMAT_UTF8))
+        self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UTF8),
+                         ('latin1:\xe9'.encode('utf8'), PyUnicode_FORMAT_UTF8))
+        self.assertEqual(unicode_export('ucs2:\u20ac', PyUnicode_FORMAT_UTF8),
+                         ('ucs2:\u20ac'.encode('utf8'),
+                          PyUnicode_FORMAT_UTF8))
+        self.assertEqual(unicode_export('ucs4:\U0010ffff', PyUnicode_FORMAT_UTF8),
+                         ('ucs4:\U0010ffff'.encode('utf8'),
+                          PyUnicode_FORMAT_UTF8))
+
+        # No supported format or invalid format
+        with self.assertRaisesRegex(ValueError,
+                                    "unable to find a matching export format"):
+            unicode_export('abc', 0)
+        with self.assertRaisesRegex(ValueError,
+                                    "unable to find a matching export format"):
+            unicode_export('abc', PyUnicode_FORMAT_INVALID)
+
+    def test_unicode_import(self):
+        # Test PyUnicode_Import()
+        unicode_import = _testlimitedcapi.unicode_import
+        if sys.byteorder == 'little':
+            ucs2_enc = 'utf-16le'
+            ucs4_enc = 'utf-32le'
+        else:
+            ucs2_enc = 'utf-16be'
+            ucs4_enc = 'utf-32be'
+
+        self.assertEqual(unicode_import(b'abc', PyUnicode_FORMAT_ASCII),
+                         "abc")
+        self.assertEqual(unicode_import(b'latin1:\xe9', PyUnicode_FORMAT_UCS1),
+                         "latin1:\xe9")
+
+        self.assertEqual(unicode_import('ucs2:\u20ac'.encode(ucs2_enc),
+                                          PyUnicode_FORMAT_UCS2),
+                         'ucs2:\u20ac')
+
+        self.assertEqual(unicode_import('ucs4:\U0010ffff'.encode(ucs4_enc),
+                                          PyUnicode_FORMAT_UCS4),
+                         'ucs4:\U0010ffff')
+
+        text = "abc\xe9\U0010ffff"
+        self.assertEqual(unicode_import(text.encode('utf8'),
+                                          PyUnicode_FORMAT_UTF8),
+                         text)
+
+        # Empty string
+        for native_format in (
+            PyUnicode_FORMAT_ASCII,
+            PyUnicode_FORMAT_UCS1,
+            PyUnicode_FORMAT_UCS2,
+            PyUnicode_FORMAT_UCS4,
+            PyUnicode_FORMAT_UTF8,
+        ):
+            with self.subTest(native_format=native_format):
+                self.assertEqual(unicode_import(b'', native_format),
+                                 '')
+
+        # Invalid format
+        with self.assertRaises(ValueError):
+            unicode_import(b'', PyUnicode_FORMAT_INVALID)
+
+        # Invalid size
+        ucs2 = 'ucs2:\u20ac'.encode(ucs2_enc)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs2[:-1], PyUnicode_FORMAT_UCS2)
+        ucs4 = 'ucs4:\U0010ffff'.encode(ucs4_enc)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs4[:-1], PyUnicode_FORMAT_UCS4)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs4[:-2], PyUnicode_FORMAT_UCS4)
+        with self.assertRaises(ValueError):
+            unicode_import(ucs4[:-3], PyUnicode_FORMAT_UCS4)
+
+
+if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index 47dff5c28f6ff8..b4e977f4e972e2 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -798,6 +798,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_EncodeLocale",
     "PyUnicode_EqualToUTF8",
     "PyUnicode_EqualToUTF8AndSize",
+    "PyUnicode_Export",
     "PyUnicode_FSConverter",
     "PyUnicode_FSDecoder",
     "PyUnicode_Find",
@@ -814,6 +815,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_GetDefaultEncoding",
     "PyUnicode_GetLength",
     "PyUnicode_GetSize",
+    "PyUnicode_Import",
     "PyUnicode_InternFromString",
     "PyUnicode_InternImmortal",
     "PyUnicode_InternInPlace",
@@ -823,6 +825,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_RPartition",
     "PyUnicode_RSplit",
     "PyUnicode_ReadChar",
+    "PyUnicode_ReleaseExport",
     "PyUnicode_Replace",
     "PyUnicode_Resize",
     "PyUnicode_RichCompare",
diff --git a/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst b/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst
new file mode 100644
index 00000000000000..3eae4543f087d0
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst	
@@ -0,0 +1,2 @@
+Add :c:func:`PyUnicode_Export` and :c:func:`PyUnicode_Import` functions to
+export and import strings. Patch by Victor Stinner.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 305978f9f0c5c4..47d4be45965229 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2510,3 +2510,9 @@
 
 [function.Py_TYPE]
     added = '3.14'
+[function.PyUnicode_Export]
+    added = '3.14'
+[function.PyUnicode_ReleaseExport]
+    added = '3.14'
+[function.PyUnicode_Import]
+    added = '3.14'
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 2b70d09108a333..655dda3af196d1 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1837,6 +1837,44 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
 #undef CHECK_FORMAT_0
 }
 
+
+// Test PyUnicode_Export()
+static PyObject*
+unicode_export(PyObject *self, PyObject *args)
+{
+    PyObject *obj;
+    unsigned int supported_formats;
+    if (!PyArg_ParseTuple(args, "OI", &obj, &supported_formats)) {
+        return NULL;
+    }
+
+    Py_ssize_t size;
+    uint32_t format;
+    const void *data = PyUnicode_Export(obj, supported_formats, &size, &format);
+    if (data == NULL) {
+        return NULL;
+    }
+
+    PyObject *res = Py_BuildValue("y#I", data, size, (unsigned int)format);
+    PyUnicode_ReleaseExport(obj, data, format);
+    return res;
+}
+
+
+// Test PyUnicode_Import()
+static PyObject*
+unicode_import(PyObject *self, PyObject *args)
+{
+    const void *data;
+    Py_ssize_t size;
+    unsigned int format;
+    if (!PyArg_ParseTuple(args, "y#I", &data, &size, &format)) {
+        return NULL;
+    }
+    return PyUnicode_Import(data, size, format);
+}
+
+
 static PyMethodDef TestMethods[] = {
     {"codec_incrementalencoder", codec_incrementalencoder,       METH_VARARGS},
     {"codec_incrementaldecoder", codec_incrementaldecoder,       METH_VARARGS},
@@ -1924,6 +1962,8 @@ static PyMethodDef TestMethods[] = {
     {"unicode_format",           unicode_format,                 METH_VARARGS},
     {"unicode_contains",         unicode_contains,               METH_VARARGS},
     {"unicode_isidentifier",     unicode_isidentifier,           METH_O},
+    {"unicode_export",           unicode_export,                 METH_VARARGS},
+    {"unicode_import",           unicode_import,                 METH_VARARGS},
     {NULL},
 };
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e6feed47fbb2bf..928065b7c6237c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2097,6 +2097,148 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
     return res;
 }
 
+const void*
+PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
+                 Py_ssize_t *size, uint32_t *format)
+{
+    if (!PyUnicode_Check(unicode)) {
+        PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
+        goto error;
+    }
+
+    Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
+
+    if (PyUnicode_IS_ASCII(unicode)
+        && (requested_formats & PyUnicode_FORMAT_ASCII))
+    {
+        *format = PyUnicode_FORMAT_ASCII;
+        *size = len;
+        return PyUnicode_1BYTE_DATA(unicode);
+    }
+
+    int kind = PyUnicode_KIND(unicode);
+    if (kind == PyUnicode_1BYTE_KIND
+        && (requested_formats & PyUnicode_FORMAT_UCS1))
+    {
+        *format = PyUnicode_FORMAT_UCS1;
+        *size = len;
+        return PyUnicode_1BYTE_DATA(unicode);
+    }
+
+    if (kind == PyUnicode_2BYTE_KIND
+        && (requested_formats & PyUnicode_FORMAT_UCS2))
+    {
+        *format = PyUnicode_FORMAT_UCS2;
+        *size = len * 2;
+        return PyUnicode_2BYTE_DATA(unicode);
+    }
+
+    if (kind == PyUnicode_4BYTE_KIND
+        && (requested_formats & PyUnicode_FORMAT_UCS4))
+    {
+        *format = PyUnicode_FORMAT_UCS4;
+        *size = len * 4;
+        return PyUnicode_4BYTE_DATA(unicode);
+    }
+
+    if (requested_formats & PyUnicode_FORMAT_UCS4) {
+        // Convert UCS1 or UCS2 to UCS4
+        Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
+        if (ucs4 == NULL) {
+            goto error;
+        }
+        *format = PyUnicode_FORMAT_UCS4;
+        *size = len * 4;
+        return ucs4;
+    }
+
+    if (requested_formats & PyUnicode_FORMAT_UTF8) {
+        // Encode UCS1, UCS2 or UCS4 to UTF-8
+        const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, size);
+        if (utf8 == NULL) {
+            goto error;
+        }
+        *format = PyUnicode_FORMAT_UTF8;
+        return utf8;
+    }
+
+    PyErr_Format(PyExc_ValueError, "unable to find a matching export format");
+
+
+error:
+    *size = 0;
+    *format = 0;
+    return NULL;
+}
+
+void
+PyUnicode_ReleaseExport(PyObject *unicode, const void* data,
+                        uint32_t format)
+{
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+        break;
+    case PyUnicode_FORMAT_UCS1:
+        break;
+    case PyUnicode_FORMAT_UCS2:
+        break;
+    case PyUnicode_FORMAT_UCS4:
+        if (PyUnicode_KIND(unicode) != PyUnicode_4BYTE_KIND) {
+            PyMem_Free((void*)data);
+        }
+        break;
+    case PyUnicode_FORMAT_UTF8:
+        break;
+    default:
+        // ignore silently an unknown format
+        break;
+    }
+}
+
+PyObject*
+PyUnicode_Import(const void *data, Py_ssize_t size,
+                 uint32_t format)
+{
+    if (size < 0) {
+        PyErr_SetString(PyExc_ValueError, "Negative size");
+        return NULL;
+    }
+
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+        return PyUnicode_DecodeASCII((const char*)data, size, NULL);
+
+    case PyUnicode_FORMAT_UCS1:
+        return _PyUnicode_FromUCS1(data, size);
+
+    case PyUnicode_FORMAT_UCS2:
+        if (size % 2) {
+            PyErr_Format(PyExc_ValueError, "size must be a multiple of 2: %zd",
+                         size);
+            return NULL;
+        }
+        return _PyUnicode_FromUCS2(data, size / 2);
+
+    case PyUnicode_FORMAT_UCS4:
+        if (size % 4) {
+            PyErr_Format(PyExc_ValueError, "size must be a multiple of 4: %zd",
+                         size);
+            return NULL;
+        }
+        return _PyUnicode_FromUCS4(data, size / 4);
+
+    case PyUnicode_FORMAT_UTF8:
+        return PyUnicode_DecodeUTF8((const char*)data, size, NULL);
+
+    default:
+        PyErr_Format(PyExc_ValueError, "unknown format: %i",
+                     format);
+        return NULL;
+    }
+}
+
 PyObject*
 PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
 {
diff --git a/PC/python3dll.c b/PC/python3dll.c
index 0bcf1cc507e1e8..3086a08c0b70f5 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -708,6 +708,7 @@ EXPORT_FUNC(PyUnicode_EncodeFSDefault)
 EXPORT_FUNC(PyUnicode_EncodeLocale)
 EXPORT_FUNC(PyUnicode_EqualToUTF8)
 EXPORT_FUNC(PyUnicode_EqualToUTF8AndSize)
+EXPORT_FUNC(PyUnicode_Export)
 EXPORT_FUNC(PyUnicode_Find)
 EXPORT_FUNC(PyUnicode_FindChar)
 EXPORT_FUNC(PyUnicode_Format)
@@ -724,6 +725,7 @@ EXPORT_FUNC(PyUnicode_FSDecoder)
 EXPORT_FUNC(PyUnicode_GetDefaultEncoding)
 EXPORT_FUNC(PyUnicode_GetLength)
 EXPORT_FUNC(PyUnicode_GetSize)
+EXPORT_FUNC(PyUnicode_Import)
 EXPORT_FUNC(PyUnicode_InternFromString)
 EXPORT_FUNC(PyUnicode_InternImmortal)
 EXPORT_FUNC(PyUnicode_InternInPlace)
@@ -731,6 +733,7 @@ EXPORT_FUNC(PyUnicode_IsIdentifier)
 EXPORT_FUNC(PyUnicode_Join)
 EXPORT_FUNC(PyUnicode_Partition)
 EXPORT_FUNC(PyUnicode_ReadChar)
+EXPORT_FUNC(PyUnicode_ReleaseExport)
 EXPORT_FUNC(PyUnicode_Replace)
 EXPORT_FUNC(PyUnicode_Resize)
 EXPORT_FUNC(PyUnicode_RichCompare)

From c42cebccc196b40d4bc7004a3307fceca8524c1d Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 21 Jun 2024 10:39:00 +0200
Subject: [PATCH 02/12] stable_abi.toml: Add constants

---
 Misc/stable_abi.toml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 47d4be45965229..e3c89af3799480 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2516,3 +2516,13 @@
     added = '3.14'
 [function.PyUnicode_Import]
     added = '3.14'
+[const.PyUnicode_FORMAT_ASCII]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS1]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS2]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS4]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UTF8]
+    added = '3.14'

From 1310b6d80ede2eb3aa3f15bdd299bdfde411889b Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 21 Jun 2024 10:46:48 +0200
Subject: [PATCH 03/12] Export UCS1 as UCS2

Co-Authored-By: Petr Viktorin <encukou@gmail.com>
---
 Lib/test/test_capi/test_unicode.py |  6 ++++++
 Objects/unicodeobject.c            | 30 +++++++++++++++++++++++++++---
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index a8bc1a2117687c..8f5861d283421b 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1713,6 +1713,12 @@ def test_unicode_export(self):
         self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS1),
                          (b'abc', PyUnicode_FORMAT_UCS1))
 
+        # export ASCII and UCS1 to UCS2
+        self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS2),
+                         ('abc'.encode(ucs2_enc), PyUnicode_FORMAT_UCS2))
+        self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UCS2),
+                         ('latin1:\xe9'.encode(ucs2_enc), PyUnicode_FORMAT_UCS2))
+
         # always export to UCS4
         self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS4),
                          ('abc'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4))
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 928065b7c6237c..d75e648fb5fc39 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2105,9 +2105,9 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
         goto error;
     }
-
     Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
 
+    // Native ASCII
     if (PyUnicode_IS_ASCII(unicode)
         && (requested_formats & PyUnicode_FORMAT_ASCII))
     {
@@ -2116,6 +2116,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         return PyUnicode_1BYTE_DATA(unicode);
     }
 
+    // Native UCS1
     int kind = PyUnicode_KIND(unicode);
     if (kind == PyUnicode_1BYTE_KIND
         && (requested_formats & PyUnicode_FORMAT_UCS1))
@@ -2125,6 +2126,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         return PyUnicode_1BYTE_DATA(unicode);
     }
 
+    // Native UCS2
     if (kind == PyUnicode_2BYTE_KIND
         && (requested_formats & PyUnicode_FORMAT_UCS2))
     {
@@ -2133,6 +2135,28 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         return PyUnicode_2BYTE_DATA(unicode);
     }
 
+    // Convert ASCII or UCS1 to UCS2
+    if (kind == PyUnicode_1BYTE_KIND
+        && requested_formats & PyUnicode_FORMAT_UCS2)
+    {
+        Py_UCS2 *ucs2 = PyMem_Malloc((len + 1) * sizeof(Py_UCS2));
+        if (!ucs2) {
+            PyErr_NoMemory();
+            goto error;
+        }
+
+        _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2,
+                                 PyUnicode_1BYTE_DATA(unicode),
+                                 PyUnicode_1BYTE_DATA(unicode) + len,
+                                 ucs2);
+        ucs2[len] = 0;
+
+        *format = PyUnicode_FORMAT_UCS2;
+        *size = len * 2;
+        return ucs2;
+    }
+
+    // Native UCS4
     if (kind == PyUnicode_4BYTE_KIND
         && (requested_formats & PyUnicode_FORMAT_UCS4))
     {
@@ -2141,8 +2165,8 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         return PyUnicode_4BYTE_DATA(unicode);
     }
 
+    // Convert ASCII, UCS1 or UCS2 to UCS4
     if (requested_formats & PyUnicode_FORMAT_UCS4) {
-        // Convert UCS1 or UCS2 to UCS4
         Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
         if (ucs4 == NULL) {
             goto error;
@@ -2152,6 +2176,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         return ucs4;
     }
 
+    // Convert to UTF-8
     if (requested_formats & PyUnicode_FORMAT_UTF8) {
         // Encode UCS1, UCS2 or UCS4 to UTF-8
         const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, size);
@@ -2164,7 +2189,6 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
 
     PyErr_Format(PyExc_ValueError, "unable to find a matching export format");
 
-
 error:
     *size = 0;
     *format = 0;

From 1c781036927abd5be60212a45814f06b12fa69d7 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 21 Jun 2024 10:53:20 +0200
Subject: [PATCH 04/12] Add test_unicode_export_import_roundtrip()

---
 Lib/test/test_capi/test_unicode.py | 34 ++++++++++++++++++++++++++++++
 Modules/_testlimitedcapi/unicode.c |  6 +++---
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 8f5861d283421b..1d78b8f8ffd228 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1808,5 +1808,39 @@ def test_unicode_import(self):
             unicode_import(ucs4[:-3], PyUnicode_FORMAT_UCS4)
 
 
+
+    def test_unicode_export_import_roundtrip(self):
+        unicode_export = _testlimitedcapi.unicode_export
+        unicode_import = _testlimitedcapi.unicode_import
+
+        ASCII = PyUnicode_FORMAT_ASCII
+        UCS1 = PyUnicode_FORMAT_UCS1
+        UCS2 = PyUnicode_FORMAT_UCS2
+        UCS4 = PyUnicode_FORMAT_UCS4
+        UTF8 = PyUnicode_FORMAT_UTF8
+        ALL = (ASCII | UCS1 | UCS2 | UCS4 | UTF8)
+
+        for string, allowed_formats in (
+            ('', {ASCII, UCS1, UCS2, UCS4, UTF8}),
+            ('ascii', {ASCII, UCS1, UCS2, UCS4, UTF8}),
+            ('latin1:\xe9', {UCS1, UCS2, UCS4, UTF8}),
+            ('ucs2:\u20ac', {UCS2, UCS4, UTF8}),
+            ('ucs4:\U0001f638', {UCS4, UTF8}),
+        ):
+            for format in ASCII, UCS1, UCS2, UCS4, UTF8:
+                with self.subTest(string=string, format=format):
+                    if format not in allowed_formats:
+                        with self.assertRaises(ValueError):
+                            unicode_export(string, format)
+                    else:
+                        buf, buf_fmt = unicode_export(string, format)
+                        restored = unicode_import(buf, buf_fmt)
+                        self.assertEqual(restored, string)
+
+            buf, buf_fmt = unicode_export(string, ALL)
+            restored = unicode_import(buf, buf_fmt)
+            self.assertEqual(restored, string)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 655dda3af196d1..7fdb3d08a34184 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1843,14 +1843,14 @@ static PyObject*
 unicode_export(PyObject *self, PyObject *args)
 {
     PyObject *obj;
-    unsigned int supported_formats;
-    if (!PyArg_ParseTuple(args, "OI", &obj, &supported_formats)) {
+    unsigned int requested_formats;
+    if (!PyArg_ParseTuple(args, "OI", &obj, &requested_formats)) {
         return NULL;
     }
 
     Py_ssize_t size;
     uint32_t format;
-    const void *data = PyUnicode_Export(obj, supported_formats, &size, &format);
+    const void *data = PyUnicode_Export(obj, requested_formats, &size, &format);
     if (data == NULL) {
         return NULL;
     }

From 62f4598501a4e6918a58abff2c4f8d284a4afbf5 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 21 Jun 2024 10:55:51 +0200
Subject: [PATCH 05/12] Rename size parameter to nbytes

---
 Doc/c-api/unicode.rst   | 10 ++++-----
 Include/unicodeobject.h |  6 ++---
 Objects/unicodeobject.c | 49 ++++++++++++++++++++---------------------
 3 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 1b532382a2576a..c82e586c497665 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -341,19 +341,19 @@ APIs:
    .. versionadded:: 3.3
 
 
-.. c:function:: const void* PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_ssize_t *size, uint32_t *format)
+.. c:function:: const void* PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_ssize_t *nbytes, uint32_t *format)
 
    Export the contents of the *unicode* string in one of the requested format
    *requested_formats*.
 
-   * On success, set *\*size* and *\*format*, and return the contents.
+   * On success, set *\*nbytes* and *\*format*, and return the contents.
    * On error, set an exception and return ``NULL``.
 
    The contents is valid as long as *unicode* is valid.
 
    The export must be released by :c:func:`PyUnicode_ReleaseExport`.
 
-   *unicode*, *size* and *format* must not be NULL.
+   *unicode*, *nbytes* and *format* must not be NULL.
 
    Available formats:
 
@@ -382,14 +382,14 @@ APIs:
    .. versionadded:: 3.14
 
 
-.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t size, uint32_t format)
+.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format)
 
    Import a string from the *format* format.
 
    * Return a reference to a new string object on success.
    * Set an exception and return ``NULL`` on error.
 
-   *data* must not be NULL. *size* must be positive or zero.
+   *data* must not be NULL. *nbytes* must be positive or zero.
 
    See :c:func:`PyUnicode_Export` for the available formats.
 
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index a97eb2518501f3..8263b6b64a04f4 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -255,14 +255,14 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
 #define PyUnicode_FORMAT_UTF8 0x10
 
 // Get the content of a string in the requested format:
-// - Return the content, set '*size' and '*format' on success.
+// - Return the content, set '*nbytes' and '*format' on success.
 // - Set an exception and return NULL on error.
 //
 // The export must be released by PyUnicode_ReleaseExport().
 PyAPI_FUNC(const void*) PyUnicode_Export(
     PyObject *unicode,
     uint32_t requested_formats,
-    Py_ssize_t *size,
+    Py_ssize_t *nbytes,
     uint32_t *format);
 
 // Release an export created by PyUnicode_Export().
@@ -276,7 +276,7 @@ PyAPI_FUNC(void) PyUnicode_ReleaseExport(
 // - Set an exception and return NULL on error.
 PyAPI_FUNC(PyObject*) PyUnicode_Import(
     const void *data,
-    Py_ssize_t size,
+    Py_ssize_t nbytes,
     uint32_t format);
 
 /* --- wchar_t support for platforms which support it --------------------- */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d75e648fb5fc39..44aae4936dd645 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2099,7 +2099,7 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
 
 const void*
 PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
-                 Py_ssize_t *size, uint32_t *format)
+                 Py_ssize_t *nbytes, uint32_t *format)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
@@ -2112,7 +2112,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         && (requested_formats & PyUnicode_FORMAT_ASCII))
     {
         *format = PyUnicode_FORMAT_ASCII;
-        *size = len;
+        *nbytes = len;
         return PyUnicode_1BYTE_DATA(unicode);
     }
 
@@ -2122,7 +2122,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         && (requested_formats & PyUnicode_FORMAT_UCS1))
     {
         *format = PyUnicode_FORMAT_UCS1;
-        *size = len;
+        *nbytes = len;
         return PyUnicode_1BYTE_DATA(unicode);
     }
 
@@ -2131,7 +2131,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         && (requested_formats & PyUnicode_FORMAT_UCS2))
     {
         *format = PyUnicode_FORMAT_UCS2;
-        *size = len * 2;
+        *nbytes = len * 2;
         return PyUnicode_2BYTE_DATA(unicode);
     }
 
@@ -2152,7 +2152,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         ucs2[len] = 0;
 
         *format = PyUnicode_FORMAT_UCS2;
-        *size = len * 2;
+        *nbytes = len * 2;
         return ucs2;
     }
 
@@ -2161,7 +2161,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
         && (requested_formats & PyUnicode_FORMAT_UCS4))
     {
         *format = PyUnicode_FORMAT_UCS4;
-        *size = len * 4;
+        *nbytes = len * 4;
         return PyUnicode_4BYTE_DATA(unicode);
     }
 
@@ -2172,14 +2172,14 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
             goto error;
         }
         *format = PyUnicode_FORMAT_UCS4;
-        *size = len * 4;
+        *nbytes = len * 4;
         return ucs4;
     }
 
     // Convert to UTF-8
     if (requested_formats & PyUnicode_FORMAT_UTF8) {
         // Encode UCS1, UCS2 or UCS4 to UTF-8
-        const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, size);
+        const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, nbytes);
         if (utf8 == NULL) {
             goto error;
         }
@@ -2190,7 +2190,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
     PyErr_Format(PyExc_ValueError, "unable to find a matching export format");
 
 error:
-    *size = 0;
+    *nbytes = 0;
     *format = 0;
     return NULL;
 }
@@ -2221,44 +2221,43 @@ PyUnicode_ReleaseExport(PyObject *unicode, const void* data,
 }
 
 PyObject*
-PyUnicode_Import(const void *data, Py_ssize_t size,
+PyUnicode_Import(const void *data, Py_ssize_t nbytes,
                  uint32_t format)
 {
-    if (size < 0) {
-        PyErr_SetString(PyExc_ValueError, "Negative size");
+    if (nbytes < 0) {
+        PyErr_SetString(PyExc_ValueError, "Negative nbytes");
         return NULL;
     }
 
     switch (format)
     {
     case PyUnicode_FORMAT_ASCII:
-        return PyUnicode_DecodeASCII((const char*)data, size, NULL);
+        return PyUnicode_DecodeASCII((const char*)data, nbytes, NULL);
 
     case PyUnicode_FORMAT_UCS1:
-        return _PyUnicode_FromUCS1(data, size);
+        return _PyUnicode_FromUCS1(data, nbytes);
 
     case PyUnicode_FORMAT_UCS2:
-        if (size % 2) {
-            PyErr_Format(PyExc_ValueError, "size must be a multiple of 2: %zd",
-                         size);
+        if (nbytes % 2) {
+            PyErr_Format(PyExc_ValueError, "nbytes must be a multiple of 2: %zd",
+                         nbytes);
             return NULL;
         }
-        return _PyUnicode_FromUCS2(data, size / 2);
+        return _PyUnicode_FromUCS2(data, nbytes / 2);
 
     case PyUnicode_FORMAT_UCS4:
-        if (size % 4) {
-            PyErr_Format(PyExc_ValueError, "size must be a multiple of 4: %zd",
-                         size);
+        if (nbytes % 4) {
+            PyErr_Format(PyExc_ValueError, "nbytes must be a multiple of 4: %zd",
+                         nbytes);
             return NULL;
         }
-        return _PyUnicode_FromUCS4(data, size / 4);
+        return _PyUnicode_FromUCS4(data, nbytes / 4);
 
     case PyUnicode_FORMAT_UTF8:
-        return PyUnicode_DecodeUTF8((const char*)data, size, NULL);
+        return PyUnicode_DecodeUTF8((const char*)data, nbytes, NULL);
 
     default:
-        PyErr_Format(PyExc_ValueError, "unknown format: %i",
-                     format);
+        PyErr_Format(PyExc_ValueError, "unknown format: %i", format);
         return NULL;
     }
 }

From 53cd9375ffeef93339b5f299ac947b7b8d2a9550 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 21 Jun 2024 10:58:32 +0200
Subject: [PATCH 06/12] Update doc

---
 Doc/c-api/unicode.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index c82e586c497665..22123d444fd5b2 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -372,6 +372,8 @@ APIs:
    *requested_formats* can be a single format or a combination of the formats
    in the table above.
 
+   Note that future versions of Python may introduce additional formats.
+
    .. versionadded:: 3.14
 
 
@@ -384,7 +386,7 @@ APIs:
 
 .. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format)
 
-   Import a string from the *format* format.
+   Create a string object from a buffer in an “export format”.
 
    * Return a reference to a new string object on success.
    * Set an exception and return ``NULL`` on error.

From 3a084919928c5575cc3571c0757a954292338377 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 21 Jun 2024 11:03:39 +0200
Subject: [PATCH 07/12] Make sure that exported string ends with NUL character

---
 Modules/_testlimitedcapi/unicode.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 7fdb3d08a34184..5328cba49c2b03 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1850,11 +1850,33 @@ unicode_export(PyObject *self, PyObject *args)
 
     Py_ssize_t size;
     uint32_t format;
-    const void *data = PyUnicode_Export(obj, requested_formats, &size, &format);
+    const char *data = PyUnicode_Export(obj, requested_formats, &size, &format);
     if (data == NULL) {
         return NULL;
     }
 
+    // Make sure that the exported string ends with a NUL character
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+    case PyUnicode_FORMAT_UCS1:
+        assert(data[size] == 0);
+        break;
+    case PyUnicode_FORMAT_UCS2:
+        assert(data[size] == 0);
+        assert(data[size+1] == 0);
+        break;
+    case PyUnicode_FORMAT_UCS4:
+        assert(data[size] == 0);
+        assert(data[size+1] == 0);
+        assert(data[size+2] == 0);
+        assert(data[size+3] == 0);
+        break;
+    case PyUnicode_FORMAT_UTF8:
+        assert(data[size] == 0);
+        break;
+    }
+
     PyObject *res = Py_BuildValue("y#I", data, size, (unsigned int)format);
     PyUnicode_ReleaseExport(obj, data, format);
     return res;

From a9a90838c051ca17aa73041d25bafc29d91b7d91 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 21 Jun 2024 11:05:55 +0200
Subject: [PATCH 08/12] tests: rename size to nbytes

---
 Modules/_testlimitedcapi/unicode.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 5328cba49c2b03..e059d349a18aa3 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1848,9 +1848,9 @@ unicode_export(PyObject *self, PyObject *args)
         return NULL;
     }
 
-    Py_ssize_t size;
+    Py_ssize_t nbytes;
     uint32_t format;
-    const char *data = PyUnicode_Export(obj, requested_formats, &size, &format);
+    const char *data = PyUnicode_Export(obj, requested_formats, &nbytes, &format);
     if (data == NULL) {
         return NULL;
     }
@@ -1860,24 +1860,24 @@ unicode_export(PyObject *self, PyObject *args)
     {
     case PyUnicode_FORMAT_ASCII:
     case PyUnicode_FORMAT_UCS1:
-        assert(data[size] == 0);
+        assert(data[nbytes] == 0);
         break;
     case PyUnicode_FORMAT_UCS2:
-        assert(data[size] == 0);
-        assert(data[size+1] == 0);
+        assert(data[nbytes] == 0);
+        assert(data[nbytes+1] == 0);
         break;
     case PyUnicode_FORMAT_UCS4:
-        assert(data[size] == 0);
-        assert(data[size+1] == 0);
-        assert(data[size+2] == 0);
-        assert(data[size+3] == 0);
+        assert(data[nbytes] == 0);
+        assert(data[nbytes+1] == 0);
+        assert(data[nbytes+2] == 0);
+        assert(data[nbytes+3] == 0);
         break;
     case PyUnicode_FORMAT_UTF8:
-        assert(data[size] == 0);
+        assert(data[nbytes] == 0);
         break;
     }
 
-    PyObject *res = Py_BuildValue("y#I", data, size, (unsigned int)format);
+    PyObject *res = Py_BuildValue("y#I", data, nbytes, (unsigned int)format);
     PyUnicode_ReleaseExport(obj, data, format);
     return res;
 }
@@ -1888,12 +1888,12 @@ static PyObject*
 unicode_import(PyObject *self, PyObject *args)
 {
     const void *data;
-    Py_ssize_t size;
+    Py_ssize_t nbytes;
     unsigned int format;
-    if (!PyArg_ParseTuple(args, "y#I", &data, &size, &format)) {
+    if (!PyArg_ParseTuple(args, "y#I", &data, &nbytes, &format)) {
         return NULL;
     }
-    return PyUnicode_Import(data, size, format);
+    return PyUnicode_Import(data, nbytes, format);
 }
 
 

From 1297c52e7390dec3e7a37cf6a3d7b64f5d92d361 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Sat, 22 Jun 2024 17:17:46 +0200
Subject: [PATCH 09/12] Update Doc/c-api/unicode.rst

Co-authored-by: Petr Viktorin <encukou@gmail.com>
---
 Doc/c-api/unicode.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 22123d444fd5b2..ca9a9806c50882 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -360,7 +360,7 @@ APIs:
    .. c:namespace:: NULL
 
    ===================================  ========  ===========================
-   Constant Identifier                  Value  Description
+   Constant Identifier                    Value  Description
    ===================================  ========  ===========================
    .. c:macro:: PyUnicode_FORMAT_ASCII  ``0x01``  ASCII string (``Py_UCS1*``)
    .. c:macro:: PyUnicode_FORMAT_UCS1   ``0x02``  UCS-1 string (``Py_UCS1*``)

From d8e3d5d7ab33fd18ba95ebb23ba4a7e832af306e Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Sat, 22 Jun 2024 17:18:16 +0200
Subject: [PATCH 10/12] Update Doc/c-api/unicode.rst

Co-authored-by: Petr Viktorin <encukou@gmail.com>
---
 Doc/c-api/unicode.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index ca9a9806c50882..7f2c0e819e17fc 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -381,6 +381,11 @@ APIs:
 
    Release an export created by :c:func:`PyUnicode_Export`.
 
+   Each argument must match the corresponding argument or result of
+   a single earlier call to :c:func:`PyUnicode_Export`.
+   In particular, this means that you must hold a reference to *unicode*
+   while an export is valid.
+
    .. versionadded:: 3.14
 
 

From da67c897d6daef4bee730a243c2a1f84ded281c4 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Sat, 22 Jun 2024 17:22:22 +0200
Subject: [PATCH 11/12] Address Petr's review

---
 Doc/c-api/unicode.rst | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 7f2c0e819e17fc..15f0d126ab7cd2 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -352,6 +352,9 @@ APIs:
    The contents is valid as long as *unicode* is valid.
 
    The export must be released by :c:func:`PyUnicode_ReleaseExport`.
+   The contents of the buffer are valid until they are released.
+
+   The buffer must not be modified.
 
    *unicode*, *nbytes* and *format* must not be NULL.
 
@@ -369,8 +372,9 @@ APIs:
    .. c:macro:: PyUnicode_FORMAT_UTF8   ``0x10``  UTF-8 string (``char*``)
    ===================================  ========  ===========================
 
-   *requested_formats* can be a single format or a combination of the formats
-   in the table above.
+   *requested_formats* can be a single format or a bitwise combination of the
+   formats in the table above.
+   On success, *\*format* will be set to a single one of the requested flags.
 
    Note that future versions of Python may introduce additional formats.
 

From f3857d879581c429db3d13f44ddc8f89c98c7fd8 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Wed, 26 Jun 2024 11:10:33 +0200
Subject: [PATCH 12/12] Fix doc: add uint32_t type

---
 Doc/conf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Doc/conf.py b/Doc/conf.py
index 8a14646801ebac..f23e4a93fd3311 100644
--- a/Doc/conf.py
+++ b/Doc/conf.py
@@ -140,6 +140,7 @@
     ('c:type', 'size_t'),
     ('c:type', 'ssize_t'),
     ('c:type', 'time_t'),
+    ('c:type', 'uint32_t'),
     ('c:type', 'uint64_t'),
     ('c:type', 'uintmax_t'),
     ('c:type', 'uintptr_t'),