From d39945ec55aaa14d62c90fac3f7541034c5597be Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 3 Oct 2023 18:24:05 +0300
Subject: [PATCH 01/16] gh-110289: C API: Add PyUnicode_EqualToString()
 function

---
 Doc/c-api/unicode.rst                         | 10 +++
 Doc/data/stable_abi.dat                       |  1 +
 Doc/whatsnew/3.13.rst                         |  7 +++
 Include/unicodeobject.h                       |  8 +++
 Lib/test/test_stable_abi_ctypes.py            |  1 +
 ...-10-03-19-01-20.gh-issue-110289.YBIHEz.rst |  1 +
 Misc/stable_abi.toml                          |  2 +
 Objects/unicodeobject.c                       | 61 +++++++++++++++++++
 PC/python3dll.c                               |  1 +
 9 files changed, 92 insertions(+)
 create mode 100644 Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 2a2cb1b8c458e7..f552380124bb37 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1396,6 +1396,16 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
    :c:func:`PyErr_Occurred` to check for errors.
 
 
+.. c:function:: int PyUnicode_EqualToString(PyObject *unicode, const char *string)
+
+   Compare a Unicode object with a UTF-8 encoded C string and return true
+   if they are equal and false otherwise.
+
+   This function does not raise exceptions.
+
+   .. versionadded:: 3.13
+
+
 .. c:function:: int PyUnicode_CompareWithASCIIString(PyObject *uni, const char *string)
 
    Compare a Unicode object, *uni*, with *string* and return ``-1``, ``0``, ``1`` for less
diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index c189c78238f40f..abfc186cdc460d 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -755,6 +755,7 @@ function,PyUnicode_DecodeUnicodeEscape,3.2,,
 function,PyUnicode_EncodeCodePage,3.7,on Windows,
 function,PyUnicode_EncodeFSDefault,3.2,,
 function,PyUnicode_EncodeLocale,3.7,,
+function,PyUnicode_EqualToString,3.13,,
 function,PyUnicode_FSConverter,3.2,,
 function,PyUnicode_FSDecoder,3.2,,
 function,PyUnicode_Find,3.2,,
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 1ef04fa7ae6adc..7f05a0275f4664 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -1001,6 +1001,13 @@ New Features
   :c:macro:`Py_TPFLAGS_MANAGED_DICT` flag.
   (Contributed by Victor Stinner in :gh:`107073`.)
 
+* Add :c:func:`PyUnicode_EqualToString` function: compare Unicode object with
+  a :c:expr:`const char*` UTF-8 encoded bytes string and return true if they
+  are equal or false otherwise.
+  This function does not raise exceptions.
+  (Contributed by Serhiy Storchaka in :gh:`110289`.)
+
+
 Porting to Python 3.13
 ----------------------
 
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index f00277787122aa..a7fad22e606b28 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -957,6 +957,14 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
     const char *right           /* ASCII-encoded string */
     );
 
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030D0000
+/* Compare a Unicode object with UTF-8 encoded C string and return 1 for equal
+   and 0 otherwise.
+   This function does not raise exceptions. */
+
+PyAPI_FUNC(int) PyUnicode_EqualToString(PyObject *, const char *);
+#endif
+
 /* Rich compare two strings and return one of the following:
 
    - NULL in case an exception was raised
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index 94f817f8e1d159..f224d67e6416d6 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -770,6 +770,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_DecodeUnicodeEscape",
     "PyUnicode_EncodeFSDefault",
     "PyUnicode_EncodeLocale",
+    "PyUnicode_EqualToString",
     "PyUnicode_FSConverter",
     "PyUnicode_FSDecoder",
     "PyUnicode_Find",
diff --git a/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst b/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst
new file mode 100644
index 00000000000000..ada5072071a476
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst	
@@ -0,0 +1 @@
+Add :c:func:`PyUnicode_EqualToString` function.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 8df3f85e61eec6..20f6ea560b4316 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2460,3 +2460,5 @@
     added = '3.13'
 [function.PyMapping_HasKeyStringWithError]
     added = '3.13'
+[function.PyUnicode_EqualToString]
+    added = '3.13'
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 49981a1f881c21..8c71990a011849 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10673,6 +10673,67 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
     }
 }
 
+int
+PyUnicode_EqualToString(PyObject *unicode, const char *str)
+{
+    assert(_PyUnicode_CHECK(unicode));
+    assert(str);
+    if (PyUnicode_IS_ASCII(unicode)) {
+        size_t len = (size_t)PyUnicode_GET_LENGTH(unicode);
+        return strlen(str) == len &&
+            memcmp(PyUnicode_1BYTE_DATA(unicode), str, len) == 0;
+    }
+    if (PyUnicode_UTF8(unicode) != NULL) {
+        size_t len = (size_t)PyUnicode_UTF8_LENGTH(unicode);
+        return strlen(str) == len &&
+            memcmp(PyUnicode_UTF8(unicode), str, len) == 0;
+    }
+
+    Py_UCS4 ch;
+    Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
+    int kind = PyUnicode_KIND(unicode);
+    const void *data = PyUnicode_DATA(unicode);
+    /* Compare Unicode string and UTF-8 string */
+    for (Py_ssize_t i = 0; i < len; i++) {
+        ch = PyUnicode_READ(kind, data, i);
+        if (ch == 0x80) {
+            return 0;
+        }
+        else if (ch < 0x80) {
+            if (ch != (unsigned char)*str++) {
+                return 0;
+            }
+        }
+        else if (ch < 0x800) {
+            if ((0xc0 | (ch >> 6)) != (unsigned char)*str++ ||
+                (0x80 | (ch & 0x3f)) != (unsigned char)*str++)
+            {
+                return 0;
+            }
+        }
+        else if (ch < 0x10000) {
+            if (Py_UNICODE_IS_SURROGATE(ch) ||
+                (0xe0 | (ch >> 12)) != (unsigned char)*str++ ||
+                (0x80 | ((ch >> 6) & 0x3f)) != (unsigned char)*str++ ||
+                (0x80 | (ch & 0x3f)) != (unsigned char)*str++)
+            {
+                return 0;
+            }
+        }
+        else {
+            assert(ch <= MAX_UNICODE);
+            if ((0xf0 | (ch >> 18)) != (unsigned char)*str++ ||
+                (0x80 | ((ch >> 12) & 0x3f)) != (unsigned char)*str++ ||
+                (0x80 | ((ch >> 6) & 0x3f)) != (unsigned char)*str++ ||
+                (0x80 | (ch & 0x3f)) != (unsigned char)*str++)
+            {
+                return 0;
+            }
+        }
+    }
+    return *str == 0;
+}
+
 int
 _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str)
 {
diff --git a/PC/python3dll.c b/PC/python3dll.c
index 2c1cc8098ce856..5f629ccf99d28a 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -688,6 +688,7 @@ EXPORT_FUNC(PyUnicode_DecodeUTF8Stateful)
 EXPORT_FUNC(PyUnicode_EncodeCodePage)
 EXPORT_FUNC(PyUnicode_EncodeFSDefault)
 EXPORT_FUNC(PyUnicode_EncodeLocale)
+EXPORT_FUNC(PyUnicode_EqualToString)
 EXPORT_FUNC(PyUnicode_Find)
 EXPORT_FUNC(PyUnicode_FindChar)
 EXPORT_FUNC(PyUnicode_Format)

From 4793161fcb730e2d09794a2b7cf91460b2d48a87 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 3 Oct 2023 21:20:38 +0300
Subject: [PATCH 02/16] Add tests and address review comments.

---
 Doc/c-api/unicode.rst                         |  4 ++-
 Doc/data/stable_abi.dat                       |  2 +-
 Doc/whatsnew/3.13.rst                         |  2 +-
 Include/unicodeobject.h                       |  2 +-
 Lib/test/test_capi/test_unicode.py            | 31 +++++++++++++++++++
 Lib/test/test_stable_abi_ctypes.py            |  2 +-
 ...-10-03-19-01-20.gh-issue-110289.YBIHEz.rst |  2 +-
 Misc/stable_abi.toml                          |  2 +-
 Modules/_testcapi/unicode.c                   | 19 ++++++++++++
 Objects/unicodeobject.c                       |  4 +--
 PC/python3dll.c                               |  2 +-
 11 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index f552380124bb37..c8bd0d7f81c7e5 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1396,10 +1396,12 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
    :c:func:`PyErr_Occurred` to check for errors.
 
 
-.. c:function:: int PyUnicode_EqualToString(PyObject *unicode, const char *string)
+.. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *string)
 
    Compare a Unicode object with a UTF-8 encoded C string and return true
    if they are equal and false otherwise.
+   If the Unicode object contains null or surrogate characters or
+   the C string not encoded to UTF-8 return false.
 
    This function does not raise exceptions.
 
diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index abfc186cdc460d..1407659d1ae576 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -755,7 +755,7 @@ function,PyUnicode_DecodeUnicodeEscape,3.2,,
 function,PyUnicode_EncodeCodePage,3.7,on Windows,
 function,PyUnicode_EncodeFSDefault,3.2,,
 function,PyUnicode_EncodeLocale,3.7,,
-function,PyUnicode_EqualToString,3.13,,
+function,PyUnicode_EqualToUTF8,3.13,,
 function,PyUnicode_FSConverter,3.2,,
 function,PyUnicode_FSDecoder,3.2,,
 function,PyUnicode_Find,3.2,,
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 7f05a0275f4664..ccc29fd915fcf2 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -1001,7 +1001,7 @@ New Features
   :c:macro:`Py_TPFLAGS_MANAGED_DICT` flag.
   (Contributed by Victor Stinner in :gh:`107073`.)
 
-* Add :c:func:`PyUnicode_EqualToString` function: compare Unicode object with
+* Add :c:func:`PyUnicode_EqualToUTF8` function: compare Unicode object with
   a :c:expr:`const char*` UTF-8 encoded bytes string and return true if they
   are equal or false otherwise.
   This function does not raise exceptions.
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index a7fad22e606b28..e2787497c47bef 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -962,7 +962,7 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
    and 0 otherwise.
    This function does not raise exceptions. */
 
-PyAPI_FUNC(int) PyUnicode_EqualToString(PyObject *, const char *);
+PyAPI_FUNC(int) PyUnicode_EqualToUTF8(PyObject *, const char *);
 #endif
 
 /* Rich compare two strings and return one of the following:
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 622ee8993907fa..e6e3792c639aec 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1297,6 +1297,37 @@ def test_comparewithasciistring(self):
         # CRASHES comparewithasciistring([], b'abc')
         # CRASHES comparewithasciistring(NULL, b'abc')
 
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_equaltoutf8(self):
+        """Test PyUnicode_EqualToUTF8()"""
+        from _testcapi import unicode_equaltoutf8 as equaltoutf8
+
+        strings = [
+            'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
+            '\U0001f600\U0001f601\U0001f602'
+        ]
+        for s in strings:
+            b = s.encode()
+            self.assertEqual(equaltoutf8(s, b), 1)
+            self.assertEqual(equaltoutf8(b.decode(), b), 1)
+            self.assertEqual(equaltoutf8(s + 'x', b + b'x'), 1)
+            self.assertEqual(equaltoutf8(s + 'x', b + b'y'), 0)
+            self.assertEqual(equaltoutf8(s + '\0', b + b'\0'), 0)
+            self.assertEqual(equaltoutf8(s, b + b'x'), 0)
+            self.assertEqual(equaltoutf8(s, b[:-1]), 0)
+            self.assertEqual(equaltoutf8(s, b[:-1] + b'x'), 0)
+
+        # surrogateescape
+        self.assertEqual(equaltoutf8('\udcfe', b'\xfe'), 0)
+        # surrogatepass
+        self.assertEqual(equaltoutf8('\udcfe', b'\xed\xb3\xbe'), 0)
+
+        # CRASHES equaltoutf8(b'abc', b'abc')
+        # CRASHES equaltoutf8([], b'abc')
+        # CRASHES equaltoutf8(NULL, b'abc')
+        # CRASHES equaltoutf8('abc')  # NULL
+
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_richcompare(self):
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index f224d67e6416d6..d1ae4e382c9470 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -770,7 +770,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_DecodeUnicodeEscape",
     "PyUnicode_EncodeFSDefault",
     "PyUnicode_EncodeLocale",
-    "PyUnicode_EqualToString",
+    "PyUnicode_EqualToUTF8",
     "PyUnicode_FSConverter",
     "PyUnicode_FSDecoder",
     "PyUnicode_Find",
diff --git a/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst b/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst
index ada5072071a476..b1582bc1591590 100644
--- a/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst	
+++ b/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst	
@@ -1 +1 @@
-Add :c:func:`PyUnicode_EqualToString` function.
+Add :c:func:`PyUnicode_EqualToUTF8` function.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 20f6ea560b4316..ae39fea3a66a24 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2460,5 +2460,5 @@
     added = '3.13'
 [function.PyMapping_HasKeyStringWithError]
     added = '3.13'
-[function.PyUnicode_EqualToString]
+[function.PyUnicode_EqualToUTF8]
     added = '3.13'
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
index 232b2ad543fca0..83fb8a7cfbcb87 100644
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -1429,6 +1429,24 @@ unicode_comparewithasciistring(PyObject *self, PyObject *args)
     return PyLong_FromLong(result);
 }
 
+/* Test PyUnicode_EqualToUTF8() */
+static PyObject *
+unicode_equaltoutf8(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    const char *right = NULL;
+    Py_ssize_t right_len;
+    int result;
+
+    if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len))
+        return NULL;
+
+    NULLABLE(left);
+    result = PyUnicode_EqualToUTF8(left, right);
+    assert(!PyErr_Occurred());
+    return PyLong_FromLong(result);
+}
+
 /* Test PyUnicode_RichCompare() */
 static PyObject *
 unicode_richcompare(PyObject *self, PyObject *args)
@@ -2044,6 +2062,7 @@ static PyMethodDef TestMethods[] = {
     {"unicode_replace",          unicode_replace,                METH_VARARGS},
     {"unicode_compare",          unicode_compare,                METH_VARARGS},
     {"unicode_comparewithasciistring",unicode_comparewithasciistring,METH_VARARGS},
+    {"unicode_equaltoutf8",      unicode_equaltoutf8,            METH_VARARGS},
     {"unicode_richcompare",      unicode_richcompare,            METH_VARARGS},
     {"unicode_format",           unicode_format,                 METH_VARARGS},
     {"unicode_contains",         unicode_contains,               METH_VARARGS},
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 8c71990a011849..4993b0c9c52b17 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10674,7 +10674,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
 }
 
 int
-PyUnicode_EqualToString(PyObject *unicode, const char *str)
+PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
 {
     assert(_PyUnicode_CHECK(unicode));
     assert(str);
@@ -10696,7 +10696,7 @@ PyUnicode_EqualToString(PyObject *unicode, const char *str)
     /* Compare Unicode string and UTF-8 string */
     for (Py_ssize_t i = 0; i < len; i++) {
         ch = PyUnicode_READ(kind, data, i);
-        if (ch == 0x80) {
+        if (ch == 0) {
             return 0;
         }
         else if (ch < 0x80) {
diff --git a/PC/python3dll.c b/PC/python3dll.c
index 5f629ccf99d28a..0beb61f28e0ef8 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -688,7 +688,7 @@ EXPORT_FUNC(PyUnicode_DecodeUTF8Stateful)
 EXPORT_FUNC(PyUnicode_EncodeCodePage)
 EXPORT_FUNC(PyUnicode_EncodeFSDefault)
 EXPORT_FUNC(PyUnicode_EncodeLocale)
-EXPORT_FUNC(PyUnicode_EqualToString)
+EXPORT_FUNC(PyUnicode_EqualToUTF8)
 EXPORT_FUNC(PyUnicode_Find)
 EXPORT_FUNC(PyUnicode_FindChar)
 EXPORT_FUNC(PyUnicode_Format)

From c55f9ac784a417bc615f2335c813c0e39437e0fd Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 4 Oct 2023 10:53:32 +0300
Subject: [PATCH 03/16] Apply suggestions from code review

Co-authored-by: Victor Stinner <vstinner@python.org>
---
 Doc/c-api/unicode.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index c8bd0d7f81c7e5..c9f11d93638333 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1401,7 +1401,7 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
    Compare a Unicode object with a UTF-8 encoded C string and return true
    if they are equal and false otherwise.
    If the Unicode object contains null or surrogate characters or
-   the C string not encoded to UTF-8 return false.
+   the C string is not encoded to UTF-8 return 0.
 
    This function does not raise exceptions.
 

From bdf2f1e27cdc42ec976a7a23b83f0aade13a56ad Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 4 Oct 2023 11:30:14 +0300
Subject: [PATCH 04/16] Address some of review comments and test the UTF-8
 cache.

---
 Doc/c-api/unicode.rst              |  6 +++---
 Lib/test/test_capi/test_unicode.py | 28 +++++++++++++++++----------
 Objects/unicodeobject.c            | 31 +++++++++++++++++-------------
 3 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index c9f11d93638333..dec451464137fd 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1398,10 +1398,10 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
 
 .. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *string)
 
-   Compare a Unicode object with a UTF-8 encoded C string and return true
-   if they are equal and false otherwise.
+   Compare a Unicode object with a UTF-8 encoded C string and return true (``1``)
+   if they are equal and false (``0``) otherwise.
    If the Unicode object contains null or surrogate characters or
-   the C string is not encoded to UTF-8 return 0.
+   the C string is not encoded to UTF-8 return false.
 
    This function does not raise exceptions.
 
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index e6e3792c639aec..c3d7e3bc4c56ab 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1302,26 +1302,34 @@ def test_comparewithasciistring(self):
     def test_equaltoutf8(self):
         """Test PyUnicode_EqualToUTF8()"""
         from _testcapi import unicode_equaltoutf8 as equaltoutf8
+        from _testcapi import unicode_asutf8andsize as asutf8andsize
 
         strings = [
             'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
             '\U0001f600\U0001f601\U0001f602'
         ]
         for s in strings:
+            # Call PyUnicode_AsUTF8AndSize() which creates the UTF-8
+            # encoded string cached in the Unicode object.
+            asutf8andsize(s, 0)
             b = s.encode()
-            self.assertEqual(equaltoutf8(s, b), 1)
-            self.assertEqual(equaltoutf8(b.decode(), b), 1)
+            self.assertEqual(equaltoutf8(s, b), 1)  # Use the UTF-8 cache.
+            s2 = b.decode()  # New Unicode object without the UTF-8 cache.
+            self.assertEqual(equaltoutf8(s2, b), 1)
             self.assertEqual(equaltoutf8(s + 'x', b + b'x'), 1)
             self.assertEqual(equaltoutf8(s + 'x', b + b'y'), 0)
             self.assertEqual(equaltoutf8(s + '\0', b + b'\0'), 0)
-            self.assertEqual(equaltoutf8(s, b + b'x'), 0)
-            self.assertEqual(equaltoutf8(s, b[:-1]), 0)
-            self.assertEqual(equaltoutf8(s, b[:-1] + b'x'), 0)
-
-        # surrogateescape
-        self.assertEqual(equaltoutf8('\udcfe', b'\xfe'), 0)
-        # surrogatepass
-        self.assertEqual(equaltoutf8('\udcfe', b'\xed\xb3\xbe'), 0)
+            self.assertEqual(equaltoutf8(s2, b + b'x'), 0)
+            self.assertEqual(equaltoutf8(s2, b[:-1]), 0)
+            self.assertEqual(equaltoutf8(s2, b[:-1] + b'x'), 0)
+
+        # Surrogate characters are always treated as not equal
+        self.assertEqual(equaltoutf8('\udcfe',
+                            '\udcfe'.encode("utf8", "surrogateescape")), 0)
+        self.assertEqual(equaltoutf8('\udcfe',
+                            '\udcfe'.encode("utf8", "surrogatepass")), 0)
+        self.assertEqual(equaltoutf8('\ud801',
+                            '\ud801'.encode("utf8", "surrogatepass")), 0)
 
         # CRASHES equaltoutf8(b'abc', b'abc')
         # CRASHES equaltoutf8([], b'abc')
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 4993b0c9c52b17..18e99a500bf3c6 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10678,6 +10678,7 @@ PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
 {
     assert(_PyUnicode_CHECK(unicode));
     assert(str);
+
     if (PyUnicode_IS_ASCII(unicode)) {
         size_t len = (size_t)PyUnicode_GET_LENGTH(unicode);
         return strlen(str) == len &&
@@ -10689,49 +10690,53 @@ PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
             memcmp(PyUnicode_UTF8(unicode), str, len) == 0;
     }
 
-    Py_UCS4 ch;
+    const unsigned char *s = (const unsigned char *)str;
     Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
     int kind = PyUnicode_KIND(unicode);
     const void *data = PyUnicode_DATA(unicode);
     /* Compare Unicode string and UTF-8 string */
     for (Py_ssize_t i = 0; i < len; i++) {
-        ch = PyUnicode_READ(kind, data, i);
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
         if (ch == 0) {
             return 0;
         }
         else if (ch < 0x80) {
-            if (ch != (unsigned char)*str++) {
+            if (s[0] != ch) {
                 return 0;
             }
+            s += 1;
         }
         else if (ch < 0x800) {
-            if ((0xc0 | (ch >> 6)) != (unsigned char)*str++ ||
-                (0x80 | (ch & 0x3f)) != (unsigned char)*str++)
+            if (s[0] != (0xc0 | (ch >> 6)) ||
+                s[1] != (0x80 | (ch & 0x3f)))
             {
                 return 0;
             }
+            s += 2;
         }
         else if (ch < 0x10000) {
             if (Py_UNICODE_IS_SURROGATE(ch) ||
-                (0xe0 | (ch >> 12)) != (unsigned char)*str++ ||
-                (0x80 | ((ch >> 6) & 0x3f)) != (unsigned char)*str++ ||
-                (0x80 | (ch & 0x3f)) != (unsigned char)*str++)
+                s[0] != (0xe0 | (ch >> 12)) ||
+                s[1] != (0x80 | ((ch >> 6) & 0x3f)) ||
+                s[2] != (0x80 | (ch & 0x3f)))
             {
                 return 0;
             }
+            s += 3;
         }
         else {
             assert(ch <= MAX_UNICODE);
-            if ((0xf0 | (ch >> 18)) != (unsigned char)*str++ ||
-                (0x80 | ((ch >> 12) & 0x3f)) != (unsigned char)*str++ ||
-                (0x80 | ((ch >> 6) & 0x3f)) != (unsigned char)*str++ ||
-                (0x80 | (ch & 0x3f)) != (unsigned char)*str++)
+            if (s[0] != (0xf0 | (ch >> 18)) ||
+                s[1] != (0x80 | ((ch >> 12) & 0x3f)) ||
+                s[2] != (0x80 | ((ch >> 6) & 0x3f)) ||
+                s[3] != (0x80 | (ch & 0x3f)))
             {
                 return 0;
             }
+            s += 4;
         }
     }
-    return *str == 0;
+    return *s == 0;
 }
 
 int

From 7223c14e3f9629d777fd27b477ad39d516472d80 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 4 Oct 2023 16:15:04 +0300
Subject: [PATCH 05/16] Address review comments.

---
 Doc/c-api/unicode.rst              | 6 +++---
 Lib/test/test_capi/test_unicode.py | 9 +++++++++
 Modules/_testcapi/unicode.c        | 3 ++-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index dec451464137fd..36926b0681f7bc 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1398,10 +1398,10 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
 
 .. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *string)
 
-   Compare a Unicode object with a UTF-8 encoded C string and return true (``1``)
-   if they are equal and false (``0``) otherwise.
+   Compare a Unicode object with a UTF-8 or ASCII encoded C string
+   and return true (``1``) if they are equal and false (``0``) otherwise.
    If the Unicode object contains null or surrogate characters or
-   the C string is not encoded to UTF-8 return false.
+   the C string is not encoded to UTF-8 or ASCII, return false.
 
    This function does not raise exceptions.
 
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index c3d7e3bc4c56ab..f3fff6aa4dab9d 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1331,6 +1331,15 @@ def test_equaltoutf8(self):
         self.assertEqual(equaltoutf8('\ud801',
                             '\ud801'.encode("utf8", "surrogatepass")), 0)
 
+        def check_not_equal_encoding(text, encoding):
+            self.assertEqual(equaltoutf8(text, text.encode(encoding)), 0)
+            self.assertNotEqual(text.encode(encoding), text.encode("utf8"))
+
+        # Strings encoded to other encodings are not equal to expected UTF8-encoding string
+        check_not_equal_encoding('Stéphane', 'latin1')
+        check_not_equal_encoding('Stéphane', 'utf-16-le')  # embedded null characters
+        check_not_equal_encoding('北京市', 'gbk')
+
         # CRASHES equaltoutf8(b'abc', b'abc')
         # CRASHES equaltoutf8([], b'abc')
         # CRASHES equaltoutf8(NULL, b'abc')
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
index 83fb8a7cfbcb87..094cae40049e6d 100644
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -1438,8 +1438,9 @@ unicode_equaltoutf8(PyObject *self, PyObject *args)
     Py_ssize_t right_len;
     int result;
 
-    if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len))
+    if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len)) {
         return NULL;
+    }
 
     NULLABLE(left);
     result = PyUnicode_EqualToUTF8(left, right);

From b2713274d26af3460d60f60b7189a3eeef823b9b Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 4 Oct 2023 17:53:32 +0300
Subject: [PATCH 06/16] Apply suggestions from code review

Co-authored-by: Victor Stinner <vstinner@python.org>
---
 Doc/c-api/unicode.rst              | 4 ++--
 Doc/whatsnew/3.13.rst              | 4 ++--
 Lib/test/test_capi/test_unicode.py | 8 +++++++-
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 36926b0681f7bc..ee72af3b5c9cb4 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1399,9 +1399,9 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
 .. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *string)
 
    Compare a Unicode object with a UTF-8 or ASCII encoded C string
-   and return true (``1``) if they are equal and false (``0``) otherwise.
+   and return true (``1``) if they are equal, or false (``0``) otherwise.
    If the Unicode object contains null or surrogate characters or
-   the C string is not encoded to UTF-8 or ASCII, return false.
+   the C string is not encoded to UTF-8 or ASCII, return false (``0``) .
 
    This function does not raise exceptions.
 
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 067ccbd9259b9b..5181d346254b97 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -1004,8 +1004,8 @@ New Features
   (Contributed by Victor Stinner in :gh:`107073`.)
 
 * Add :c:func:`PyUnicode_EqualToUTF8` function: compare Unicode object with
-  a :c:expr:`const char*` UTF-8 encoded bytes string and return true if they
-  are equal or false otherwise.
+  a :c:expr:`const char*` UTF-8 encoded bytes string and return true (``1``)
+  if they are equal, or false (``0``) otherwise.
   This function does not raise exceptions.
   (Contributed by Serhiy Storchaka in :gh:`110289`.)
 
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index f3fff6aa4dab9d..0dd9bebf33ad9d 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1306,7 +1306,8 @@ def test_equaltoutf8(self):
 
         strings = [
             'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
-            '\U0001f600\U0001f601\U0001f602'
+            '\U0001f600\U0001f601\U0001f602',
+            '\U0010ffff',
         ]
         for s in strings:
             # Call PyUnicode_AsUTF8AndSize() which creates the UTF-8
@@ -1323,6 +1324,11 @@ def test_equaltoutf8(self):
             self.assertEqual(equaltoutf8(s2, b[:-1]), 0)
             self.assertEqual(equaltoutf8(s2, b[:-1] + b'x'), 0)
 
+        # embedded null chars/bytes
+        self.assertEqual(equaltoutf8('abc', b'abc\0def\0'), 1)
+        self.assertEqual(equaltoutf8('a\0bc', b'abc'), 0)
+        self.assertEqual(equaltoutf8('abc', b'a\0bc'), 0)
+        
         # Surrogate characters are always treated as not equal
         self.assertEqual(equaltoutf8('\udcfe',
                             '\udcfe'.encode("utf8", "surrogateescape")), 0)

From 6f26ad6ccf4726073c912a7b2cb8e9bc469dfb38 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 4 Oct 2023 17:57:32 +0300
Subject: [PATCH 07/16] Remove trailing spaces.

---
 Lib/test/test_capi/test_unicode.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 0dd9bebf33ad9d..3e32b1b7150ab1 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1328,7 +1328,7 @@ def test_equaltoutf8(self):
         self.assertEqual(equaltoutf8('abc', b'abc\0def\0'), 1)
         self.assertEqual(equaltoutf8('a\0bc', b'abc'), 0)
         self.assertEqual(equaltoutf8('abc', b'a\0bc'), 0)
-        
+
         # Surrogate characters are always treated as not equal
         self.assertEqual(equaltoutf8('\udcfe',
                             '\udcfe'.encode("utf8", "surrogateescape")), 0)

From dd124b87e00aa51f54e1da9adccb7b46c0aa16f5 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 4 Oct 2023 18:41:44 +0300
Subject: [PATCH 08/16] Apply suggestions from code review

Co-authored-by: Victor Stinner <vstinner@python.org>
---
 Include/unicodeobject.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index e2787497c47bef..1bce505e9c4d32 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -958,8 +958,8 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
     );
 
 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030D0000
-/* Compare a Unicode object with UTF-8 encoded C string and return 1 for equal
-   and 0 otherwise.
+/* Compare a Unicode object with UTF-8 encoded C string.
+   Return 1 if they are equal, or 0 otherwise.
    This function does not raise exceptions. */
 
 PyAPI_FUNC(int) PyUnicode_EqualToUTF8(PyObject *, const char *);

From 76b9177c4158e5b6f9afc898cf0cce8167c48ee4 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 4 Oct 2023 18:52:36 +0300
Subject: [PATCH 09/16] Use "z#" instead of "|y#".

---
 Lib/test/test_capi/test_unicode.py | 2 +-
 Modules/_testcapi/unicode.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 3e32b1b7150ab1..98cc69741baa0a 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1349,7 +1349,7 @@ def check_not_equal_encoding(text, encoding):
         # CRASHES equaltoutf8(b'abc', b'abc')
         # CRASHES equaltoutf8([], b'abc')
         # CRASHES equaltoutf8(NULL, b'abc')
-        # CRASHES equaltoutf8('abc')  # NULL
+        # CRASHES equaltoutf8('abc', NULL)
 
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
index 094cae40049e6d..732d7f48ec49e4 100644
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -1438,7 +1438,7 @@ unicode_equaltoutf8(PyObject *self, PyObject *args)
     Py_ssize_t right_len;
     int result;
 
-    if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len)) {
+    if (!PyArg_ParseTuple(args, "Oz#", &left, &right, &right_len)) {
         return NULL;
     }
 

From ee5781d223e3bf55dc0a3de59e3eb81a9726b40a Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 5 Oct 2023 22:31:17 +0300
Subject: [PATCH 10/16] Apply suggestions from code review

Co-authored-by: Antoine Pitrou <pitrou@free.fr>
---
 Doc/c-api/unicode.rst | 2 +-
 Doc/whatsnew/3.13.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index ee72af3b5c9cb4..57e55cdbfe496a 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1401,7 +1401,7 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
    Compare a Unicode object with a UTF-8 or ASCII encoded C string
    and return true (``1``) if they are equal, or false (``0``) otherwise.
    If the Unicode object contains null or surrogate characters or
-   the C string is not encoded to UTF-8 or ASCII, return false (``0``) .
+   the C string is not valid UTF-8, false (``0``) is returned.
 
    This function does not raise exceptions.
 
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 5181d346254b97..ff92bd17065922 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -1004,7 +1004,7 @@ New Features
   (Contributed by Victor Stinner in :gh:`107073`.)
 
 * Add :c:func:`PyUnicode_EqualToUTF8` function: compare Unicode object with
-  a :c:expr:`const char*` UTF-8 encoded bytes string and return true (``1``)
+  a :c:expr:`const char*` UTF-8 encoded string and return true (``1``)
   if they are equal, or false (``0``) otherwise.
   This function does not raise exceptions.
   (Contributed by Serhiy Storchaka in :gh:`110289`.)

From 1a4eb7bb149cd1b3dcdde7e822119f80ece85cd6 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Fri, 6 Oct 2023 09:44:53 +0300
Subject: [PATCH 11/16] Add PyUnicode_EqualToUTF8AndSize().

---
 Doc/c-api/unicode.rst                         | 18 ++++--
 Doc/data/stable_abi.dat                       |  1 +
 Doc/whatsnew/3.13.rst                         |  8 +--
 Include/unicodeobject.h                       |  1 +
 Lib/test/test_capi/test_unicode.py            | 58 +++++++++++++++++--
 Lib/test/test_stable_abi_ctypes.py            |  1 +
 ...-10-03-19-01-20.gh-issue-110289.YBIHEz.rst |  2 +-
 Misc/stable_abi.toml                          |  2 +
 Modules/_testcapi/unicode.c                   | 24 ++++++++
 Objects/unicodeobject.c                       | 36 +++++++-----
 PC/python3dll.c                               |  1 +
 11 files changed, 125 insertions(+), 27 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 57e55cdbfe496a..00f4bac65a252a 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1396,11 +1396,12 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
    :c:func:`PyErr_Occurred` to check for errors.
 
 
-.. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *string)
+.. c:function:: int PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *string, Py_ssize_t size)
 
-   Compare a Unicode object with a UTF-8 or ASCII encoded C string
-   and return true (``1``) if they are equal, or false (``0``) otherwise.
-   If the Unicode object contains null or surrogate characters or
+   Compare a Unicode object with a char buffer which is interpreted as
+   being UTF-8 or ASCII encoded and return true (``1``) if they are equal,
+   or false (``0``) otherwise.
+   If the Unicode object contains surrogate characters or
    the C string is not valid UTF-8, false (``0``) is returned.
 
    This function does not raise exceptions.
@@ -1408,6 +1409,15 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
    .. versionadded:: 3.13
 
 
+.. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *string)
+
+   Similar to :c:func:`PyUnicode_EqualToUTF8AndSize`, but compute the string
+   length using :c:func:`!strlen`.
+   If the Unicode object contains null characters, false (``0``) is returned.
+
+   .. versionadded:: 3.13
+
+
 .. c:function:: int PyUnicode_CompareWithASCIIString(PyObject *uni, const char *string)
 
    Compare a Unicode object, *uni*, with *string* and return ``-1``, ``0``, ``1`` for less
diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index 1407659d1ae576..bfb1f97b554fc6 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -756,6 +756,7 @@ function,PyUnicode_EncodeCodePage,3.7,on Windows,
 function,PyUnicode_EncodeFSDefault,3.2,,
 function,PyUnicode_EncodeLocale,3.7,,
 function,PyUnicode_EqualToUTF8,3.13,,
+function,PyUnicode_EqualToUTF8AndSize,3.13,,
 function,PyUnicode_FSConverter,3.2,,
 function,PyUnicode_FSDecoder,3.2,,
 function,PyUnicode_Find,3.2,,
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index ff92bd17065922..2577606373e4ba 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -1003,10 +1003,10 @@ New Features
   functions on Python 3.11 and 3.12.
   (Contributed by Victor Stinner in :gh:`107073`.)
 
-* Add :c:func:`PyUnicode_EqualToUTF8` function: compare Unicode object with
-  a :c:expr:`const char*` UTF-8 encoded string and return true (``1``)
-  if they are equal, or false (``0``) otherwise.
-  This function does not raise exceptions.
+* Add :c:func:`PyUnicode_EqualToUTF8AndSize` and :c:func:`PyUnicode_EqualToUTF8`
+  functions: compare Unicode object with a :c:expr:`const char*` UTF-8 encoded
+  string and return true (``1``) if they are equal, or false (``0``) otherwise.
+  These functions do not raise exceptions.
   (Contributed by Serhiy Storchaka in :gh:`110289`.)
 
 * Add :c:func:`PyThreadState_GetUnchecked()` function: similar to
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 1bce505e9c4d32..dee00715b3c51d 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -963,6 +963,7 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
    This function does not raise exceptions. */
 
 PyAPI_FUNC(int) PyUnicode_EqualToUTF8(PyObject *, const char *);
+PyAPI_FUNC(int) PyUnicode_EqualToUTF8AndSize(PyObject *, const char *, Py_ssize_t);
 #endif
 
 /* Rich compare two strings and return one of the following:
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 98cc69741baa0a..0bbab9bfc0ec01 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1320,6 +1320,7 @@ def test_equaltoutf8(self):
             self.assertEqual(equaltoutf8(s + 'x', b + b'x'), 1)
             self.assertEqual(equaltoutf8(s + 'x', b + b'y'), 0)
             self.assertEqual(equaltoutf8(s + '\0', b + b'\0'), 0)
+            self.assertEqual(equaltoutf8(s + '\0', b), 0)
             self.assertEqual(equaltoutf8(s2, b + b'x'), 0)
             self.assertEqual(equaltoutf8(s2, b[:-1]), 0)
             self.assertEqual(equaltoutf8(s2, b[:-1] + b'x'), 0)
@@ -1337,8 +1338,54 @@ def test_equaltoutf8(self):
         self.assertEqual(equaltoutf8('\ud801',
                             '\ud801'.encode("utf8", "surrogatepass")), 0)
 
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_equaltoutf8andsize(self):
+        """Test PyUnicode_EqualToUTF8AndSize()"""
+        from _testcapi import unicode_equaltoutf8andsize as equaltoutf8andsize
+        from _testcapi import unicode_asutf8andsize as asutf8andsize
+
+        strings = [
+            'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
+            '\U0001f600\U0001f601\U0001f602',
+            '\U0010ffff',
+        ]
+        for s in strings:
+            # Call PyUnicode_AsUTF8AndSize() which creates the UTF-8
+            # encoded string cached in the Unicode object.
+            asutf8andsize(s, 0)
+            b = s.encode()
+            self.assertEqual(equaltoutf8andsize(s, b), 1)  # Use the UTF-8 cache.
+            s2 = b.decode()  # New Unicode object without the UTF-8 cache.
+            self.assertEqual(equaltoutf8andsize(s2, b), 1)
+            self.assertEqual(equaltoutf8andsize(s + 'x', b + b'x'), 1)
+            self.assertEqual(equaltoutf8andsize(s + 'x', b + b'y'), 0)
+            self.assertEqual(equaltoutf8andsize(s + '\0', b + b'\0'), 1)
+            self.assertEqual(equaltoutf8andsize(s + '\0', b), 0)
+            self.assertEqual(equaltoutf8andsize(s2, b + b'x'), 0)
+            self.assertEqual(equaltoutf8andsize(s2, b[:-1]), 0)
+            self.assertEqual(equaltoutf8andsize(s2, b[:-1] + b'x'), 0)
+            # Not null-terminated,
+            self.assertEqual(equaltoutf8andsize(s, b + b'x', len(b)), 1)
+            self.assertEqual(equaltoutf8andsize(s2, b + b'x', len(b)), 1)
+            self.assertEqual(equaltoutf8andsize(s + '\0', b + b'\0x', len(b) + 1), 1)
+            self.assertEqual(equaltoutf8andsize(s2, b, len(b) - 1), 0)
+
+        # embedded null chars/bytes
+        self.assertEqual(equaltoutf8andsize('abc', b'abc\0def\0'), 0)
+        self.assertEqual(equaltoutf8andsize('a\0bc', b'abc'), 0)
+        self.assertEqual(equaltoutf8andsize('abc', b'a\0bc'), 0)
+
+        # Surrogate characters are always treated as not equal
+        self.assertEqual(equaltoutf8andsize('\udcfe',
+                            '\udcfe'.encode("utf8", "surrogateescape")), 0)
+        self.assertEqual(equaltoutf8andsize('\udcfe',
+                            '\udcfe'.encode("utf8", "surrogatepass")), 0)
+        self.assertEqual(equaltoutf8andsize('\ud801',
+                            '\ud801'.encode("utf8", "surrogatepass")), 0)
+
         def check_not_equal_encoding(text, encoding):
-            self.assertEqual(equaltoutf8(text, text.encode(encoding)), 0)
+            self.assertEqual(equaltoutf8andsize(text, text.encode(encoding)), 0)
             self.assertNotEqual(text.encode(encoding), text.encode("utf8"))
 
         # Strings encoded to other encodings are not equal to expected UTF8-encoding string
@@ -1346,10 +1393,11 @@ def check_not_equal_encoding(text, encoding):
         check_not_equal_encoding('Stéphane', 'utf-16-le')  # embedded null characters
         check_not_equal_encoding('北京市', 'gbk')
 
-        # CRASHES equaltoutf8(b'abc', b'abc')
-        # CRASHES equaltoutf8([], b'abc')
-        # CRASHES equaltoutf8(NULL, b'abc')
-        # CRASHES equaltoutf8('abc', NULL)
+        # CRASHES equaltoutf8andsize('abc', b'abc', -1)
+        # CRASHES equaltoutf8andsize(b'abc', b'abc')
+        # CRASHES equaltoutf8andsize([], b'abc')
+        # CRASHES equaltoutf8andsize(NULL, b'abc')
+        # CRASHES equaltoutf8andsize('abc', NULL)
 
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index d1ae4e382c9470..2a22f6edbf4761 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -771,6 +771,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_EncodeFSDefault",
     "PyUnicode_EncodeLocale",
     "PyUnicode_EqualToUTF8",
+    "PyUnicode_EqualToUTF8AndSize",
     "PyUnicode_FSConverter",
     "PyUnicode_FSDecoder",
     "PyUnicode_Find",
diff --git a/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst b/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst
index b1582bc1591590..9028e35130d50c 100644
--- a/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst	
+++ b/Misc/NEWS.d/next/C API/2023-10-03-19-01-20.gh-issue-110289.YBIHEz.rst	
@@ -1 +1 @@
-Add :c:func:`PyUnicode_EqualToUTF8` function.
+Add :c:func:`PyUnicode_EqualToUTF8AndSize` and :c:func:`PyUnicode_EqualToUTF8` functions.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index ae39fea3a66a24..4503a9c45d4ac0 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2462,3 +2462,5 @@
     added = '3.13'
 [function.PyUnicode_EqualToUTF8]
     added = '3.13'
+[function.PyUnicode_EqualToUTF8AndSize]
+    added = '3.13'
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
index 732d7f48ec49e4..d52d88a65d86fc 100644
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -1448,6 +1448,29 @@ unicode_equaltoutf8(PyObject *self, PyObject *args)
     return PyLong_FromLong(result);
 }
 
+/* Test PyUnicode_EqualToUTF8AndSize() */
+static PyObject *
+unicode_equaltoutf8andsize(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    const char *right = NULL;
+    Py_ssize_t right_len;
+    Py_ssize_t size = -100;
+    int result;
+
+    if (!PyArg_ParseTuple(args, "Oz#|n", &left, &right, &right_len, &size)) {
+        return NULL;
+    }
+
+    NULLABLE(left);
+    if (size == -100) {
+        size = right_len;
+    }
+    result = PyUnicode_EqualToUTF8AndSize(left, right, size);
+    assert(!PyErr_Occurred());
+    return PyLong_FromLong(result);
+}
+
 /* Test PyUnicode_RichCompare() */
 static PyObject *
 unicode_richcompare(PyObject *self, PyObject *args)
@@ -2064,6 +2087,7 @@ static PyMethodDef TestMethods[] = {
     {"unicode_compare",          unicode_compare,                METH_VARARGS},
     {"unicode_comparewithasciistring",unicode_comparewithasciistring,METH_VARARGS},
     {"unicode_equaltoutf8",      unicode_equaltoutf8,            METH_VARARGS},
+    {"unicode_equaltoutf8andsize",unicode_equaltoutf8andsize,    METH_VARARGS},
     {"unicode_richcompare",      unicode_richcompare,            METH_VARARGS},
     {"unicode_format",           unicode_format,                 METH_VARARGS},
     {"unicode_contains",         unicode_contains,               METH_VARARGS},
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 18e99a500bf3c6..63b65f35f2936a 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10675,39 +10675,47 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
 
 int
 PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
+{
+    return PyUnicode_EqualToUTF8AndSize(unicode, str, strlen(str));
+}
+
+int
+PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t size)
 {
     assert(_PyUnicode_CHECK(unicode));
     assert(str);
 
     if (PyUnicode_IS_ASCII(unicode)) {
-        size_t len = (size_t)PyUnicode_GET_LENGTH(unicode);
-        return strlen(str) == len &&
+        Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
+        return size == len &&
             memcmp(PyUnicode_1BYTE_DATA(unicode), str, len) == 0;
     }
     if (PyUnicode_UTF8(unicode) != NULL) {
-        size_t len = (size_t)PyUnicode_UTF8_LENGTH(unicode);
-        return strlen(str) == len &&
+        Py_ssize_t len = PyUnicode_UTF8_LENGTH(unicode);
+        return size == len &&
             memcmp(PyUnicode_UTF8(unicode), str, len) == 0;
     }
 
-    const unsigned char *s = (const unsigned char *)str;
     Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
+    if ((size_t)len >= (size_t)size || (size_t)len < (size_t)size / 4) {
+        return 0;
+    }
+    const unsigned char *s = (const unsigned char *)str;
+    const unsigned char *ends = s + (size_t)size;
     int kind = PyUnicode_KIND(unicode);
     const void *data = PyUnicode_DATA(unicode);
     /* Compare Unicode string and UTF-8 string */
     for (Py_ssize_t i = 0; i < len; i++) {
         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
-        if (ch == 0) {
-            return 0;
-        }
-        else if (ch < 0x80) {
-            if (s[0] != ch) {
+        if (ch < 0x80) {
+            if (ends == s || s[0] != ch) {
                 return 0;
             }
             s += 1;
         }
         else if (ch < 0x800) {
-            if (s[0] != (0xc0 | (ch >> 6)) ||
+            if (ends - s < 2 ||
+                s[0] != (0xc0 | (ch >> 6)) ||
                 s[1] != (0x80 | (ch & 0x3f)))
             {
                 return 0;
@@ -10716,6 +10724,7 @@ PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
         }
         else if (ch < 0x10000) {
             if (Py_UNICODE_IS_SURROGATE(ch) ||
+                ends - s < 3 ||
                 s[0] != (0xe0 | (ch >> 12)) ||
                 s[1] != (0x80 | ((ch >> 6) & 0x3f)) ||
                 s[2] != (0x80 | (ch & 0x3f)))
@@ -10726,7 +10735,8 @@ PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
         }
         else {
             assert(ch <= MAX_UNICODE);
-            if (s[0] != (0xf0 | (ch >> 18)) ||
+            if (ends - s < 4 ||
+                s[0] != (0xf0 | (ch >> 18)) ||
                 s[1] != (0x80 | ((ch >> 12) & 0x3f)) ||
                 s[2] != (0x80 | ((ch >> 6) & 0x3f)) ||
                 s[3] != (0x80 | (ch & 0x3f)))
@@ -10736,7 +10746,7 @@ PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
             s += 4;
         }
     }
-    return *s == 0;
+    return s == ends;
 }
 
 int
diff --git a/PC/python3dll.c b/PC/python3dll.c
index 0beb61f28e0ef8..1fb4c810cf1cfb 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -689,6 +689,7 @@ EXPORT_FUNC(PyUnicode_EncodeCodePage)
 EXPORT_FUNC(PyUnicode_EncodeFSDefault)
 EXPORT_FUNC(PyUnicode_EncodeLocale)
 EXPORT_FUNC(PyUnicode_EqualToUTF8)
+EXPORT_FUNC(PyUnicode_EqualToUTF8AndSize)
 EXPORT_FUNC(PyUnicode_Find)
 EXPORT_FUNC(PyUnicode_FindChar)
 EXPORT_FUNC(PyUnicode_Format)

From b1243770a7bb5d8234708d8070320fd256a8b5df Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 7 Oct 2023 15:43:47 +0300
Subject: [PATCH 12/16] Apply suggestions from code review

Co-authored-by: Victor Stinner <vstinner@python.org>
---
 Doc/c-api/unicode.rst              | 2 +-
 Lib/test/test_capi/test_unicode.py | 5 ++---
 Objects/unicodeobject.c            | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 00f4bac65a252a..5ab9f1cab23ef8 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1411,7 +1411,7 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
 
 .. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *string)
 
-   Similar to :c:func:`PyUnicode_EqualToUTF8AndSize`, but compute the string
+   Similar to :c:func:`PyUnicode_EqualToUTF8AndSize`, but compute *string*
    length using :c:func:`!strlen`.
    If the Unicode object contains null characters, false (``0``) is returned.
 
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 0bbab9bfc0ec01..e10c4ff3a94f7c 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1372,9 +1372,8 @@ def test_equaltoutf8andsize(self):
             self.assertEqual(equaltoutf8andsize(s2, b, len(b) - 1), 0)
 
         # embedded null chars/bytes
-        self.assertEqual(equaltoutf8andsize('abc', b'abc\0def\0'), 0)
-        self.assertEqual(equaltoutf8andsize('a\0bc', b'abc'), 0)
-        self.assertEqual(equaltoutf8andsize('abc', b'a\0bc'), 0)
+        self.assertEqual(equaltoutf8andsize('abc\0def', b'abc\0def', 7), 1)
+        self.assertEqual(equaltoutf8andsize('abc\0def\0', b'abc\0def\0', 8), 1)
 
         # Surrogate characters are always treated as not equal
         self.assertEqual(equaltoutf8andsize('\udcfe',
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 63b65f35f2936a..e234277c37513a 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10714,7 +10714,7 @@ PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t size
             s += 1;
         }
         else if (ch < 0x800) {
-            if (ends - s < 2 ||
+            if ((ends - s) < 2 ||
                 s[0] != (0xc0 | (ch >> 6)) ||
                 s[1] != (0x80 | (ch & 0x3f)))
             {

From 029f1a06efd2e41139d2ce9842e6a5511163c74a Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 7 Oct 2023 15:48:57 +0300
Subject: [PATCH 13/16] Add more parentheses.

---
 Objects/unicodeobject.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e234277c37513a..33cbc987d43282 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10724,7 +10724,7 @@ PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t size
         }
         else if (ch < 0x10000) {
             if (Py_UNICODE_IS_SURROGATE(ch) ||
-                ends - s < 3 ||
+                (ends - s) < 3 ||
                 s[0] != (0xe0 | (ch >> 12)) ||
                 s[1] != (0x80 | ((ch >> 6) & 0x3f)) ||
                 s[2] != (0x80 | (ch & 0x3f)))
@@ -10735,7 +10735,7 @@ PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t size
         }
         else {
             assert(ch <= MAX_UNICODE);
-            if (ends - s < 4 ||
+            if ((ends - s) < 4 ||
                 s[0] != (0xf0 | (ch >> 18)) ||
                 s[1] != (0x80 | ((ch >> 12) & 0x3f)) ||
                 s[2] != (0x80 | ((ch >> 6) & 0x3f)) ||

From be2ffe844a5f274168f2f7e554f8e1f745e83cb9 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 7 Oct 2023 15:50:58 +0300
Subject: [PATCH 14/16] Remove redundant arguments.

---
 Lib/test/test_capi/test_unicode.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index e10c4ff3a94f7c..28ab4ddb46009b 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1372,8 +1372,8 @@ def test_equaltoutf8andsize(self):
             self.assertEqual(equaltoutf8andsize(s2, b, len(b) - 1), 0)
 
         # embedded null chars/bytes
-        self.assertEqual(equaltoutf8andsize('abc\0def', b'abc\0def', 7), 1)
-        self.assertEqual(equaltoutf8andsize('abc\0def\0', b'abc\0def\0', 8), 1)
+        self.assertEqual(equaltoutf8andsize('abc\0def', b'abc\0def'), 1)
+        self.assertEqual(equaltoutf8andsize('abc\0def\0', b'abc\0def\0'), 1)
 
         # Surrogate characters are always treated as not equal
         self.assertEqual(equaltoutf8andsize('\udcfe',

From 78de49d5f40466abfa88640eb251d956e6ebb855 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 10 Oct 2023 23:34:19 +0300
Subject: [PATCH 15/16] Turn docstrings into comments.

---
 Lib/test/test_capi/test_unicode.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 28ab4ddb46009b..dd0dc950ca0b90 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1300,7 +1300,7 @@ def test_comparewithasciistring(self):
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_equaltoutf8(self):
-        """Test PyUnicode_EqualToUTF8()"""
+        # Test PyUnicode_EqualToUTF8()
         from _testcapi import unicode_equaltoutf8 as equaltoutf8
         from _testcapi import unicode_asutf8andsize as asutf8andsize
 
@@ -1341,7 +1341,7 @@ def test_equaltoutf8(self):
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_equaltoutf8andsize(self):
-        """Test PyUnicode_EqualToUTF8AndSize()"""
+        # Test PyUnicode_EqualToUTF8AndSize()
         from _testcapi import unicode_equaltoutf8andsize as equaltoutf8andsize
         from _testcapi import unicode_asutf8andsize as asutf8andsize
 

From 19ad12633077bc6122a2a1340d6843f8da241574 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 11 Oct 2023 13:05:25 +0300
Subject: [PATCH 16/16] Add tests for empty strings.

---
 Lib/test/test_capi/test_unicode.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index dd0dc950ca0b90..a73e669dda7ddc 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1319,12 +1319,17 @@ def test_equaltoutf8(self):
             self.assertEqual(equaltoutf8(s2, b), 1)
             self.assertEqual(equaltoutf8(s + 'x', b + b'x'), 1)
             self.assertEqual(equaltoutf8(s + 'x', b + b'y'), 0)
+            self.assertEqual(equaltoutf8(s, b + b'\0'), 1)
+            self.assertEqual(equaltoutf8(s2, b + b'\0'), 1)
             self.assertEqual(equaltoutf8(s + '\0', b + b'\0'), 0)
             self.assertEqual(equaltoutf8(s + '\0', b), 0)
             self.assertEqual(equaltoutf8(s2, b + b'x'), 0)
             self.assertEqual(equaltoutf8(s2, b[:-1]), 0)
             self.assertEqual(equaltoutf8(s2, b[:-1] + b'x'), 0)
 
+        self.assertEqual(equaltoutf8('', b''), 1)
+        self.assertEqual(equaltoutf8('', b'\0'), 1)
+
         # embedded null chars/bytes
         self.assertEqual(equaltoutf8('abc', b'abc\0def\0'), 1)
         self.assertEqual(equaltoutf8('a\0bc', b'abc'), 0)
@@ -1360,6 +1365,8 @@ def test_equaltoutf8andsize(self):
             self.assertEqual(equaltoutf8andsize(s2, b), 1)
             self.assertEqual(equaltoutf8andsize(s + 'x', b + b'x'), 1)
             self.assertEqual(equaltoutf8andsize(s + 'x', b + b'y'), 0)
+            self.assertEqual(equaltoutf8andsize(s, b + b'\0'), 0)
+            self.assertEqual(equaltoutf8andsize(s2, b + b'\0'), 0)
             self.assertEqual(equaltoutf8andsize(s + '\0', b + b'\0'), 1)
             self.assertEqual(equaltoutf8andsize(s + '\0', b), 0)
             self.assertEqual(equaltoutf8andsize(s2, b + b'x'), 0)
@@ -1371,6 +1378,10 @@ def test_equaltoutf8andsize(self):
             self.assertEqual(equaltoutf8andsize(s + '\0', b + b'\0x', len(b) + 1), 1)
             self.assertEqual(equaltoutf8andsize(s2, b, len(b) - 1), 0)
 
+        self.assertEqual(equaltoutf8andsize('', b''), 1)
+        self.assertEqual(equaltoutf8andsize('', b'\0'), 0)
+        self.assertEqual(equaltoutf8andsize('', b'x', 0), 1)
+
         # embedded null chars/bytes
         self.assertEqual(equaltoutf8andsize('abc\0def', b'abc\0def'), 1)
         self.assertEqual(equaltoutf8andsize('abc\0def\0', b'abc\0def\0'), 1)