From 2d6f3e1125a4cf2f3744fb02f9cf959624cceca1 Mon Sep 17 00:00:00 2001 From: philg314 <110174000+philg314@users.noreply.github.com> Date: Fri, 29 Jul 2022 21:07:48 +0200 Subject: [PATCH 1/6] Add tests for PyUnicode_FromFormat --- Modules/_testcapimodule.c | 392 +++++++++++++++++++++++++++++++++++--- 1 file changed, 370 insertions(+), 22 deletions(-) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index b9f75d154ee5c0..f4ccbdf154d8dd 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3194,44 +3194,392 @@ pending_threadfunc(PyObject *self, PyObject *arg) Py_RETURN_TRUE; } -/* Some tests of PyUnicode_FromFormat(). This needs more tests. */ +/* Some tests of PyUnicode_FromFormat(). */ static PyObject * test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) { PyObject *result; - char *msg; + PyObject *unicode = PyUnicode_FromString("None"); -#define CHECK_1_FORMAT(FORMAT, TYPE) \ - result = PyUnicode_FromFormat(FORMAT, (TYPE)1); \ +#define CHECK_FORMAT_2(FORMAT, EXPECTED, ARG1, ARG2) \ + result = PyUnicode_FromFormat(FORMAT, ARG1, ARG2); \ if (result == NULL) \ return NULL; \ - if (!_PyUnicode_EqualToASCIIString(result, "1")) { \ - msg = FORMAT " failed at 1"; \ + if (!_PyUnicode_EqualToASCIIString(result, EXPECTED)) { \ + PyErr_Format(TestError, \ + "test_string_from_format: failed at \"%s\" " \ + "expected \"%s\" got \"%s\"", \ + FORMAT, EXPECTED, PyUnicode_AsUTF8(result)); \ goto Fail; \ } \ Py_DECREF(result) - CHECK_1_FORMAT("%d", int); - CHECK_1_FORMAT("%ld", long); - /* The z width modifier was added in Python 2.5. */ - CHECK_1_FORMAT("%zd", Py_ssize_t); - - /* The u type code was added in Python 2.5. */ - CHECK_1_FORMAT("%u", unsigned int); - CHECK_1_FORMAT("%lu", unsigned long); - CHECK_1_FORMAT("%zu", size_t); - - /* "%lld" and "%llu" support added in Python 2.7. */ - CHECK_1_FORMAT("%llu", unsigned long long); - CHECK_1_FORMAT("%lld", long long); - +#define CHECK_FORMAT_1(FORMAT, EXPECTED, ARG) \ + CHECK_FORMAT_2(FORMAT, EXPECTED, ARG, 0) + +#define CHECK_FORMAT_0(FORMAT, EXPECTED) \ + CHECK_FORMAT_2(FORMAT, EXPECTED, 0, 0) + + // Unrecognized + CHECK_FORMAT_2("%u %? %u", "1 %? %u", 1, 2); + + // "%%" + CHECK_FORMAT_0( "%%", "%"); + CHECK_FORMAT_0( "%0%", "%"); + CHECK_FORMAT_0("%00%", "%"); + CHECK_FORMAT_0( "%2%", "%"); + CHECK_FORMAT_0("%02%", "%"); + CHECK_FORMAT_0("%.0%", "%.0%"); + CHECK_FORMAT_0("%.2%", "%.2%"); + + // "%c" + CHECK_FORMAT_1( "%c", "c", 'c'); + CHECK_FORMAT_1( "%0c", "c", 'c'); + CHECK_FORMAT_1("%00c", "c", 'c'); + CHECK_FORMAT_1( "%2c", "c", 'c'); + CHECK_FORMAT_1("%02c", "c", 'c'); + CHECK_FORMAT_1("%.0c", "c", 'c'); + CHECK_FORMAT_1("%.2c", "c", 'c'); + + // Integers + CHECK_FORMAT_1("%d", "123", (int)123); + CHECK_FORMAT_1("%i", "123", (int)123); + CHECK_FORMAT_1("%u", "123", (unsigned int)123); + CHECK_FORMAT_1("%ld", "123", (long)123); + CHECK_FORMAT_1("%li", "123", (long)123); + CHECK_FORMAT_1("%lu", "123", (unsigned long)123); + CHECK_FORMAT_1("%lld", "123", (long long)123); + CHECK_FORMAT_1("%lli", "123", (long long)123); + CHECK_FORMAT_1("%llu", "123", (unsigned long long)123); + CHECK_FORMAT_1("%zd", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%zi", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%zu", "123", (size_t)123); + CHECK_FORMAT_1("%x", "7b", (int)123); + + CHECK_FORMAT_1("%d", "-123", (int)-123); + CHECK_FORMAT_1("%i", "-123", (int)-123); + CHECK_FORMAT_1("%ld", "-123", (long)-123); + CHECK_FORMAT_1("%li", "-123", (long)-123); + CHECK_FORMAT_1("%lld", "-123", (long long)-123); + CHECK_FORMAT_1("%lli", "-123", (long long)-123); + CHECK_FORMAT_1("%zd", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%zi", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%x", "ffffff85", (int)-123); + + // Integers: width < length + CHECK_FORMAT_1("%1d", "123", (int)123); + CHECK_FORMAT_1("%1i", "123", (int)123); + CHECK_FORMAT_1("%1u", "123", (unsigned int)123); + CHECK_FORMAT_1("%1ld", "123", (long)123); + CHECK_FORMAT_1("%1li", "123", (long)123); + CHECK_FORMAT_1("%1lu", "123", (unsigned long)123); + CHECK_FORMAT_1("%1lld", "123", (long long)123); + CHECK_FORMAT_1("%1lli", "123", (long long)123); + CHECK_FORMAT_1("%1llu", "123", (unsigned long long)123); + CHECK_FORMAT_1("%1zd", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%1zi", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%1zu", "123", (size_t)123); + CHECK_FORMAT_1("%1x", "7b", (int)123); + + CHECK_FORMAT_1("%1d", "-123", (int)-123); + CHECK_FORMAT_1("%1i", "-123", (int)-123); + CHECK_FORMAT_1("%1ld", "-123", (long)-123); + CHECK_FORMAT_1("%1li", "-123", (long)-123); + CHECK_FORMAT_1("%1lld", "-123", (long long)-123); + CHECK_FORMAT_1("%1lli", "-123", (long long)-123); + CHECK_FORMAT_1("%1zd", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%1zi", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%1x", "ffffff85", (int)-123); + + // Integers: width > length + CHECK_FORMAT_1("%5d", " 123", (int)123); + CHECK_FORMAT_1("%5i", " 123", (int)123); + CHECK_FORMAT_1("%5u", " 123", (unsigned int)123); + CHECK_FORMAT_1("%5ld", " 123", (long)123); + CHECK_FORMAT_1("%5li", " 123", (long)123); + CHECK_FORMAT_1("%5lu", " 123", (unsigned long)123); + CHECK_FORMAT_1("%5lld", " 123", (long long)123); + CHECK_FORMAT_1("%5lli", " 123", (long long)123); + CHECK_FORMAT_1("%5llu", " 123", (unsigned long long)123); + CHECK_FORMAT_1("%5zd", " 123", (Py_ssize_t)123); + CHECK_FORMAT_1("%5zi", " 123", (Py_ssize_t)123); + CHECK_FORMAT_1("%5zu", " 123", (size_t)123); + CHECK_FORMAT_1("%5x", " 7b", (int)123); + + CHECK_FORMAT_1("%5d", " -123", (int)-123); + CHECK_FORMAT_1("%5i", " -123", (int)-123); + CHECK_FORMAT_1("%5ld", " -123", (long)-123); + CHECK_FORMAT_1("%5li", " -123", (long)-123); + CHECK_FORMAT_1("%5lld", " -123", (long long)-123); + CHECK_FORMAT_1("%5lli", " -123", (long long)-123); + CHECK_FORMAT_1("%5zd", " -123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%5zi", " -123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%9x", " ffffff85", (int)-123); + + // Integers: width > length, 0-flag + CHECK_FORMAT_1("%05d", "00123", (int)123); + CHECK_FORMAT_1("%05i", "00123", (int)123); + CHECK_FORMAT_1("%05u", "00123", (unsigned int)123); + CHECK_FORMAT_1("%05ld", "00123", (long)123); + CHECK_FORMAT_1("%05li", "00123", (long)123); + CHECK_FORMAT_1("%05lu", "00123", (unsigned long)123); + CHECK_FORMAT_1("%05lld", "00123", (long long)123); + CHECK_FORMAT_1("%05lli", "00123", (long long)123); + CHECK_FORMAT_1("%05llu", "00123", (unsigned long long)123); + CHECK_FORMAT_1("%05zd", "00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%05zi", "00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%05zu", "00123", (size_t)123); + CHECK_FORMAT_1("%05x", "0007b", (int)123); + + CHECK_FORMAT_1("%05d", "0-123", (int)-123); + CHECK_FORMAT_1("%05i", "0-123", (int)-123); + CHECK_FORMAT_1("%05ld", "0-123", (long)-123); + CHECK_FORMAT_1("%05li", "0-123", (long)-123); + CHECK_FORMAT_1("%05lld", "0-123", (long long)-123); + CHECK_FORMAT_1("%05lli", "0-123", (long long)-123); + CHECK_FORMAT_1("%05zd", "0-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%05zi", "0-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%09x", "0ffffff85", (int)-123); + + // Integers: precision < length + CHECK_FORMAT_1("%.1d", "123", (int)123); + CHECK_FORMAT_1("%.1i", "123", (int)123); + CHECK_FORMAT_1("%.1u", "123", (unsigned int)123); + CHECK_FORMAT_1("%.1ld", "123", (long)123); + CHECK_FORMAT_1("%.1li", "123", (long)123); + CHECK_FORMAT_1("%.1lu", "123", (unsigned long)123); + CHECK_FORMAT_1("%.1lld", "123", (long long)123); + CHECK_FORMAT_1("%.1lli", "123", (long long)123); + CHECK_FORMAT_1("%.1llu", "123", (unsigned long long)123); + CHECK_FORMAT_1("%.1zd", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%.1zi", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%.1zu", "123", (size_t)123); + CHECK_FORMAT_1("%.1x", "7b", (int)123); + + CHECK_FORMAT_1("%.1d", "-123", (int)-123); + CHECK_FORMAT_1("%.1i", "-123", (int)-123); + CHECK_FORMAT_1("%.1ld", "-123", (long)-123); + CHECK_FORMAT_1("%.1li", "-123", (long)-123); + CHECK_FORMAT_1("%.1lld", "-123", (long long)-123); + CHECK_FORMAT_1("%.1lli", "-123", (long long)-123); + CHECK_FORMAT_1("%.1zd", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%.1zi", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%.1x", "ffffff85", (int)-123); + + // Integers: precision > length + CHECK_FORMAT_1("%.5d", "00123", (int)123); + CHECK_FORMAT_1("%.5i", "00123", (int)123); + CHECK_FORMAT_1("%.5u", "00123", (unsigned int)123); + CHECK_FORMAT_1("%.5ld", "00123", (long)123); + CHECK_FORMAT_1("%.5li", "00123", (long)123); + CHECK_FORMAT_1("%.5lu", "00123", (unsigned long)123); + CHECK_FORMAT_1("%.5lld", "00123", (long long)123); + CHECK_FORMAT_1("%.5lli", "00123", (long long)123); + CHECK_FORMAT_1("%.5llu", "00123", (unsigned long long)123); + CHECK_FORMAT_1("%.5zd", "00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%.5zi", "00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%.5zu", "00123", (size_t)123); + CHECK_FORMAT_1("%.5x", "0007b", (int)123); + + CHECK_FORMAT_1("%.5d", "0-123", (int)-123); + CHECK_FORMAT_1("%.5i", "0-123", (int)-123); + CHECK_FORMAT_1("%.5ld", "0-123", (long)-123); + CHECK_FORMAT_1("%.5li", "0-123", (long)-123); + CHECK_FORMAT_1("%.5lld", "0-123", (long long)-123); + CHECK_FORMAT_1("%.5lli", "0-123", (long long)-123); + CHECK_FORMAT_1("%.5zd", "0-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%.5zi", "0-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%.9x", "0ffffff85", (int)-123); + + // Integers: width > precision > length + CHECK_FORMAT_1("%7.5d", " 00123", (int)123); + CHECK_FORMAT_1("%7.5i", " 00123", (int)123); + CHECK_FORMAT_1("%7.5u", " 00123", (unsigned int)123); + CHECK_FORMAT_1("%7.5ld", " 00123", (long)123); + CHECK_FORMAT_1("%7.5li", " 00123", (long)123); + CHECK_FORMAT_1("%7.5lu", " 00123", (unsigned long)123); + CHECK_FORMAT_1("%7.5lld", " 00123", (long long)123); + CHECK_FORMAT_1("%7.5lli", " 00123", (long long)123); + CHECK_FORMAT_1("%7.5llu", " 00123", (unsigned long long)123); + CHECK_FORMAT_1("%7.5zd", " 00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%7.5zi", " 00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%7.5zu", " 00123", (size_t)123); + CHECK_FORMAT_1("%7.5x", " 0007b", (int)123); + + CHECK_FORMAT_1("%7.5d", " 0-123", (int)-123); + CHECK_FORMAT_1("%7.5i", " 0-123", (int)-123); + CHECK_FORMAT_1("%7.5ld", " 0-123", (long)-123); + CHECK_FORMAT_1("%7.5li", " 0-123", (long)-123); + CHECK_FORMAT_1("%7.5lld", " 0-123", (long long)-123); + CHECK_FORMAT_1("%7.5lli", " 0-123", (long long)-123); + CHECK_FORMAT_1("%7.5zd", " 0-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%7.5zi", " 0-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%10.9x", " 0ffffff85", (int)-123); + + // Integers: width > precision > length, 0-flag + CHECK_FORMAT_1("%07.5d", "0000123", (int)123); + CHECK_FORMAT_1("%07.5i", "0000123", (int)123); + CHECK_FORMAT_1("%07.5u", "0000123", (unsigned int)123); + CHECK_FORMAT_1("%07.5ld", "0000123", (long)123); + CHECK_FORMAT_1("%07.5li", "0000123", (long)123); + CHECK_FORMAT_1("%07.5lu", "0000123", (unsigned long)123); + CHECK_FORMAT_1("%07.5lld", "0000123", (long long)123); + CHECK_FORMAT_1("%07.5lli", "0000123", (long long)123); + CHECK_FORMAT_1("%07.5llu", "0000123", (unsigned long long)123); + CHECK_FORMAT_1("%07.5zd", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%07.5zi", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%07.5zu", "0000123", (size_t)123); + CHECK_FORMAT_1("%07.5x", "000007b", (int)123); + + CHECK_FORMAT_1("%07.5d", "000-123", (int)-123); + CHECK_FORMAT_1("%07.5i", "000-123", (int)-123); + CHECK_FORMAT_1("%07.5ld", "000-123", (long)-123); + CHECK_FORMAT_1("%07.5li", "000-123", (long)-123); + CHECK_FORMAT_1("%07.5lld", "000-123", (long long)-123); + CHECK_FORMAT_1("%07.5lli", "000-123", (long long)-123); + CHECK_FORMAT_1("%07.5zd", "000-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%07.5zi", "000-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%010.9x", "00ffffff85", (int)-123); + + // Integers: precision > width > length + CHECK_FORMAT_1("%5.7d", "0000123", (int)123); + CHECK_FORMAT_1("%5.7i", "0000123", (int)123); + CHECK_FORMAT_1("%5.7u", "0000123", (unsigned int)123); + CHECK_FORMAT_1("%5.7ld", "0000123", (long)123); + CHECK_FORMAT_1("%5.7li", "0000123", (long)123); + CHECK_FORMAT_1("%5.7lu", "0000123", (unsigned long)123); + CHECK_FORMAT_1("%5.7lld", "0000123", (long long)123); + CHECK_FORMAT_1("%5.7lli", "0000123", (long long)123); + CHECK_FORMAT_1("%5.7llu", "0000123", (unsigned long long)123); + CHECK_FORMAT_1("%5.7zd", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%5.7zi", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%5.7zu", "0000123", (size_t)123); + CHECK_FORMAT_1("%5.7x", "000007b", (int)123); + + CHECK_FORMAT_1("%5.7d", "000-123", (int)-123); + CHECK_FORMAT_1("%5.7i", "000-123", (int)-123); + CHECK_FORMAT_1("%5.7ld", "000-123", (long)-123); + CHECK_FORMAT_1("%5.7li", "000-123", (long)-123); + CHECK_FORMAT_1("%5.7lld", "000-123", (long long)-123); + CHECK_FORMAT_1("%5.7lli", "000-123", (long long)-123); + CHECK_FORMAT_1("%5.7zd", "000-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%5.7zi", "000-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%9.10x", "00ffffff85", (int)-123); + + // Integers: precision > width > length, 0-flag + CHECK_FORMAT_1("%05.7d", "0000123", (int)123); + CHECK_FORMAT_1("%05.7i", "0000123", (int)123); + CHECK_FORMAT_1("%05.7u", "0000123", (unsigned int)123); + CHECK_FORMAT_1("%05.7ld", "0000123", (long)123); + CHECK_FORMAT_1("%05.7li", "0000123", (long)123); + CHECK_FORMAT_1("%05.7lu", "0000123", (unsigned long)123); + CHECK_FORMAT_1("%05.7lld", "0000123", (long long)123); + CHECK_FORMAT_1("%05.7lli", "0000123", (long long)123); + CHECK_FORMAT_1("%05.7llu", "0000123", (unsigned long long)123); + CHECK_FORMAT_1("%05.7zd", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%05.7zi", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%05.7zu", "0000123", (size_t)123); + CHECK_FORMAT_1("%05.7x", "000007b", (int)123); + + CHECK_FORMAT_1("%05.7d", "000-123", (int)-123); + CHECK_FORMAT_1("%05.7i", "000-123", (int)-123); + CHECK_FORMAT_1("%05.7ld", "000-123", (long)-123); + CHECK_FORMAT_1("%05.7li", "000-123", (long)-123); + CHECK_FORMAT_1("%05.7lld", "000-123", (long long)-123); + CHECK_FORMAT_1("%05.7lli", "000-123", (long long)-123); + CHECK_FORMAT_1("%05.7zd", "000-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%05.7zi", "000-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%09.10x", "00ffffff85", (int)-123); + + // Integers: precision = 0, arg = 0 (empty string in C) + CHECK_FORMAT_1("%.0d", "0", (int)0); + CHECK_FORMAT_1("%.0i", "0", (int)0); + CHECK_FORMAT_1("%.0u", "0", (unsigned int)0); + CHECK_FORMAT_1("%.0ld", "0", (long)0); + CHECK_FORMAT_1("%.0li", "0", (long)0); + CHECK_FORMAT_1("%.0lu", "0", (unsigned long)0); + CHECK_FORMAT_1("%.0lld", "0", (long long)0); + CHECK_FORMAT_1("%.0lli", "0", (long long)0); + CHECK_FORMAT_1("%.0llu", "0", (unsigned long long)0); + CHECK_FORMAT_1("%.0zd", "0", (Py_ssize_t)0); + CHECK_FORMAT_1("%.0zi", "0", (Py_ssize_t)0); + CHECK_FORMAT_1("%.0zu", "0", (size_t)0); + CHECK_FORMAT_1("%.0x", "0", (int)0); + + // Strings + CHECK_FORMAT_1("%s", "None", "None"); + CHECK_FORMAT_1("%U", "None", unicode); + CHECK_FORMAT_1("%A", "None", Py_None); + CHECK_FORMAT_1("%S", "None", Py_None); + CHECK_FORMAT_1("%R", "None", Py_None); + CHECK_FORMAT_2("%V", "None", unicode, "ignored"); + CHECK_FORMAT_2("%V", "None", NULL, "None"); + + // Strings: width < length + CHECK_FORMAT_1("%1s", "None", "None"); + CHECK_FORMAT_1("%1U", "None", unicode); + CHECK_FORMAT_1("%1A", "None", Py_None); + CHECK_FORMAT_1("%1S", "None", Py_None); + CHECK_FORMAT_1("%1R", "None", Py_None); + CHECK_FORMAT_2("%1V", "None", unicode, "ignored"); + CHECK_FORMAT_2("%1V", "None", NULL, "None"); + + // Strings: width > length + CHECK_FORMAT_1("%5s", " None", "None"); + CHECK_FORMAT_1("%5U", " None", unicode); + CHECK_FORMAT_1("%5A", " None", Py_None); + CHECK_FORMAT_1("%5S", " None", Py_None); + CHECK_FORMAT_1("%5R", " None", Py_None); + CHECK_FORMAT_2("%5V", " None", unicode, "ignored"); + CHECK_FORMAT_2("%5V", " None", NULL, "None"); + + // Strings: precision < length + CHECK_FORMAT_1("%.1s", "N", "None"); + CHECK_FORMAT_1("%.1U", "N", unicode); + CHECK_FORMAT_1("%.1A", "N", Py_None); + CHECK_FORMAT_1("%.1S", "N", Py_None); + CHECK_FORMAT_1("%.1R", "N", Py_None); + CHECK_FORMAT_2("%.1V", "N", unicode, "ignored"); + CHECK_FORMAT_2("%.1V", "N", NULL, "None"); + + // Strings: precision > length + CHECK_FORMAT_1("%.5s", "None", "None"); + CHECK_FORMAT_1("%.5U", "None", unicode); + CHECK_FORMAT_1("%.5A", "None", Py_None); + CHECK_FORMAT_1("%.5S", "None", Py_None); + CHECK_FORMAT_1("%.5R", "None", Py_None); + CHECK_FORMAT_2("%.5V", "None", unicode, "ignored"); + CHECK_FORMAT_2("%.5V", "None", NULL, "None"); + + // Strings: precision < length, width > length + CHECK_FORMAT_1("%5.1s", " N", "None"); + CHECK_FORMAT_1("%5.1U", " N", unicode); + CHECK_FORMAT_1("%5.1A", " N", Py_None); + CHECK_FORMAT_1("%5.1S", " N", Py_None); + CHECK_FORMAT_1("%5.1R", " N", Py_None); + CHECK_FORMAT_2("%5.1V", " N", unicode, "ignored"); + CHECK_FORMAT_2("%5.1V", " N", NULL, "None"); + + // Strings: width < length, precision > length + CHECK_FORMAT_1("%1.5s", "None", "None"); + CHECK_FORMAT_1("%1.5U", "None", unicode); + CHECK_FORMAT_1("%1.5A", "None", Py_None); + CHECK_FORMAT_1("%1.5S", "None", Py_None); + CHECK_FORMAT_1("%1.5R", "None", Py_None); + CHECK_FORMAT_2("%1.5V", "None", unicode, "ignored"); + CHECK_FORMAT_2("%1.5V", "None", NULL, "None"); + + Py_XDECREF(unicode); Py_RETURN_NONE; Fail: Py_XDECREF(result); - return raiseTestError("test_string_from_format", msg); + Py_XDECREF(unicode); + return NULL; -#undef CHECK_1_FORMAT +#undef CHECK_FORMAT_2 +#undef CHECK_FORMAT_1 +#undef CHECK_FORMAT_0 } From bd72e2a6f012bea145bef94471f1247492796b35 Mon Sep 17 00:00:00 2001 From: philg314 <110174000+philg314@users.noreply.github.com> Date: Fri, 29 Jul 2022 21:28:09 +0200 Subject: [PATCH 2/6] Fix negative numbers in PyUnicode_FromFormat --- Misc/ACKS | 1 + ...2-07-31-21-58-27.gh-issue-95504.wy7B1F.rst | 2 + Modules/_testcapimodule.c | 96 +++++++++---------- Objects/unicodeobject.c | 25 +++-- 4 files changed, 70 insertions(+), 54 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2022-07-31-21-58-27.gh-issue-95504.wy7B1F.rst diff --git a/Misc/ACKS b/Misc/ACKS index 32475f874c36db..86db5fc65a60f0 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -609,6 +609,7 @@ Jan-Philip Gehrcke Thomas Gellekum Gabriel Genellina Christos Georgiou +Philip Georgi Elazar (אלעזר) Gershuni Ben Gertzfield Nadim Ghaznavi diff --git a/Misc/NEWS.d/next/C API/2022-07-31-21-58-27.gh-issue-95504.wy7B1F.rst b/Misc/NEWS.d/next/C API/2022-07-31-21-58-27.gh-issue-95504.wy7B1F.rst new file mode 100644 index 00000000000000..dfe7e226c5e8af --- /dev/null +++ b/Misc/NEWS.d/next/C API/2022-07-31-21-58-27.gh-issue-95504.wy7B1F.rst @@ -0,0 +1,2 @@ +Fix sign placement when specifying width or precision in +``PyUnicode_FromFormat``. Patch by Philip Georgi. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index f4ccbdf154d8dd..b59c942cc38711 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3331,14 +3331,14 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) CHECK_FORMAT_1("%05zu", "00123", (size_t)123); CHECK_FORMAT_1("%05x", "0007b", (int)123); - CHECK_FORMAT_1("%05d", "0-123", (int)-123); - CHECK_FORMAT_1("%05i", "0-123", (int)-123); - CHECK_FORMAT_1("%05ld", "0-123", (long)-123); - CHECK_FORMAT_1("%05li", "0-123", (long)-123); - CHECK_FORMAT_1("%05lld", "0-123", (long long)-123); - CHECK_FORMAT_1("%05lli", "0-123", (long long)-123); - CHECK_FORMAT_1("%05zd", "0-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%05zi", "0-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%05d", "-0123", (int)-123); + CHECK_FORMAT_1("%05i", "-0123", (int)-123); + CHECK_FORMAT_1("%05ld", "-0123", (long)-123); + CHECK_FORMAT_1("%05li", "-0123", (long)-123); + CHECK_FORMAT_1("%05lld", "-0123", (long long)-123); + CHECK_FORMAT_1("%05lli", "-0123", (long long)-123); + CHECK_FORMAT_1("%05zd", "-0123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%05zi", "-0123", (Py_ssize_t)-123); CHECK_FORMAT_1("%09x", "0ffffff85", (int)-123); // Integers: precision < length @@ -3381,14 +3381,14 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) CHECK_FORMAT_1("%.5zu", "00123", (size_t)123); CHECK_FORMAT_1("%.5x", "0007b", (int)123); - CHECK_FORMAT_1("%.5d", "0-123", (int)-123); - CHECK_FORMAT_1("%.5i", "0-123", (int)-123); - CHECK_FORMAT_1("%.5ld", "0-123", (long)-123); - CHECK_FORMAT_1("%.5li", "0-123", (long)-123); - CHECK_FORMAT_1("%.5lld", "0-123", (long long)-123); - CHECK_FORMAT_1("%.5lli", "0-123", (long long)-123); - CHECK_FORMAT_1("%.5zd", "0-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%.5zi", "0-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%.5d", "-00123", (int)-123); + CHECK_FORMAT_1("%.5i", "-00123", (int)-123); + CHECK_FORMAT_1("%.5ld", "-00123", (long)-123); + CHECK_FORMAT_1("%.5li", "-00123", (long)-123); + CHECK_FORMAT_1("%.5lld", "-00123", (long long)-123); + CHECK_FORMAT_1("%.5lli", "-00123", (long long)-123); + CHECK_FORMAT_1("%.5zd", "-00123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%.5zi", "-00123", (Py_ssize_t)-123); CHECK_FORMAT_1("%.9x", "0ffffff85", (int)-123); // Integers: width > precision > length @@ -3406,14 +3406,14 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) CHECK_FORMAT_1("%7.5zu", " 00123", (size_t)123); CHECK_FORMAT_1("%7.5x", " 0007b", (int)123); - CHECK_FORMAT_1("%7.5d", " 0-123", (int)-123); - CHECK_FORMAT_1("%7.5i", " 0-123", (int)-123); - CHECK_FORMAT_1("%7.5ld", " 0-123", (long)-123); - CHECK_FORMAT_1("%7.5li", " 0-123", (long)-123); - CHECK_FORMAT_1("%7.5lld", " 0-123", (long long)-123); - CHECK_FORMAT_1("%7.5lli", " 0-123", (long long)-123); - CHECK_FORMAT_1("%7.5zd", " 0-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%7.5zi", " 0-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%7.5d", " -00123", (int)-123); + CHECK_FORMAT_1("%7.5i", " -00123", (int)-123); + CHECK_FORMAT_1("%7.5ld", " -00123", (long)-123); + CHECK_FORMAT_1("%7.5li", " -00123", (long)-123); + CHECK_FORMAT_1("%7.5lld", " -00123", (long long)-123); + CHECK_FORMAT_1("%7.5lli", " -00123", (long long)-123); + CHECK_FORMAT_1("%7.5zd", " -00123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%7.5zi", " -00123", (Py_ssize_t)-123); CHECK_FORMAT_1("%10.9x", " 0ffffff85", (int)-123); // Integers: width > precision > length, 0-flag @@ -3431,14 +3431,14 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) CHECK_FORMAT_1("%07.5zu", "0000123", (size_t)123); CHECK_FORMAT_1("%07.5x", "000007b", (int)123); - CHECK_FORMAT_1("%07.5d", "000-123", (int)-123); - CHECK_FORMAT_1("%07.5i", "000-123", (int)-123); - CHECK_FORMAT_1("%07.5ld", "000-123", (long)-123); - CHECK_FORMAT_1("%07.5li", "000-123", (long)-123); - CHECK_FORMAT_1("%07.5lld", "000-123", (long long)-123); - CHECK_FORMAT_1("%07.5lli", "000-123", (long long)-123); - CHECK_FORMAT_1("%07.5zd", "000-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%07.5zi", "000-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%07.5d", "-000123", (int)-123); + CHECK_FORMAT_1("%07.5i", "-000123", (int)-123); + CHECK_FORMAT_1("%07.5ld", "-000123", (long)-123); + CHECK_FORMAT_1("%07.5li", "-000123", (long)-123); + CHECK_FORMAT_1("%07.5lld", "-000123", (long long)-123); + CHECK_FORMAT_1("%07.5lli", "-000123", (long long)-123); + CHECK_FORMAT_1("%07.5zd", "-000123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%07.5zi", "-000123", (Py_ssize_t)-123); CHECK_FORMAT_1("%010.9x", "00ffffff85", (int)-123); // Integers: precision > width > length @@ -3456,14 +3456,14 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) CHECK_FORMAT_1("%5.7zu", "0000123", (size_t)123); CHECK_FORMAT_1("%5.7x", "000007b", (int)123); - CHECK_FORMAT_1("%5.7d", "000-123", (int)-123); - CHECK_FORMAT_1("%5.7i", "000-123", (int)-123); - CHECK_FORMAT_1("%5.7ld", "000-123", (long)-123); - CHECK_FORMAT_1("%5.7li", "000-123", (long)-123); - CHECK_FORMAT_1("%5.7lld", "000-123", (long long)-123); - CHECK_FORMAT_1("%5.7lli", "000-123", (long long)-123); - CHECK_FORMAT_1("%5.7zd", "000-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%5.7zi", "000-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%5.7d", "-0000123", (int)-123); + CHECK_FORMAT_1("%5.7i", "-0000123", (int)-123); + CHECK_FORMAT_1("%5.7ld", "-0000123", (long)-123); + CHECK_FORMAT_1("%5.7li", "-0000123", (long)-123); + CHECK_FORMAT_1("%5.7lld", "-0000123", (long long)-123); + CHECK_FORMAT_1("%5.7lli", "-0000123", (long long)-123); + CHECK_FORMAT_1("%5.7zd", "-0000123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%5.7zi", "-0000123", (Py_ssize_t)-123); CHECK_FORMAT_1("%9.10x", "00ffffff85", (int)-123); // Integers: precision > width > length, 0-flag @@ -3481,14 +3481,14 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) CHECK_FORMAT_1("%05.7zu", "0000123", (size_t)123); CHECK_FORMAT_1("%05.7x", "000007b", (int)123); - CHECK_FORMAT_1("%05.7d", "000-123", (int)-123); - CHECK_FORMAT_1("%05.7i", "000-123", (int)-123); - CHECK_FORMAT_1("%05.7ld", "000-123", (long)-123); - CHECK_FORMAT_1("%05.7li", "000-123", (long)-123); - CHECK_FORMAT_1("%05.7lld", "000-123", (long long)-123); - CHECK_FORMAT_1("%05.7lli", "000-123", (long long)-123); - CHECK_FORMAT_1("%05.7zd", "000-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%05.7zi", "000-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%05.7d", "-0000123", (int)-123); + CHECK_FORMAT_1("%05.7i", "-0000123", (int)-123); + CHECK_FORMAT_1("%05.7ld", "-0000123", (long)-123); + CHECK_FORMAT_1("%05.7li", "-0000123", (long)-123); + CHECK_FORMAT_1("%05.7lld", "-0000123", (long long)-123); + CHECK_FORMAT_1("%05.7lli", "-0000123", (long long)-123); + CHECK_FORMAT_1("%05.7zd", "-0000123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%05.7zi", "-0000123", (Py_ssize_t)-123); CHECK_FORMAT_1("%09.10x", "00ffffff85", (int)-123); // Integers: precision = 0, arg = 0 (empty string in C) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 355d74fe3bbda7..41f57e1b376abf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2482,21 +2482,34 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, } assert(len >= 0); - if (precision < len) - precision = len; + int negative = buffer[0]=='-'?1:0; + len -= negative; + + precision = Py_MAX(precision, len); + width = Py_MAX(width, precision + negative); arglen = Py_MAX(precision, width); if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1) return NULL; if (width > precision) { - Py_UCS4 fillchar; - fill = width - precision; - fillchar = zeropad?'0':' '; + if (negative && zeropad) { + if (_PyUnicodeWriter_WriteChar(writer, '-') == -1) + return NULL; + } + + Py_UCS4 fillchar = zeropad?'0':' '; + fill = width - precision - negative; if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1) return NULL; writer->pos += fill; + + if (negative && !zeropad) { + if (_PyUnicodeWriter_WriteChar(writer, '-') == -1) + return NULL; + } } + if (precision > len) { fill = precision - len; if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1) @@ -2504,7 +2517,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, writer->pos += fill; } - if (_PyUnicodeWriter_WriteASCIIString(writer, buffer, len) < 0) + if (_PyUnicodeWriter_WriteASCIIString(writer, &buffer[negative], len) < 0) return NULL; break; } From 99b0ebcc108df0e33f81eef418c4917714371705 Mon Sep 17 00:00:00 2001 From: philg314 <110174000+philg314@users.noreply.github.com> Date: Tue, 2 Aug 2022 05:19:42 +0200 Subject: [PATCH 3/6] Add whatsnew --- Doc/whatsnew/3.12.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 67396f8e02280b..b391563ab9a49e 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -427,6 +427,9 @@ Porting to Python 3.12 using the existing public C-API instead, or, if necessary, the (internal-only) ``_PyObject_GET_WEAKREFS_LISTPTR()`` macro. +* Fixed wrong sign placement in ``PyUnicode_FromFormat``. + (Contributed by Philip Georgi in :gh:`95505`.) + Deprecated ---------- From 63e9afdab78b6d28223bb1345ac2892ead310873 Mon Sep 17 00:00:00 2001 From: philg314 <110174000+philg314@users.noreply.github.com> Date: Tue, 2 Aug 2022 05:26:04 +0200 Subject: [PATCH 4/6] gh-93649: Split unicode tests from _testcapimodule.c --- Modules/Setup.stdlib.in | 2 +- Modules/_testcapi/parts.h | 1 + Modules/_testcapi/unicode.c | 696 ++++++++++++++++++++++++++++++ Modules/_testcapimodule.c | 672 +---------------------------- PCbuild/_testcapi.vcxproj | 1 + PCbuild/_testcapi.vcxproj.filters | 3 + 6 files changed, 705 insertions(+), 670 deletions(-) create mode 100644 Modules/_testcapi/unicode.c diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index c5dc1e8eb45377..b1bf1e5e12df0d 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -169,7 +169,7 @@ @MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c @MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c -@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c +@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/unicode.c # Some testing modules MUST be built as shared libraries. *shared* diff --git a/Modules/_testcapi/parts.h b/Modules/_testcapi/parts.h index e6d2ed23cb18e7..79e7908473a700 100644 --- a/Modules/_testcapi/parts.h +++ b/Modules/_testcapi/parts.h @@ -2,3 +2,4 @@ int _PyTestCapi_Init_Vectorcall(PyObject *module); int _PyTestCapi_Init_Heaptype(PyObject *module); +int _PyTestCapi_Init_Unicode(PyObject *module); diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c new file mode 100644 index 00000000000000..542391bd0364e7 --- /dev/null +++ b/Modules/_testcapi/unicode.c @@ -0,0 +1,696 @@ +#include "parts.h" + +// Forward declarations +static struct PyModuleDef *_testcapimodule = NULL; // set at initialization +static PyObject *TestError; /* set to exception object in init */ + +/* Raise TestError with test_name + ": " + msg, and return NULL. */ +static PyObject * +raiseTestError(const char* test_name, const char* msg) +{ + PyErr_Format(TestError, "%s: %s", test_name, msg); + return NULL; +} + +static PyObject * +codec_incrementalencoder(PyObject *self, PyObject *args) +{ + const char *encoding, *errors = NULL; + if (!PyArg_ParseTuple(args, "s|s:test_incrementalencoder", + &encoding, &errors)) + return NULL; + return PyCodec_IncrementalEncoder(encoding, errors); +} + +static PyObject * +codec_incrementaldecoder(PyObject *self, PyObject *args) +{ + const char *encoding, *errors = NULL; + if (!PyArg_ParseTuple(args, "s|s:test_incrementaldecoder", + &encoding, &errors)) + return NULL; + return PyCodec_IncrementalDecoder(encoding, errors); +} + +static PyObject * +test_unicode_compare_with_ascii(PyObject *self, PyObject *Py_UNUSED(ignored)) { + PyObject *py_s = PyUnicode_FromStringAndSize("str\0", 4); + int result; + if (py_s == NULL) + return NULL; + result = PyUnicode_CompareWithASCIIString(py_s, "str"); + Py_DECREF(py_s); + if (!result) { + PyErr_SetString(TestError, "Python string ending in NULL " + "should not compare equal to c string."); + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject * +test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ +#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) + const wchar_t wtext[2] = {(wchar_t)0x10ABCDu}; + size_t wtextlen = 1; + const wchar_t invalid[1] = {(wchar_t)0x110000u}; +#else + const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu}; + size_t wtextlen = 2; +#endif + PyObject *wide, *utf8; + + wide = PyUnicode_FromWideChar(wtext, wtextlen); + if (wide == NULL) + return NULL; + + utf8 = PyUnicode_FromString("\xf4\x8a\xaf\x8d"); + if (utf8 == NULL) { + Py_DECREF(wide); + return NULL; + } + + if (PyUnicode_GET_LENGTH(wide) != PyUnicode_GET_LENGTH(utf8)) { + Py_DECREF(wide); + Py_DECREF(utf8); + return raiseTestError("test_widechar", + "wide string and utf8 string " + "have different length"); + } + if (PyUnicode_Compare(wide, utf8)) { + Py_DECREF(wide); + Py_DECREF(utf8); + if (PyErr_Occurred()) + return NULL; + return raiseTestError("test_widechar", + "wide string and utf8 string " + "are different"); + } + + Py_DECREF(wide); + Py_DECREF(utf8); + +#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) + wide = PyUnicode_FromWideChar(invalid, 1); + if (wide == NULL) + PyErr_Clear(); + else + return raiseTestError("test_widechar", + "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail"); +#endif + Py_RETURN_NONE; +} + +static PyObject * +unicode_aswidechar(PyObject *self, PyObject *args) +{ + PyObject *unicode, *result; + Py_ssize_t buflen, size; + wchar_t *buffer; + + if (!PyArg_ParseTuple(args, "Un", &unicode, &buflen)) + return NULL; + buffer = PyMem_New(wchar_t, buflen); + if (buffer == NULL) + return PyErr_NoMemory(); + + size = PyUnicode_AsWideChar(unicode, buffer, buflen); + if (size == -1) { + PyMem_Free(buffer); + return NULL; + } + + if (size < buflen) + buflen = size + 1; + else + buflen = size; + result = PyUnicode_FromWideChar(buffer, buflen); + PyMem_Free(buffer); + if (result == NULL) + return NULL; + + return Py_BuildValue("(Nn)", result, size); +} + +static PyObject * +unicode_aswidecharstring(PyObject *self, PyObject *args) +{ + PyObject *unicode, *result; + Py_ssize_t size; + wchar_t *buffer; + + if (!PyArg_ParseTuple(args, "U", &unicode)) + return NULL; + + buffer = PyUnicode_AsWideCharString(unicode, &size); + if (buffer == NULL) + return NULL; + + result = PyUnicode_FromWideChar(buffer, size + 1); + PyMem_Free(buffer); + if (result == NULL) + return NULL; + return Py_BuildValue("(Nn)", result, size); +} + +static PyObject * +unicode_asucs4(PyObject *self, PyObject *args) +{ + PyObject *unicode, *result; + Py_UCS4 *buffer; + int copy_null; + Py_ssize_t str_len, buf_len; + + if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, ©_null)) { + return NULL; + } + + buf_len = str_len + 1; + buffer = PyMem_NEW(Py_UCS4, buf_len); + if (buffer == NULL) { + return PyErr_NoMemory(); + } + memset(buffer, 0, sizeof(Py_UCS4)*buf_len); + buffer[str_len] = 0xffffU; + + if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) { + PyMem_Free(buffer); + return NULL; + } + + result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len); + PyMem_Free(buffer); + return result; +} + +static PyObject * +unicode_asutf8(PyObject *self, PyObject *args) +{ + PyObject *unicode; + const char *buffer; + + if (!PyArg_ParseTuple(args, "U", &unicode)) { + return NULL; + } + + buffer = PyUnicode_AsUTF8(unicode); + if (buffer == NULL) { + return NULL; + } + + return PyBytes_FromString(buffer); +} + +static PyObject * +unicode_asutf8andsize(PyObject *self, PyObject *args) +{ + PyObject *unicode, *result; + const char *buffer; + Py_ssize_t utf8_len; + + if(!PyArg_ParseTuple(args, "U", &unicode)) { + return NULL; + } + + buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len); + if (buffer == NULL) { + return NULL; + } + + result = PyBytes_FromString(buffer); + if (result == NULL) { + return NULL; + } + + return Py_BuildValue("(Nn)", result, utf8_len); +} + +static PyObject * +unicode_findchar(PyObject *self, PyObject *args) +{ + PyObject *str; + int direction; + unsigned int ch; + Py_ssize_t result; + Py_ssize_t start, end; + + if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch, + &start, &end, &direction)) { + return NULL; + } + + result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction); + if (result == -2) + return NULL; + else + return PyLong_FromSsize_t(result); +} + +static PyObject * +unicode_copycharacters(PyObject *self, PyObject *args) +{ + PyObject *from, *to, *to_copy; + Py_ssize_t from_start, to_start, how_many, copied; + + if (!PyArg_ParseTuple(args, "UnOnn:unicode_copycharacters", &to, &to_start, + &from, &from_start, &how_many)) { + return NULL; + } + + if (!(to_copy = PyUnicode_New(PyUnicode_GET_LENGTH(to), + PyUnicode_MAX_CHAR_VALUE(to)))) { + return NULL; + } + if (PyUnicode_Fill(to_copy, 0, PyUnicode_GET_LENGTH(to_copy), 0U) < 0) { + Py_DECREF(to_copy); + return NULL; + } + + if ((copied = PyUnicode_CopyCharacters(to_copy, to_start, from, + from_start, how_many)) < 0) { + Py_DECREF(to_copy); + return NULL; + } + + return Py_BuildValue("(Nn)", to_copy, copied); +} + +static PyObject * +test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *result; + PyObject *unicode = PyUnicode_FromString("None"); + +#define CHECK_FORMAT_2(FORMAT, EXPECTED, ARG1, ARG2) \ + result = PyUnicode_FromFormat(FORMAT, ARG1, ARG2); \ + if (result == NULL) \ + return NULL; \ + if (!_PyUnicode_EqualToASCIIString(result, EXPECTED)) { \ + PyErr_Format(TestError, \ + "test_string_from_format: failed at \"%s\" " \ + "expected \"%s\" got \"%s\"", \ + FORMAT, EXPECTED, PyUnicode_AsUTF8(result)); \ + goto Fail; \ + } \ + Py_DECREF(result) + +#define CHECK_FORMAT_1(FORMAT, EXPECTED, ARG) \ + CHECK_FORMAT_2(FORMAT, EXPECTED, ARG, 0) + +#define CHECK_FORMAT_0(FORMAT, EXPECTED) \ + CHECK_FORMAT_2(FORMAT, EXPECTED, 0, 0) + + // Unrecognized + CHECK_FORMAT_2("%u %? %u", "1 %? %u", 1, 2); + + // "%%" + CHECK_FORMAT_0( "%%", "%"); + CHECK_FORMAT_0( "%0%", "%"); + CHECK_FORMAT_0("%00%", "%"); + CHECK_FORMAT_0( "%2%", "%"); + CHECK_FORMAT_0("%02%", "%"); + CHECK_FORMAT_0("%.0%", "%.0%"); + CHECK_FORMAT_0("%.2%", "%.2%"); + + // "%c" + CHECK_FORMAT_1( "%c", "c", 'c'); + CHECK_FORMAT_1( "%0c", "c", 'c'); + CHECK_FORMAT_1("%00c", "c", 'c'); + CHECK_FORMAT_1( "%2c", "c", 'c'); + CHECK_FORMAT_1("%02c", "c", 'c'); + CHECK_FORMAT_1("%.0c", "c", 'c'); + CHECK_FORMAT_1("%.2c", "c", 'c'); + + // Integers + CHECK_FORMAT_1("%d", "123", (int)123); + CHECK_FORMAT_1("%i", "123", (int)123); + CHECK_FORMAT_1("%u", "123", (unsigned int)123); + CHECK_FORMAT_1("%ld", "123", (long)123); + CHECK_FORMAT_1("%li", "123", (long)123); + CHECK_FORMAT_1("%lu", "123", (unsigned long)123); + CHECK_FORMAT_1("%lld", "123", (long long)123); + CHECK_FORMAT_1("%lli", "123", (long long)123); + CHECK_FORMAT_1("%llu", "123", (unsigned long long)123); + CHECK_FORMAT_1("%zd", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%zi", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%zu", "123", (size_t)123); + CHECK_FORMAT_1("%x", "7b", (int)123); + + CHECK_FORMAT_1("%d", "-123", (int)-123); + CHECK_FORMAT_1("%i", "-123", (int)-123); + CHECK_FORMAT_1("%ld", "-123", (long)-123); + CHECK_FORMAT_1("%li", "-123", (long)-123); + CHECK_FORMAT_1("%lld", "-123", (long long)-123); + CHECK_FORMAT_1("%lli", "-123", (long long)-123); + CHECK_FORMAT_1("%zd", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%zi", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%x", "ffffff85", (int)-123); + + // Integers: width < length + CHECK_FORMAT_1("%1d", "123", (int)123); + CHECK_FORMAT_1("%1i", "123", (int)123); + CHECK_FORMAT_1("%1u", "123", (unsigned int)123); + CHECK_FORMAT_1("%1ld", "123", (long)123); + CHECK_FORMAT_1("%1li", "123", (long)123); + CHECK_FORMAT_1("%1lu", "123", (unsigned long)123); + CHECK_FORMAT_1("%1lld", "123", (long long)123); + CHECK_FORMAT_1("%1lli", "123", (long long)123); + CHECK_FORMAT_1("%1llu", "123", (unsigned long long)123); + CHECK_FORMAT_1("%1zd", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%1zi", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%1zu", "123", (size_t)123); + CHECK_FORMAT_1("%1x", "7b", (int)123); + + CHECK_FORMAT_1("%1d", "-123", (int)-123); + CHECK_FORMAT_1("%1i", "-123", (int)-123); + CHECK_FORMAT_1("%1ld", "-123", (long)-123); + CHECK_FORMAT_1("%1li", "-123", (long)-123); + CHECK_FORMAT_1("%1lld", "-123", (long long)-123); + CHECK_FORMAT_1("%1lli", "-123", (long long)-123); + CHECK_FORMAT_1("%1zd", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%1zi", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%1x", "ffffff85", (int)-123); + + // Integers: width > length + CHECK_FORMAT_1("%5d", " 123", (int)123); + CHECK_FORMAT_1("%5i", " 123", (int)123); + CHECK_FORMAT_1("%5u", " 123", (unsigned int)123); + CHECK_FORMAT_1("%5ld", " 123", (long)123); + CHECK_FORMAT_1("%5li", " 123", (long)123); + CHECK_FORMAT_1("%5lu", " 123", (unsigned long)123); + CHECK_FORMAT_1("%5lld", " 123", (long long)123); + CHECK_FORMAT_1("%5lli", " 123", (long long)123); + CHECK_FORMAT_1("%5llu", " 123", (unsigned long long)123); + CHECK_FORMAT_1("%5zd", " 123", (Py_ssize_t)123); + CHECK_FORMAT_1("%5zi", " 123", (Py_ssize_t)123); + CHECK_FORMAT_1("%5zu", " 123", (size_t)123); + CHECK_FORMAT_1("%5x", " 7b", (int)123); + + CHECK_FORMAT_1("%5d", " -123", (int)-123); + CHECK_FORMAT_1("%5i", " -123", (int)-123); + CHECK_FORMAT_1("%5ld", " -123", (long)-123); + CHECK_FORMAT_1("%5li", " -123", (long)-123); + CHECK_FORMAT_1("%5lld", " -123", (long long)-123); + CHECK_FORMAT_1("%5lli", " -123", (long long)-123); + CHECK_FORMAT_1("%5zd", " -123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%5zi", " -123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%9x", " ffffff85", (int)-123); + + // Integers: width > length, 0-flag + CHECK_FORMAT_1("%05d", "00123", (int)123); + CHECK_FORMAT_1("%05i", "00123", (int)123); + CHECK_FORMAT_1("%05u", "00123", (unsigned int)123); + CHECK_FORMAT_1("%05ld", "00123", (long)123); + CHECK_FORMAT_1("%05li", "00123", (long)123); + CHECK_FORMAT_1("%05lu", "00123", (unsigned long)123); + CHECK_FORMAT_1("%05lld", "00123", (long long)123); + CHECK_FORMAT_1("%05lli", "00123", (long long)123); + CHECK_FORMAT_1("%05llu", "00123", (unsigned long long)123); + CHECK_FORMAT_1("%05zd", "00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%05zi", "00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%05zu", "00123", (size_t)123); + CHECK_FORMAT_1("%05x", "0007b", (int)123); + + CHECK_FORMAT_1("%05d", "-0123", (int)-123); + CHECK_FORMAT_1("%05i", "-0123", (int)-123); + CHECK_FORMAT_1("%05ld", "-0123", (long)-123); + CHECK_FORMAT_1("%05li", "-0123", (long)-123); + CHECK_FORMAT_1("%05lld", "-0123", (long long)-123); + CHECK_FORMAT_1("%05lli", "-0123", (long long)-123); + CHECK_FORMAT_1("%05zd", "-0123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%05zi", "-0123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%09x", "0ffffff85", (int)-123); + + // Integers: precision < length + CHECK_FORMAT_1("%.1d", "123", (int)123); + CHECK_FORMAT_1("%.1i", "123", (int)123); + CHECK_FORMAT_1("%.1u", "123", (unsigned int)123); + CHECK_FORMAT_1("%.1ld", "123", (long)123); + CHECK_FORMAT_1("%.1li", "123", (long)123); + CHECK_FORMAT_1("%.1lu", "123", (unsigned long)123); + CHECK_FORMAT_1("%.1lld", "123", (long long)123); + CHECK_FORMAT_1("%.1lli", "123", (long long)123); + CHECK_FORMAT_1("%.1llu", "123", (unsigned long long)123); + CHECK_FORMAT_1("%.1zd", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%.1zi", "123", (Py_ssize_t)123); + CHECK_FORMAT_1("%.1zu", "123", (size_t)123); + CHECK_FORMAT_1("%.1x", "7b", (int)123); + + CHECK_FORMAT_1("%.1d", "-123", (int)-123); + CHECK_FORMAT_1("%.1i", "-123", (int)-123); + CHECK_FORMAT_1("%.1ld", "-123", (long)-123); + CHECK_FORMAT_1("%.1li", "-123", (long)-123); + CHECK_FORMAT_1("%.1lld", "-123", (long long)-123); + CHECK_FORMAT_1("%.1lli", "-123", (long long)-123); + CHECK_FORMAT_1("%.1zd", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%.1zi", "-123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%.1x", "ffffff85", (int)-123); + + // Integers: precision > length + CHECK_FORMAT_1("%.5d", "00123", (int)123); + CHECK_FORMAT_1("%.5i", "00123", (int)123); + CHECK_FORMAT_1("%.5u", "00123", (unsigned int)123); + CHECK_FORMAT_1("%.5ld", "00123", (long)123); + CHECK_FORMAT_1("%.5li", "00123", (long)123); + CHECK_FORMAT_1("%.5lu", "00123", (unsigned long)123); + CHECK_FORMAT_1("%.5lld", "00123", (long long)123); + CHECK_FORMAT_1("%.5lli", "00123", (long long)123); + CHECK_FORMAT_1("%.5llu", "00123", (unsigned long long)123); + CHECK_FORMAT_1("%.5zd", "00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%.5zi", "00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%.5zu", "00123", (size_t)123); + CHECK_FORMAT_1("%.5x", "0007b", (int)123); + + CHECK_FORMAT_1("%.5d", "-00123", (int)-123); + CHECK_FORMAT_1("%.5i", "-00123", (int)-123); + CHECK_FORMAT_1("%.5ld", "-00123", (long)-123); + CHECK_FORMAT_1("%.5li", "-00123", (long)-123); + CHECK_FORMAT_1("%.5lld", "-00123", (long long)-123); + CHECK_FORMAT_1("%.5lli", "-00123", (long long)-123); + CHECK_FORMAT_1("%.5zd", "-00123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%.5zi", "-00123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%.9x", "0ffffff85", (int)-123); + + // Integers: width > precision > length + CHECK_FORMAT_1("%7.5d", " 00123", (int)123); + CHECK_FORMAT_1("%7.5i", " 00123", (int)123); + CHECK_FORMAT_1("%7.5u", " 00123", (unsigned int)123); + CHECK_FORMAT_1("%7.5ld", " 00123", (long)123); + CHECK_FORMAT_1("%7.5li", " 00123", (long)123); + CHECK_FORMAT_1("%7.5lu", " 00123", (unsigned long)123); + CHECK_FORMAT_1("%7.5lld", " 00123", (long long)123); + CHECK_FORMAT_1("%7.5lli", " 00123", (long long)123); + CHECK_FORMAT_1("%7.5llu", " 00123", (unsigned long long)123); + CHECK_FORMAT_1("%7.5zd", " 00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%7.5zi", " 00123", (Py_ssize_t)123); + CHECK_FORMAT_1("%7.5zu", " 00123", (size_t)123); + CHECK_FORMAT_1("%7.5x", " 0007b", (int)123); + + CHECK_FORMAT_1("%7.5d", " -00123", (int)-123); + CHECK_FORMAT_1("%7.5i", " -00123", (int)-123); + CHECK_FORMAT_1("%7.5ld", " -00123", (long)-123); + CHECK_FORMAT_1("%7.5li", " -00123", (long)-123); + CHECK_FORMAT_1("%7.5lld", " -00123", (long long)-123); + CHECK_FORMAT_1("%7.5lli", " -00123", (long long)-123); + CHECK_FORMAT_1("%7.5zd", " -00123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%7.5zi", " -00123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%10.9x", " 0ffffff85", (int)-123); + + // Integers: width > precision > length, 0-flag + CHECK_FORMAT_1("%07.5d", "0000123", (int)123); + CHECK_FORMAT_1("%07.5i", "0000123", (int)123); + CHECK_FORMAT_1("%07.5u", "0000123", (unsigned int)123); + CHECK_FORMAT_1("%07.5ld", "0000123", (long)123); + CHECK_FORMAT_1("%07.5li", "0000123", (long)123); + CHECK_FORMAT_1("%07.5lu", "0000123", (unsigned long)123); + CHECK_FORMAT_1("%07.5lld", "0000123", (long long)123); + CHECK_FORMAT_1("%07.5lli", "0000123", (long long)123); + CHECK_FORMAT_1("%07.5llu", "0000123", (unsigned long long)123); + CHECK_FORMAT_1("%07.5zd", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%07.5zi", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%07.5zu", "0000123", (size_t)123); + CHECK_FORMAT_1("%07.5x", "000007b", (int)123); + + CHECK_FORMAT_1("%07.5d", "-000123", (int)-123); + CHECK_FORMAT_1("%07.5i", "-000123", (int)-123); + CHECK_FORMAT_1("%07.5ld", "-000123", (long)-123); + CHECK_FORMAT_1("%07.5li", "-000123", (long)-123); + CHECK_FORMAT_1("%07.5lld", "-000123", (long long)-123); + CHECK_FORMAT_1("%07.5lli", "-000123", (long long)-123); + CHECK_FORMAT_1("%07.5zd", "-000123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%07.5zi", "-000123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%010.9x", "00ffffff85", (int)-123); + + // Integers: precision > width > length + CHECK_FORMAT_1("%5.7d", "0000123", (int)123); + CHECK_FORMAT_1("%5.7i", "0000123", (int)123); + CHECK_FORMAT_1("%5.7u", "0000123", (unsigned int)123); + CHECK_FORMAT_1("%5.7ld", "0000123", (long)123); + CHECK_FORMAT_1("%5.7li", "0000123", (long)123); + CHECK_FORMAT_1("%5.7lu", "0000123", (unsigned long)123); + CHECK_FORMAT_1("%5.7lld", "0000123", (long long)123); + CHECK_FORMAT_1("%5.7lli", "0000123", (long long)123); + CHECK_FORMAT_1("%5.7llu", "0000123", (unsigned long long)123); + CHECK_FORMAT_1("%5.7zd", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%5.7zi", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%5.7zu", "0000123", (size_t)123); + CHECK_FORMAT_1("%5.7x", "000007b", (int)123); + + CHECK_FORMAT_1("%5.7d", "-0000123", (int)-123); + CHECK_FORMAT_1("%5.7i", "-0000123", (int)-123); + CHECK_FORMAT_1("%5.7ld", "-0000123", (long)-123); + CHECK_FORMAT_1("%5.7li", "-0000123", (long)-123); + CHECK_FORMAT_1("%5.7lld", "-0000123", (long long)-123); + CHECK_FORMAT_1("%5.7lli", "-0000123", (long long)-123); + CHECK_FORMAT_1("%5.7zd", "-0000123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%5.7zi", "-0000123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%9.10x", "00ffffff85", (int)-123); + + // Integers: precision > width > length, 0-flag + CHECK_FORMAT_1("%05.7d", "0000123", (int)123); + CHECK_FORMAT_1("%05.7i", "0000123", (int)123); + CHECK_FORMAT_1("%05.7u", "0000123", (unsigned int)123); + CHECK_FORMAT_1("%05.7ld", "0000123", (long)123); + CHECK_FORMAT_1("%05.7li", "0000123", (long)123); + CHECK_FORMAT_1("%05.7lu", "0000123", (unsigned long)123); + CHECK_FORMAT_1("%05.7lld", "0000123", (long long)123); + CHECK_FORMAT_1("%05.7lli", "0000123", (long long)123); + CHECK_FORMAT_1("%05.7llu", "0000123", (unsigned long long)123); + CHECK_FORMAT_1("%05.7zd", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%05.7zi", "0000123", (Py_ssize_t)123); + CHECK_FORMAT_1("%05.7zu", "0000123", (size_t)123); + CHECK_FORMAT_1("%05.7x", "000007b", (int)123); + + CHECK_FORMAT_1("%05.7d", "-0000123", (int)-123); + CHECK_FORMAT_1("%05.7i", "-0000123", (int)-123); + CHECK_FORMAT_1("%05.7ld", "-0000123", (long)-123); + CHECK_FORMAT_1("%05.7li", "-0000123", (long)-123); + CHECK_FORMAT_1("%05.7lld", "-0000123", (long long)-123); + CHECK_FORMAT_1("%05.7lli", "-0000123", (long long)-123); + CHECK_FORMAT_1("%05.7zd", "-0000123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%05.7zi", "-0000123", (Py_ssize_t)-123); + CHECK_FORMAT_1("%09.10x", "00ffffff85", (int)-123); + + // Integers: precision = 0, arg = 0 (empty string in C) + CHECK_FORMAT_1("%.0d", "0", (int)0); + CHECK_FORMAT_1("%.0i", "0", (int)0); + CHECK_FORMAT_1("%.0u", "0", (unsigned int)0); + CHECK_FORMAT_1("%.0ld", "0", (long)0); + CHECK_FORMAT_1("%.0li", "0", (long)0); + CHECK_FORMAT_1("%.0lu", "0", (unsigned long)0); + CHECK_FORMAT_1("%.0lld", "0", (long long)0); + CHECK_FORMAT_1("%.0lli", "0", (long long)0); + CHECK_FORMAT_1("%.0llu", "0", (unsigned long long)0); + CHECK_FORMAT_1("%.0zd", "0", (Py_ssize_t)0); + CHECK_FORMAT_1("%.0zi", "0", (Py_ssize_t)0); + CHECK_FORMAT_1("%.0zu", "0", (size_t)0); + CHECK_FORMAT_1("%.0x", "0", (int)0); + + // Strings + CHECK_FORMAT_1("%s", "None", "None"); + CHECK_FORMAT_1("%U", "None", unicode); + CHECK_FORMAT_1("%A", "None", Py_None); + CHECK_FORMAT_1("%S", "None", Py_None); + CHECK_FORMAT_1("%R", "None", Py_None); + CHECK_FORMAT_2("%V", "None", unicode, "ignored"); + CHECK_FORMAT_2("%V", "None", NULL, "None"); + + // Strings: width < length + CHECK_FORMAT_1("%1s", "None", "None"); + CHECK_FORMAT_1("%1U", "None", unicode); + CHECK_FORMAT_1("%1A", "None", Py_None); + CHECK_FORMAT_1("%1S", "None", Py_None); + CHECK_FORMAT_1("%1R", "None", Py_None); + CHECK_FORMAT_2("%1V", "None", unicode, "ignored"); + CHECK_FORMAT_2("%1V", "None", NULL, "None"); + + // Strings: width > length + CHECK_FORMAT_1("%5s", " None", "None"); + CHECK_FORMAT_1("%5U", " None", unicode); + CHECK_FORMAT_1("%5A", " None", Py_None); + CHECK_FORMAT_1("%5S", " None", Py_None); + CHECK_FORMAT_1("%5R", " None", Py_None); + CHECK_FORMAT_2("%5V", " None", unicode, "ignored"); + CHECK_FORMAT_2("%5V", " None", NULL, "None"); + + // Strings: precision < length + CHECK_FORMAT_1("%.1s", "N", "None"); + CHECK_FORMAT_1("%.1U", "N", unicode); + CHECK_FORMAT_1("%.1A", "N", Py_None); + CHECK_FORMAT_1("%.1S", "N", Py_None); + CHECK_FORMAT_1("%.1R", "N", Py_None); + CHECK_FORMAT_2("%.1V", "N", unicode, "ignored"); + CHECK_FORMAT_2("%.1V", "N", NULL, "None"); + + // Strings: precision > length + CHECK_FORMAT_1("%.5s", "None", "None"); + CHECK_FORMAT_1("%.5U", "None", unicode); + CHECK_FORMAT_1("%.5A", "None", Py_None); + CHECK_FORMAT_1("%.5S", "None", Py_None); + CHECK_FORMAT_1("%.5R", "None", Py_None); + CHECK_FORMAT_2("%.5V", "None", unicode, "ignored"); + CHECK_FORMAT_2("%.5V", "None", NULL, "None"); + + // Strings: precision < length, width > length + CHECK_FORMAT_1("%5.1s", " N", "None"); + CHECK_FORMAT_1("%5.1U", " N", unicode); + CHECK_FORMAT_1("%5.1A", " N", Py_None); + CHECK_FORMAT_1("%5.1S", " N", Py_None); + CHECK_FORMAT_1("%5.1R", " N", Py_None); + CHECK_FORMAT_2("%5.1V", " N", unicode, "ignored"); + CHECK_FORMAT_2("%5.1V", " N", NULL, "None"); + + // Strings: width < length, precision > length + CHECK_FORMAT_1("%1.5s", "None", "None"); + CHECK_FORMAT_1("%1.5U", "None", unicode); + CHECK_FORMAT_1("%1.5A", "None", Py_None); + CHECK_FORMAT_1("%1.5S", "None", Py_None); + CHECK_FORMAT_1("%1.5R", "None", Py_None); + CHECK_FORMAT_2("%1.5V", "None", unicode, "ignored"); + CHECK_FORMAT_2("%1.5V", "None", NULL, "None"); + + Py_XDECREF(unicode); + Py_RETURN_NONE; + + Fail: + Py_XDECREF(result); + Py_XDECREF(unicode); + return NULL; + +#undef CHECK_FORMAT_2 +#undef CHECK_FORMAT_1 +#undef CHECK_FORMAT_0 +} + +static PyMethodDef TestMethods[] = { + {"codec_incrementalencoder", codec_incrementalencoder, METH_VARARGS}, + {"codec_incrementaldecoder", codec_incrementaldecoder, METH_VARARGS}, + {"test_unicode_compare_with_ascii", + test_unicode_compare_with_ascii, METH_NOARGS}, + {"test_string_from_format", test_string_from_format, METH_NOARGS}, + {"test_widechar", test_widechar, METH_NOARGS}, + {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, + {"unicode_aswidecharstring", unicode_aswidecharstring, METH_VARARGS}, + {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, + {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, + {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, + {"unicode_findchar", unicode_findchar, METH_VARARGS}, + {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, + {NULL}, +}; + +int +_PyTestCapi_Init_Unicode(PyObject *m) { + _testcapimodule = PyModule_GetDef(m); + + TestError = PyErr_NewException("_testcapi.unicode_error", NULL, NULL); + Py_INCREF(TestError); + PyModule_AddObject(m, "unicode_error", TestError); + + if (PyModule_AddFunctions(m, TestMethods) < 0) { + return -1; + } + + return 0; +} diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 9c00eec83c98bd..f36ad0c3c768c8 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1886,234 +1886,6 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args) return return_value; } -static PyObject * -test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) - const wchar_t wtext[2] = {(wchar_t)0x10ABCDu}; - size_t wtextlen = 1; - const wchar_t invalid[1] = {(wchar_t)0x110000u}; -#else - const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu}; - size_t wtextlen = 2; -#endif - PyObject *wide, *utf8; - - wide = PyUnicode_FromWideChar(wtext, wtextlen); - if (wide == NULL) - return NULL; - - utf8 = PyUnicode_FromString("\xf4\x8a\xaf\x8d"); - if (utf8 == NULL) { - Py_DECREF(wide); - return NULL; - } - - if (PyUnicode_GET_LENGTH(wide) != PyUnicode_GET_LENGTH(utf8)) { - Py_DECREF(wide); - Py_DECREF(utf8); - return raiseTestError("test_widechar", - "wide string and utf8 string " - "have different length"); - } - if (PyUnicode_Compare(wide, utf8)) { - Py_DECREF(wide); - Py_DECREF(utf8); - if (PyErr_Occurred()) - return NULL; - return raiseTestError("test_widechar", - "wide string and utf8 string " - "are different"); - } - - Py_DECREF(wide); - Py_DECREF(utf8); - -#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) - wide = PyUnicode_FromWideChar(invalid, 1); - if (wide == NULL) - PyErr_Clear(); - else - return raiseTestError("test_widechar", - "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail"); -#endif - Py_RETURN_NONE; -} - -static PyObject * -unicode_aswidechar(PyObject *self, PyObject *args) -{ - PyObject *unicode, *result; - Py_ssize_t buflen, size; - wchar_t *buffer; - - if (!PyArg_ParseTuple(args, "Un", &unicode, &buflen)) - return NULL; - buffer = PyMem_New(wchar_t, buflen); - if (buffer == NULL) - return PyErr_NoMemory(); - - size = PyUnicode_AsWideChar(unicode, buffer, buflen); - if (size == -1) { - PyMem_Free(buffer); - return NULL; - } - - if (size < buflen) - buflen = size + 1; - else - buflen = size; - result = PyUnicode_FromWideChar(buffer, buflen); - PyMem_Free(buffer); - if (result == NULL) - return NULL; - - return Py_BuildValue("(Nn)", result, size); -} - -static PyObject * -unicode_aswidecharstring(PyObject *self, PyObject *args) -{ - PyObject *unicode, *result; - Py_ssize_t size; - wchar_t *buffer; - - if (!PyArg_ParseTuple(args, "U", &unicode)) - return NULL; - - buffer = PyUnicode_AsWideCharString(unicode, &size); - if (buffer == NULL) - return NULL; - - result = PyUnicode_FromWideChar(buffer, size + 1); - PyMem_Free(buffer); - if (result == NULL) - return NULL; - return Py_BuildValue("(Nn)", result, size); -} - -static PyObject * -unicode_asucs4(PyObject *self, PyObject *args) -{ - PyObject *unicode, *result; - Py_UCS4 *buffer; - int copy_null; - Py_ssize_t str_len, buf_len; - - if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, ©_null)) { - return NULL; - } - - buf_len = str_len + 1; - buffer = PyMem_NEW(Py_UCS4, buf_len); - if (buffer == NULL) { - return PyErr_NoMemory(); - } - memset(buffer, 0, sizeof(Py_UCS4)*buf_len); - buffer[str_len] = 0xffffU; - - if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) { - PyMem_Free(buffer); - return NULL; - } - - result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len); - PyMem_Free(buffer); - return result; -} - -static PyObject * -unicode_asutf8(PyObject *self, PyObject *args) -{ - PyObject *unicode; - const char *buffer; - - if (!PyArg_ParseTuple(args, "U", &unicode)) { - return NULL; - } - - buffer = PyUnicode_AsUTF8(unicode); - if (buffer == NULL) { - return NULL; - } - - return PyBytes_FromString(buffer); -} - -static PyObject * -unicode_asutf8andsize(PyObject *self, PyObject *args) -{ - PyObject *unicode, *result; - const char *buffer; - Py_ssize_t utf8_len; - - if(!PyArg_ParseTuple(args, "U", &unicode)) { - return NULL; - } - - buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len); - if (buffer == NULL) { - return NULL; - } - - result = PyBytes_FromString(buffer); - if (result == NULL) { - return NULL; - } - - return Py_BuildValue("(Nn)", result, utf8_len); -} - -static PyObject * -unicode_findchar(PyObject *self, PyObject *args) -{ - PyObject *str; - int direction; - unsigned int ch; - Py_ssize_t result; - Py_ssize_t start, end; - - if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch, - &start, &end, &direction)) { - return NULL; - } - - result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction); - if (result == -2) - return NULL; - else - return PyLong_FromSsize_t(result); -} - -static PyObject * -unicode_copycharacters(PyObject *self, PyObject *args) -{ - PyObject *from, *to, *to_copy; - Py_ssize_t from_start, to_start, how_many, copied; - - if (!PyArg_ParseTuple(args, "UnOnn:unicode_copycharacters", &to, &to_start, - &from, &from_start, &how_many)) { - return NULL; - } - - if (!(to_copy = PyUnicode_New(PyUnicode_GET_LENGTH(to), - PyUnicode_MAX_CHAR_VALUE(to)))) { - return NULL; - } - if (PyUnicode_Fill(to_copy, 0, PyUnicode_GET_LENGTH(to_copy), 0U) < 0) { - Py_DECREF(to_copy); - return NULL; - } - - if ((copied = PyUnicode_CopyCharacters(to_copy, to_start, from, - from_start, how_many)) < 0) { - Py_DECREF(to_copy); - return NULL; - } - - return Py_BuildValue("(Nn)", to_copy, copied); -} - static PyObject * getargs_w_star(PyObject *self, PyObject *args) { @@ -2164,27 +1936,6 @@ test_empty_argparse(PyObject *self, PyObject *Py_UNUSED(ignored)) } } -static PyObject * -codec_incrementalencoder(PyObject *self, PyObject *args) -{ - const char *encoding, *errors = NULL; - if (!PyArg_ParseTuple(args, "s|s:test_incrementalencoder", - &encoding, &errors)) - return NULL; - return PyCodec_IncrementalEncoder(encoding, errors); -} - -static PyObject * -codec_incrementaldecoder(PyObject *self, PyObject *args) -{ - const char *encoding, *errors = NULL; - if (!PyArg_ParseTuple(args, "s|s:test_incrementaldecoder", - &encoding, &errors)) - return NULL; - return PyCodec_IncrementalDecoder(encoding, errors); -} - - /* Simple test of _PyLong_NumBits and _PyLong_Sign. */ static PyObject * test_long_numbits(PyObject *self, PyObject *Py_UNUSED(ignored)) @@ -2847,411 +2598,6 @@ pending_threadfunc(PyObject *self, PyObject *arg) Py_RETURN_TRUE; } -/* Some tests of PyUnicode_FromFormat(). */ -static PyObject * -test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ - PyObject *result; - PyObject *unicode = PyUnicode_FromString("None"); - -#define CHECK_FORMAT_2(FORMAT, EXPECTED, ARG1, ARG2) \ - result = PyUnicode_FromFormat(FORMAT, ARG1, ARG2); \ - if (result == NULL) \ - return NULL; \ - if (!_PyUnicode_EqualToASCIIString(result, EXPECTED)) { \ - PyErr_Format(TestError, \ - "test_string_from_format: failed at \"%s\" " \ - "expected \"%s\" got \"%s\"", \ - FORMAT, EXPECTED, PyUnicode_AsUTF8(result)); \ - goto Fail; \ - } \ - Py_DECREF(result) - -#define CHECK_FORMAT_1(FORMAT, EXPECTED, ARG) \ - CHECK_FORMAT_2(FORMAT, EXPECTED, ARG, 0) - -#define CHECK_FORMAT_0(FORMAT, EXPECTED) \ - CHECK_FORMAT_2(FORMAT, EXPECTED, 0, 0) - - // Unrecognized - CHECK_FORMAT_2("%u %? %u", "1 %? %u", 1, 2); - - // "%%" - CHECK_FORMAT_0( "%%", "%"); - CHECK_FORMAT_0( "%0%", "%"); - CHECK_FORMAT_0("%00%", "%"); - CHECK_FORMAT_0( "%2%", "%"); - CHECK_FORMAT_0("%02%", "%"); - CHECK_FORMAT_0("%.0%", "%.0%"); - CHECK_FORMAT_0("%.2%", "%.2%"); - - // "%c" - CHECK_FORMAT_1( "%c", "c", 'c'); - CHECK_FORMAT_1( "%0c", "c", 'c'); - CHECK_FORMAT_1("%00c", "c", 'c'); - CHECK_FORMAT_1( "%2c", "c", 'c'); - CHECK_FORMAT_1("%02c", "c", 'c'); - CHECK_FORMAT_1("%.0c", "c", 'c'); - CHECK_FORMAT_1("%.2c", "c", 'c'); - - // Integers - CHECK_FORMAT_1("%d", "123", (int)123); - CHECK_FORMAT_1("%i", "123", (int)123); - CHECK_FORMAT_1("%u", "123", (unsigned int)123); - CHECK_FORMAT_1("%ld", "123", (long)123); - CHECK_FORMAT_1("%li", "123", (long)123); - CHECK_FORMAT_1("%lu", "123", (unsigned long)123); - CHECK_FORMAT_1("%lld", "123", (long long)123); - CHECK_FORMAT_1("%lli", "123", (long long)123); - CHECK_FORMAT_1("%llu", "123", (unsigned long long)123); - CHECK_FORMAT_1("%zd", "123", (Py_ssize_t)123); - CHECK_FORMAT_1("%zi", "123", (Py_ssize_t)123); - CHECK_FORMAT_1("%zu", "123", (size_t)123); - CHECK_FORMAT_1("%x", "7b", (int)123); - - CHECK_FORMAT_1("%d", "-123", (int)-123); - CHECK_FORMAT_1("%i", "-123", (int)-123); - CHECK_FORMAT_1("%ld", "-123", (long)-123); - CHECK_FORMAT_1("%li", "-123", (long)-123); - CHECK_FORMAT_1("%lld", "-123", (long long)-123); - CHECK_FORMAT_1("%lli", "-123", (long long)-123); - CHECK_FORMAT_1("%zd", "-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%zi", "-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%x", "ffffff85", (int)-123); - - // Integers: width < length - CHECK_FORMAT_1("%1d", "123", (int)123); - CHECK_FORMAT_1("%1i", "123", (int)123); - CHECK_FORMAT_1("%1u", "123", (unsigned int)123); - CHECK_FORMAT_1("%1ld", "123", (long)123); - CHECK_FORMAT_1("%1li", "123", (long)123); - CHECK_FORMAT_1("%1lu", "123", (unsigned long)123); - CHECK_FORMAT_1("%1lld", "123", (long long)123); - CHECK_FORMAT_1("%1lli", "123", (long long)123); - CHECK_FORMAT_1("%1llu", "123", (unsigned long long)123); - CHECK_FORMAT_1("%1zd", "123", (Py_ssize_t)123); - CHECK_FORMAT_1("%1zi", "123", (Py_ssize_t)123); - CHECK_FORMAT_1("%1zu", "123", (size_t)123); - CHECK_FORMAT_1("%1x", "7b", (int)123); - - CHECK_FORMAT_1("%1d", "-123", (int)-123); - CHECK_FORMAT_1("%1i", "-123", (int)-123); - CHECK_FORMAT_1("%1ld", "-123", (long)-123); - CHECK_FORMAT_1("%1li", "-123", (long)-123); - CHECK_FORMAT_1("%1lld", "-123", (long long)-123); - CHECK_FORMAT_1("%1lli", "-123", (long long)-123); - CHECK_FORMAT_1("%1zd", "-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%1zi", "-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%1x", "ffffff85", (int)-123); - - // Integers: width > length - CHECK_FORMAT_1("%5d", " 123", (int)123); - CHECK_FORMAT_1("%5i", " 123", (int)123); - CHECK_FORMAT_1("%5u", " 123", (unsigned int)123); - CHECK_FORMAT_1("%5ld", " 123", (long)123); - CHECK_FORMAT_1("%5li", " 123", (long)123); - CHECK_FORMAT_1("%5lu", " 123", (unsigned long)123); - CHECK_FORMAT_1("%5lld", " 123", (long long)123); - CHECK_FORMAT_1("%5lli", " 123", (long long)123); - CHECK_FORMAT_1("%5llu", " 123", (unsigned long long)123); - CHECK_FORMAT_1("%5zd", " 123", (Py_ssize_t)123); - CHECK_FORMAT_1("%5zi", " 123", (Py_ssize_t)123); - CHECK_FORMAT_1("%5zu", " 123", (size_t)123); - CHECK_FORMAT_1("%5x", " 7b", (int)123); - - CHECK_FORMAT_1("%5d", " -123", (int)-123); - CHECK_FORMAT_1("%5i", " -123", (int)-123); - CHECK_FORMAT_1("%5ld", " -123", (long)-123); - CHECK_FORMAT_1("%5li", " -123", (long)-123); - CHECK_FORMAT_1("%5lld", " -123", (long long)-123); - CHECK_FORMAT_1("%5lli", " -123", (long long)-123); - CHECK_FORMAT_1("%5zd", " -123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%5zi", " -123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%9x", " ffffff85", (int)-123); - - // Integers: width > length, 0-flag - CHECK_FORMAT_1("%05d", "00123", (int)123); - CHECK_FORMAT_1("%05i", "00123", (int)123); - CHECK_FORMAT_1("%05u", "00123", (unsigned int)123); - CHECK_FORMAT_1("%05ld", "00123", (long)123); - CHECK_FORMAT_1("%05li", "00123", (long)123); - CHECK_FORMAT_1("%05lu", "00123", (unsigned long)123); - CHECK_FORMAT_1("%05lld", "00123", (long long)123); - CHECK_FORMAT_1("%05lli", "00123", (long long)123); - CHECK_FORMAT_1("%05llu", "00123", (unsigned long long)123); - CHECK_FORMAT_1("%05zd", "00123", (Py_ssize_t)123); - CHECK_FORMAT_1("%05zi", "00123", (Py_ssize_t)123); - CHECK_FORMAT_1("%05zu", "00123", (size_t)123); - CHECK_FORMAT_1("%05x", "0007b", (int)123); - - CHECK_FORMAT_1("%05d", "-0123", (int)-123); - CHECK_FORMAT_1("%05i", "-0123", (int)-123); - CHECK_FORMAT_1("%05ld", "-0123", (long)-123); - CHECK_FORMAT_1("%05li", "-0123", (long)-123); - CHECK_FORMAT_1("%05lld", "-0123", (long long)-123); - CHECK_FORMAT_1("%05lli", "-0123", (long long)-123); - CHECK_FORMAT_1("%05zd", "-0123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%05zi", "-0123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%09x", "0ffffff85", (int)-123); - - // Integers: precision < length - CHECK_FORMAT_1("%.1d", "123", (int)123); - CHECK_FORMAT_1("%.1i", "123", (int)123); - CHECK_FORMAT_1("%.1u", "123", (unsigned int)123); - CHECK_FORMAT_1("%.1ld", "123", (long)123); - CHECK_FORMAT_1("%.1li", "123", (long)123); - CHECK_FORMAT_1("%.1lu", "123", (unsigned long)123); - CHECK_FORMAT_1("%.1lld", "123", (long long)123); - CHECK_FORMAT_1("%.1lli", "123", (long long)123); - CHECK_FORMAT_1("%.1llu", "123", (unsigned long long)123); - CHECK_FORMAT_1("%.1zd", "123", (Py_ssize_t)123); - CHECK_FORMAT_1("%.1zi", "123", (Py_ssize_t)123); - CHECK_FORMAT_1("%.1zu", "123", (size_t)123); - CHECK_FORMAT_1("%.1x", "7b", (int)123); - - CHECK_FORMAT_1("%.1d", "-123", (int)-123); - CHECK_FORMAT_1("%.1i", "-123", (int)-123); - CHECK_FORMAT_1("%.1ld", "-123", (long)-123); - CHECK_FORMAT_1("%.1li", "-123", (long)-123); - CHECK_FORMAT_1("%.1lld", "-123", (long long)-123); - CHECK_FORMAT_1("%.1lli", "-123", (long long)-123); - CHECK_FORMAT_1("%.1zd", "-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%.1zi", "-123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%.1x", "ffffff85", (int)-123); - - // Integers: precision > length - CHECK_FORMAT_1("%.5d", "00123", (int)123); - CHECK_FORMAT_1("%.5i", "00123", (int)123); - CHECK_FORMAT_1("%.5u", "00123", (unsigned int)123); - CHECK_FORMAT_1("%.5ld", "00123", (long)123); - CHECK_FORMAT_1("%.5li", "00123", (long)123); - CHECK_FORMAT_1("%.5lu", "00123", (unsigned long)123); - CHECK_FORMAT_1("%.5lld", "00123", (long long)123); - CHECK_FORMAT_1("%.5lli", "00123", (long long)123); - CHECK_FORMAT_1("%.5llu", "00123", (unsigned long long)123); - CHECK_FORMAT_1("%.5zd", "00123", (Py_ssize_t)123); - CHECK_FORMAT_1("%.5zi", "00123", (Py_ssize_t)123); - CHECK_FORMAT_1("%.5zu", "00123", (size_t)123); - CHECK_FORMAT_1("%.5x", "0007b", (int)123); - - CHECK_FORMAT_1("%.5d", "-00123", (int)-123); - CHECK_FORMAT_1("%.5i", "-00123", (int)-123); - CHECK_FORMAT_1("%.5ld", "-00123", (long)-123); - CHECK_FORMAT_1("%.5li", "-00123", (long)-123); - CHECK_FORMAT_1("%.5lld", "-00123", (long long)-123); - CHECK_FORMAT_1("%.5lli", "-00123", (long long)-123); - CHECK_FORMAT_1("%.5zd", "-00123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%.5zi", "-00123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%.9x", "0ffffff85", (int)-123); - - // Integers: width > precision > length - CHECK_FORMAT_1("%7.5d", " 00123", (int)123); - CHECK_FORMAT_1("%7.5i", " 00123", (int)123); - CHECK_FORMAT_1("%7.5u", " 00123", (unsigned int)123); - CHECK_FORMAT_1("%7.5ld", " 00123", (long)123); - CHECK_FORMAT_1("%7.5li", " 00123", (long)123); - CHECK_FORMAT_1("%7.5lu", " 00123", (unsigned long)123); - CHECK_FORMAT_1("%7.5lld", " 00123", (long long)123); - CHECK_FORMAT_1("%7.5lli", " 00123", (long long)123); - CHECK_FORMAT_1("%7.5llu", " 00123", (unsigned long long)123); - CHECK_FORMAT_1("%7.5zd", " 00123", (Py_ssize_t)123); - CHECK_FORMAT_1("%7.5zi", " 00123", (Py_ssize_t)123); - CHECK_FORMAT_1("%7.5zu", " 00123", (size_t)123); - CHECK_FORMAT_1("%7.5x", " 0007b", (int)123); - - CHECK_FORMAT_1("%7.5d", " -00123", (int)-123); - CHECK_FORMAT_1("%7.5i", " -00123", (int)-123); - CHECK_FORMAT_1("%7.5ld", " -00123", (long)-123); - CHECK_FORMAT_1("%7.5li", " -00123", (long)-123); - CHECK_FORMAT_1("%7.5lld", " -00123", (long long)-123); - CHECK_FORMAT_1("%7.5lli", " -00123", (long long)-123); - CHECK_FORMAT_1("%7.5zd", " -00123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%7.5zi", " -00123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%10.9x", " 0ffffff85", (int)-123); - - // Integers: width > precision > length, 0-flag - CHECK_FORMAT_1("%07.5d", "0000123", (int)123); - CHECK_FORMAT_1("%07.5i", "0000123", (int)123); - CHECK_FORMAT_1("%07.5u", "0000123", (unsigned int)123); - CHECK_FORMAT_1("%07.5ld", "0000123", (long)123); - CHECK_FORMAT_1("%07.5li", "0000123", (long)123); - CHECK_FORMAT_1("%07.5lu", "0000123", (unsigned long)123); - CHECK_FORMAT_1("%07.5lld", "0000123", (long long)123); - CHECK_FORMAT_1("%07.5lli", "0000123", (long long)123); - CHECK_FORMAT_1("%07.5llu", "0000123", (unsigned long long)123); - CHECK_FORMAT_1("%07.5zd", "0000123", (Py_ssize_t)123); - CHECK_FORMAT_1("%07.5zi", "0000123", (Py_ssize_t)123); - CHECK_FORMAT_1("%07.5zu", "0000123", (size_t)123); - CHECK_FORMAT_1("%07.5x", "000007b", (int)123); - - CHECK_FORMAT_1("%07.5d", "-000123", (int)-123); - CHECK_FORMAT_1("%07.5i", "-000123", (int)-123); - CHECK_FORMAT_1("%07.5ld", "-000123", (long)-123); - CHECK_FORMAT_1("%07.5li", "-000123", (long)-123); - CHECK_FORMAT_1("%07.5lld", "-000123", (long long)-123); - CHECK_FORMAT_1("%07.5lli", "-000123", (long long)-123); - CHECK_FORMAT_1("%07.5zd", "-000123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%07.5zi", "-000123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%010.9x", "00ffffff85", (int)-123); - - // Integers: precision > width > length - CHECK_FORMAT_1("%5.7d", "0000123", (int)123); - CHECK_FORMAT_1("%5.7i", "0000123", (int)123); - CHECK_FORMAT_1("%5.7u", "0000123", (unsigned int)123); - CHECK_FORMAT_1("%5.7ld", "0000123", (long)123); - CHECK_FORMAT_1("%5.7li", "0000123", (long)123); - CHECK_FORMAT_1("%5.7lu", "0000123", (unsigned long)123); - CHECK_FORMAT_1("%5.7lld", "0000123", (long long)123); - CHECK_FORMAT_1("%5.7lli", "0000123", (long long)123); - CHECK_FORMAT_1("%5.7llu", "0000123", (unsigned long long)123); - CHECK_FORMAT_1("%5.7zd", "0000123", (Py_ssize_t)123); - CHECK_FORMAT_1("%5.7zi", "0000123", (Py_ssize_t)123); - CHECK_FORMAT_1("%5.7zu", "0000123", (size_t)123); - CHECK_FORMAT_1("%5.7x", "000007b", (int)123); - - CHECK_FORMAT_1("%5.7d", "-0000123", (int)-123); - CHECK_FORMAT_1("%5.7i", "-0000123", (int)-123); - CHECK_FORMAT_1("%5.7ld", "-0000123", (long)-123); - CHECK_FORMAT_1("%5.7li", "-0000123", (long)-123); - CHECK_FORMAT_1("%5.7lld", "-0000123", (long long)-123); - CHECK_FORMAT_1("%5.7lli", "-0000123", (long long)-123); - CHECK_FORMAT_1("%5.7zd", "-0000123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%5.7zi", "-0000123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%9.10x", "00ffffff85", (int)-123); - - // Integers: precision > width > length, 0-flag - CHECK_FORMAT_1("%05.7d", "0000123", (int)123); - CHECK_FORMAT_1("%05.7i", "0000123", (int)123); - CHECK_FORMAT_1("%05.7u", "0000123", (unsigned int)123); - CHECK_FORMAT_1("%05.7ld", "0000123", (long)123); - CHECK_FORMAT_1("%05.7li", "0000123", (long)123); - CHECK_FORMAT_1("%05.7lu", "0000123", (unsigned long)123); - CHECK_FORMAT_1("%05.7lld", "0000123", (long long)123); - CHECK_FORMAT_1("%05.7lli", "0000123", (long long)123); - CHECK_FORMAT_1("%05.7llu", "0000123", (unsigned long long)123); - CHECK_FORMAT_1("%05.7zd", "0000123", (Py_ssize_t)123); - CHECK_FORMAT_1("%05.7zi", "0000123", (Py_ssize_t)123); - CHECK_FORMAT_1("%05.7zu", "0000123", (size_t)123); - CHECK_FORMAT_1("%05.7x", "000007b", (int)123); - - CHECK_FORMAT_1("%05.7d", "-0000123", (int)-123); - CHECK_FORMAT_1("%05.7i", "-0000123", (int)-123); - CHECK_FORMAT_1("%05.7ld", "-0000123", (long)-123); - CHECK_FORMAT_1("%05.7li", "-0000123", (long)-123); - CHECK_FORMAT_1("%05.7lld", "-0000123", (long long)-123); - CHECK_FORMAT_1("%05.7lli", "-0000123", (long long)-123); - CHECK_FORMAT_1("%05.7zd", "-0000123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%05.7zi", "-0000123", (Py_ssize_t)-123); - CHECK_FORMAT_1("%09.10x", "00ffffff85", (int)-123); - - // Integers: precision = 0, arg = 0 (empty string in C) - CHECK_FORMAT_1("%.0d", "0", (int)0); - CHECK_FORMAT_1("%.0i", "0", (int)0); - CHECK_FORMAT_1("%.0u", "0", (unsigned int)0); - CHECK_FORMAT_1("%.0ld", "0", (long)0); - CHECK_FORMAT_1("%.0li", "0", (long)0); - CHECK_FORMAT_1("%.0lu", "0", (unsigned long)0); - CHECK_FORMAT_1("%.0lld", "0", (long long)0); - CHECK_FORMAT_1("%.0lli", "0", (long long)0); - CHECK_FORMAT_1("%.0llu", "0", (unsigned long long)0); - CHECK_FORMAT_1("%.0zd", "0", (Py_ssize_t)0); - CHECK_FORMAT_1("%.0zi", "0", (Py_ssize_t)0); - CHECK_FORMAT_1("%.0zu", "0", (size_t)0); - CHECK_FORMAT_1("%.0x", "0", (int)0); - - // Strings - CHECK_FORMAT_1("%s", "None", "None"); - CHECK_FORMAT_1("%U", "None", unicode); - CHECK_FORMAT_1("%A", "None", Py_None); - CHECK_FORMAT_1("%S", "None", Py_None); - CHECK_FORMAT_1("%R", "None", Py_None); - CHECK_FORMAT_2("%V", "None", unicode, "ignored"); - CHECK_FORMAT_2("%V", "None", NULL, "None"); - - // Strings: width < length - CHECK_FORMAT_1("%1s", "None", "None"); - CHECK_FORMAT_1("%1U", "None", unicode); - CHECK_FORMAT_1("%1A", "None", Py_None); - CHECK_FORMAT_1("%1S", "None", Py_None); - CHECK_FORMAT_1("%1R", "None", Py_None); - CHECK_FORMAT_2("%1V", "None", unicode, "ignored"); - CHECK_FORMAT_2("%1V", "None", NULL, "None"); - - // Strings: width > length - CHECK_FORMAT_1("%5s", " None", "None"); - CHECK_FORMAT_1("%5U", " None", unicode); - CHECK_FORMAT_1("%5A", " None", Py_None); - CHECK_FORMAT_1("%5S", " None", Py_None); - CHECK_FORMAT_1("%5R", " None", Py_None); - CHECK_FORMAT_2("%5V", " None", unicode, "ignored"); - CHECK_FORMAT_2("%5V", " None", NULL, "None"); - - // Strings: precision < length - CHECK_FORMAT_1("%.1s", "N", "None"); - CHECK_FORMAT_1("%.1U", "N", unicode); - CHECK_FORMAT_1("%.1A", "N", Py_None); - CHECK_FORMAT_1("%.1S", "N", Py_None); - CHECK_FORMAT_1("%.1R", "N", Py_None); - CHECK_FORMAT_2("%.1V", "N", unicode, "ignored"); - CHECK_FORMAT_2("%.1V", "N", NULL, "None"); - - // Strings: precision > length - CHECK_FORMAT_1("%.5s", "None", "None"); - CHECK_FORMAT_1("%.5U", "None", unicode); - CHECK_FORMAT_1("%.5A", "None", Py_None); - CHECK_FORMAT_1("%.5S", "None", Py_None); - CHECK_FORMAT_1("%.5R", "None", Py_None); - CHECK_FORMAT_2("%.5V", "None", unicode, "ignored"); - CHECK_FORMAT_2("%.5V", "None", NULL, "None"); - - // Strings: precision < length, width > length - CHECK_FORMAT_1("%5.1s", " N", "None"); - CHECK_FORMAT_1("%5.1U", " N", unicode); - CHECK_FORMAT_1("%5.1A", " N", Py_None); - CHECK_FORMAT_1("%5.1S", " N", Py_None); - CHECK_FORMAT_1("%5.1R", " N", Py_None); - CHECK_FORMAT_2("%5.1V", " N", unicode, "ignored"); - CHECK_FORMAT_2("%5.1V", " N", NULL, "None"); - - // Strings: width < length, precision > length - CHECK_FORMAT_1("%1.5s", "None", "None"); - CHECK_FORMAT_1("%1.5U", "None", unicode); - CHECK_FORMAT_1("%1.5A", "None", Py_None); - CHECK_FORMAT_1("%1.5S", "None", Py_None); - CHECK_FORMAT_1("%1.5R", "None", Py_None); - CHECK_FORMAT_2("%1.5V", "None", unicode, "ignored"); - CHECK_FORMAT_2("%1.5V", "None", NULL, "None"); - - Py_XDECREF(unicode); - Py_RETURN_NONE; - - Fail: - Py_XDECREF(result); - Py_XDECREF(unicode); - return NULL; - -#undef CHECK_FORMAT_2 -#undef CHECK_FORMAT_1 -#undef CHECK_FORMAT_0 -} - - -static PyObject * -test_unicode_compare_with_ascii(PyObject *self, PyObject *Py_UNUSED(ignored)) { - PyObject *py_s = PyUnicode_FromStringAndSize("str\0", 4); - int result; - if (py_s == NULL) - return NULL; - result = PyUnicode_CompareWithASCIIString(py_s, "str"); - Py_DECREF(py_s); - if (!result) { - PyErr_SetString(TestError, "Python string ending in NULL " - "should not compare equal to c string."); - return NULL; - } - Py_RETURN_NONE; -} - /* This is here to provide a docstring for test_descr. */ static PyObject * test_with_docstring(PyObject *self, PyObject *Py_UNUSED(ignored)) @@ -6113,12 +5459,9 @@ static PyMethodDef TestMethods[] = { {"pyobject_repr_from_null", pyobject_repr_from_null, METH_NOARGS}, {"pyobject_str_from_null", pyobject_str_from_null, METH_NOARGS}, {"pyobject_bytes_from_null", pyobject_bytes_from_null, METH_NOARGS}, - {"test_string_from_format", (PyCFunction)test_string_from_format, METH_NOARGS}, {"test_with_docstring", test_with_docstring, METH_NOARGS, PyDoc_STR("This is a pretty normal docstring.")}, {"test_string_to_double", test_string_to_double, METH_NOARGS}, - {"test_unicode_compare_with_ascii", test_unicode_compare_with_ascii, - METH_NOARGS}, {"test_capsule", (PyCFunction)test_capsule, METH_NOARGS}, {"test_from_contiguous", (PyCFunction)test_from_contiguous, METH_NOARGS}, #if (defined(__linux__) || defined(__FreeBSD__)) && defined(__GNUC__) @@ -6187,19 +5530,7 @@ static PyMethodDef TestMethods[] = { {"getargs_et", getargs_et, METH_VARARGS}, {"getargs_es_hash", getargs_es_hash, METH_VARARGS}, {"getargs_et_hash", getargs_et_hash, METH_VARARGS}, - {"codec_incrementalencoder", - (PyCFunction)codec_incrementalencoder, METH_VARARGS}, - {"codec_incrementaldecoder", - (PyCFunction)codec_incrementaldecoder, METH_VARARGS}, {"test_s_code", test_s_code, METH_NOARGS}, - {"test_widechar", test_widechar, METH_NOARGS}, - {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, - {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, - {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, - {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, - {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, - {"unicode_findchar", unicode_findchar, METH_VARARGS}, - {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {"_test_thread_state", test_thread_state, METH_VARARGS}, {"_pending_threadfunc", pending_threadfunc, METH_VARARGS}, #ifdef HAVE_GETTIMEOFDAY @@ -7158,6 +6489,9 @@ PyInit__testcapi(void) if (_PyTestCapi_Init_Heaptype(m) < 0) { return NULL; } + if (_PyTestCapi_Init_Unicode(m) < 0) { + return NULL; + } PyState_AddModule(m, &_testcapimodule); return m; diff --git a/PCbuild/_testcapi.vcxproj b/PCbuild/_testcapi.vcxproj index a88540cab19f9a..47e088ec64753d 100644 --- a/PCbuild/_testcapi.vcxproj +++ b/PCbuild/_testcapi.vcxproj @@ -96,6 +96,7 @@ + diff --git a/PCbuild/_testcapi.vcxproj.filters b/PCbuild/_testcapi.vcxproj.filters index a43ab5ea0ff941..bb7366780836a6 100644 --- a/PCbuild/_testcapi.vcxproj.filters +++ b/PCbuild/_testcapi.vcxproj.filters @@ -18,6 +18,9 @@ Source Files + + Source Files + From d70fa1f0376ad2c5150becf23ab0622a8b5c7ada Mon Sep 17 00:00:00 2001 From: philg314 <110174000+philg314@users.noreply.github.com> Date: Sat, 6 Aug 2022 13:00:16 +0200 Subject: [PATCH 5/6] Use built-in AssertionError --- Modules/_testcapi/unicode.c | 43 ++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index 542391bd0364e7..3b7ab803f35cdb 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -1,16 +1,6 @@ #include "parts.h" -// Forward declarations static struct PyModuleDef *_testcapimodule = NULL; // set at initialization -static PyObject *TestError; /* set to exception object in init */ - -/* Raise TestError with test_name + ": " + msg, and return NULL. */ -static PyObject * -raiseTestError(const char* test_name, const char* msg) -{ - PyErr_Format(TestError, "%s: %s", test_name, msg); - return NULL; -} static PyObject * codec_incrementalencoder(PyObject *self, PyObject *args) @@ -41,7 +31,7 @@ test_unicode_compare_with_ascii(PyObject *self, PyObject *Py_UNUSED(ignored)) { result = PyUnicode_CompareWithASCIIString(py_s, "str"); Py_DECREF(py_s); if (!result) { - PyErr_SetString(TestError, "Python string ending in NULL " + PyErr_SetString(PyExc_AssertionError, "Python string ending in NULL " "should not compare equal to c string."); return NULL; } @@ -74,18 +64,22 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) if (PyUnicode_GET_LENGTH(wide) != PyUnicode_GET_LENGTH(utf8)) { Py_DECREF(wide); Py_DECREF(utf8); - return raiseTestError("test_widechar", - "wide string and utf8 string " - "have different length"); + PyErr_SetString(PyExc_AssertionError, + "test_widechar: " + "wide string and utf8 string " + "have different length"); + return NULL; } if (PyUnicode_Compare(wide, utf8)) { Py_DECREF(wide); Py_DECREF(utf8); if (PyErr_Occurred()) return NULL; - return raiseTestError("test_widechar", - "wide string and utf8 string " - "are different"); + PyErr_SetString(PyExc_AssertionError, + "test_widechar: " + "wide string and utf8 string " + "are different"); + return NULL; } Py_DECREF(wide); @@ -95,9 +89,12 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) wide = PyUnicode_FromWideChar(invalid, 1); if (wide == NULL) PyErr_Clear(); - else - return raiseTestError("test_widechar", - "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail"); + else { + PyErr_SetString(PyExc_AssertionError, + "test_widechar: " + "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail"); + return NULL; + } #endif Py_RETURN_NONE; } @@ -287,7 +284,7 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) if (result == NULL) \ return NULL; \ if (!_PyUnicode_EqualToASCIIString(result, EXPECTED)) { \ - PyErr_Format(TestError, \ + PyErr_Format(PyExc_AssertionError, \ "test_string_from_format: failed at \"%s\" " \ "expected \"%s\" got \"%s\"", \ FORMAT, EXPECTED, PyUnicode_AsUTF8(result)); \ @@ -684,10 +681,6 @@ int _PyTestCapi_Init_Unicode(PyObject *m) { _testcapimodule = PyModule_GetDef(m); - TestError = PyErr_NewException("_testcapi.unicode_error", NULL, NULL); - Py_INCREF(TestError); - PyModule_AddObject(m, "unicode_error", TestError); - if (PyModule_AddFunctions(m, TestMethods) < 0) { return -1; } From e8d0edfc16dbd365e59dc3d7013e9691b6d35390 Mon Sep 17 00:00:00 2001 From: philg314 <110174000+philg314@users.noreply.github.com> Date: Sun, 7 Aug 2022 11:58:42 +0200 Subject: [PATCH 6/6] Implement requested changes --- Misc/ACKS | 2 +- Objects/unicodeobject.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/ACKS b/Misc/ACKS index 7065267379deb1..28b4ce42e907c6 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -608,8 +608,8 @@ Marius Gedminas Jan-Philip Gehrcke Thomas Gellekum Gabriel Genellina -Christos Georgiou Philip Georgi +Christos Georgiou Elazar (אלעזר) Gershuni Ben Gertzfield Nadim Ghaznavi diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7f27def1963323..eee7d157efb5f4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2482,7 +2482,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, } assert(len >= 0); - int negative = buffer[0]=='-'?1:0; + int negative = (buffer[0] == '-'); len -= negative; precision = Py_MAX(precision, len);