From 044dc496e06843cd3eb30a32f34d9d080635875a Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 15:55:37 +0200 Subject: [PATCH] gh-117709: Add vectorcall support for str() with positional-only arguments (#117746) Fall back to tp_call() for cases when arguments are passed by name. Co-authored-by: Donghee Na Co-authored-by: Victor Stinner --- Lib/test/test_str.py | 18 +++++++ ...-04-10-22-16-18.gh-issue-117709.-_1YL0.rst | 3 ++ Objects/unicodeobject.c | 51 +++++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index b4927113db44e3..ea37eb5d96457d 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -2651,6 +2651,24 @@ def test_check_encoding_errors(self): proc = assert_python_failure('-X', 'dev', '-c', code) self.assertEqual(proc.rc, 10, proc) + def test_str_invalid_call(self): + check = lambda *a, **kw: self.assertRaises(TypeError, str, *a, **kw) + + # too many args + check(1, "", "", 1) + + # no such kw arg + check(test=1) + + # 'encoding' must be str + check(1, encoding=1) + check(1, 1) + + # 'errors' must be str + check(1, errors=1) + check(1, "", errors=1) + check(1, 1, 1) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst new file mode 100644 index 00000000000000..2216b53688c378 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst @@ -0,0 +1,3 @@ +Speed up calls to :func:`str` with positional-only argument, +by using the :pep:`590` ``vectorcall`` calling convention. +Patch by Erlend Aasland. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5f15071d7d54ef..2c259b7e869efe 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14617,6 +14617,56 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, return unicode; } +static const char * +arg_as_utf8(PyObject *obj, const char *name) +{ + if (!PyUnicode_Check(obj)) { + PyErr_Format(PyExc_TypeError, + "str() argument '%s' must be str, not %T", + name, obj); + return NULL; + } + return _PyUnicode_AsUTF8NoNUL(obj); +} + +static PyObject * +unicode_vectorcall(PyObject *type, PyObject *const *args, + size_t nargsf, PyObject *kwnames) +{ + assert(Py_Is(_PyType_CAST(type), &PyUnicode_Type)); + + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + if (kwnames != NULL && PyTuple_GET_SIZE(kwnames) != 0) { + // Fallback to unicode_new() + PyObject *tuple = _PyTuple_FromArray(args, nargs); + if (tuple == NULL) { + return NULL; + } + PyObject *dict = _PyStack_AsDict(args + nargs, kwnames); + if (dict == NULL) { + Py_DECREF(tuple); + return NULL; + } + PyObject *ret = unicode_new(_PyType_CAST(type), tuple, dict); + Py_DECREF(tuple); + Py_DECREF(dict); + return ret; + } + if (!_PyArg_CheckPositional("str", nargs, 0, 3)) { + return NULL; + } + if (nargs == 0) { + return unicode_get_empty(); + } + PyObject *object = args[0]; + if (nargs == 1) { + return PyObject_Str(object); + } + const char *encoding = arg_as_utf8(args[1], "encoding"); + const char *errors = (nargs == 3) ? arg_as_utf8(args[2], "errors") : NULL; + return PyUnicode_FromEncodedObject(object, encoding, errors); +} + static PyObject * unicode_subtype_new(PyTypeObject *type, PyObject *unicode) { @@ -14758,6 +14808,7 @@ PyTypeObject PyUnicode_Type = { 0, /* tp_alloc */ unicode_new, /* tp_new */ PyObject_Del, /* tp_free */ + .tp_vectorcall = unicode_vectorcall, }; /* Initialize the Unicode implementation */