From f9fa69060dffea5cf613dce2068f2b7701dfc45e Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 13:27:40 -0700 Subject: [PATCH 01/15] Refactor helpers for specialization --- Python/specialize.c | 136 ++++++++++++++++++++++++++++---------------- 1 file changed, 88 insertions(+), 48 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index a3fb3d27ffe3aa..3e8e546939380d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -24,25 +24,6 @@ extern const char *_PyUOpName(int index); * ./adaptive.md */ -#ifdef Py_GIL_DISABLED -#define SET_OPCODE_OR_RETURN(instr, opcode) \ - do { \ - uint8_t old_op = _Py_atomic_load_uint8_relaxed(&(instr)->op.code); \ - if (old_op >= MIN_INSTRUMENTED_OPCODE) { \ - /* Lost race with instrumentation */ \ - return; \ - } \ - if (!_Py_atomic_compare_exchange_uint8(&(instr)->op.code, &old_op, \ - (opcode))) { \ - /* Lost race with instrumentation */ \ - assert(old_op >= MIN_INSTRUMENTED_OPCODE); \ - return; \ - } \ - } while (0) -#else -#define SET_OPCODE_OR_RETURN(instr, opcode) (instr)->op.code = (opcode) -#endif - #ifdef Py_STATS GCStats _py_gc_stats[NUM_GENERATIONS] = { 0 }; static PyStats _Py_stats_struct = { .gc_stats = _py_gc_stats }; @@ -682,6 +663,77 @@ _PyCode_DisableSpecialization(_Py_CODEUNIT *instructions, Py_ssize_t size) #define SPEC_FAIL_CONTAINS_OP_LIST 11 #define SPEC_FAIL_CONTAINS_OP_USER_CLASS 12 +static int +set_opcode(_Py_CODEUNIT *instr, uint8_t opcode) +{ +#ifdef Py_GIL_DISABLED + uint8_t old_op = _Py_atomic_load_uint8_relaxed(&instr->op.code); + if (old_op >= MIN_INSTRUMENTED_OPCODE) { + /* Lost race with instrumentation */ + return 0; + } + if (!_Py_atomic_compare_exchange_uint8(&instr->op.code, &old_op, opcode)) { + /* Lost race with instrumentation */ + assert(old_op >= MIN_INSTRUMENTED_OPCODE); + return 0; + } + return 1; +#else + instr->op.code = opcode; + return 1; +#endif +} + +static inline void +set_counter(_Py_BackoffCounter *counter, _Py_BackoffCounter value) +{ + FT_ATOMIC_STORE_UINT16_RELAXED(counter->value_and_backoff, + value.value_and_backoff); +} + +static inline _Py_BackoffCounter +load_counter(_Py_BackoffCounter *counter) +{ + _Py_BackoffCounter result = { + .value_and_backoff = + FT_ATOMIC_LOAD_UINT16_RELAXED(counter->value_and_backoff)}; + return result; +} + +static void +specialize(_Py_CODEUNIT *instr, uint8_t specialized_opcode, void *cache) +{ + assert(!PyErr_Occurred()); + uint8_t generic_opcode = _PyOpcode_Deopt[specialized_opcode]; + if (!set_opcode(instr, specialized_opcode)) { + STAT_INC(generic_opcode, failure); + SPECIALIZATION_FAIL(generic_opcode, SPEC_FAIL_OTHER); + return; + } + if (cache != NULL) { + uint8_t num_caches = _PyOpcode_Caches[generic_opcode]; + memcpy(instr + 1, cache, sizeof(_Py_CODEUNIT) * num_caches); + } + set_counter((_Py_BackoffCounter *)instr + 1, adaptive_counter_cooldown()); +} + +static void +unspecialize(_Py_CODEUNIT *instr, int reason) +{ + assert(!PyErr_Occurred()); + uint8_t opcode = FT_ATOMIC_LOAD_UINT8_RELAXED(instr->op.code); + uint8_t generic_opcode = _PyOpcode_Deopt[opcode]; + STAT_INC(generic_opcode, failure); + if (!set_opcode(instr, generic_opcode)) { + SPECIALIZATION_FAIL(generic_opcode, SPEC_FAIL_OTHER); + return; + } + SPECIALIZATION_FAIL(generic_opcode, reason); + _Py_BackoffCounter *counter = (_Py_BackoffCounter *)instr + 1; + _Py_BackoffCounter cur = load_counter(counter); + set_counter(counter, adaptive_counter_backoff(cur)); +} + static int function_kind(PyCodeObject *code); static bool function_check_args(PyObject *o, int expected_argcount, int opcode); static uint32_t function_get_version(PyObject *o, int opcode); @@ -2190,7 +2242,6 @@ _Py_Specialize_CallKw(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) } } -#ifdef Py_STATS static int binary_op_fail_kind(int oparg, PyObject *lhs, PyObject *rhs) { @@ -2258,7 +2309,6 @@ binary_op_fail_kind(int oparg, PyObject *lhs, PyObject *rhs) } Py_UNREACHABLE(); } -#endif // Py_STATS void _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *instr, @@ -2268,8 +2318,6 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in PyObject *rhs = PyStackRef_AsPyObjectBorrow(rhs_st); assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[BINARY_OP] == INLINE_CACHE_ENTRIES_BINARY_OP); - _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(instr + 1); - uint8_t specialized_op; switch (oparg) { case NB_ADD: case NB_INPLACE_ADD: @@ -2280,19 +2328,19 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in _Py_CODEUNIT next = instr[INLINE_CACHE_ENTRIES_BINARY_OP + 1]; bool to_store = (next.op.code == STORE_FAST); if (to_store && PyStackRef_AsPyObjectBorrow(locals[next.op.arg]) == lhs) { - specialized_op = BINARY_OP_INPLACE_ADD_UNICODE; - goto success; + specialize(instr, BINARY_OP_INPLACE_ADD_UNICODE, NULL); + return; } - specialized_op = BINARY_OP_ADD_UNICODE; - goto success; + specialize(instr, BINARY_OP_ADD_UNICODE, NULL); + return; } if (PyLong_CheckExact(lhs)) { - specialized_op = BINARY_OP_ADD_INT; - goto success; + specialize(instr, BINARY_OP_ADD_INT, NULL); + return; } if (PyFloat_CheckExact(lhs)) { - specialized_op = BINARY_OP_ADD_FLOAT; - goto success; + specialize(instr, BINARY_OP_ADD_FLOAT, NULL); + return; } break; case NB_MULTIPLY: @@ -2301,12 +2349,12 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in break; } if (PyLong_CheckExact(lhs)) { - specialized_op = BINARY_OP_MULTIPLY_INT; - goto success; + specialize(instr, BINARY_OP_MULTIPLY_INT, NULL); + return; } if (PyFloat_CheckExact(lhs)) { - specialized_op = BINARY_OP_MULTIPLY_FLOAT; - goto success; + specialize(instr, BINARY_OP_MULTIPLY_FLOAT, NULL); + return; } break; case NB_SUBTRACT: @@ -2315,24 +2363,16 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in break; } if (PyLong_CheckExact(lhs)) { - specialized_op = BINARY_OP_SUBTRACT_INT; - goto success; + specialize(instr, BINARY_OP_SUBTRACT_INT, NULL); + return; } if (PyFloat_CheckExact(lhs)) { - specialized_op = BINARY_OP_SUBTRACT_FLOAT; - goto success; + specialize(instr, BINARY_OP_SUBTRACT_FLOAT, NULL); + return; } break; } - SPECIALIZATION_FAIL(BINARY_OP, binary_op_fail_kind(oparg, lhs, rhs)); - STAT_INC(BINARY_OP, failure); - SET_OPCODE_OR_RETURN(instr, BINARY_OP); - cache->counter = adaptive_counter_backoff(cache->counter); - return; -success: - STAT_INC(BINARY_OP, success); - SET_OPCODE_OR_RETURN(instr, specialized_op); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr, binary_op_fail_kind(oparg, lhs, rhs)); } From e431f82c3da5ac01dd9485913a1db06a5cf40156 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 14:55:05 -0700 Subject: [PATCH 02/15] Refactor specialize_c_call to use helpers --- Python/specialize.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 3e8e546939380d..d11df856166ac5 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2107,49 +2107,49 @@ specialize_py_call_kw(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, return 0; } -static int +static void specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) { if (PyCFunction_GET_FUNCTION(callable) == NULL) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OTHER); - return 1; + unspecialize(instr, SPEC_FAIL_OTHER); + return; } switch (PyCFunction_GET_FLAGS(callable) & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { case METH_O: { if (nargs != 1) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); - return 1; + unspecialize(instr, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return; } /* len(o) */ PyInterpreterState *interp = _PyInterpreterState_GET(); if (callable == interp->callable_cache.len) { - instr->op.code = CALL_LEN; - return 0; + specialize(instr, CALL_LEN, NULL); + return; } - instr->op.code = CALL_BUILTIN_O; - return 0; + specialize(instr, CALL_BUILTIN_O, NULL); + return; } case METH_FASTCALL: { if (nargs == 2) { /* isinstance(o1, o2) */ PyInterpreterState *interp = _PyInterpreterState_GET(); if (callable == interp->callable_cache.isinstance) { - instr->op.code = CALL_ISINSTANCE; - return 0; + specialize(instr, CALL_ISINSTANCE, NULL); + return; } } - instr->op.code = CALL_BUILTIN_FAST; - return 0; + specialize(instr, CALL_BUILTIN_FAST, NULL); + return; } case METH_FASTCALL | METH_KEYWORDS: { - instr->op.code = CALL_BUILTIN_FAST_WITH_KEYWORDS; - return 0; + specialize(instr, CALL_BUILTIN_FAST_WITH_KEYWORDS, NULL); + return; } default: - instr->op.code = CALL_NON_PY_GENERAL; - return 0; + specialize(instr, CALL_NON_PY_GENERAL, NULL); + return; } } @@ -2164,7 +2164,8 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) _PyCallCache *cache = (_PyCallCache *)(instr + 1); int fail; if (PyCFunction_CheckExact(callable)) { - fail = specialize_c_call(callable, instr, nargs); + specialize_c_call(callable, instr, nargs); + return; } else if (PyFunction_Check(callable)) { fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, false); From 42249ad6fd442eb6a9814d389a3240c6ed6174dd Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 15:04:06 -0700 Subject: [PATCH 03/15] Refactor specialize_py_call to use helpers --- Python/specialize.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index d11df856166ac5..056eda896d1e26 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2045,42 +2045,43 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, return 0; } -static int +static void specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, bool bound_method) { - _PyCallCache *cache = (_PyCallCache *)(instr + 1); PyCodeObject *code = (PyCodeObject *)func->func_code; int kind = function_kind(code); /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_PEP_523); - return -1; + unspecialize(instr, SPEC_FAIL_CALL_PEP_523); + return; } int argcount = -1; if (kind == SPEC_FAIL_CODE_NOT_OPTIMIZED) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CODE_NOT_OPTIMIZED); - return -1; + unspecialize(instr, SPEC_FAIL_CODE_NOT_OPTIMIZED); + return; } if (kind == SIMPLE_FUNCTION) { argcount = code->co_argcount; } int version = _PyFunction_GetVersionForCurrentState(func); if (!_PyFunction_IsVersionValid(version)) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OUT_OF_VERSIONS); - return -1; + unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); + return; } - write_u32(cache->func_version, version); + _PyCallCache cache; + write_u32(cache.func_version, version); + uint8_t opcode; if (argcount == nargs + bound_method) { - instr->op.code = bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS; + opcode = + bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS; } else { - instr->op.code = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL; + opcode = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL; } - return 0; + specialize(instr, opcode, &cache); } - static int specialize_py_call_kw(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, bool bound_method) @@ -2168,7 +2169,8 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) return; } else if (PyFunction_Check(callable)) { - fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, false); + specialize_py_call((PyFunctionObject *)callable, instr, nargs, false); + return; } else if (PyType_Check(callable)) { fail = specialize_class_call(callable, instr, nargs); @@ -2179,7 +2181,8 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) else if (PyMethod_Check(callable)) { PyObject *func = ((PyMethodObject *)callable)->im_func; if (PyFunction_Check(func)) { - fail = specialize_py_call((PyFunctionObject *)func, instr, nargs, true); + specialize_py_call((PyFunctionObject *)func, instr, nargs, true); + return; } else { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_BOUND_METHOD); From 2bfbdea4904f82aa3199fbecfbcd6c518558f158 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 15:16:21 -0700 Subject: [PATCH 04/15] Refactor specialize_class_call to use helpers --- Python/specialize.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 056eda896d1e26..61c859fefaa533 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -603,6 +603,7 @@ _PyCode_DisableSpecialization(_Py_CODEUNIT *instructions, Py_ssize_t size) #define SPEC_FAIL_CALL_INIT_NOT_SIMPLE 30 #define SPEC_FAIL_CALL_METACLASS 31 #define SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES 32 +#define SPEC_FAIL_CALL_NO_TYPE_VERSION 33 /* COMPARE_OP */ #define SPEC_FAIL_COMPARE_OP_DIFFERENT_TYPES 12 @@ -1953,7 +1954,7 @@ get_init_for_simple_managed_python_class(PyTypeObject *tp) return (PyFunctionObject *)init; } -static int +static void specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) { assert(PyType_Check(callable)); @@ -1962,21 +1963,21 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) int oparg = instr->op.arg; if (nargs == 1 && oparg == 1) { if (tp == &PyUnicode_Type) { - instr->op.code = CALL_STR_1; - return 0; + specialize(instr, CALL_STR_1, NULL); + return; } else if (tp == &PyType_Type) { - instr->op.code = CALL_TYPE_1; - return 0; + specialize(instr, CALL_TYPE_1, NULL); + return; } else if (tp == &PyTuple_Type) { - instr->op.code = CALL_TUPLE_1; - return 0; + specialize(instr, CALL_TUPLE_1, NULL); + return; } } if (tp->tp_vectorcall != NULL) { - instr->op.code = CALL_BUILTIN_CLASS; - return 0; + specialize(instr, CALL_BUILTIN_CLASS, NULL); + return; } goto generic; } @@ -1986,18 +1987,18 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) if (tp->tp_new == PyBaseObject_Type.tp_new) { PyFunctionObject *init = get_init_for_simple_managed_python_class(tp); if (type_get_version(tp, CALL) == 0) { - return -1; + unspecialize(instr, SPEC_FAIL_CALL_NO_TYPE_VERSION); + return; } if (init != NULL) { - _PyCallCache *cache = (_PyCallCache *)(instr + 1); - write_u32(cache->func_version, tp->tp_version_tag); - _Py_SET_OPCODE(*instr, CALL_ALLOC_AND_ENTER_INIT); - return 0; + _PyCallCache cache; + write_u32(cache.func_version, tp->tp_version_tag); + specialize(instr, CALL_ALLOC_AND_ENTER_INIT, &cache); + return; } } generic: - instr->op.code = CALL_NON_PY_GENERAL; - return 0; + specialize(instr, CALL_NON_PY_GENERAL, NULL); } static int @@ -2173,7 +2174,8 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) return; } else if (PyType_Check(callable)) { - fail = specialize_class_call(callable, instr, nargs); + specialize_class_call(callable, instr, nargs); + return; } else if (Py_IS_TYPE(callable, &PyMethodDescr_Type)) { fail = specialize_method_descriptor((PyMethodDescrObject *)callable, instr, nargs); From 7fbdd99f383354cede85c256ed640d9ae255c234 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 15:45:47 -0700 Subject: [PATCH 05/15] Refactor specialize_method_descriptor to use helpers --- Python/specialize.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 61c859fefaa533..f56c09fbcc42e5 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2001,7 +2001,7 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) specialize(instr, CALL_NON_PY_GENERAL, NULL); } -static int +static void specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, int nargs) { @@ -2010,16 +2010,16 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, METH_KEYWORDS | METH_METHOD)) { case METH_NOARGS: { if (nargs != 1) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); - return -1; + unspecialize(instr, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return; } - instr->op.code = CALL_METHOD_DESCRIPTOR_NOARGS; - return 0; + specialize(instr, CALL_METHOD_DESCRIPTOR_NOARGS, NULL); + return; } case METH_O: { if (nargs != 2) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); - return -1; + unspecialize(instr, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return; } PyInterpreterState *interp = _PyInterpreterState_GET(); PyObject *list_append = interp->callable_cache.list_append; @@ -2027,23 +2027,22 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, bool pop = (next.op.code == POP_TOP); int oparg = instr->op.arg; if ((PyObject *)descr == list_append && oparg == 1 && pop) { - instr->op.code = CALL_LIST_APPEND; - return 0; + specialize(instr, CALL_LIST_APPEND, NULL); + return; } - instr->op.code = CALL_METHOD_DESCRIPTOR_O; - return 0; + specialize(instr, CALL_METHOD_DESCRIPTOR_O, NULL); + return; } case METH_FASTCALL: { - instr->op.code = CALL_METHOD_DESCRIPTOR_FAST; - return 0; + specialize(instr, CALL_METHOD_DESCRIPTOR_FAST, NULL); + return; } case METH_FASTCALL | METH_KEYWORDS: { - instr->op.code = CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS; - return 0; + specialize(instr, CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, NULL); + return; } } - instr->op.code = CALL_NON_PY_GENERAL; - return 0; + specialize(instr, CALL_NON_PY_GENERAL, NULL); } static void @@ -2178,7 +2177,8 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) return; } else if (Py_IS_TYPE(callable, &PyMethodDescr_Type)) { - fail = specialize_method_descriptor((PyMethodDescrObject *)callable, instr, nargs); + specialize_method_descriptor((PyMethodDescrObject *)callable, instr, nargs); + return; } else if (PyMethod_Check(callable)) { PyObject *func = ((PyMethodObject *)callable)->im_func; From 0d60fce1be5d8564b5d9903e849d75c363abb7d5 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 15:50:55 -0700 Subject: [PATCH 06/15] Remove unneeded code --- Python/specialize.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index f56c09fbcc42e5..d8872b488bb206 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2162,49 +2162,29 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) assert(ENABLE_SPECIALIZATION); assert(_PyOpcode_Caches[CALL] == INLINE_CACHE_ENTRIES_CALL); assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL); - _PyCallCache *cache = (_PyCallCache *)(instr + 1); - int fail; if (PyCFunction_CheckExact(callable)) { specialize_c_call(callable, instr, nargs); - return; } else if (PyFunction_Check(callable)) { specialize_py_call((PyFunctionObject *)callable, instr, nargs, false); - return; } else if (PyType_Check(callable)) { specialize_class_call(callable, instr, nargs); - return; } else if (Py_IS_TYPE(callable, &PyMethodDescr_Type)) { specialize_method_descriptor((PyMethodDescrObject *)callable, instr, nargs); - return; } else if (PyMethod_Check(callable)) { PyObject *func = ((PyMethodObject *)callable)->im_func; if (PyFunction_Check(func)) { specialize_py_call((PyFunctionObject *)func, instr, nargs, true); - return; } else { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_BOUND_METHOD); - fail = -1; + unspecialize(instr, SPEC_FAIL_CALL_BOUND_METHOD); } } else { - instr->op.code = CALL_NON_PY_GENERAL; - fail = 0; - } - if (fail) { - STAT_INC(CALL, failure); - assert(!PyErr_Occurred()); - instr->op.code = CALL; - cache->counter = adaptive_counter_backoff(cache->counter); - } - else { - STAT_INC(CALL, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + specialize(instr, CALL_NON_PY_GENERAL, NULL); } } From 09f80b19aae085941b596129f9ab5046238edccd Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 19:09:19 -0700 Subject: [PATCH 07/15] Enable almost all specializations of CALL _CALL_ALLOC_AND_ENTER_INIT will be addressed in a separate PR --- Python/bytecodes.c | 4 ++-- Python/specialize.c | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6b10a7acfe4a91..0790e5e61152f7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3301,7 +3301,7 @@ dummy_func( }; specializing op(_SPECIALIZE_CALL, (counter/1, callable[1], self_or_null[1], args[oparg] -- callable[1], self_or_null[1], args[oparg])) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Call(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0])); @@ -3309,7 +3309,7 @@ dummy_func( } OPCODE_DEFERRED_INC(CALL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } op(_MAYBE_EXPAND_METHOD, (callable[1], self_or_null[1], args[oparg] -- func[1], maybe_self[1], args[oparg])) { diff --git a/Python/specialize.c b/Python/specialize.c index d8872b488bb206..36677da2103f52 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1984,6 +1984,7 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) if (Py_TYPE(tp) != &PyType_Type) { goto generic; } + #ifndef Py_GIL_DISABLED if (tp->tp_new == PyBaseObject_Type.tp_new) { PyFunctionObject *init = get_init_for_simple_managed_python_class(tp); if (type_get_version(tp, CALL) == 0) { @@ -1997,6 +1998,7 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) return; } } + #endif generic: specialize(instr, CALL_NON_PY_GENERAL, NULL); } @@ -2159,7 +2161,7 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) { PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[CALL] == INLINE_CACHE_ENTRIES_CALL); assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL); if (PyCFunction_CheckExact(callable)) { From b9c6f4e414c8b2f4dde1d4b2b646e08c6fb58bb4 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 19:10:31 -0700 Subject: [PATCH 08/15] Fix usage of PyStackRef_FromPyObjectSteal in _CALL_STR_1 This was missed in gh-124894 --- Python/bytecodes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0790e5e61152f7..044eec1530b765 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3647,11 +3647,12 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *res_o = PyObject_Str(arg_o); DEAD(null); DEAD(callable); PyStackRef_CLOSE(arg); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(res_o == NULL, error); + res = PyStackRef_FromPyObjectSteal(res_o); } macro(CALL_STR_1) = From f56c2cff633cecfc4012ee285206faacf3e8af77 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 18 Oct 2024 10:51:55 -0700 Subject: [PATCH 09/15] Fix usage of PyStackRef_FromPyObjectSteal in CALL_TUPLE_1 This was missed in gh-124894 --- Python/bytecodes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 044eec1530b765..30e135594e524f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3669,11 +3669,12 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *res_o = PySequence_Tuple(arg_o); DEAD(null); DEAD(callable); PyStackRef_CLOSE(arg); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(res_o == NULL, error); + res = PyStackRef_FromPyObjectSteal(res_o); } macro(CALL_TUPLE_1) = From 438cb65fe57550eafb8ca9d71cb532a3f6d84c9f Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 18 Oct 2024 10:52:09 -0700 Subject: [PATCH 10/15] Regen files --- Python/executor_cases.c.h | 10 ++++++---- Python/generated_cases.c.h | 14 ++++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5882c472405668..c99d30b130da51 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4306,10 +4306,11 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); + if (res_o == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); @@ -4338,10 +4339,11 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); + if (res_o == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a64c4428b70290..5ced19e92495d8 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -872,7 +872,7 @@ callable = &stack_pointer[-2 - oparg]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -882,7 +882,7 @@ } OPCODE_DEFERRED_INC(CALL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 2 cache entries */ // _MAYBE_EXPAND_METHOD @@ -2976,10 +2976,11 @@ DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type, CALL); STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) goto pop_3_error; + if (res_o == NULL) goto pop_3_error; + res = PyStackRef_FromPyObjectSteal(res_o); } // _CHECK_PERIODIC { @@ -3026,10 +3027,11 @@ DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type, CALL); STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) goto pop_3_error; + if (res_o == NULL) goto pop_3_error; + res = PyStackRef_FromPyObjectSteal(res_o); } // _CHECK_PERIODIC { From 97e2aaa35b6ee8b21c3cd215d9bb749aaa33357d Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 18 Oct 2024 14:23:24 -0700 Subject: [PATCH 11/15] Fix implementation of CALL_LIST_APPEND in free-threaded builds This needs to acquire a critical section on the list. --- Include/internal/pycore_list.h | 3 +++ Objects/listobject.c | 10 ++++++++++ Python/bytecodes.c | 5 +++++ 3 files changed, 18 insertions(+) diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 2c666f9be4bd79..6830fa26c28303 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -37,6 +37,9 @@ _PyList_AppendTakeRef(PyListObject *self, PyObject *newitem) return _PyList_AppendTakeRefListResize(self, newitem); } +// Like _PyList_AppendTakeRef, but locks self in free-threaded builds. +extern int _PyList_AppendTakeRefAndLock(PyListObject *self, PyObject *newitem); + // Repeat the bytes of a buffer in place static inline void _Py_memory_repeat(char* dest, Py_ssize_t len_dest, Py_ssize_t len_src) diff --git a/Objects/listobject.c b/Objects/listobject.c index 930aefde325a7c..6140388f2ba858 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -490,6 +490,16 @@ PyList_Append(PyObject *op, PyObject *newitem) return -1; } +int +_PyList_AppendTakeRefAndLock(PyListObject *self, PyObject *newitem) +{ + int ret; + Py_BEGIN_CRITICAL_SECTION(self); + ret = _PyList_AppendTakeRef((PyListObject *)self, newitem); + Py_END_CRITICAL_SECTION(); + return ret; +} + /* Methods */ static void diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 30e135594e524f..e56d3c70c202b9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3967,7 +3967,12 @@ dummy_func( assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o)); STAT_INC(CALL, hit); + #ifdef Py_GIL_DISABLED + int err; + err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + #endif PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); ERROR_IF(err, error); From 819f30a03fac660b2f4361f2812411576a87f9e6 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 18 Oct 2024 14:31:20 -0700 Subject: [PATCH 12/15] Regenerate interpreter and friends --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Python/executor_cases.c.h | 7 +++++++ Python/generated_cases.c.h | 7 +++++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 8fec45b1e8d5c3..6b88dd3e688a2f 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1054,7 +1054,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, + [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 890aa7815886bf..abda0fc960196b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -235,7 +235,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_LEN] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CALL_ISINSTANCE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, - [_CALL_LIST_APPEND] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG, + [_CALL_LIST_APPEND] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_NOARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c99d30b130da51..84e6506272e96e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4782,7 +4782,14 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(CALL, hit); + #ifdef Py_GIL_DISABLED + int err; + _PyFrame_SetStackPointer(frame, stack_pointer); + err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + stack_pointer = _PyFrame_GetStackPointer(frame); + #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + #endif PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) JUMP_TO_ERROR(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 5ced19e92495d8..f1e4d3c78de4b0 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2376,7 +2376,14 @@ assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o), CALL); STAT_INC(CALL, hit); + #ifdef Py_GIL_DISABLED + int err; + _PyFrame_SetStackPointer(frame, stack_pointer); + err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + stack_pointer = _PyFrame_GetStackPointer(frame); + #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + #endif PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) goto pop_3_error; From 58c4f7bad41151ac5474216e127f882bb22352a7 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 22 Oct 2024 16:57:13 -0700 Subject: [PATCH 13/15] Refactor PyType_LookupRef to return version --- Include/internal/pycore_object.h | 2 ++ Objects/typeobject.c | 42 ++++++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 8832692d03c29e..6bf81158197d09 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -775,6 +775,8 @@ extern int _PyObject_StoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *value); extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr); +extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *, + unsigned int *); #ifdef Py_GIL_DISABLED # define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index c9cc4d552fc02c..747deaa36d4560 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5512,9 +5512,12 @@ _PyTypes_AfterFork(void) } /* Internal API to look for a name through the MRO. - This returns a borrowed reference, and doesn't set an exception! */ + This returns a strong reference, and doesn't set an exception! + If nonzero, version is set to the value of type->tp_version at the time of + the lookup. +*/ PyObject * -_PyType_LookupRef(PyTypeObject *type, PyObject *name) +_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) { PyObject *res; int error; @@ -5537,6 +5540,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) // If the sequence is still valid then we're done if (value == NULL || _Py_TryIncref(value)) { if (_PySeqLock_EndRead(&entry->sequence, sequence)) { + if (version != NULL) { + *version = entry_version; + } return value; } Py_XDECREF(value); @@ -5558,6 +5564,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); Py_XINCREF(entry->value); + if (version != NULL) { + *version = entry->version; + } return entry->value; } #endif @@ -5571,12 +5580,12 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) // anyone else can modify our mro or mutate the type. int has_version = 0; - int version = 0; + unsigned int assigned_version = 0; BEGIN_TYPE_LOCK(); res = find_name_in_mro(type, name, &error); if (MCACHE_CACHEABLE_NAME(name)) { has_version = assign_version_tag(interp, type); - version = type->tp_version_tag; + assigned_version = type->tp_version_tag; } END_TYPE_LOCK(); @@ -5593,24 +5602,43 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) if (error == -1) { PyErr_Clear(); } + if (version != NULL) { + // 0 is not a valid version + *version = 0; + } return NULL; } if (has_version) { #if Py_GIL_DISABLED - update_cache_gil_disabled(entry, name, version, res); + update_cache_gil_disabled(entry, name, assigned_version, res); #else - PyObject *old_value = update_cache(entry, name, version, res); + PyObject *old_value = update_cache(entry, name, assigned_version, res); Py_DECREF(old_value); #endif } + if (version != NULL) { + // 0 is not a valid version + *version = has_version ? assigned_version : 0; + } return res; } +/* Internal API to look for a name through the MRO. + This returns a strong reference, and doesn't set an exception! +*/ +PyObject * +_PyType_LookupRef(PyTypeObject *type, PyObject *name) +{ + return _PyType_LookupRefAndVersion(type, name, NULL); +} + +/* Internal API to look for a name through the MRO. + This returns a borrowed reference, and doesn't set an exception! */ PyObject * _PyType_Lookup(PyTypeObject *type, PyObject *name) { - PyObject *res = _PyType_LookupRef(type, name); + PyObject *res = _PyType_LookupRefAndVersion(type, name, NULL); Py_XDECREF(res); return res; } From b33986abaaf1094c8d0b90e51e2bdad64eb997c5 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 24 Oct 2024 15:38:23 -0700 Subject: [PATCH 14/15] Make CALL_ALLOC_AND_ENTER_INIT thread-safe - Modify `get_init_for_simple_managed_python_class` to return both init as well as the type version at the time of lookup. - Modify caching logic to verify that the current version of the type matches the version at the time of lookup. This prevents potentially caching a stale value if we race with an update to __init__. - Only cache __init__ functions that are deferred in free-threaded builds. This ensures that the borrowed reference to __init__ that is stored in the cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes: 1. The type version is cleared before the reference in the MRO to __init__ is destroyed. 2. If the reference in (1) was the last reference then the __init__ method will be queued for deletion the next time GC runs. 3. GC requires stopping the world, which forces a synchronizes-with operation between all threads. 4. If the GC collects the cached __init__, then type's version will have been updated *and* the update will be visible to all threads, so the guard cannot pass. - There are no escaping calls in between loading from the specialization cache and pushing the frame. This is a requirement for the default build. --- Include/internal/pycore_object.h | 2 ++ Objects/typeobject.c | 21 +++++++++++++++ Python/bytecodes.c | 4 +-- Python/specialize.c | 46 ++++++++++++++++++++------------ 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 6bf81158197d09..8bd1edac33d545 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -777,6 +777,8 @@ extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr); extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *, unsigned int *); +extern int _PyType_CacheInitForSpecialization(PyTypeObject *, PyObject *, + unsigned int); #ifdef Py_GIL_DISABLED # define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 747deaa36d4560..135d5b87e2f8f6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5643,6 +5643,27 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) return res; } + +int +_PyType_CacheInitForSpecialization(PyTypeObject *type, PyObject *init, + unsigned int tp_version) +{ + if (!init || !tp_version) { + return 0; + } + int can_cache = type->tp_version_tag == tp_version; + BEGIN_TYPE_LOCK(); + #ifdef Py_GIL_DISABLED + can_cache = can_cache && _PyObject_HasDeferredRefcount(init); + #endif + if (can_cache) { + PyHeapTypeObject *ht = (PyHeapTypeObject*) type; + FT_ATOMIC_STORE_PTR_RELAXED(ht->_spec_cache.init, init); + } + END_TYPE_LOCK(); + return can_cache; +} + static void set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e56d3c70c202b9..a8fd621cb4c7bc 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3688,10 +3688,10 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null[0])); DEOPT_IF(!PyType_Check(callable_o)); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(tp->tp_version_tag != type_version); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)); STAT_INC(CALL, hit); diff --git a/Python/specialize.c b/Python/specialize.c index 36677da2103f52..3acd38a951c3f1 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1920,38 +1920,38 @@ _Py_Specialize_StoreSubscr(_PyStackRef container_st, _PyStackRef sub_st, _Py_COD cache->counter = adaptive_counter_cooldown(); } -/* Returns a borrowed reference. - * The reference is only valid if guarded by a type version check. - */ -static PyFunctionObject * -get_init_for_simple_managed_python_class(PyTypeObject *tp) +/* Returns a strong reference. */ +static PyObject * +get_init_for_simple_managed_python_class(PyTypeObject *tp, unsigned int *tp_version) { assert(tp->tp_new == PyBaseObject_Type.tp_new); if (tp->tp_alloc != PyType_GenericAlloc) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OVERRIDDEN); return NULL; } - if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) { + unsigned long tp_flags = PyType_GetFlags(tp); + if ((tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES); return NULL; } - if (!(tp->tp_flags & Py_TPFLAGS_HEAPTYPE)) { + if (!(tp_flags & Py_TPFLAGS_HEAPTYPE)) { /* Is this possible? */ SPECIALIZATION_FAIL(CALL, SPEC_FAIL_EXPECTED_ERROR); return NULL; } - PyObject *init = _PyType_Lookup(tp, &_Py_ID(__init__)); + PyObject *init = _PyType_LookupRefAndVersion(tp, &_Py_ID(__init__), tp_version); if (init == NULL || !PyFunction_Check(init)) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_PYTHON); + Py_XDECREF(init); return NULL; } int kind = function_kind((PyCodeObject *)PyFunction_GET_CODE(init)); if (kind != SIMPLE_FUNCTION) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_SIMPLE); + Py_DECREF(init); return NULL; } - ((PyHeapTypeObject *)tp)->_spec_cache.init = init; - return (PyFunctionObject *)init; + return init; } static void @@ -1984,21 +1984,23 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) if (Py_TYPE(tp) != &PyType_Type) { goto generic; } - #ifndef Py_GIL_DISABLED if (tp->tp_new == PyBaseObject_Type.tp_new) { - PyFunctionObject *init = get_init_for_simple_managed_python_class(tp); - if (type_get_version(tp, CALL) == 0) { - unspecialize(instr, SPEC_FAIL_CALL_NO_TYPE_VERSION); + unsigned int tp_version = 0; + PyObject *init = get_init_for_simple_managed_python_class(tp, &tp_version); + if (!tp_version) { + unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); + Py_XDECREF(init); return; } - if (init != NULL) { + if (init != NULL && _PyType_CacheInitForSpecialization(tp, init, tp_version)) { _PyCallCache cache; - write_u32(cache.func_version, tp->tp_version_tag); + write_u32(cache.func_version, tp_version); specialize(instr, CALL_ALLOC_AND_ENTER_INIT, &cache); + Py_DECREF(init); return; } + Py_XDECREF(init); } - #endif generic: specialize(instr, CALL_NON_PY_GENERAL, NULL); } @@ -2808,6 +2810,13 @@ static const PyBytesObject no_location = { .ob_sval = { NO_LOC_4 } }; +#ifdef Py_GIL_DISABLED +static _PyCodeArray init_cleanup_tlbc = { + .size = 1, + .entries = {(char*) &_Py_InitCleanup.co_code_adaptive}, +}; +#endif + const struct _PyCode8 _Py_InitCleanup = { _PyVarObject_HEAD_INIT(&PyCode_Type, 3), .co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty), @@ -2823,6 +2832,9 @@ const struct _PyCode8 _Py_InitCleanup = { ._co_firsttraceable = 4, .co_stacksize = 2, .co_framesize = 2 + FRAME_SPECIALS_SIZE, +#ifdef Py_GIL_DISABLED + .co_tlbc = &init_cleanup_tlbc, +#endif .co_code_adaptive = { EXIT_INIT_CHECK, 0, RETURN_VALUE, 0, From 68e81fd17fe13449e8eb1d0a9932e4edbd2f5d7a Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 24 Oct 2024 15:38:40 -0700 Subject: [PATCH 15/15] Regenerate files --- Python/executor_cases.c.h | 4 ++-- Python/generated_cases.c.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 84e6506272e96e..fa290156e010cf 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4373,13 +4373,13 @@ JUMP_TO_JUMP_TARGET(); } PyTypeObject *tp = (PyTypeObject *)callable_o; - if (tp->tp_version_tag != type_version) { + if (FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)) { UOP_STAT_INC(uopcode, miss); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f1e4d3c78de4b0..d26e0e9d227cdc 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1040,10 +1040,10 @@ DEOPT_IF(!PyStackRef_IsNull(null[0]), CALL); DEOPT_IF(!PyType_Check(callable_o), CALL); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(tp->tp_version_tag != type_version, CALL); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version, CALL); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize), CALL); STAT_INC(CALL, hit);