From f978fb4f8d6eac0585057e463bb1701dc04a9900 Mon Sep 17 00:00:00 2001 From: mpage Date: Wed, 9 Oct 2024 08:18:25 -0700 Subject: [PATCH] gh-115999: Refactor `LOAD_GLOBAL` specializations to avoid reloading {globals, builtins} keys (gh-124953) Each of the `LOAD_GLOBAL` specializations is implemented roughly as: 1. Load keys version. 2. Load cached keys version. 3. Deopt if (1) and (2) don't match. 4. Load keys. 5. Load cached index into keys. 6. Load object from (4) at offset from (5). This is not thread-safe in free-threaded builds; the keys object may be replaced in between steps (3) and (4). This change refactors the specializations to avoid reloading the keys object and instead pass the keys object from guards to be consumed by downstream uops. --- Include/internal/pycore_opcode_metadata.h | 4 +- Include/internal/pycore_uop_ids.h | 213 +++++++++++----------- Include/internal/pycore_uop_metadata.h | 28 ++- Python/bytecodes.c | 57 ++++-- Python/executor_cases.c.h | 91 ++++++++- Python/generated_cases.c.h | 22 ++- Python/optimizer_analysis.c | 42 ++++- Python/optimizer_bytecodes.c | 10 + Python/optimizer_cases.c.h | 60 +++++- 9 files changed, 372 insertions(+), 155 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index a0d3072d2cd5f6..8fec45b1e8d5c3 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1362,8 +1362,8 @@ _PyOpcode_macro_expansion[256] = { [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, 5, 0 }, { _LOAD_FAST, 6, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, 0, 0 } } }, [LOAD_GLOBAL] = { .nuops = 1, .uops = { { _LOAD_GLOBAL, 0, 0 } } }, - [LOAD_GLOBAL_BUILTIN] = { .nuops = 3, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION, 1, 2 }, { _LOAD_GLOBAL_BUILTINS, 1, 3 } } }, - [LOAD_GLOBAL_MODULE] = { .nuops = 2, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _LOAD_GLOBAL_MODULE, 1, 3 } } }, + [LOAD_GLOBAL_BUILTIN] = { .nuops = 3, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION_PUSH_KEYS, 1, 2 }, { _LOAD_GLOBAL_BUILTINS_FROM_KEYS, 1, 3 } } }, + [LOAD_GLOBAL_MODULE] = { .nuops = 2, .uops = { { _GUARD_GLOBALS_VERSION_PUSH_KEYS, 1, 1 }, { _LOAD_GLOBAL_MODULE_FROM_KEYS, 1, 3 } } }, [LOAD_LOCALS] = { .nuops = 1, .uops = { { _LOAD_LOCALS, 0, 0 } } }, [LOAD_NAME] = { .nuops = 1, .uops = { { _LOAD_NAME, 0, 0 } } }, [LOAD_SPECIAL] = { .nuops = 1, .uops = { { _LOAD_SPECIAL, 0, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 48bafacabcfd1d..1951c65a2871cf 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -120,32 +120,33 @@ extern "C" { #define _GUARD_BOTH_FLOAT 367 #define _GUARD_BOTH_INT 368 #define _GUARD_BOTH_UNICODE 369 -#define _GUARD_BUILTINS_VERSION 370 +#define _GUARD_BUILTINS_VERSION_PUSH_KEYS 370 #define _GUARD_DORV_NO_DICT 371 #define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 372 #define _GUARD_GLOBALS_VERSION 373 -#define _GUARD_IS_FALSE_POP 374 -#define _GUARD_IS_NONE_POP 375 -#define _GUARD_IS_NOT_NONE_POP 376 -#define _GUARD_IS_TRUE_POP 377 -#define _GUARD_KEYS_VERSION 378 -#define _GUARD_NOS_FLOAT 379 -#define _GUARD_NOS_INT 380 -#define _GUARD_NOT_EXHAUSTED_LIST 381 -#define _GUARD_NOT_EXHAUSTED_RANGE 382 -#define _GUARD_NOT_EXHAUSTED_TUPLE 383 -#define _GUARD_TOS_FLOAT 384 -#define _GUARD_TOS_INT 385 -#define _GUARD_TYPE_VERSION 386 +#define _GUARD_GLOBALS_VERSION_PUSH_KEYS 374 +#define _GUARD_IS_FALSE_POP 375 +#define _GUARD_IS_NONE_POP 376 +#define _GUARD_IS_NOT_NONE_POP 377 +#define _GUARD_IS_TRUE_POP 378 +#define _GUARD_KEYS_VERSION 379 +#define _GUARD_NOS_FLOAT 380 +#define _GUARD_NOS_INT 381 +#define _GUARD_NOT_EXHAUSTED_LIST 382 +#define _GUARD_NOT_EXHAUSTED_RANGE 383 +#define _GUARD_NOT_EXHAUSTED_TUPLE 384 +#define _GUARD_TOS_FLOAT 385 +#define _GUARD_TOS_INT 386 +#define _GUARD_TYPE_VERSION 387 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 387 -#define _INIT_CALL_PY_EXACT_ARGS 388 -#define _INIT_CALL_PY_EXACT_ARGS_0 389 -#define _INIT_CALL_PY_EXACT_ARGS_1 390 -#define _INIT_CALL_PY_EXACT_ARGS_2 391 -#define _INIT_CALL_PY_EXACT_ARGS_3 392 -#define _INIT_CALL_PY_EXACT_ARGS_4 393 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 388 +#define _INIT_CALL_PY_EXACT_ARGS 389 +#define _INIT_CALL_PY_EXACT_ARGS_0 390 +#define _INIT_CALL_PY_EXACT_ARGS_1 391 +#define _INIT_CALL_PY_EXACT_ARGS_2 392 +#define _INIT_CALL_PY_EXACT_ARGS_3 393 +#define _INIT_CALL_PY_EXACT_ARGS_4 394 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER @@ -157,133 +158,135 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 394 -#define _IS_NONE 395 +#define _INTERNAL_INCREMENT_OPT_COUNTER 395 +#define _IS_NONE 396 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 396 -#define _ITER_CHECK_RANGE 397 -#define _ITER_CHECK_TUPLE 398 -#define _ITER_JUMP_LIST 399 -#define _ITER_JUMP_RANGE 400 -#define _ITER_JUMP_TUPLE 401 -#define _ITER_NEXT_LIST 402 -#define _ITER_NEXT_RANGE 403 -#define _ITER_NEXT_TUPLE 404 -#define _JUMP_TO_TOP 405 +#define _ITER_CHECK_LIST 397 +#define _ITER_CHECK_RANGE 398 +#define _ITER_CHECK_TUPLE 399 +#define _ITER_JUMP_LIST 400 +#define _ITER_JUMP_RANGE 401 +#define _ITER_JUMP_TUPLE 402 +#define _ITER_NEXT_LIST 403 +#define _ITER_NEXT_RANGE 404 +#define _ITER_NEXT_TUPLE 405 +#define _JUMP_TO_TOP 406 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 406 -#define _LOAD_ATTR_CLASS 407 -#define _LOAD_ATTR_CLASS_0 408 -#define _LOAD_ATTR_CLASS_1 409 +#define _LOAD_ATTR 407 +#define _LOAD_ATTR_CLASS 408 +#define _LOAD_ATTR_CLASS_0 409 +#define _LOAD_ATTR_CLASS_1 410 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 410 -#define _LOAD_ATTR_INSTANCE_VALUE_0 411 -#define _LOAD_ATTR_INSTANCE_VALUE_1 412 -#define _LOAD_ATTR_METHOD_LAZY_DICT 413 -#define _LOAD_ATTR_METHOD_NO_DICT 414 -#define _LOAD_ATTR_METHOD_WITH_VALUES 415 -#define _LOAD_ATTR_MODULE 416 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 417 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 418 -#define _LOAD_ATTR_PROPERTY_FRAME 419 -#define _LOAD_ATTR_SLOT 420 -#define _LOAD_ATTR_SLOT_0 421 -#define _LOAD_ATTR_SLOT_1 422 -#define _LOAD_ATTR_WITH_HINT 423 +#define _LOAD_ATTR_INSTANCE_VALUE 411 +#define _LOAD_ATTR_INSTANCE_VALUE_0 412 +#define _LOAD_ATTR_INSTANCE_VALUE_1 413 +#define _LOAD_ATTR_METHOD_LAZY_DICT 414 +#define _LOAD_ATTR_METHOD_NO_DICT 415 +#define _LOAD_ATTR_METHOD_WITH_VALUES 416 +#define _LOAD_ATTR_MODULE 417 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 418 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 419 +#define _LOAD_ATTR_PROPERTY_FRAME 420 +#define _LOAD_ATTR_SLOT 421 +#define _LOAD_ATTR_SLOT_0 422 +#define _LOAD_ATTR_SLOT_1 423 +#define _LOAD_ATTR_WITH_HINT 424 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 424 -#define _LOAD_CONST_INLINE_BORROW 425 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 426 -#define _LOAD_CONST_INLINE_WITH_NULL 427 +#define _LOAD_CONST_INLINE 425 +#define _LOAD_CONST_INLINE_BORROW 426 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 427 +#define _LOAD_CONST_INLINE_WITH_NULL 428 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 428 -#define _LOAD_FAST_0 429 -#define _LOAD_FAST_1 430 -#define _LOAD_FAST_2 431 -#define _LOAD_FAST_3 432 -#define _LOAD_FAST_4 433 -#define _LOAD_FAST_5 434 -#define _LOAD_FAST_6 435 -#define _LOAD_FAST_7 436 +#define _LOAD_FAST 429 +#define _LOAD_FAST_0 430 +#define _LOAD_FAST_1 431 +#define _LOAD_FAST_2 432 +#define _LOAD_FAST_3 433 +#define _LOAD_FAST_4 434 +#define _LOAD_FAST_5 435 +#define _LOAD_FAST_6 436 +#define _LOAD_FAST_7 437 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 437 -#define _LOAD_GLOBAL_BUILTINS 438 -#define _LOAD_GLOBAL_MODULE 439 +#define _LOAD_GLOBAL 438 +#define _LOAD_GLOBAL_BUILTINS 439 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 440 +#define _LOAD_GLOBAL_MODULE 441 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 442 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 440 +#define _MAKE_CALLARGS_A_TUPLE 443 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 441 +#define _MAKE_WARM 444 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 442 -#define _MAYBE_EXPAND_METHOD_KW 443 -#define _MONITOR_CALL 444 -#define _MONITOR_JUMP_BACKWARD 445 -#define _MONITOR_RESUME 446 +#define _MAYBE_EXPAND_METHOD 445 +#define _MAYBE_EXPAND_METHOD_KW 446 +#define _MONITOR_CALL 447 +#define _MONITOR_JUMP_BACKWARD 448 +#define _MONITOR_RESUME 449 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 447 -#define _POP_JUMP_IF_TRUE 448 +#define _POP_JUMP_IF_FALSE 450 +#define _POP_JUMP_IF_TRUE 451 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 449 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 452 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 450 +#define _PUSH_FRAME 453 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 451 -#define _PY_FRAME_KW 452 -#define _QUICKEN_RESUME 453 -#define _REPLACE_WITH_TRUE 454 +#define _PY_FRAME_GENERAL 454 +#define _PY_FRAME_KW 455 +#define _QUICKEN_RESUME 456 +#define _REPLACE_WITH_TRUE 457 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 455 -#define _SEND 456 -#define _SEND_GEN_FRAME 457 +#define _SAVE_RETURN_OFFSET 458 +#define _SEND 459 +#define _SEND_GEN_FRAME 460 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 458 -#define _STORE_ATTR 459 -#define _STORE_ATTR_INSTANCE_VALUE 460 -#define _STORE_ATTR_SLOT 461 -#define _STORE_ATTR_WITH_HINT 462 +#define _START_EXECUTOR 461 +#define _STORE_ATTR 462 +#define _STORE_ATTR_INSTANCE_VALUE 463 +#define _STORE_ATTR_SLOT 464 +#define _STORE_ATTR_WITH_HINT 465 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 463 -#define _STORE_FAST_0 464 -#define _STORE_FAST_1 465 -#define _STORE_FAST_2 466 -#define _STORE_FAST_3 467 -#define _STORE_FAST_4 468 -#define _STORE_FAST_5 469 -#define _STORE_FAST_6 470 -#define _STORE_FAST_7 471 +#define _STORE_FAST 466 +#define _STORE_FAST_0 467 +#define _STORE_FAST_1 468 +#define _STORE_FAST_2 469 +#define _STORE_FAST_3 470 +#define _STORE_FAST_4 471 +#define _STORE_FAST_5 472 +#define _STORE_FAST_6 473 +#define _STORE_FAST_7 474 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 472 -#define _STORE_SUBSCR 473 +#define _STORE_SLICE 475 +#define _STORE_SUBSCR 476 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 474 -#define _TO_BOOL 475 +#define _TIER2_RESUME_CHECK 477 +#define _TO_BOOL 478 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -293,13 +296,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 476 +#define _UNPACK_SEQUENCE 479 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 476 +#define MAX_UOP_ID 479 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index c68ee594947283..fd41e9a5fe862b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -116,9 +116,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_NAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_LOAD_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_GLOBALS_VERSION] = HAS_DEOPT_FLAG, - [_GUARD_BUILTINS_VERSION] = HAS_DEOPT_FLAG, - [_LOAD_GLOBAL_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_LOAD_GLOBAL_BUILTINS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_GUARD_GLOBALS_VERSION_PUSH_KEYS] = HAS_DEOPT_FLAG, + [_GUARD_BUILTINS_VERSION_PUSH_KEYS] = HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_MODULE_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_BUILTINS_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_DELETE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MAKE_CELL] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG, [_DELETE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, @@ -273,6 +274,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_BUILTINS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, @@ -397,10 +400,11 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_BOTH_FLOAT] = "_GUARD_BOTH_FLOAT", [_GUARD_BOTH_INT] = "_GUARD_BOTH_INT", [_GUARD_BOTH_UNICODE] = "_GUARD_BOTH_UNICODE", - [_GUARD_BUILTINS_VERSION] = "_GUARD_BUILTINS_VERSION", + [_GUARD_BUILTINS_VERSION_PUSH_KEYS] = "_GUARD_BUILTINS_VERSION_PUSH_KEYS", [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT", [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", + [_GUARD_GLOBALS_VERSION_PUSH_KEYS] = "_GUARD_GLOBALS_VERSION_PUSH_KEYS", [_GUARD_IS_FALSE_POP] = "_GUARD_IS_FALSE_POP", [_GUARD_IS_NONE_POP] = "_GUARD_IS_NONE_POP", [_GUARD_IS_NOT_NONE_POP] = "_GUARD_IS_NOT_NONE_POP", @@ -476,7 +480,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", [_LOAD_GLOBAL] = "_LOAD_GLOBAL", [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", + [_LOAD_GLOBAL_BUILTINS_FROM_KEYS] = "_LOAD_GLOBAL_BUILTINS_FROM_KEYS", [_LOAD_GLOBAL_MODULE] = "_LOAD_GLOBAL_MODULE", + [_LOAD_GLOBAL_MODULE_FROM_KEYS] = "_LOAD_GLOBAL_MODULE_FROM_KEYS", [_LOAD_LOCALS] = "_LOAD_LOCALS", [_LOAD_NAME] = "_LOAD_NAME", [_LOAD_SPECIAL] = "_LOAD_SPECIAL", @@ -752,12 +758,14 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _GUARD_GLOBALS_VERSION: return 0; - case _GUARD_BUILTINS_VERSION: + case _GUARD_GLOBALS_VERSION_PUSH_KEYS: return 0; - case _LOAD_GLOBAL_MODULE: - return 0; - case _LOAD_GLOBAL_BUILTINS: + case _GUARD_BUILTINS_VERSION_PUSH_KEYS: return 0; + case _LOAD_GLOBAL_MODULE_FROM_KEYS: + return 1; + case _LOAD_GLOBAL_BUILTINS_FROM_KEYS: + return 1; case _DELETE_FAST: return 0; case _MAKE_CELL: @@ -1066,6 +1074,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _CHECK_FUNCTION: return 0; + case _LOAD_GLOBAL_MODULE: + return 0; + case _LOAD_GLOBAL_BUILTINS: + return 0; case _INTERNAL_INCREMENT_OPT_COUNTER: return 1; case _DYNAMIC_EXIT: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 228d82173e6126..87cca3fc1d373c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1569,17 +1569,29 @@ dummy_func( assert(DK_IS_UNICODE(dict->ma_keys)); } - op(_GUARD_BUILTINS_VERSION, (version/1 --)) { + op(_GUARD_GLOBALS_VERSION_PUSH_KEYS, (version / 1 -- globals_keys: PyDictKeysObject *)) + { + PyDictObject *dict = (PyDictObject *)GLOBALS(); + DEOPT_IF(!PyDict_CheckExact(dict)); + DEOPT_IF(dict->ma_keys->dk_version != version); + globals_keys = dict->ma_keys; + assert(DK_IS_UNICODE(globals_keys)); + } + + op(_GUARD_BUILTINS_VERSION_PUSH_KEYS, (version / 1 -- builtins_keys: PyDictKeysObject *)) + { PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict)); DEOPT_IF(dict->ma_keys->dk_version != version); - assert(DK_IS_UNICODE(dict->ma_keys)); + builtins_keys = dict->ma_keys; + assert(DK_IS_UNICODE(builtins_keys)); } - op(_LOAD_GLOBAL_MODULE, (index/1 -- res, null if (oparg & 1))) { - PyDictObject *dict = (PyDictObject *)GLOBALS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + op(_LOAD_GLOBAL_MODULE_FROM_KEYS, (index/1, globals_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); PyObject *res_o = entries[index].me_value; + DEAD(globals_keys); + SYNC_SP(); DEOPT_IF(res_o == NULL); Py_INCREF(res_o); STAT_INC(LOAD_GLOBAL, hit); @@ -1587,10 +1599,11 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - op(_LOAD_GLOBAL_BUILTINS, (index/1 -- res, null if (oparg & 1))) { - PyDictObject *bdict = (PyDictObject *)BUILTINS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys); + op(_LOAD_GLOBAL_BUILTINS_FROM_KEYS, (index/1, builtins_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); PyObject *res_o = entries[index].me_value; + DEAD(builtins_keys); + SYNC_SP(); DEOPT_IF(res_o == NULL); Py_INCREF(res_o); STAT_INC(LOAD_GLOBAL, hit); @@ -1600,15 +1613,15 @@ dummy_func( macro(LOAD_GLOBAL_MODULE) = unused/1 + // Skip over the counter - _GUARD_GLOBALS_VERSION + + _GUARD_GLOBALS_VERSION_PUSH_KEYS + unused/1 + // Skip over the builtins version - _LOAD_GLOBAL_MODULE; + _LOAD_GLOBAL_MODULE_FROM_KEYS; macro(LOAD_GLOBAL_BUILTIN) = unused/1 + // Skip over the counter _GUARD_GLOBALS_VERSION + - _GUARD_BUILTINS_VERSION + - _LOAD_GLOBAL_BUILTINS; + _GUARD_BUILTINS_VERSION_PUSH_KEYS + + _LOAD_GLOBAL_BUILTINS_FROM_KEYS; inst(DELETE_FAST, (--)) { _PyStackRef v = GETLOCAL(oparg); @@ -4871,6 +4884,26 @@ dummy_func( DEOPT_IF(func->func_version != func_version); } + tier2 op(_LOAD_GLOBAL_MODULE, (index/1 -- res, null if (oparg & 1))) { + PyDictObject *dict = (PyDictObject *)GLOBALS(); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyObject *res_o = entries[index].me_value; + DEOPT_IF(res_o == NULL); + Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + null = PyStackRef_NULL; + } + + tier2 op(_LOAD_GLOBAL_BUILTINS, (index/1 -- res, null if (oparg & 1))) { + PyDictObject *dict = (PyDictObject *)BUILTINS(); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyObject *res_o = entries[index].me_value; + DEOPT_IF(res_o == NULL); + Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + null = PyStackRef_NULL; + } + /* Internal -- for testing executors */ op(_INTERNAL_INCREMENT_OPT_COUNTER, (opt --)) { _PyCounterOptimizerObject *exe = (_PyCounterOptimizerObject *)PyStackRef_AsPyObjectBorrow(opt); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4574e183921006..57e15f33ca7703 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1791,7 +1791,28 @@ break; } - case _GUARD_BUILTINS_VERSION: { + case _GUARD_GLOBALS_VERSION_PUSH_KEYS: { + PyDictKeysObject *globals_keys; + uint16_t version = (uint16_t)CURRENT_OPERAND(); + PyDictObject *dict = (PyDictObject *)GLOBALS(); + if (!PyDict_CheckExact(dict)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + if (dict->ma_keys->dk_version != version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + globals_keys = dict->ma_keys; + assert(DK_IS_UNICODE(globals_keys)); + stack_pointer[0].bits = (uintptr_t)globals_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_BUILTINS_VERSION_PUSH_KEYS: { + PyDictKeysObject *builtins_keys; uint16_t version = (uint16_t)CURRENT_OPERAND(); PyDictObject *dict = (PyDictObject *)BUILTINS(); if (!PyDict_CheckExact(dict)) { @@ -1802,18 +1823,25 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - assert(DK_IS_UNICODE(dict->ma_keys)); + builtins_keys = dict->ma_keys; + assert(DK_IS_UNICODE(builtins_keys)); + stack_pointer[0].bits = (uintptr_t)builtins_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } - case _LOAD_GLOBAL_MODULE: { + case _LOAD_GLOBAL_MODULE_FROM_KEYS: { + PyDictKeysObject *globals_keys; _PyStackRef res; _PyStackRef null = PyStackRef_NULL; oparg = CURRENT_OPARG(); + globals_keys = (PyDictKeysObject *)stack_pointer[-1].bits; uint16_t index = (uint16_t)CURRENT_OPERAND(); - PyDictObject *dict = (PyDictObject *)GLOBALS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); PyObject *res_o = entries[index].me_value; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -1829,14 +1857,17 @@ break; } - case _LOAD_GLOBAL_BUILTINS: { + case _LOAD_GLOBAL_BUILTINS_FROM_KEYS: { + PyDictKeysObject *builtins_keys; _PyStackRef res; _PyStackRef null = PyStackRef_NULL; oparg = CURRENT_OPARG(); + builtins_keys = (PyDictKeysObject *)stack_pointer[-1].bits; uint16_t index = (uint16_t)CURRENT_OPERAND(); - PyDictObject *bdict = (PyDictObject *)BUILTINS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); PyObject *res_o = entries[index].me_value; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -5698,6 +5729,50 @@ break; } + case _LOAD_GLOBAL_MODULE: { + _PyStackRef res; + _PyStackRef null = PyStackRef_NULL; + oparg = CURRENT_OPARG(); + uint16_t index = (uint16_t)CURRENT_OPERAND(); + PyDictObject *dict = (PyDictObject *)GLOBALS(); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyObject *res_o = entries[index].me_value; + if (res_o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + null = PyStackRef_NULL; + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + _PyStackRef res; + _PyStackRef null = PyStackRef_NULL; + oparg = CURRENT_OPARG(); + uint16_t index = (uint16_t)CURRENT_OPERAND(); + PyDictObject *dict = (PyDictObject *)BUILTINS(); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyObject *res_o = entries[index].me_value; + if (res_o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + null = PyStackRef_NULL; + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _INTERNAL_INCREMENT_OPT_COUNTER: { _PyStackRef opt; opt = stack_pointer[-1]; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c4de7bdeb4ce80..7656ce6bb7e313 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6146,6 +6146,7 @@ next_instr += 5; INSTRUCTION_STATS(LOAD_GLOBAL_BUILTIN); static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); + PyDictKeysObject *builtins_keys; _PyStackRef res; _PyStackRef null = PyStackRef_NULL; /* Skip 1 cache entry */ @@ -6157,19 +6158,19 @@ DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); assert(DK_IS_UNICODE(dict->ma_keys)); } - // _GUARD_BUILTINS_VERSION + // _GUARD_BUILTINS_VERSION_PUSH_KEYS { uint16_t version = read_u16(&this_instr[3].cache); PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - assert(DK_IS_UNICODE(dict->ma_keys)); + builtins_keys = dict->ma_keys; + assert(DK_IS_UNICODE(builtins_keys)); } - // _LOAD_GLOBAL_BUILTINS + // _LOAD_GLOBAL_BUILTINS_FROM_KEYS { uint16_t index = read_u16(&this_instr[4].cache); - PyDictObject *bdict = (PyDictObject *)BUILTINS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); PyObject *res_o = entries[index].me_value; DEOPT_IF(res_o == NULL, LOAD_GLOBAL); Py_INCREF(res_o); @@ -6189,23 +6190,24 @@ next_instr += 5; INSTRUCTION_STATS(LOAD_GLOBAL_MODULE); static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); + PyDictKeysObject *globals_keys; _PyStackRef res; _PyStackRef null = PyStackRef_NULL; /* Skip 1 cache entry */ - // _GUARD_GLOBALS_VERSION + // _GUARD_GLOBALS_VERSION_PUSH_KEYS { uint16_t version = read_u16(&this_instr[2].cache); PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - assert(DK_IS_UNICODE(dict->ma_keys)); + globals_keys = dict->ma_keys; + assert(DK_IS_UNICODE(globals_keys)); } /* Skip 1 cache entry */ - // _LOAD_GLOBAL_MODULE + // _LOAD_GLOBAL_MODULE_FROM_KEYS { uint16_t index = read_u16(&this_instr[4].cache); - PyDictObject *dict = (PyDictObject *)GLOBALS(); - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); PyObject *res_o = entries[index].me_value; DEOPT_IF(res_o == NULL, LOAD_GLOBAL); Py_INCREF(res_o); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 06826ff942a761..25166bc2dc5c02 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -131,6 +131,26 @@ incorrect_keys(_PyUOpInstruction *inst, PyObject *obj) return 0; } +static int +check_next_uop(_PyUOpInstruction *buffer, int size, int pc, uint16_t expected) +{ + if (pc + 1 >= size) { + DPRINTF(1, "Cannot rewrite %s at pc %d: buffer too small\n", + _PyOpcode_uop_name[buffer[pc].opcode], pc); + return 0; + } + uint16_t next_opcode = buffer[pc + 1].opcode; + if (next_opcode != expected) { + DPRINTF(1, + "Cannot rewrite %s at pc %d: unexpected next opcode %s, " + "expected %s\n", + _PyOpcode_uop_name[buffer[pc].opcode], pc, + _PyOpcode_uop_name[next_opcode], _PyOpcode_uop_name[expected]); + return 0; + } + return 1; +} + /* Returns 1 if successfully optimized * 0 if the trace is not suitable for optimization (yet) * -1 if there was an error. */ @@ -174,7 +194,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, _PyUOpInstruction *inst = &buffer[pc]; int opcode = inst->opcode; switch(opcode) { - case _GUARD_BUILTINS_VERSION: + case _GUARD_BUILTINS_VERSION_PUSH_KEYS: if (incorrect_keys(inst, builtins)) { OPT_STAT_INC(remove_globals_incorrect_keys); return 0; @@ -182,6 +202,10 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, if (interp->rare_events.builtin_dict >= _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) { continue; } + if (!check_next_uop(buffer, buffer_size, pc, + _LOAD_GLOBAL_BUILTINS_FROM_KEYS)) { + continue; + } if ((builtins_watched & 1) == 0) { PyDict_Watch(BUILTINS_WATCHER_ID, builtins); builtins_watched |= 1; @@ -194,8 +218,13 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, buffer[pc].operand = function_version; function_checked |= 1; } + // We're no longer pushing the builtins keys; rewrite the + // instruction that consumed the keys to load them from the + // frame. + buffer[pc + 1].opcode = _LOAD_GLOBAL_BUILTINS; break; case _GUARD_GLOBALS_VERSION: + case _GUARD_GLOBALS_VERSION_PUSH_KEYS: if (incorrect_keys(inst, globals)) { OPT_STAT_INC(remove_globals_incorrect_keys); return 0; @@ -204,6 +233,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, if (watched_mutations >= _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) { continue; } + if (opcode == _GUARD_GLOBALS_VERSION_PUSH_KEYS && + !check_next_uop(buffer, buffer_size, pc, + _LOAD_GLOBAL_MODULE_FROM_KEYS)) { + continue; + } if ((globals_watched & 1) == 0) { PyDict_Watch(GLOBALS_WATCHER_ID, globals); _Py_BloomFilter_Add(dependencies, globals); @@ -217,6 +251,12 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, buffer[pc].operand = function_version; function_checked |= 1; } + if (opcode == _GUARD_GLOBALS_VERSION_PUSH_KEYS) { + // We're no longer pushing the globals keys; rewrite the + // instruction that consumed the keys to load them from the + // frame. + buffer[pc + 1].opcode = _LOAD_GLOBAL_MODULE; + } break; case _LOAD_GLOBAL_BUILTINS: if (function_checked & globals_watched & builtins_watched & 1) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index c73b632d1afdd7..d71b55cbe1c68d 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -836,6 +836,16 @@ dummy_func(void) { ctx->done = true; } + op(_GUARD_GLOBALS_VERSION_PUSH_KEYS, (version/1 -- globals_keys)) { + globals_keys = sym_new_unknown(ctx); + (void)version; + } + + op(_GUARD_BUILTINS_VERSION_PUSH_KEYS, (version/1 -- builtins_keys)) { + builtins_keys = sym_new_unknown(ctx); + (void)version; + } + // END BYTECODES // } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index ae532fd27769ab..6ec9e69d1dbc44 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -844,30 +844,48 @@ break; } - case _GUARD_BUILTINS_VERSION: { + case _GUARD_GLOBALS_VERSION_PUSH_KEYS: { + _Py_UopsSymbol *globals_keys; + uint16_t version = (uint16_t)this_instr->operand; + globals_keys = sym_new_unknown(ctx); + (void)version; + stack_pointer[0] = globals_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } - case _LOAD_GLOBAL_MODULE: { + case _GUARD_BUILTINS_VERSION_PUSH_KEYS: { + _Py_UopsSymbol *builtins_keys; + uint16_t version = (uint16_t)this_instr->operand; + builtins_keys = sym_new_unknown(ctx); + (void)version; + stack_pointer[0] = builtins_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_GLOBAL_MODULE_FROM_KEYS: { _Py_UopsSymbol *res; _Py_UopsSymbol *null = NULL; res = sym_new_not_null(ctx); null = sym_new_null(ctx); - stack_pointer[0] = res; - if (oparg & 1) stack_pointer[1] = null; - stack_pointer += 1 + (oparg & 1); + stack_pointer[-1] = res; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); assert(WITHIN_STACK_BOUNDS()); break; } - case _LOAD_GLOBAL_BUILTINS: { + case _LOAD_GLOBAL_BUILTINS_FROM_KEYS: { _Py_UopsSymbol *res; _Py_UopsSymbol *null = NULL; res = sym_new_not_null(ctx); null = sym_new_null(ctx); - stack_pointer[0] = res; - if (oparg & 1) stack_pointer[1] = null; - stack_pointer += 1 + (oparg & 1); + stack_pointer[-1] = res; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); assert(WITHIN_STACK_BOUNDS()); break; } @@ -2419,6 +2437,30 @@ break; } + case _LOAD_GLOBAL_MODULE: { + _Py_UopsSymbol *res; + _Py_UopsSymbol *null = NULL; + res = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + _Py_UopsSymbol *res; + _Py_UopsSymbol *null = NULL; + res = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _INTERNAL_INCREMENT_OPT_COUNTER: { stack_pointer += -1; assert(WITHIN_STACK_BOUNDS());