From 320d1ab2fb7a99f98ca2555a742a6bc5a3ba6d93 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 8 Oct 2024 13:18:39 -0400 Subject: [PATCH] gh-125063: Emit slices as constants in the bytecode compiler (#125064) * Make slices marshallable * Emit slices as constants * Update Python/marshal.c Co-authored-by: Peter Bierma * Refactor codegen_slice into two functions so it always has the same net effect * Fix for free-threaded builds * Simplify marshal loading of slices * Only return SUCCESS/ERROR from codegen_slice --------- Co-authored-by: Mark Shannon Co-authored-by: Peter Bierma --- Include/internal/pycore_magic_number.h | 3 +- Lib/test/test_compile.py | 27 ++++++++- Objects/codeobject.c | 1 + Objects/sliceobject.c | 2 +- Python/codegen.c | 78 +++++++++++++++++++------- Python/marshal.c | 34 +++++++++++ 6 files changed, 122 insertions(+), 23 deletions(-) diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index 2414d25d41bfbf..a88ff2deeba941 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -259,6 +259,7 @@ Known values: Python 3.14a1 3605 (Move ENTER_EXECUTOR to opcode 255) Python 3.14a1 3606 (Specialize CALL_KW) Python 3.14a1 3607 (Add pseudo instructions JUMP_IF_TRUE/FALSE) + Python 3.14a1 3608 (Add support for slices) Python 3.15 will start with 3650 @@ -271,7 +272,7 @@ PC/launcher.c must also be updated. */ -#define PYC_MAGIC_NUMBER 3607 +#define PYC_MAGIC_NUMBER 3608 /* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes (little-endian) and then appending b'\r\n'. */ #define PYC_MAGIC_NUMBER_TOKEN \ diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index e6dc7a53189e12..6f838da6018741 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -1386,6 +1386,14 @@ def check_op_count(func, op, expected): actual += 1 self.assertEqual(actual, expected) + def check_consts(func, typ, expected): + slice_consts = 0 + consts = func.__code__.co_consts + for instr in dis.Bytecode(func): + if instr.opname == "LOAD_CONST" and isinstance(consts[instr.oparg], typ): + slice_consts += 1 + self.assertEqual(slice_consts, expected) + def load(): return x[a:b] + x [a:] + x[:b] + x[:] @@ -1401,15 +1409,30 @@ def long_slice(): def aug(): x[a:b] += y - check_op_count(load, "BINARY_SLICE", 4) + def aug_const(): + x[1:2] += y + + def compound_const_slice(): + x[1:2:3, 4:5:6] = y + + check_op_count(load, "BINARY_SLICE", 3) check_op_count(load, "BUILD_SLICE", 0) - check_op_count(store, "STORE_SLICE", 4) + check_consts(load, slice, 1) + check_op_count(store, "STORE_SLICE", 3) check_op_count(store, "BUILD_SLICE", 0) + check_consts(store, slice, 1) check_op_count(long_slice, "BUILD_SLICE", 1) check_op_count(long_slice, "BINARY_SLICE", 0) check_op_count(aug, "BINARY_SLICE", 1) check_op_count(aug, "STORE_SLICE", 1) check_op_count(aug, "BUILD_SLICE", 0) + check_op_count(aug_const, "BINARY_SLICE", 0) + check_op_count(aug_const, "STORE_SLICE", 0) + check_consts(aug_const, slice, 1) + check_op_count(compound_const_slice, "BINARY_SLICE", 0) + check_op_count(compound_const_slice, "BUILD_SLICE", 0) + check_consts(compound_const_slice, slice, 0) + check_consts(compound_const_slice, tuple, 1) def test_compare_positions(self): for opname_prefix, op in [ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 6f0b3f8b9a3262..8a2f4d32b911d9 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2336,6 +2336,7 @@ _PyCode_ConstantKey(PyObject *op) if (op == Py_None || op == Py_Ellipsis || PyLong_CheckExact(op) || PyUnicode_CheckExact(op) + || PySlice_Check(op) /* code_richcompare() uses _PyCode_ConstantKey() internally */ || PyCode_Check(op)) { diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 1b6d35998c2b69..4fef0af93fe095 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -343,7 +343,7 @@ Create a slice object. This is used for extended slicing (e.g. a[0:10:2])."); static void slice_dealloc(PySliceObject *r) { - _PyObject_GC_UNTRACK(r); + PyObject_GC_UnTrack(r); Py_DECREF(r->step); Py_DECREF(r->start); Py_DECREF(r->stop); diff --git a/Python/codegen.c b/Python/codegen.c index 896c30cc14952a..689d2b5124e9d3 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -194,6 +194,7 @@ static int codegen_visit_expr(compiler *, expr_ty); static int codegen_augassign(compiler *, stmt_ty); static int codegen_annassign(compiler *, stmt_ty); static int codegen_subscript(compiler *, expr_ty); +static int codegen_slice_two_parts(compiler *, expr_ty); static int codegen_slice(compiler *, expr_ty); static bool are_all_items_const(asdl_expr_seq *, Py_ssize_t, Py_ssize_t); @@ -5005,12 +5006,8 @@ codegen_visit_expr(compiler *c, expr_ty e) } break; case Slice_kind: - { - int n = codegen_slice(c, e); - RETURN_IF_ERROR(n); - ADDOP_I(c, loc, BUILD_SLICE, n); + RETURN_IF_ERROR(codegen_slice(c, e)); break; - } case Name_kind: return codegen_nameop(c, loc, e->v.Name.id, e->v.Name.ctx); /* child nodes of List and Tuple will have expr_context set */ @@ -5023,9 +5020,22 @@ codegen_visit_expr(compiler *c, expr_ty e) } static bool -is_two_element_slice(expr_ty s) +is_constant_slice(expr_ty s) { return s->kind == Slice_kind && + (s->v.Slice.lower == NULL || + s->v.Slice.lower->kind == Constant_kind) && + (s->v.Slice.upper == NULL || + s->v.Slice.upper->kind == Constant_kind) && + (s->v.Slice.step == NULL || + s->v.Slice.step->kind == Constant_kind); +} + +static bool +should_apply_two_element_slice_optimization(expr_ty s) +{ + return !is_constant_slice(s) && + s->kind == Slice_kind && s->v.Slice.step == NULL; } @@ -5046,8 +5056,8 @@ codegen_augassign(compiler *c, stmt_ty s) break; case Subscript_kind: VISIT(c, expr, e->v.Subscript.value); - if (is_two_element_slice(e->v.Subscript.slice)) { - RETURN_IF_ERROR(codegen_slice(c, e->v.Subscript.slice)); + if (should_apply_two_element_slice_optimization(e->v.Subscript.slice)) { + RETURN_IF_ERROR(codegen_slice_two_parts(c, e->v.Subscript.slice)); ADDOP_I(c, loc, COPY, 3); ADDOP_I(c, loc, COPY, 3); ADDOP_I(c, loc, COPY, 3); @@ -5084,7 +5094,7 @@ codegen_augassign(compiler *c, stmt_ty s) ADDOP_NAME(c, loc, STORE_ATTR, e->v.Attribute.attr, names); break; case Subscript_kind: - if (is_two_element_slice(e->v.Subscript.slice)) { + if (should_apply_two_element_slice_optimization(e->v.Subscript.slice)) { ADDOP_I(c, loc, SWAP, 4); ADDOP_I(c, loc, SWAP, 3); ADDOP_I(c, loc, SWAP, 2); @@ -5231,8 +5241,10 @@ codegen_subscript(compiler *c, expr_ty e) } VISIT(c, expr, e->v.Subscript.value); - if (is_two_element_slice(e->v.Subscript.slice) && ctx != Del) { - RETURN_IF_ERROR(codegen_slice(c, e->v.Subscript.slice)); + if (should_apply_two_element_slice_optimization(e->v.Subscript.slice) && + ctx != Del + ) { + RETURN_IF_ERROR(codegen_slice_two_parts(c, e->v.Subscript.slice)); if (ctx == Load) { ADDOP(c, loc, BINARY_SLICE); } @@ -5254,15 +5266,9 @@ codegen_subscript(compiler *c, expr_ty e) return SUCCESS; } -/* Returns the number of the values emitted, - * thus are needed to build the slice, or -1 if there is an error. */ static int -codegen_slice(compiler *c, expr_ty s) +codegen_slice_two_parts(compiler *c, expr_ty s) { - int n = 2; - assert(s->kind == Slice_kind); - - /* only handles the cases where BUILD_SLICE is emitted */ if (s->v.Slice.lower) { VISIT(c, expr, s->v.Slice.lower); } @@ -5277,11 +5283,45 @@ codegen_slice(compiler *c, expr_ty s) ADDOP_LOAD_CONST(c, LOC(s), Py_None); } + return 0; +} + +static int +codegen_slice(compiler *c, expr_ty s) +{ + int n = 2; + assert(s->kind == Slice_kind); + + if (is_constant_slice(s)) { + PyObject *start = NULL; + if (s->v.Slice.lower) { + start = s->v.Slice.lower->v.Constant.value; + } + PyObject *stop = NULL; + if (s->v.Slice.upper) { + stop = s->v.Slice.upper->v.Constant.value; + } + PyObject *step = NULL; + if (s->v.Slice.step) { + step = s->v.Slice.step->v.Constant.value; + } + PyObject *slice = PySlice_New(start, stop, step); + if (slice == NULL) { + return ERROR; + } + ADDOP_LOAD_CONST_NEW(c, LOC(s), slice); + return SUCCESS; + } + + RETURN_IF_ERROR(codegen_slice_two_parts(c, s)); + if (s->v.Slice.step) { n++; VISIT(c, expr, s->v.Slice.step); } - return n; + + ADDOP_I(c, LOC(s), BUILD_SLICE, n); + return SUCCESS; } diff --git a/Python/marshal.c b/Python/marshal.c index b1708a7306f9e7..3d127b4e331d0d 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -76,6 +76,7 @@ module marshal #define TYPE_UNKNOWN '?' #define TYPE_SET '<' #define TYPE_FROZENSET '>' +#define TYPE_SLICE ':' #define FLAG_REF '\x80' /* with a type, add obj to index */ #define TYPE_ASCII 'a' @@ -613,6 +614,13 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_pstring(view.buf, view.len, p); PyBuffer_Release(&view); } + else if (PySlice_Check(v)) { + PySliceObject *slice = (PySliceObject *)v; + W_TYPE(TYPE_SLICE, p); + w_object(slice->start, p); + w_object(slice->stop, p); + w_object(slice->step, p); + } else { W_TYPE(TYPE_UNKNOWN, p); p->error = WFERR_UNMARSHALLABLE; @@ -1534,6 +1542,32 @@ r_object(RFILE *p) retval = Py_NewRef(v); break; + case TYPE_SLICE: + { + Py_ssize_t idx = r_ref_reserve(flag, p); + PyObject *stop = NULL; + PyObject *step = NULL; + PyObject *start = r_object(p); + if (start == NULL) { + goto cleanup; + } + stop = r_object(p); + if (stop == NULL) { + goto cleanup; + } + step = r_object(p); + if (step == NULL) { + goto cleanup; + } + retval = PySlice_New(start, stop, step); + r_ref_insert(retval, idx, flag, p); + cleanup: + Py_XDECREF(start); + Py_XDECREF(stop); + Py_XDECREF(step); + break; + } + default: /* Bogus data got written, which isn't ideal. This will let you keep working and recover. */