Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-106529: Make FOR_ITER a viable uop #112134

Merged
merged 17 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 48 additions & 40 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Include/internal/pycore_uops.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ extern "C" {

#include "pycore_frame.h" // _PyInterpreterFrame

#define _Py_UOP_MAX_TRACE_LENGTH 128
#define _Py_UOP_MAX_TRACE_LENGTH 256

typedef struct {
uint16_t opcode;
Expand Down
1 change: 1 addition & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -1609,6 +1609,7 @@ Python/ceval.o: \
$(srcdir)/Python/ceval_macros.h \
$(srcdir)/Python/condvar.h \
$(srcdir)/Python/generated_cases.c.h \
$(srcdir)/Python/executor_cases.c.h \
$(srcdir)/Python/opcode_targets.h

Python/flowgraph.o: \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Enable translating unspecialized ``FOR_ITER`` to Tier 2.
6 changes: 6 additions & 0 deletions Python/abstract_interp_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 26 additions & 2 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1210,6 +1210,7 @@ dummy_func(
};

specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) {
TIER_ONE_ONLY
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
next_instr = this_instr;
Expand Down Expand Up @@ -2368,7 +2369,7 @@ dummy_func(
goto enter_tier_one;
}

replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) {
replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) {
assert(PyBool_Check(cond));
int flag = Py_IsFalse(cond);
#if ENABLE_SPECIALIZATION
Expand Down Expand Up @@ -2512,7 +2513,7 @@ dummy_func(
#endif /* ENABLE_SPECIALIZATION */
}

op(_FOR_ITER, (iter -- iter, next)) {
replaced op(_FOR_ITER, (iter -- iter, next)) {
/* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */
next = (*Py_TYPE(iter)->tp_iternext)(iter);
if (next == NULL) {
Expand All @@ -2535,6 +2536,28 @@ dummy_func(
// Common case: no jump, leave it to the code generator
}

op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) {
/* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */
next = (*Py_TYPE(iter)->tp_iternext)(iter);
if (next == NULL) {
if (_PyErr_Occurred(tstate)) {
if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
GOTO_ERROR(error);
}
_PyErr_Clear(tstate);
}
/* iterator ended normally */
Py_DECREF(iter);
STACK_SHRINK(1);
/* HACK: Emulate DEOPT_IF to jump over END_FOR */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No hacks, please 🙂

The code should look like this:

if (next == NULL) {
    if (_PyErr_Occurred(tstate)) {
        if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
            GOTO_ERROR(error);
        }
        _PyErr_Clear(tstate);
    }
    /* iterator ended normally */
    Py_DECREF(iter);
    STACK_SHRINK(1);
    DEOPT_IF(true);
}

The trace generator can adjust the target, so it points after the END_FOR.

frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1;
assert(frame->instr_ptr[-1].op.code == END_FOR ||
frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR);
goto exit_trace;
}
// Common case: no jump, leave it to the code generator
}

macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER;

inst(INSTRUMENTED_FOR_ITER, (unused/1 -- )) {
Expand Down Expand Up @@ -4008,6 +4031,7 @@ dummy_func(

op(_EXIT_TRACE, (--)) {
TIER_TWO_ONLY
frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame));
gvanrossum marked this conversation as resolved.
Show resolved Hide resolved
GOTO_TIER_ONE();
}

Expand Down
3 changes: 1 addition & 2 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1067,7 +1067,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
UOP_STAT_INC(opcode, miss);
frame->return_offset = 0; // Dispatch to frame->instr_ptr
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable);
frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
Py_DECREF(current_executor);
// Fall through
// Jump here from ENTER_EXECUTOR
Expand All @@ -1078,7 +1078,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
// Jump here from _EXIT_TRACE
exit_trace:
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable);
Py_DECREF(current_executor);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
goto enter_tier_one;
Expand Down
2 changes: 2 additions & 0 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -397,3 +397,5 @@ stack_pointer = _PyFrame_GetStackPointer(frame);
#define GOTO_TIER_TWO() goto enter_tier_two;

#define GOTO_TIER_ONE() goto exit_trace;

#define CURRENT_TARGET() (next_uop[-1].target)
29 changes: 29 additions & 0 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 21 additions & 1 deletion Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,8 @@ uop_dealloc(_PyUOpExecutorObject *self) {
}

static const char *
uop_name(int index) {
uop_name(int index)
{
if (index <= MAX_REAL_OPCODE) {
return _PyOpcode_OpName[index];
}
Expand Down Expand Up @@ -391,6 +392,7 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = {
[_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE,
[_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST,
[_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE,
[_FOR_ITER] = _FOR_ITER_TIER_TWO,
};

static const uint16_t
Expand Down Expand Up @@ -832,6 +834,24 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
assert(dest == -1);
executor->base.execute = _PyUopExecute;
_Py_ExecutorInit((_PyExecutorObject *)executor, dependencies);
#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
int lltrace = 0;
if (python_lltrace != NULL && *python_lltrace >= '0') {
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
}
if (lltrace >= 2) {
printf("Optimized executor (length %d):\n", length);
for (int i = 0; i < length; i++) {
printf("%4d %s(%d, %d, %" PRIu64 ")\n",
i,
uop_name(executor->trace[i].opcode),
executor->trace[i].oparg,
executor->trace[i].target,
executor->trace[i].operand);
}
}
#endif
return (_PyExecutorObject *)executor;
}

Expand Down
9 changes: 4 additions & 5 deletions Tools/cases_generator/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@

def makes_escaping_api_call(instr: parsing.InstDef) -> bool:
if "CALL_INTRINSIC" in instr.name:
return True;
return True
tkns = iter(instr.tokens)
for tkn in tkns:
if tkn.kind != lx.IDENTIFIER:
Expand All @@ -79,6 +79,7 @@ def makes_escaping_api_call(instr: parsing.InstDef) -> bool:
return True
return False


@dataclasses.dataclass
class InstructionFlags:
"""Construct and manipulate instruction flags"""
Expand Down Expand Up @@ -124,9 +125,7 @@ def fromInstruction(instr: parsing.InstDef) -> "InstructionFlags":
or variable_used(instr, "exception_unwind")
or variable_used(instr, "resume_with_error")
),
HAS_ESCAPES_FLAG=(
makes_escaping_api_call(instr)
),
HAS_ESCAPES_FLAG=makes_escaping_api_call(instr),
)

@staticmethod
Expand Down Expand Up @@ -176,7 +175,7 @@ def variable_used_unspecialized(node: parsing.Node, name: str) -> bool:
tokens: list[lx.Token] = []
skipping = False
for i, token in enumerate(node.tokens):
if token.kind == "MACRO":
if token.kind == "CMACRO":
Copy link
Member Author

@gvanrossum gvanrossum Nov 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NOTE: This fix resulted in _SPECIALIZE_UNPACK_SEQUENCE becoming a viable uop. It was missing a TIER_ONE_ONLY marker; I've added it back. (The fix is needed to restore the feature that this function doesn't look inside #if TIER_ONE.)

text = "".join(token.text.split())
# TODO: Handle nested #if
if text == "#if":
Expand Down
2 changes: 1 addition & 1 deletion Tools/cases_generator/generate_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ def write_macro_expansions(
if not part.instr.is_viable_uop() and "replaced" not in part.instr.annotations:
# This note just reminds us about macros that cannot
# be expanded to Tier 2 uops. It is not an error.
# It is sometimes emitted for macros that have a
# Suppress it using 'replaced op(...)' for macros having
# manual translation in translate_bytecode_to_trace()
# in Python/optimizer.c.
if len(parts) > 1 or part.instr.name != name:
Expand Down
Loading
Loading