Skip to content

Commit

Permalink
pythonGH-109329: Add tier 2 stats (pythonGH-109913)
Browse files Browse the repository at this point in the history
  • Loading branch information
mdboom authored Oct 4, 2023
1 parent f786029 commit e561e98
Show file tree
Hide file tree
Showing 9 changed files with 483 additions and 128 deletions.
28 changes: 24 additions & 4 deletions Include/cpython/pystats.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,6 @@ typedef struct _object_stats {
uint64_t type_cache_dunder_hits;
uint64_t type_cache_dunder_misses;
uint64_t type_cache_collisions;
uint64_t optimization_attempts;
uint64_t optimization_traces_created;
uint64_t optimization_traces_executed;
uint64_t optimization_uops_executed;
/* Temporary value used during GC */
uint64_t object_visits;
} ObjectStats;
Expand All @@ -100,10 +96,34 @@ typedef struct _gc_stats {
uint64_t objects_collected;
} GCStats;

typedef struct _uop_stats {
uint64_t execution_count;
} UOpStats;

#define _Py_UOP_HIST_SIZE 32

typedef struct _optimization_stats {
uint64_t attempts;
uint64_t traces_created;
uint64_t traces_executed;
uint64_t uops_executed;
uint64_t trace_stack_overflow;
uint64_t trace_stack_underflow;
uint64_t trace_too_long;
uint64_t inner_loop;
uint64_t recursive_call;
UOpStats opcode[512];
uint64_t unsupported_opcode[256];
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
} OptimizationStats;

typedef struct _stats {
OpcodeStats opcode_stats[256];
CallStats call_stats;
ObjectStats object_stats;
OptimizationStats optimization_stats;
GCStats *gc_stats;
} PyStats;

Expand Down
15 changes: 15 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,17 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
#define UOP_EXE_INC(opname) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].execution_count++; } while (0)
#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
#define OPT_HIST(length, name) \
do { \
if (_Py_stats) { \
int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
_Py_stats->optimization_stats.name[bucket]++; \
} \
} while (0)

// Export for '_opcode' shared extension
PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
Expand All @@ -296,6 +307,10 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
#define EVAL_CALL_STAT_INC(name) ((void)0)
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
#define GC_STAT_ADD(gen, name, n) ((void)0)
#define OPT_STAT_INC(name) ((void)0)
#define UOP_EXE_INC(opname) ((void)0)
#define OPT_UNSUPPORTED_OPCODE(opname) ((void)0)
#define OPT_HIST(length, name) ((void)0)
#endif // !Py_STATS

// Utility functions for reading/writing 32/64-bit values in the inline caches.
Expand Down
3 changes: 2 additions & 1 deletion Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2278,7 +2278,7 @@ dummy_func(
// Double-check that the opcode isn't instrumented or something:
here->op.code == JUMP_BACKWARD)
{
OBJECT_STAT_INC(optimization_attempts);
OPT_STAT_INC(attempts);
int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer);
ERROR_IF(optimized < 0, error);
if (optimized) {
Expand Down Expand Up @@ -3965,6 +3965,7 @@ dummy_func(
frame->prev_instr--; // Back up to just before destination
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
return frame;
}

Expand Down
25 changes: 23 additions & 2 deletions Python/executor.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "opcode.h"

#include "pycore_bitutils.h"
#include "pycore_call.h"
#include "pycore_ceval.h"
#include "pycore_dict.h"
Expand Down Expand Up @@ -30,6 +31,16 @@
goto deoptimize; \
}

#ifdef Py_STATS
// Disable these macros that apply to Tier 1 stats when we are in Tier 2
#undef STAT_INC
#define STAT_INC(opname, name) ((void)0)
#undef STAT_DEC
#define STAT_DEC(opname, name) ((void)0)
#undef CALL_STAT_INC
#define CALL_STAT_INC(name) ((void)0)
#endif

#undef ENABLE_SPECIALIZATION
#define ENABLE_SPECIALIZATION 0

Expand Down Expand Up @@ -62,12 +73,15 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject

CHECK_EVAL_BREAKER();

OBJECT_STAT_INC(optimization_traces_executed);
OPT_STAT_INC(traces_executed);
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
int pc = 0;
int opcode;
int oparg;
uint64_t operand;
#ifdef Py_STATS
uint64_t trace_uop_execution_counter = 0;
#endif

for (;;) {
opcode = self->trace[pc].opcode;
Expand All @@ -81,7 +95,12 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
operand,
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
pc++;
OBJECT_STAT_INC(optimization_uops_executed);
OPT_STAT_INC(uops_executed);
UOP_EXE_INC(opcode);
#ifdef Py_STATS
trace_uop_execution_counter++;
#endif

switch (opcode) {

#include "executor_cases.c.h"
Expand Down Expand Up @@ -114,6 +133,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
// On ERROR_IF we return NULL as the frame.
// The caller recovers the frame from tstate->current_frame.
DPRINTF(2, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
return NULL;
Expand All @@ -122,6 +142,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
// On DEOPT_IF we just repeat the last instruction.
// This presumes nothing was popped from the stack (nor pushed).
DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->prev_instr--; // Back up to just before destination
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
Expand Down
1 change: 1 addition & 0 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 11 additions & 2 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ translate_bytecode_to_trace(
if (trace_length + (n) > max_length) { \
DPRINTF(2, "No room for %s (need %d, got %d)\n", \
(opname), (n), max_length - trace_length); \
OPT_STAT_INC(trace_too_long); \
goto done; \
} \
reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest
Expand All @@ -472,6 +473,7 @@ translate_bytecode_to_trace(
#define TRACE_STACK_PUSH() \
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
DPRINTF(2, "Trace stack overflow\n"); \
OPT_STAT_INC(trace_stack_overflow); \
ADD_TO_TRACE(_SET_IP, 0, 0); \
goto done; \
} \
Expand Down Expand Up @@ -572,6 +574,7 @@ translate_bytecode_to_trace(
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0);
}
else {
OPT_STAT_INC(inner_loop);
DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
}
goto done;
Expand Down Expand Up @@ -638,7 +641,9 @@ translate_bytecode_to_trace(
// LOAD_CONST + _POP_FRAME.
if (trace_stack_depth == 0) {
DPRINTF(2, "Trace stack underflow\n");
goto done;}
OPT_STAT_INC(trace_stack_underflow);
goto done;
}
}
uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
for (int i = 0; i < nuops; i++) {
Expand Down Expand Up @@ -713,6 +718,7 @@ translate_bytecode_to_trace(
PyUnicode_AsUTF8(new_code->co_qualname),
PyUnicode_AsUTF8(new_code->co_filename),
new_code->co_firstlineno);
OPT_STAT_INC(recursive_call);
ADD_TO_TRACE(_SET_IP, 0, 0);
goto done;
}
Expand Down Expand Up @@ -744,6 +750,7 @@ translate_bytecode_to_trace(
break;
}
DPRINTF(2, "Unsupported opcode %s\n", uop_name(opcode));
OPT_UNSUPPORTED_OPCODE(opcode);
goto done; // Break out of loop
} // End default

Expand Down Expand Up @@ -891,7 +898,8 @@ uop_optimize(
// Error or nothing translated
return trace_length;
}
OBJECT_STAT_INC(optimization_traces_created);
OPT_HIST(trace_length, trace_length_hist);
OPT_STAT_INC(traces_created);
char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
if (uop_optimize != NULL && *uop_optimize > '0') {
trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries);
Expand All @@ -901,6 +909,7 @@ uop_optimize(
if (executor == NULL) {
return -1;
}
OPT_HIST(trace_length, optimized_trace_length_hist);
executor->base.execute = _PyUopExecute;
memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
*exec_ptr = (_PyExecutorObject *)executor;
Expand Down
54 changes: 50 additions & 4 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,6 @@ print_object_stats(FILE *out, ObjectStats *stats)
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts);
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created);
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed);
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed);
}

static void
Expand All @@ -215,13 +211,63 @@ print_gc_stats(FILE *out, GCStats *stats)
}
}

static void
print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
{
for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
}
}

static void
print_optimization_stats(FILE *out, OptimizationStats *stats)
{
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);

print_histogram(out, "Trace length", stats->trace_length_hist);
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);

const char* const* names;
for (int i = 0; i < 512; i++) {
if (i < 256) {
names = _PyOpcode_OpName;
} else {
names = _PyOpcode_uop_name;
}
if (stats->opcode[i].execution_count) {
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count);
}
}

for (int i = 0; i < 256; i++) {
if (stats->unsupported_opcode[i]) {
fprintf(
out,
"unsupported_opcode[%s].count : %" PRIu64 "\n",
_PyOpcode_OpName[i],
stats->unsupported_opcode[i]
);
}
}
}

static void
print_stats(FILE *out, PyStats *stats)
{
print_spec_stats(out, stats->opcode_stats);
print_call_stats(out, &stats->call_stats);
print_object_stats(out, &stats->object_stats);
print_gc_stats(out, stats->gc_stats);
print_optimization_stats(out, &stats->optimization_stats);
}

void
Expand Down
Loading

0 comments on commit e561e98

Please sign in to comment.