Skip to content

Commit

Permalink
pythongh-117139: Convert the evaluation stack to stack refs (python#1…
Browse files Browse the repository at this point in the history
…18450)

This PR sets up tagged pointers for CPython.

The general idea is to create a separate struct _PyStackRef for everything on the evaluation stack to store the bits. This forces the C compiler to warn us if we try to cast things or pull things out of the struct directly.

Only for free threading: We tag the low bit if something is deferred - that means we skip incref and decref operations on it. This behavior may change in the future if Mark's plans to defer all objects in the interpreter loop pans out.

This implies a strict stack reference discipline is required. ALL incref and decref operations on stackrefs must use the stackref variants. It is unsafe to untag something then do normal incref/decref ops on it.

The new incref and decref variants are called dup and close. They mimic a "handle" API operating on these stackrefs.

Please read Include/internal/pycore_stackref.h for more information!

---------

Co-authored-by: Mark Shannon <[email protected]>
  • Loading branch information
Fidget-Spinner and markshannon authored Jun 26, 2024
1 parent d611c4c commit 22b0de2
Show file tree
Hide file tree
Showing 35 changed files with 5,215 additions and 3,745 deletions.
5 changes: 4 additions & 1 deletion Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,11 @@ PyAPI_FUNC(void) _PyEval_FormatExcUnbound(PyThreadState *tstate, PyCodeObject *c
PyAPI_FUNC(void) _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *kwargs);
PyAPI_FUNC(PyObject *)_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs);
PyAPI_FUNC(PyObject *)_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys);
PyAPI_FUNC(int) _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, PyObject **sp);
PyAPI_FUNC(int) _PyEval_UnpackIterableStackRef(PyThreadState *tstate, _PyStackRef v, int argcnt, int argcntafter, _PyStackRef *sp);
PyAPI_FUNC(void) _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
PyAPI_FUNC(PyObject **) _PyObjectArray_FromStackRefArray(_PyStackRef *input, Py_ssize_t nargs, PyObject **scratch);

PyAPI_FUNC(void) _PyObjectArray_Free(PyObject **array, PyObject **scratch);


/* Bits that can be set in PyThreadState.eval_breaker */
Expand Down
29 changes: 15 additions & 14 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_stackref.h" // _PyStackRef
#include "pycore_lock.h" // PyMutex
#include "pycore_backoff.h" // _Py_BackoffCounter

Expand Down Expand Up @@ -317,30 +318,30 @@ extern void _PyCode_Clear_Executors(PyCodeObject *code);

/* Specialization functions */

extern void _Py_Specialize_LoadSuperAttr(PyObject *global_super, PyObject *cls,
extern void _Py_Specialize_LoadSuperAttr(_PyStackRef global_super, _PyStackRef cls,
_Py_CODEUNIT *instr, int load_method);
extern void _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
extern void _Py_Specialize_LoadAttr(_PyStackRef owner, _Py_CODEUNIT *instr,
PyObject *name);
extern void _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr,
extern void _Py_Specialize_StoreAttr(_PyStackRef owner, _Py_CODEUNIT *instr,
PyObject *name);
extern void _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins,
_Py_CODEUNIT *instr, PyObject *name);
extern void _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container,
extern void _Py_Specialize_BinarySubscr(_PyStackRef sub, _PyStackRef container,
_Py_CODEUNIT *instr);
extern void _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub,
extern void _Py_Specialize_StoreSubscr(_PyStackRef container, _PyStackRef sub,
_Py_CODEUNIT *instr);
extern void _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr,
extern void _Py_Specialize_Call(_PyStackRef callable, _Py_CODEUNIT *instr,
int nargs);
extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
int oparg, PyObject **locals);
extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
extern void _Py_Specialize_BinaryOp(_PyStackRef lhs, _PyStackRef rhs, _Py_CODEUNIT *instr,
int oparg, _PyStackRef *locals);
extern void _Py_Specialize_CompareOp(_PyStackRef lhs, _PyStackRef rhs,
_Py_CODEUNIT *instr, int oparg);
extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr,
extern void _Py_Specialize_UnpackSequence(_PyStackRef seq, _Py_CODEUNIT *instr,
int oparg);
extern void _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg);
extern void _Py_Specialize_Send(PyObject *receiver, _Py_CODEUNIT *instr);
extern void _Py_Specialize_ToBool(PyObject *value, _Py_CODEUNIT *instr);
extern void _Py_Specialize_ContainsOp(PyObject *value, _Py_CODEUNIT *instr);
extern void _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg);
extern void _Py_Specialize_Send(_PyStackRef receiver, _Py_CODEUNIT *instr);
extern void _Py_Specialize_ToBool(_PyStackRef value, _Py_CODEUNIT *instr);
extern void _Py_Specialize_ContainsOp(_PyStackRef value, _Py_CODEUNIT *instr);

#ifdef Py_STATS

Expand Down
27 changes: 14 additions & 13 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ extern "C" {
#include <stdbool.h>
#include <stddef.h> // offsetof()
#include "pycore_code.h" // STATS
#include "pycore_stackref.h" // _PyStackRef

/* See Objects/frame_layout.md for an explanation of the frame stack
* including explanation of the PyFrameObject and _PyInterpreterFrame
Expand Down Expand Up @@ -67,7 +68,7 @@ typedef struct _PyInterpreterFrame {
uint16_t return_offset; /* Only relevant during a function call */
char owner;
/* Locals and stack */
PyObject *localsplus[1];
_PyStackRef localsplus[1];
} _PyInterpreterFrame;

#define _PyInterpreterFrame_LASTI(IF) \
Expand All @@ -78,23 +79,23 @@ static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) {
return (PyCodeObject *)f->f_executable;
}

static inline PyObject **_PyFrame_Stackbase(_PyInterpreterFrame *f) {
return f->localsplus + _PyFrame_GetCode(f)->co_nlocalsplus;
static inline _PyStackRef *_PyFrame_Stackbase(_PyInterpreterFrame *f) {
return (f->localsplus + _PyFrame_GetCode(f)->co_nlocalsplus);
}

static inline PyObject *_PyFrame_StackPeek(_PyInterpreterFrame *f) {
static inline _PyStackRef _PyFrame_StackPeek(_PyInterpreterFrame *f) {
assert(f->stacktop > _PyFrame_GetCode(f)->co_nlocalsplus);
assert(f->localsplus[f->stacktop-1] != NULL);
assert(!PyStackRef_IsNull(f->localsplus[f->stacktop-1]));
return f->localsplus[f->stacktop-1];
}

static inline PyObject *_PyFrame_StackPop(_PyInterpreterFrame *f) {
static inline _PyStackRef _PyFrame_StackPop(_PyInterpreterFrame *f) {
assert(f->stacktop > _PyFrame_GetCode(f)->co_nlocalsplus);
f->stacktop--;
return f->localsplus[f->stacktop];
}

static inline void _PyFrame_StackPush(_PyInterpreterFrame *f, PyObject *value) {
static inline void _PyFrame_StackPush(_PyInterpreterFrame *f, _PyStackRef value) {
f->localsplus[f->stacktop] = value;
f->stacktop++;
}
Expand Down Expand Up @@ -143,14 +144,14 @@ _PyFrame_Initialize(
frame->owner = FRAME_OWNED_BY_THREAD;

for (int i = null_locals_from; i < code->co_nlocalsplus; i++) {
frame->localsplus[i] = NULL;
frame->localsplus[i] = PyStackRef_NULL;
}
}

/* Gets the pointer to the locals array
* that precedes this frame.
*/
static inline PyObject**
static inline _PyStackRef*
_PyFrame_GetLocalsArray(_PyInterpreterFrame *frame)
{
return frame->localsplus;
Expand All @@ -160,16 +161,16 @@ _PyFrame_GetLocalsArray(_PyInterpreterFrame *frame)
Having stacktop <= 0 ensures that invalid
values are not visible to the cycle GC.
We choose -1 rather than 0 to assist debugging. */
static inline PyObject**
static inline _PyStackRef*
_PyFrame_GetStackPointer(_PyInterpreterFrame *frame)
{
PyObject **sp = frame->localsplus + frame->stacktop;
_PyStackRef *sp = frame->localsplus + frame->stacktop;
frame->stacktop = -1;
return sp;
}

static inline void
_PyFrame_SetStackPointer(_PyInterpreterFrame *frame, PyObject **stack_pointer)
_PyFrame_SetStackPointer(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer)
{
frame->stacktop = (int)(stack_pointer - frame->localsplus);
}
Expand Down Expand Up @@ -309,7 +310,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int

PyAPI_FUNC(_PyInterpreterFrame *)
_PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, PyObject* const* args,
PyObject *locals, _PyStackRef const* args,
size_t argcount, PyObject *kwnames);

#ifdef __cplusplus
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ extern "C" {

#ifdef _Py_JIT

typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate);
typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate);

int _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length);
void _PyJIT_Free(_PyExecutorObject *executor);
Expand Down
15 changes: 0 additions & 15 deletions Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,21 +159,6 @@ static inline void _Py_ClearImmortal(PyObject *op)
op = NULL; \
} while (0)

// Mark an object as supporting deferred reference counting. This is a no-op
// in the default (with GIL) build. Objects that use deferred reference
// counting should be tracked by the GC so that they are eventually collected.
extern void _PyObject_SetDeferredRefcount(PyObject *op);

static inline int
_PyObject_HasDeferredRefcount(PyObject *op)
{
#ifdef Py_GIL_DISABLED
return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_DEFERRED);
#else
return 0;
#endif
}

#if !defined(Py_GIL_DISABLED)
static inline void
_Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct)
Expand Down
32 changes: 32 additions & 0 deletions Include/internal/pycore_object_deferred.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef Py_INTERNAL_OBJECT_DEFERRED_H
#define Py_INTERNAL_OBJECT_DEFERRED_H

#ifdef __cplusplus
extern "C" {
#endif

#include "pycore_gc.h"

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

// Mark an object as supporting deferred reference counting. This is a no-op
// in the default (with GIL) build. Objects that use deferred reference
// counting should be tracked by the GC so that they are eventually collected.
extern void _PyObject_SetDeferredRefcount(PyObject *op);

static inline int
_PyObject_HasDeferredRefcount(PyObject *op)
{
#ifdef Py_GIL_DISABLED
return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_DEFERRED);
#else
return 0;
#endif
}

#ifdef __cplusplus
}
#endif
#endif // !Py_INTERNAL_OBJECT_DEFERRED_H
2 changes: 1 addition & 1 deletion Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx);

PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);

PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyStackRef *stack_pointer, _PyExecutorObject **exec_ptr);

#ifdef __cplusplus
}
Expand Down
Loading

0 comments on commit 22b0de2

Please sign in to comment.