Skip to content

Commit

Permalink
pythongh-118527: Intern code consts in free-threaded build
Browse files Browse the repository at this point in the history
We already intern and immortalize most string constants. In the
free-threaded build, other constants can be a source of reference count
contention because they are shared by all threads running the same code
objects.
  • Loading branch information
colesbury committed May 6, 2024
1 parent 5a1618a commit c7f5015
Show file tree
Hide file tree
Showing 9 changed files with 275 additions and 14 deletions.
11 changes: 11 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_lock.h" // PyMutex


// We hide some of the newer PyCodeObject fields behind macros.
// This helps with backporting certain changes to 3.12.
Expand All @@ -16,6 +18,15 @@ extern "C" {
#define _PyCode_HAS_INSTRUMENTATION(CODE) \
(CODE->_co_instrumentation_version > 0)

struct _py_code_state {
PyMutex mutex;
// Interned constants from code objects. Only used in the free-threaded
// build.
struct _Py_hashtable_t *constants;
};

extern PyStatus _PyCode_Init(PyInterpreterState *interp);
extern void _PyCode_Fini(PyInterpreterState *interp);

#define CODE_MAX_WATCHERS 8

Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ struct _is {
struct _Py_long_state long_state;
struct _dtoa_state dtoa;
struct _py_func_state func_state;
struct _py_code_state code_state;

struct _Py_dict_state dict_state;
struct _Py_exc_state exc_state;
Expand Down
3 changes: 3 additions & 0 deletions Include/internal/pycore_setobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ PyAPI_DATA(PyObject *) _PySet_Dummy;

PyAPI_FUNC(int) _PySet_Contains(PySetObject *so, PyObject *key);

// Clears the set without acquiring locks. Used by _PyCode_Fini.
extern void _PySet_ClearInternal(PySetObject *so);

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_ctypes/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_ints(self):
self.assertEqual(ci._objects, None)

def test_c_char_p(self):
s = b"Hello, World"
s = "Hello, World".encode("ascii")
refcnt = sys.getrefcount(s)
cs = c_char_p(s)
self.assertEqual(refcnt + 1, sys.getrefcount(s))
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_ctypes/test_python_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_PyLong_Long(self):

@support.refcount_test
def test_PyObj_FromPtr(self):
s = "abc def ghi jkl"
s = object()
ref = sys.getrefcount(s)
# id(python-object) is the address
pyobj = _ctypes.PyObj_FromPtr(id(s))
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_memoryio.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ def test_sizeof(self):

def _test_cow_mutation(self, mutation):
# Common code for all BytesIO copy-on-write mutation tests.
imm = b' ' * 1024
imm = (' ' * 1024).encode("ascii")
old_rc = sys.getrefcount(imm)
memio = self.ioclass(imm)
self.assertEqual(sys.getrefcount(imm), old_rc + 1)
Expand Down
255 changes: 244 additions & 11 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

#include "pycore_code.h" // _PyCodeConstructor
#include "pycore_frame.h" // FRAME_SPECIALS_SIZE
#include "pycore_hashtable.h" // _Py_hashtable_t
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_interp.h" // PyInterpreterState.co_extra_freefuncs
#include "pycore_object.h" // _PyObject_SetDeferredRefcount
#include "pycore_opcode_metadata.h" // _PyOpcode_Deopt, _PyOpcode_Caches
Expand Down Expand Up @@ -118,6 +120,10 @@ all_name_chars(PyObject *o)
return 1;
}

#ifdef Py_GIL_DISABLED
static PyObject *intern_one_constant(PyObject *op);
#endif

static int
intern_strings(PyObject *tuple)
{
Expand All @@ -135,9 +141,11 @@ intern_strings(PyObject *tuple)
return 0;
}

/* Intern selected string constants */
/* Intern constants. In the default build, this interns selected string
constants. In the free-threaded build, this also interns non-string
constants. */
static int
intern_string_constants(PyObject *tuple, int *modified)
intern_constants(PyObject *tuple, int *modified)
{
for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) {
PyObject *v = PyTuple_GET_ITEM(tuple, i);
Expand All @@ -154,7 +162,7 @@ intern_string_constants(PyObject *tuple, int *modified)
}
}
else if (PyTuple_CheckExact(v)) {
if (intern_string_constants(v, NULL) < 0) {
if (intern_constants(v, NULL) < 0) {
return -1;
}
}
Expand All @@ -165,7 +173,7 @@ intern_string_constants(PyObject *tuple, int *modified)
return -1;
}
int tmp_modified = 0;
if (intern_string_constants(tmp, &tmp_modified) < 0) {
if (intern_constants(tmp, &tmp_modified) < 0) {
Py_DECREF(tmp);
return -1;
}
Expand All @@ -184,6 +192,25 @@ intern_string_constants(PyObject *tuple, int *modified)
}
Py_DECREF(tmp);
}
#ifdef Py_GIL_DISABLED
// Intern non-string consants in the free-threaded build.
PyThreadState *tstate = PyThreadState_GET();
if (!_Py_IsImmortal(v) && !PyCode_Check(v) &&
tstate->interp->gc.immortalize.enable_on_thread_created)
{
PyObject *interned = intern_one_constant(v);
if (interned == NULL) {
return -1;
}
else if (interned != v) {
PyTuple_SET_ITEM(tuple, i, interned);
Py_SETREF(v, interned);
if (modified) {
*modified = 1;
}
}
}
#endif
}
return 0;
}
Expand Down Expand Up @@ -542,18 +569,41 @@ remove_column_info(PyObject *locations)
return res;
}

/* The caller is responsible for ensuring that the given data is valid. */

PyCodeObject *
_PyCode_New(struct _PyCodeConstructor *con)
static int
intern_code_constants(struct _PyCodeConstructor *con)
{
#ifdef Py_GIL_DISABLED
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _py_code_state *state = &interp->code_state;
PyMutex_Lock(&state->mutex);
#endif
if (intern_strings(con->names) < 0) {
return NULL;
goto error;
}
if (intern_string_constants(con->consts, NULL) < 0) {
return NULL;
if (intern_constants(con->consts, NULL) < 0) {
goto error;
}
if (intern_strings(con->localsplusnames) < 0) {
goto error;
}
#ifdef Py_GIL_DISABLED
PyMutex_Unlock(&state->mutex);
#endif
return 0;

error:
#ifdef Py_GIL_DISABLED
PyMutex_Unlock(&state->mutex);
#endif
return -1;
}

/* The caller is responsible for ensuring that the given data is valid. */

PyCodeObject *
_PyCode_New(struct _PyCodeConstructor *con)
{
if (intern_code_constants(con) < 0) {
return NULL;
}

Expand Down Expand Up @@ -2399,3 +2449,186 @@ _PyCode_ConstantKey(PyObject *op)
}
return key;
}

#ifdef Py_GIL_DISABLED
static PyObject *
intern_one_constant(PyObject *op)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
_Py_hashtable_t *consts = interp->code_state.constants;

if (PyUnicode_CheckExact(op)) {
// strings are interned separately
return op;
}

_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts, op);
if (entry == NULL) {
if (_Py_hashtable_set(consts, op, op) != 0) {
return NULL;
}

#ifdef Py_REF_DEBUG
Py_ssize_t refcnt = Py_REFCNT(op);
if (refcnt != 1) {
// Adjust the reftotal to account for the fact that we only
// restore a single reference in _PyCode_Fini.
_Py_AddRefTotal(_PyThreadState_GET(), -(refcnt - 1));
}
#endif

_Py_SetImmortal(op);
return op;
}

assert(_Py_IsImmortal(entry->value));
return (PyObject *)entry->value;
}

static int
compare_constants(const void *key1, const void *key2) {
PyObject *op1 = (PyObject *)key1;
PyObject *op2 = (PyObject *)key2;
if (op1 == op2) {
return 1;
}
if (Py_TYPE(op1) != Py_TYPE(op2)) {
return 0;
}
// We compare container contents by identity because we have already
// internalized the items.
if (PyTuple_CheckExact(op1)) {
Py_ssize_t size = PyTuple_GET_SIZE(op1);
if (size != PyTuple_GET_SIZE(op2)) {
return 0;
}
for (Py_ssize_t i = 0; i < size; i++) {
if (PyTuple_GET_ITEM(op1, i) != PyTuple_GET_ITEM(op2, i)) {
return 0;
}
}
return 1;
}
else if (PyFrozenSet_CheckExact(op1)) {
if (PySet_GET_SIZE(op1) != PySet_GET_SIZE(op2)) {
return 0;
}
Py_ssize_t pos1 = 0, pos2 = 0;
PyObject *obj1, *obj2;
Py_hash_t hash1, hash2;
while ((_PySet_NextEntry(op1, &pos1, &obj1, &hash1)) &&
(_PySet_NextEntry(op2, &pos2, &obj2, &hash2)))
{
if (obj1 != obj2) {
return 0;
}
}
return 1;
}
else if (PySlice_Check(op1)) {
PySliceObject *s1 = (PySliceObject *)op1;
PySliceObject *s2 = (PySliceObject *)op2;
return (s1->start == s2->start &&
s1->stop == s2->stop &&
s1->step == s2->step);
}
else if (PyBytes_CheckExact(op1) || PyLong_CheckExact(op1)) {
return PyObject_RichCompareBool(op1, op2, Py_EQ);
}
else if (PyFloat_CheckExact(op1)) {
// Ensure that, for example, +0.0 and -0.0 are distinct
double f1 = PyFloat_AS_DOUBLE(op1);
double f2 = PyFloat_AS_DOUBLE(op2);
return memcmp(&f1, &f2, sizeof(double)) == 0;
}
else if (PyComplex_CheckExact(op1)) {
Py_complex c1 = ((PyComplexObject *)op1)->cval;
Py_complex c2 = ((PyComplexObject *)op2)->cval;
return memcmp(&c1, &c2, sizeof(Py_complex)) == 0;
}
_Py_FatalErrorFormat("unexpected type in compare_constants: %s",
Py_TYPE(op1)->tp_name);
return 0;
}

static Py_uhash_t
hash_const(const void *key)
{
PyObject *op = (PyObject *)key;
if (PySlice_Check(op)) {
PySliceObject *s = (PySliceObject *)op;
PyObject *data[3] = { s->start, s->stop, s->step };
return _Py_HashBytes(&data, sizeof(data));
}
else if (PyTuple_CheckExact(op)) {
Py_ssize_t size = PyTuple_GET_SIZE(op);
PyObject **data = _PyTuple_ITEMS(op);
return _Py_HashBytes(data, sizeof(PyObject *) * size);
}
Py_hash_t h = PyObject_Hash(op);
if (h == -1) {
// This should never happen: all the constants we support have
// infallible hash functions.
Py_FatalError("code: hash failed");
}
return (Py_uhash_t)h;
}

static int
clear_containers(_Py_hashtable_t *ht, const void *key, const void *value,
void *user_data)
{
// First clear containers to avoid recursive deallocation later on in
// destroy_key.
PyObject *op = (PyObject *)key;
if (PyTuple_CheckExact(op)) {
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(op); i++) {
Py_CLEAR(_PyTuple_ITEMS(op)[i]);
}
}
else if (PySlice_Check(op)) {
PySliceObject *slice = (PySliceObject *)op;
Py_SETREF(slice->start, Py_None);
Py_SETREF(slice->stop, Py_None);
Py_SETREF(slice->step, Py_None);
}
else if (PyFrozenSet_CheckExact(op)) {
_PySet_ClearInternal((PySetObject *)op);
}
return 0;
}

static void
destroy_key(void *key)
{
_Py_ClearImmortal(key);
}
#endif

PyStatus
_PyCode_Init(PyInterpreterState *interp)
{
#ifdef Py_GIL_DISABLED
struct _py_code_state *state = &interp->code_state;
state->constants = _Py_hashtable_new_full(&hash_const, &compare_constants,
&destroy_key, NULL, NULL);
if (state->constants == NULL) {
return _PyStatus_NO_MEMORY();
}
#endif
return _PyStatus_OK();
}

void
_PyCode_Fini(PyInterpreterState *interp)
{
#ifdef Py_GIL_DISABLED
// Free interned constants
struct _py_code_state *state = &interp->code_state;
if (state->constants) {
_Py_hashtable_foreach(state->constants, &clear_containers, NULL);
_Py_hashtable_destroy(state->constants);
state->constants = NULL;
}
#endif
}
6 changes: 6 additions & 0 deletions Objects/setobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2621,6 +2621,12 @@ PySet_Clear(PyObject *set)
return 0;
}

void
_PySet_ClearInternal(PySetObject *so)
{
(void)set_clear_internal(so);
}

int
PySet_Contains(PyObject *anyset, PyObject *key)
{
Expand Down
Loading

0 comments on commit c7f5015

Please sign in to comment.