Skip to content

Commit

Permalink
pythongh-112532: Use separate mimalloc heaps for GC objects
Browse files Browse the repository at this point in the history
In `--disable-gil` builds, we now use four separate heaps in anticipation
of using mimalloc to find GC objects when the GIL is disabled. To support
this, we also make a few changes to mimalloc:

* Heap and mi_tld_t initialization is split from allocation. This allows
  us to have a per-PyThreadState mi_tld_t, which is important to keep
  interpreter isolation, since the same OS thread may run in multiple
  interpreters (using different PyThreadStates.)
* The pool of abandoned segments is refactored into its own struct. This
  allows us to use different pools for different interpreters so that
  we can preserve interpreter isolation.
* Heap abandoning (mi_heap_collect_ex) can now be called from a different
  thread than the one that created the heap. This is necessary because
  we may clear and delete the containing PyThreadStates from a different
  thread during finalization and after fork().
  • Loading branch information
colesbury committed Dec 18, 2023
1 parent 498a096 commit 572e191
Show file tree
Hide file tree
Showing 11 changed files with 263 additions and 99 deletions.
24 changes: 6 additions & 18 deletions Include/internal/mimalloc/mimalloc/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,6 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_trace_message(...)
#endif

#define MI_CACHE_LINE 64
#if defined(_MSC_VER)
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
#pragma warning(disable:26812) // unscoped enum warning
#define mi_decl_noinline __declspec(noinline)
#define mi_decl_thread __declspec(thread)
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
#define mi_decl_noinline __attribute__((noinline))
#define mi_decl_thread __thread
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
#else
#define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#endif

#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
#define __wasi__
#endif
Expand Down Expand Up @@ -85,6 +68,8 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
void _mi_thread_done(mi_heap_t* heap);
void _mi_thread_data_collect(void);
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);


// os.c
void _mi_os_init(void); // called from process init
Expand Down Expand Up @@ -130,6 +115,7 @@ void _mi_segment_map_allocated_at(const mi_segment_t* segment);
void _mi_segment_map_freed_at(const mi_segment_t* segment);

// "segment.c"
extern mi_abandoned_pool_t _mi_abandoned_default; // global abandoned pool
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
Expand All @@ -144,7 +130,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, m

uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
void _mi_abandoned_await_readers(void);
void _mi_abandoned_await_readers(mi_abandoned_pool_t *pool);
void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);

// "page.c"
Expand All @@ -170,8 +156,10 @@ size_t _mi_bin_size(uint8_t bin); // for stats
uint8_t _mi_bin(size_t size); // for stats

// "heap.c"
void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id);
void _mi_heap_destroy_pages(mi_heap_t* heap);
void _mi_heap_collect_abandon(mi_heap_t* heap);
void _mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from);
void _mi_heap_set_default_direct(mi_heap_t* heap);
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid);
void _mi_heap_unsafe_destroy_all(void);
Expand Down
40 changes: 40 additions & 0 deletions Include/internal/mimalloc/mimalloc/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,23 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
#endif

#define MI_CACHE_LINE 64
#if defined(_MSC_VER)
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
#pragma warning(disable:26812) // unscoped enum warning
#define mi_decl_noinline __declspec(noinline)
#define mi_decl_thread __declspec(thread)
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
#define mi_decl_noinline __attribute__((noinline))
#define mi_decl_thread __thread
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
#else
#define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#endif

// ------------------------------------------------------
// Variants
// ------------------------------------------------------
Expand Down Expand Up @@ -445,6 +462,28 @@ typedef struct mi_segment_s {
mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment
} mi_segment_t;

typedef uintptr_t mi_tagged_segment_t;

// Segments unowned by any thread are put in a shared pool
typedef struct mi_abandoned_pool_s {
// This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list reduces contention and the rate at which segments are visited.
mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL

// The abandoned page list (tagged as it supports pop)
mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL

// Maintain these for debug purposes (these counts may be a bit off)
mi_decl_cache_align _Atomic(size_t) abandoned_count;
mi_decl_cache_align _Atomic(size_t) abandoned_visited_count;

// We also maintain a count of current readers of the abandoned list
// in order to prevent resetting/decommitting segment memory if it might
// still be read.
mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0
} mi_abandoned_pool_t;


// ------------------------------------------------------
// Heaps
Expand Down Expand Up @@ -654,6 +693,7 @@ typedef struct mi_segments_tld_s {
size_t peak_size; // peak size of all segments
mi_stats_t* stats; // points to tld stats
mi_os_tld_t* os; // points to os stats
mi_abandoned_pool_t* abandoned; // pool of abandoned segments
} mi_segments_tld_t;

// Thread local data
Expand Down
5 changes: 5 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ extern "C" {
#include "pycore_import.h" // struct _import_state
#include "pycore_instruments.h" // _PY_MONITORING_EVENTS
#include "pycore_list.h" // struct _Py_list_state
#include "pycore_mimalloc.h" // struct _mimalloc_interp_state
#include "pycore_object_state.h" // struct _py_object_state
#include "pycore_obmalloc.h" // struct _obmalloc_state
#include "pycore_tstate.h" // _PyThreadStateImpl
Expand Down Expand Up @@ -166,6 +167,10 @@ struct _is {
struct _warnings_runtime_state warnings;
struct atexit_state atexit;

#if defined(Py_GIL_DISABLED)
struct _mimalloc_interp_state mimalloc;
#endif

struct _obmalloc_state obmalloc;

PyObject *audit_hooks;
Expand Down
33 changes: 33 additions & 0 deletions Include/internal/pycore_mimalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,44 @@
# error "pycore_mimalloc.h must be included before mimalloc.h"
#endif

#define _Py_MIMALLOC_HEAP_MEM 0 // PyMem_Malloc() and friends
#define _Py_MIMALLOC_HEAP_OBJECT 1 // non-GC objects
#define _Py_MIMALLOC_HEAP_GC 2 // GC objects without pre-header
#define _Py_MIMALLOC_HEAP_GC_PRE 3 // GC objects with pre-header
#define _Py_MIMALLOC_HEAP_COUNT 4

#include "pycore_pymem.h"
#define MI_DEBUG_UNINIT PYMEM_CLEANBYTE
#define MI_DEBUG_FREED PYMEM_DEADBYTE
#define MI_DEBUG_PADDING PYMEM_FORBIDDENBYTE
#ifdef Py_DEBUG
# define MI_DEBUG 1
#else
# define MI_DEBUG 0
#endif

#include "mimalloc.h"
#include "mimalloc/types.h"
#include "mimalloc/internal.h"

struct _mimalloc_interp_state {
#ifdef Py_GIL_DISABLED
// When exiting, threads place any segments with live blocks in this
// shared pool for other threads to claim and reuse.
mi_abandoned_pool_t abandoned_pool;
#else
char _unused; // empty structs are not allowed
#endif
};

struct _mimalloc_thread_state {
#ifdef Py_GIL_DISABLED
mi_heap_t *current_object_heap;
mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT];
mi_tld_t tld;
#else
char _unused; // empty structs are not allowed
#endif
};

#endif // Py_INTERNAL_MIMALLOC_H
7 changes: 6 additions & 1 deletion Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_mimalloc.h" // struct _mimalloc_thread_state


// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
// PyThreadState fields are exposed as part of the C API, although most fields
Expand All @@ -16,7 +18,10 @@ typedef struct _PyThreadStateImpl {
// semi-public fields are in PyThreadState.
PyThreadState base;

// TODO: add private fields here
#ifdef Py_GIL_DISABLED
struct _mimalloc_thread_state mimalloc;
#endif

} _PyThreadStateImpl;


Expand Down
4 changes: 3 additions & 1 deletion Lib/test/test_import/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
from test.support import os_helper
from test.support import (
STDLIB_DIR, swap_attr, swap_item, cpython_only, is_emscripten,
is_wasi, run_in_subinterp, run_in_subinterp_with_config, Py_TRACE_REFS)
is_wasi, run_in_subinterp, run_in_subinterp_with_config, Py_TRACE_REFS,
Py_GIL_DISABLED)
from test.support.import_helper import (
forget, make_legacy_pyc, unlink, unload, ready_to_import,
DirsOnSysPath, CleanImport)
Expand Down Expand Up @@ -2018,6 +2019,7 @@ def parse(cls, text):
return self


@unittest.skipIf(Py_GIL_DISABLED, "test deallocates objects from a different interpreter")
@requires_singlephase_init
class SinglephaseInitTests(unittest.TestCase):

Expand Down
26 changes: 18 additions & 8 deletions Objects/mimalloc/heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,18 +206,28 @@ mi_heap_t* mi_heap_get_backing(void) {
return bheap;
}

mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
if (heap == NULL) return NULL;
void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id)
{
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
heap->tld = bheap->tld;
heap->tld = tld;
heap->thread_id = _mi_thread_id();
heap->arena_id = arena_id;
_mi_random_split(&bheap->random, &heap->random);
if (heap == tld->heap_backing) {
_mi_random_init(&heap->random);
}
else {
_mi_random_split(&tld->heap_backing->random, &heap->random);
}
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->keys[0] = _mi_heap_random_next(heap);
heap->keys[1] = _mi_heap_random_next(heap);
}

mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
if (heap == NULL) return NULL;
_mi_heap_init_ex(heap, bheap->tld, arena_id);
heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe
// push on the thread local heaps list
heap->next = heap->tld->heaps;
Expand Down Expand Up @@ -383,7 +393,7 @@ void _mi_heap_unsafe_destroy_all(void) {
----------------------------------------------------------- */

// Transfer the pages from one heap to the other
static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
void _mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
mi_assert_internal(heap!=NULL);
if (from==NULL || from->page_count == 0) return;

Expand Down Expand Up @@ -426,7 +436,7 @@ void mi_heap_delete(mi_heap_t* heap)

if (!mi_heap_is_backing(heap)) {
// tranfer still used pages to the backing heap
mi_heap_absorb(heap->tld->heap_backing, heap);
_mi_heap_absorb(heap->tld->heap_backing, heap);
}
else {
// the backing heap abandons its pages
Expand Down
27 changes: 12 additions & 15 deletions Objects/mimalloc/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ extern mi_heap_t _mi_heap_main;
static mi_tld_t tld_main = {
0, false,
&_mi_heap_main, & _mi_heap_main,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os, &_mi_abandoned_default }, // segments
{ 0, &tld_main.stats }, // os
{ MI_STATS_NULL } // stats
};
Expand Down Expand Up @@ -297,24 +297,21 @@ static bool _mi_heap_init(void) {
mi_thread_data_t* td = mi_thread_data_zalloc();
if (td == NULL) return false;

mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap;
_mi_tld_init(&td->tld, &td->heap);
_mi_heap_init_ex(&td->heap, &td->tld, _mi_arena_id_none());
_mi_heap_set_default_direct(&td->heap);
}
return false;
}

void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
_mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld));
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap));
heap->thread_id = _mi_thread_id();
_mi_random_init(&heap->random);
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->keys[0] = _mi_heap_random_next(heap);
heap->keys[1] = _mi_heap_random_next(heap);
heap->tld = tld;
tld->heap_backing = heap;
tld->heaps = heap;
tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os;
tld->segments.abandoned = &_mi_abandoned_default;
tld->os.stats = &tld->stats;
_mi_heap_set_default_direct(heap);
}
return false;
tld->heap_backing = bheap;
tld->heaps = bheap;
}

// Free the thread local default heap (called from `mi_thread_done`)
Expand Down
Loading

0 comments on commit 572e191

Please sign in to comment.