From 376c8f82ff5b8167e4f48b2e2a976200f7b491d6 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Wed, 28 Jun 2023 11:39:35 -0600 Subject: [PATCH] Allocation Profiler: Types for all allocations Before this PR, we were missing the types for allocations in two cases: 1. allocations from codegen 2. allocations in gc_managed_realloc_ The second one is easy: those are always used for `buffer`s, right? For the first one: this PR adds a new exported julia function, which codegen will call after every allocation, to record the allocation and set its type. --- src/gc.c | 10 +++++++--- src/jl_exported_funcs.inc | 1 + src/llvm-final-gc-lowering.cpp | 8 +++++--- src/llvm-late-gc-lowering.cpp | 18 ++++++++++++++++++ src/llvm-pass-helpers.cpp | 21 +++++++++++++++++++++ src/llvm-pass-helpers.h | 3 +++ 6 files changed, 55 insertions(+), 6 deletions(-) diff --git a/src/gc.c b/src/gc.c index 9e588c171a676..943764a579f30 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1010,7 +1010,6 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz) { jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz); - maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag); return val; } @@ -1321,9 +1320,13 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, int osize) { jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize); - maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag); return val; } +JL_DLLEXPORT jl_value_t *jl_maybe_record_alloc_to_profile(jl_value_t* val, int osize, + jl_value_t* type) +{ + maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type); +} // This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into // its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner` @@ -3776,7 +3779,8 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds SetLastError(last_error); #endif errno = last_errno; - maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag); + // gc_managed_realloc_ is currently used exclusively for resizing array buffers. + maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag); return b; } diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 33b431fe12a76..71c60d856efc0 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -185,6 +185,7 @@ XX(jl_gc_new_weakref_th) \ XX(jl_gc_num) \ XX(jl_gc_pool_alloc) \ + XX(jl_maybe_record_alloc_to_profile) \ XX(jl_gc_queue_multiroot) \ XX(jl_gc_queue_root) \ XX(jl_gc_safepoint) \ diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index ac7d67cddd6f3..b8285a7bd79c0 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -48,6 +48,7 @@ struct FinalLowerGC: private JuliaPassContext { Function *queueRootFunc; Function *poolAllocFunc; Function *bigAllocFunc; + Function *recordAllocFunc; Function *allocTypedFunc; Instruction *pgcstack; Type *T_size; @@ -253,10 +254,11 @@ bool FinalLowerGC::doInitialization(Module &M) { queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot); poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc); bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc); + recordAllocFunc = getOrDeclare(jl_well_known::GCRecordAllocToProfile); allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped); T_size = M.getDataLayout().getIntPtrType(M.getContext()); - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc}; + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, recordAllocFunc, allocTypedFunc}; unsigned j = 0; for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) { if (!functionList[i]) @@ -272,8 +274,8 @@ bool FinalLowerGC::doInitialization(Module &M) { bool FinalLowerGC::doFinalization(Module &M) { - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc}; - queueRootFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr; + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, recordAllocFunc, allocTypedFunc}; + queueRootFunc = poolAllocFunc = bigAllocFunc = recordAllocFunc = allocTypedFunc = nullptr; auto used = M.getGlobalVariable("llvm.compiler.used"); if (!used) return false; diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index f1cef798224d2..5f67024cc0f44 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2404,6 +2404,24 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { store->setOrdering(AtomicOrdering::Unordered); store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); + auto recordAllocIntrinsic = getOrDeclare(jl_well_known::GCRecordAllocToProfile); + auto value = newI; + //auto record_alloc = + builder.CreateCall( + recordAllocIntrinsic, + { + value, + builder.CreateIntCast( + CI->getArgOperand(1), + allocBytesIntrinsic->getFunctionType()->getParamType(1), + false), + tag + }); + // TODO: is this needed? What is it? + //record_alloc->setOrdering(AtomicOrdering::Unordered); + //record_alloc->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); + + // Replace uses of the call to `julia.gc_alloc_obj` with the call to // `julia.gc_alloc_bytes`. CI->replaceAllUsesWith(newI); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index b006f191937f5..d87358f577ca1 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -238,6 +238,7 @@ namespace jl_intrinsics { namespace jl_well_known { static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc); static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc); + static const char *GC_RECORD_ALLOC_TO_PROFILE_NAME = XSTR(jl_maybe_record_alloc_to_profile); static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root); static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed); @@ -275,6 +276,26 @@ namespace jl_well_known { return addGCAllocAttributes(poolAllocFunc); }); + const WellKnownFunctionDescription GCRecordAllocToProfile( + GC_RECORD_ALLOC_TO_PROFILE_NAME, + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx); + auto recordAllocFunc = Function::Create( + FunctionType::get( + T_prjlvalue, + { + T_size, + Type::getInt32Ty(ctx), + T_size, + }, + false), + Function::ExternalLinkage, + GC_RECORD_ALLOC_TO_PROFILE_NAME); + recordAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None)); + return addGCAllocAttributes(recordAllocFunc); + }); + const WellKnownFunctionDescription GCQueueRoot( GC_QUEUE_ROOT_NAME, [](Type *T_size) { diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index 727f463dc50ef..a6cb36be282e1 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -147,6 +147,9 @@ namespace jl_well_known { // `jl_gc_pool_alloc`: allocates bytes. extern const WellKnownFunctionDescription GCPoolAlloc; + // `jl_maybe_record_alloc_to_profile`: records an allocation to the alloc profile. + extern const WellKnownFunctionDescription GCRecordAllocToProfile; + // `jl_gc_queue_root`: queues a GC root. extern const WellKnownFunctionDescription GCQueueRoot;