Skip to content

Commit

Permalink
codegen: take gc roots (and alloca alignment) more seriously
Browse files Browse the repository at this point in the history
Due to limitations in the LLVM implementation, we are forced to emit
fairly bad code here. But we need to make sure it is still correct with
respect to GC rooting.

The PR 50833c8 also changed the meaning
of haspadding without changing all of the existing users to use the new
equivalent flag (haspadding || !isbitsegal), incurring additional
breakage here as well and needing more tests for that.

Fixes #54720
  • Loading branch information
vtjnash committed Aug 3, 2024
1 parent e1e5a46 commit 8bfef8f
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 49 deletions.
8 changes: 4 additions & 4 deletions src/abi_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct ABI_AArch64Layout : AbiLayout {
Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields > 0`
if (dt->layout == NULL || jl_is_layout_opaque(dt->layout))
return nullptr;
size_t nfields = dt->layout->nfields;
Expand Down Expand Up @@ -62,7 +62,7 @@ Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields == 0`
Type *lltype;
// Check size first since it's cheaper.
switch (jl_datatype_size(dt)) {
Expand All @@ -88,7 +88,7 @@ Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
if (dt->name->mutabl || dt->layout->npointers || dt->layout->flags.haspadding)
if (dt->name->mutabl || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
return nullptr;
return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
}
Expand Down Expand Up @@ -184,7 +184,7 @@ Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
// uniquely addressable members.
// Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
size_t dsz = jl_datatype_size(dt);
if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->flags.haspadding)
if (dsz > 64 || !dt->layout || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
return NULL;
nele = 0;
ElementType eltype;
Expand Down
2 changes: 1 addition & 1 deletion src/abi_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const
if (jl_is_structtype(dt)) {
// Fast path checks before descending the type hierarchy
// (4 x 128b vector == 64B max size)
if (jl_datatype_size(dt) > 64 || dt->layout->npointers || dt->layout->flags.haspadding)
if (jl_datatype_size(dt) > 64 || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
return 0;

base = NULL;
Expand Down
2 changes: 1 addition & 1 deletion src/abi_ppc64le.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ struct ABI_PPC64leLayout : AbiLayout {
// count the homogeneous floating aggregate size (saturating at max count of 8)
unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
{
if (jl_datatype_size(ty) > 128 || ty->layout->npointers || ty->layout->flags.haspadding)
if (jl_datatype_size(ty) > 128 || ty->layout->npointers || !ty->layout->flags.isbitsegal || ty->layout->flags.haspadding)
return 9;

size_t i, l = ty->layout->nfields;
Expand Down
97 changes: 61 additions & 36 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2119,12 +2119,15 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
FailOrder = AtomicOrdering::Monotonic;
unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
AllocaInst *intcast = nullptr;
Type *intcast_eltyp = nullptr;
bool tracked_pointers = isboxed || CountTrackedPointers(elty).count > 0;
if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
intcast_eltyp = elty;
elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
if (!issetfield) {
intcast = emit_static_alloca(ctx, elty);
setName(ctx.emission_context, intcast, "atomic_store_box");
}
elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
}
Type *realelty = elty;
if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
Expand All @@ -2133,14 +2136,20 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
}
Value *r = nullptr;
if (issetfield || isswapfield || isreplacefield || issetfieldonce) {
if (isboxed)
if (issetfield || isswapfield || isreplacefield || issetfieldonce) { // e.g. !ismodifyfield
assert(isboxed || rhs.typ == jltype);
if (isboxed) {
r = boxed(ctx, rhs);
else if (aliasscope || Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
}
else if (intcast) {
emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
r = ctx.builder.CreateLoad(realelty, intcast);
}
else if (aliasscope || Order != AtomicOrdering::NotAtomic || tracked_pointers) {
r = emit_unbox(ctx, realelty, rhs, jltype);
if (realelty != elty)
r = ctx.builder.CreateZExt(r, elty);
}
if (realelty != elty)
r = ctx.builder.CreateZExt(r, elty);
}
if (isboxed)
alignment = sizeof(void*);
Expand Down Expand Up @@ -2222,7 +2231,14 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
Current->addIncoming(instr, SkipBB);
ctx.builder.SetInsertPoint(BB);
}
Compare = emit_unbox(ctx, realelty, cmp, jltype);
cmp = update_julia_type(ctx, cmp, jltype);
if (intcast) {
emit_unbox_store(ctx, cmp, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
Compare = ctx.builder.CreateLoad(realelty, intcast);
}
else {
Compare = emit_unbox(ctx, realelty, cmp, jltype);
}
if (realelty != elty)
Compare = ctx.builder.CreateZExt(Compare, elty);
}
Expand Down Expand Up @@ -2269,28 +2285,36 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
if (realelty != elty)
realCompare = ctx.builder.CreateTrunc(realCompare, realelty);
if (intcast) {
assert(!isboxed);
ctx.builder.CreateStore(realCompare, intcast);
if (maybe_null_if_boxed)
realCompare = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
if (tracked_pointers)
realCompare = ctx.builder.CreateLoad(intcast_eltyp, intcast);
}
if (maybe_null_if_boxed) {
Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare);
if (first_ptr)
null_load_check(ctx, first_ptr, mod, var);
if (maybe_null_if_boxed && tracked_pointers) {
Value *first_ptr = isboxed ? realCompare : extract_first_ptr(ctx, realCompare);
assert(first_ptr);
null_load_check(ctx, first_ptr, mod, var);
}
if (intcast)
if (intcast && !tracked_pointers)
oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
else
oldval = mark_julia_type(ctx, realCompare, isboxed, jltype);
rhs = newval(oldval);
if (isboxed) {
r = boxed(ctx, rhs);
}
else if (Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
else if (intcast) {
emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
r = ctx.builder.CreateLoad(realelty, intcast);
if (!tracked_pointers) // oldval is a slot, so put the oldval back
ctx.builder.CreateStore(realCompare, intcast);
}
else if (Order != AtomicOrdering::NotAtomic) {
assert(!tracked_pointers);
r = emit_unbox(ctx, realelty, rhs, jltype);
if (realelty != elty)
r = ctx.builder.CreateZExt(r, elty);
}
if (realelty != elty)
r = ctx.builder.CreateZExt(r, elty);
if (needlock)
emit_lockstate_value(ctx, needlock, true);
cmp = oldval;
Expand Down Expand Up @@ -2356,9 +2380,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
realinstr = ctx.builder.CreateTrunc(realinstr, realelty);
if (intcast) {
ctx.builder.CreateStore(realinstr, intcast);
// n.b. this oldval is only used for emit_f_is in this branch, so we know a priori that it does not need a gc-root
oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
if (maybe_null_if_boxed)
realinstr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
realinstr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
}
else {
oldval = mark_julia_type(ctx, realinstr, isboxed, jltype);
Expand Down Expand Up @@ -2398,20 +2423,23 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
ctx.builder.SetInsertPoint(DoneBB);
if (needlock)
emit_lockstate_value(ctx, needlock, false);
if (parent != NULL) {
if (parent != NULL && r && tracked_pointers && (!isboxed || !type_is_permalloc(rhs.typ))) {
if (isreplacefield || issetfieldonce) {
// TODO: avoid this branch if we aren't making a write barrier
BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "xchg_wb", ctx.f);
DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg_wb", ctx.f);
ctx.builder.CreateCondBr(Success, BB, DoneBB);
ctx.builder.SetInsertPoint(BB);
}
if (r) {
if (!isboxed)
emit_write_multibarrier(ctx, parent, r, rhs.typ);
else if (!type_is_permalloc(rhs.typ))
emit_write_barrier(ctx, parent, r);
if (realelty != elty)
r = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, r, realelty));
if (intcast) {
ctx.builder.CreateStore(r, intcast);
r = ctx.builder.CreateLoad(intcast_eltyp, intcast);
}
if (!isboxed)
emit_write_multibarrier(ctx, parent, r, rhs.typ);
else if (!type_is_permalloc(rhs.typ))
emit_write_barrier(ctx, parent, r);
if (isreplacefield || issetfieldonce) {
ctx.builder.CreateBr(DoneBB);
ctx.builder.SetInsertPoint(DoneBB);
Expand All @@ -2430,21 +2458,18 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
if (intcast) {
ctx.builder.CreateStore(instr, intcast);
instr = nullptr;
if (tracked_pointers)
instr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
}
if (maybe_null_if_boxed) {
if (intcast)
instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
if (maybe_null_if_boxed && tracked_pointers) {
Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
if (first_ptr)
null_load_check(ctx, first_ptr, mod, var);
if (intcast && !first_ptr)
instr = nullptr;
assert(first_ptr);
null_load_check(ctx, first_ptr, mod, var);
}
if (instr)
oldval = mark_julia_type(ctx, instr, isboxed, jltype);
else
if (intcast && !tracked_pointers)
oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
else
oldval = mark_julia_type(ctx, instr, isboxed, jltype);
if (isreplacefield) {
Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
Expand Down
3 changes: 3 additions & 0 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3374,11 +3374,14 @@ static size_t emit_masked_bits_compare(callback &emit_desc, jl_datatype_t *aty,
size_t padding_bytes = 0;
size_t nfields = jl_datatype_nfields(aty);
size_t total_size = jl_datatype_size(aty);
assert(aty->layout->flags.isbitsegal);
for (size_t i = 0; i < nfields; ++i) {
size_t offset = jl_field_offset(aty, i);
size_t fend = i == nfields - 1 ? total_size : jl_field_offset(aty, i + 1);
size_t fsz = jl_field_size(aty, i);
jl_datatype_t *fty = (jl_datatype_t*)jl_field_type(aty, i);
assert(jl_is_datatype(fty)); // union fields should never reach here
assert(fty->layout->flags.isbitsegal);
if (jl_field_isptr(aty, i) || !fty->layout->flags.haspadding) {
// The field has no internal padding
data_bytes += fsz;
Expand Down
14 changes: 7 additions & 7 deletions src/datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
}
else if (nb == 1) {
uint8_t *y8 = (uint8_t*)y;
assert(!dt->layout->flags.haspadding);
assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
if (dt == et) {
*y8 = *(uint8_t*)expected;
uint8_t z8 = *(uint8_t*)src;
Expand All @@ -1263,7 +1263,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
}
else if (nb == 2) {
uint16_t *y16 = (uint16_t*)y;
assert(!dt->layout->flags.haspadding);
assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
if (dt == et) {
*y16 = *(uint16_t*)expected;
uint16_t z16 = *(uint16_t*)src;
Expand All @@ -1281,7 +1281,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
uint32_t z32 = zext_read32(src, nb);
while (1) {
success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32);
if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
break;
}
}
Expand All @@ -1298,7 +1298,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
uint64_t z64 = zext_read64(src, nb);
while (1) {
success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64);
if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
break;
}
}
Expand All @@ -1316,7 +1316,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
jl_uint128_t z128 = zext_read128(src, nb);
while (1) {
success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128);
if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
break;
}
}
Expand Down Expand Up @@ -2010,7 +2010,7 @@ inline jl_value_t *modify_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_
else {
char *px = lock(p, parent, needlock, isatomic);
int success = memcmp(px, (char*)r, fsz) == 0;
if (!success && ((jl_datatype_t*)rty)->layout->flags.haspadding)
if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
success = jl_egal__bits((jl_value_t*)px, r, (jl_datatype_t*)rty);
if (success) {
if (isunion) {
Expand Down Expand Up @@ -2125,7 +2125,7 @@ inline jl_value_t *replace_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value
success = (rty == jl_typeof(expected));
if (success) {
success = memcmp((char*)r, (char*)expected, rsz) == 0;
if (!success && ((jl_datatype_t*)rty)->layout->flags.haspadding)
if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
}
*((uint8_t*)r + fsz) = success ? 1 : 0;
Expand Down
6 changes: 6 additions & 0 deletions test/atomics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ test_field_operators(ARefxy{Any}(123_10, 123_20))
test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing))
test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20))
test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20))
test_field_operators(ARefxy{Complex{Real}}(123_10, 123_20))
test_field_operators(ARefxy{PadIntA}(123_10, 123_20))
test_field_operators(ARefxy{PadIntB}(123_10, 123_20))
#FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20))
Expand Down Expand Up @@ -317,6 +318,7 @@ test_field_orderings(ARefxy{Any}(true, false), true, false)
test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing)
test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1)
test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
test_field_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40))
test_field_orderings(10.0, 20.0)
test_field_orderings(NaN, Inf)

Expand Down Expand Up @@ -568,6 +570,7 @@ test_global_operators(Any)
test_global_operators(Union{Nothing,Int})
test_global_operators(Complex{Int32})
test_global_operators(Complex{Int128})
test_global_operators(Complex{Real})
test_global_operators(PadIntA)
test_global_operators(PadIntB)
#FIXME: test_global_operators(Int24)
Expand Down Expand Up @@ -691,6 +694,7 @@ test_global_orderings(Any, true, false)
test_global_orderings(Union{Nothing,Missing}, nothing, missing)
test_global_orderings(Union{Nothing,Int}, nothing, 123_1)
test_global_orderings(Complex{Int128}, Complex{Int128}(10, 30), Complex{Int128}(20, 40))
test_global_orderings(Complex{Real}, Complex{Real}(10, 30), Complex{Real}(20, 40))
test_global_orderings(Float64, 10.0, 20.0)
test_global_orderings(Float64, NaN, Inf)

Expand Down Expand Up @@ -844,6 +848,7 @@ test_memory_operators(Any)
test_memory_operators(Union{Nothing,Int})
test_memory_operators(Complex{Int32})
test_memory_operators(Complex{Int128})
test_memory_operators(Complex{Real})
test_memory_operators(PadIntA)
test_memory_operators(PadIntB)
#FIXME: test_memory_operators(Int24)
Expand Down Expand Up @@ -1031,6 +1036,7 @@ test_memory_orderings(Any, true, false)
test_memory_orderings(Union{Nothing,Missing}, nothing, missing)
test_memory_orderings(Union{Nothing,Int}, nothing, 123_1)
test_memory_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
test_memory_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40))
test_memory_orderings(10.0, 20.0)
test_memory_orderings(NaN, Inf)

Expand Down

0 comments on commit 8bfef8f

Please sign in to comment.