diff --git a/src/ccall.cpp b/src/ccall.cpp index e336de8e3574f..2de5be6906e7c 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -441,22 +441,13 @@ static Value *llvm_type_rewrite( // we need to use this alloca copy trick instead // On ARM and AArch64, the ABI requires casting through memory to different // sizes. - Value *from; - Value *to; const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout(); Align align = std::max(DL.getPrefTypeAlign(target_type), DL.getPrefTypeAlign(from_type)); - if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) { - to = emit_static_alloca(ctx, target_type, align); - setName(ctx.emission_context, to, "type_rewrite_buffer"); - from = to; - } - else { - from = emit_static_alloca(ctx, from_type, align); - setName(ctx.emission_context, from, "type_rewrite_buffer"); - to = from; - } - ctx.builder.CreateAlignedStore(v, from, align); - auto pun = ctx.builder.CreateAlignedLoad(target_type, to, align); + size_t nb = std::max(DL.getTypeAllocSize(target_type), DL.getTypeAllocSize(from_type)); + AllocaInst *cast = emit_static_alloca(ctx, nb, align); + setName(ctx.emission_context, cast, "type_rewrite_buffer"); + ctx.builder.CreateAlignedStore(v, cast, align); + auto pun = ctx.builder.CreateAlignedLoad(target_type, cast, align); setName(ctx.emission_context, pun, "type_rewrite"); return pun; } @@ -494,7 +485,7 @@ static const std::string make_errmsg(const char *fname, int n, const char *err) return msg.str(); } -static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_value_t *jlto, jl_unionall_t *jlto_env, int argn) +static jl_cgval_t typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_value_t *jlto, jl_unionall_t *jlto_env, int argn) { if (jlto != (jl_value_t*)jl_any_type && !jl_subtype(jvinfo.typ, jlto)) { if (jlto == (jl_value_t*)jl_voidpointer_type) { @@ -502,6 +493,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val if (!jl_is_cpointer_type(jvinfo.typ)) { // emit a typecheck, if not statically known to be correct emit_cpointercheck(ctx, jvinfo, make_errmsg("ccall", argn + 1, "")); + return update_julia_type(ctx, jvinfo, (jl_value_t*)jl_pointer_type); } } else { @@ -526,8 +518,10 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val ctx.builder.CreateUnreachable(); ctx.builder.SetInsertPoint(passBB); } + return update_julia_type(ctx, jvinfo, jlto); } } + return jvinfo; } // Emit code to convert argument to form expected by C ABI @@ -537,7 +531,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val static Value *julia_to_native( jl_codectx_t &ctx, Type *to, bool toboxed, jl_value_t *jlto, jl_unionall_t *jlto_env, - const jl_cgval_t &jvinfo, + jl_cgval_t jvinfo, bool byRef, int argn) { // We're passing Any @@ -547,7 +541,7 @@ static Value *julia_to_native( } assert(jl_is_datatype(jlto) && jl_struct_try_layout((jl_datatype_t*)jlto)); - typeassert_input(ctx, jvinfo, jlto, jlto_env, argn); + jvinfo = typeassert_input(ctx, jvinfo, jlto, jlto_env, argn); if (!byRef) return emit_unbox(ctx, to, jvinfo, jlto); @@ -556,14 +550,7 @@ static Value *julia_to_native( Align align(julia_alignment(jlto)); Value *slot = emit_static_alloca(ctx, to, align); setName(ctx.emission_context, slot, "native_convert_buffer"); - if (!jvinfo.ispointer()) { - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa); - ai.decorateInst(ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot)); - } - else { - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa); - emit_memcpy(ctx, slot, ai, jvinfo, jl_datatype_size(jlto), align, align); - } + emit_unbox_store(ctx, jvinfo, slot, ctx.tbaa().tbaa_stack, align); return slot; } @@ -1991,7 +1978,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) // If the value is not boxed, try to compute the object id without // reboxing it. auto T_p_derived = PointerType::get(ctx.builder.getContext(), AddressSpace::Derived); - if (!val.isghost && !val.ispointer()) + if (!val.isghost) val = value_to_pointer(ctx, val); Value *args[] = { emit_typeof(ctx, val, false, true), diff --git a/src/cgutils.cpp b/src/cgutils.cpp index bf5c67ae9f849..9124638ce7446 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -323,6 +323,8 @@ static bool type_is_permalloc(jl_value_t *typ) } +// find the offset of pointer fields which never need a write barrier since their type-analysis +// shows they are permanently rooted static void find_perm_offsets(jl_datatype_t *typ, SmallVectorImpl &res, unsigned offset) { // This is a inlined field at `offset`. @@ -346,14 +348,37 @@ static void find_perm_offsets(jl_datatype_t *typ, SmallVectorImpl &res } } -static llvm::SmallVector get_gc_roots_for(jl_codectx_t &ctx, const jl_cgval_t &x) +// load a pointer to N inlined_roots into registers (as a SmallVector) +static llvm::SmallVector load_gc_roots(jl_codectx_t &ctx, Value *inline_roots_ptr, size_t npointers, bool isVolatile=false) +{ + SmallVector gcroots(npointers); + Type *T_prjlvalue = ctx.types().T_prjlvalue; + auto roots_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe); + for (size_t i = 0; i < npointers; i++) { + auto *ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(jl_value_t*)), Align(sizeof(void*)), isVolatile); + roots_ai.decorateInst(ptr); + gcroots[i] = ptr; + } + return gcroots; +} + +// inlined bool indicates whether this must return the inlined roots inside x separately, or whether x itself may be used as the root (if x is already isboxed) +static llvm::SmallVector get_gc_roots_for(jl_codectx_t &ctx, const jl_cgval_t &x, bool inlined=false) { if (x.constant || x.typ == jl_bottom_type) return {}; - if (x.Vboxed) // superset of x.isboxed + if (!inlined && x.Vboxed) // superset of x.isboxed return {x.Vboxed}; - assert(!x.isboxed); - if (x.ispointer()) { + assert(!x.isboxed || !inlined); + if (!x.inline_roots.empty()) { + // if (!inlined) { // TODO: implement this filter operation + // SmallVector perm_offsets; + // find_perm_offsets(typ, perm_offsets, 0); + // return filter(!in(perm_offsets), x.inline_roots) + // } + return x.inline_roots; + } + if (!inlined && x.ispointer()) { assert(x.V); assert(x.V->getType()->getPointerAddressSpace() != AddressSpace::Tracked); return {x.V}; @@ -363,8 +388,7 @@ static llvm::SmallVector get_gc_roots_for(jl_codectx_t &ctx, co Type *T = julia_type_to_llvm(ctx, jltype); Value *agg = emit_unbox(ctx, T, x, jltype); SmallVector perm_offsets; - if (jltype && jl_is_datatype(jltype) && ((jl_datatype_t*)jltype)->layout) - find_perm_offsets((jl_datatype_t*)jltype, perm_offsets, 0); + find_perm_offsets((jl_datatype_t*)jltype, perm_offsets, 0); return ExtractTrackedValues(agg, agg->getType(), false, ctx.builder, perm_offsets); } // nothing here to root, move along @@ -1078,6 +1102,247 @@ static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, align_src, is_volatile); } +static bool allpointers(jl_datatype_t *typ) +{ + return jl_datatype_size(typ) == typ->layout->npointers * sizeof(void*); +} + +// compute the space required by split_value_into, by simulating it +// returns (sizeof(split_value), n_pointers) +static std::pair split_value_size(jl_datatype_t *typ) +{ + assert(jl_is_datatype(typ)); + size_t dst_off = 0; + bool hasptr = typ->layout->first_ptr >= 0; + size_t npointers = hasptr ? typ->layout->npointers : 0; + // drop the data pointer if the entire structure is just pointers + // TODO: eventually we could drop the slots for the pointers from inside the + // types to pack it together, but this can change the alignment of the bits + // in the fields inside, even if those bits have no pointers themselves. So + // we would actually need to compute, for each pointer, whether any + // subsequent field needed the extra alignment (for example, we can + // drop space for any runs of two/four pointer). Some of these + // functions are already written in a way to support that, but not + // fully implemented yet. + bool nodata = allpointers(typ); + if (nodata) + dst_off = 0; + else + dst_off = jl_datatype_size(typ); + return std::make_pair(dst_off, npointers); +} + +// take a value `x` and split its bits into dst and the roots into inline_roots +static void split_value_into(jl_codectx_t &ctx, const jl_cgval_t &x, Align align_src, Value *dst, Align align_dst, jl_aliasinfo_t const &dst_ai, Value *inline_roots_ptr, jl_aliasinfo_t const &roots_ai, bool isVolatileStore=false) +{ + jl_datatype_t *typ = (jl_datatype_t*)x.typ; + assert(jl_is_concrete_type(x.typ)); + auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); + Type *T_prjlvalue = ctx.types().T_prjlvalue; + if (!x.inline_roots.empty()) { + auto sizes = split_value_size(typ); + if (sizes.first > 0) + emit_memcpy(ctx, dst, dst_ai, x.V, src_ai, sizes.first, align_dst, align_src, isVolatileStore); + for (size_t i = 0; i < sizes.second; i++) { + Value *unbox = x.inline_roots[i]; + roots_ai.decorateInst(ctx.builder.CreateAlignedStore(unbox, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(void*)), Align(sizeof(void*)), isVolatileStore)); + } + return; + } + if (inline_roots_ptr == nullptr) { + emit_unbox_store(ctx, x, dst, ctx.tbaa().tbaa_stack, align_dst, isVolatileStore); + return; + } + Value *src = data_pointer(ctx, value_to_pointer(ctx, x)); + bool isstack = isa(src->stripInBoundsOffsets()) || src_ai.tbaa == ctx.tbaa().tbaa_stack; + size_t dst_off = 0; + size_t src_off = 0; + bool hasptr = typ->layout->first_ptr >= 0; + size_t npointers = hasptr ? typ->layout->npointers : 0; + bool nodata = allpointers(typ); + for (size_t i = 0; true; i++) { + bool last = i == npointers; + size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*)); + if (ptr > src_off) { + emit_memcpy(ctx, + emit_ptrgep(ctx, dst, dst_off), + dst_ai, + emit_ptrgep(ctx, src, src_off), + src_ai, + ptr - src_off, + align_dst, + align_src, + isVolatileStore); + dst_off += ptr - src_off; + } + if (last) + break; + auto *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, src, ptr), Align(sizeof(void*))); + if (!isstack) + load->setOrdering(AtomicOrdering::Unordered); + src_ai.decorateInst(load); + roots_ai.decorateInst(ctx.builder.CreateAlignedStore(load, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(void*)), Align(sizeof(void*)), isVolatileStore)); + align_src = align_dst = Align(sizeof(void*)); + src_off = ptr + sizeof(void*); + if (!nodata) { + // store an undef pointer here, to make sure nobody looks at this + dst_ai.decorateInst(ctx.builder.CreateAlignedStore( + ctx.builder.getIntN(sizeof(void*) * 8, (uint64_t)-1), + emit_ptrgep(ctx, dst, dst_off), + align_src, + isVolatileStore)); + dst_off += sizeof(void*); + assert(dst_off == src_off); + } + } +} + +static void split_value_into(jl_codectx_t &ctx, const jl_cgval_t &x, Align align_src, Value *dst, Align align_dst, jl_aliasinfo_t const &dst_ai, MutableArrayRef inline_roots) +{ + jl_datatype_t *typ = (jl_datatype_t*)x.typ; + assert(jl_is_concrete_type(x.typ)); + auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); + Type *T_prjlvalue = ctx.types().T_prjlvalue; + if (!x.inline_roots.empty()) { + auto sizes = split_value_size(typ); + if (sizes.first > 0) + emit_memcpy(ctx, dst, dst_ai, x.V, src_ai, sizes.first, align_dst, align_src); + for (size_t i = 0; i < sizes.second; i++) + inline_roots[i] = x.inline_roots[i]; + return; + } + if (inline_roots.empty()) { + emit_unbox_store(ctx, x, dst, ctx.tbaa().tbaa_stack, align_dst); + return; + } + Value *src = data_pointer(ctx, value_to_pointer(ctx, x)); + bool isstack = isa(src->stripInBoundsOffsets()) || src_ai.tbaa == ctx.tbaa().tbaa_stack; + size_t dst_off = 0; + size_t src_off = 0; + bool hasptr = typ->layout->first_ptr >= 0; + size_t npointers = hasptr ? typ->layout->npointers : 0; + bool nodata = allpointers(typ); + for (size_t i = 0; true; i++) { + bool last = i == npointers; + size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*)); + if (ptr > src_off) { + emit_memcpy(ctx, + emit_ptrgep(ctx, dst, dst_off), + dst_ai, + emit_ptrgep(ctx, src, src_off), + src_ai, + ptr - src_off, + align_dst, + align_src); + dst_off += ptr - src_off; + } + if (last) + break; + auto *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, src, ptr), Align(sizeof(void*))); + if (!isstack) + load->setOrdering(AtomicOrdering::Unordered); + src_ai.decorateInst(load); + inline_roots[i] = load; + align_src = align_dst = Align(sizeof(void*)); + src_off = ptr + sizeof(void*); + if (!nodata) { + // store an undef pointer here, to make sure nobody looks at this + dst_ai.decorateInst(ctx.builder.CreateAlignedStore( + ctx.builder.getIntN(sizeof(void*) * 8, (uint64_t)-1), + emit_ptrgep(ctx, dst, dst_off), + align_src)); + dst_off += sizeof(void*); + assert(dst_off == src_off); + } + } +} + +static std::pair> split_value(jl_codectx_t &ctx, const jl_cgval_t &x, Align x_alignment) +{ + jl_datatype_t *typ = (jl_datatype_t*)x.typ; + auto sizes = split_value_size(typ); + Align align_dst(julia_alignment((jl_value_t*)typ)); + AllocaInst *bits = sizes.first > 0 ? emit_static_alloca(ctx, sizes.first, align_dst) : nullptr; + SmallVector roots(sizes.second); + auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); + split_value_into(ctx, x, x_alignment, bits, align_dst, stack_ai, MutableArrayRef(roots)); + return std::make_pair(bits, roots); +} + +// Return the offset values corresponding to jl_field_offset, but into the two buffers for a split value (or -1) +static std::pair split_value_field(jl_datatype_t *typ, unsigned idx) +{ + size_t fldoff = jl_field_offset(typ, idx); + size_t src_off = 0; + size_t dst_off = 0; + assert(typ->layout->first_ptr >= 0); + size_t npointers = typ->layout->npointers; + bool nodata = allpointers(typ); + for (size_t i = 0; i < npointers; i++) { + size_t ptr = jl_ptr_offset(typ, i) * sizeof(void*); + if (ptr >= fldoff) { + if (ptr >= fldoff + jl_field_size(typ, idx)) + break; + bool onlyptr = jl_field_isptr(typ, idx) || allpointers((jl_datatype_t*)jl_field_type(typ, idx)); + return std::make_pair(onlyptr ? -1 : dst_off + fldoff - src_off, i); + } + dst_off += ptr - src_off; + src_off = ptr + sizeof(void*); + if (!nodata) { + assert(dst_off + sizeof(void*) == src_off); + dst_off = src_off; + } + } + return std::make_pair(dst_off + fldoff - src_off, -1); +} + +// Copy `x` to `dst`, where `x` was a split value and dst needs to have a native layout, copying any inlined roots back into their native location. +// This does not respect roots, so you must call emit_write_multibarrier afterwards. +static void recombine_value(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dst, jl_aliasinfo_t const &dst_ai, Align alignment, bool isVolatileStore) +{ + jl_datatype_t *typ = (jl_datatype_t*)x.typ; + assert(jl_is_concrete_type(x.typ)); + assert(typ->layout->first_ptr >= 0 && !x.inline_roots.empty()); + Align align_dst = alignment; + Align align_src(julia_alignment(x.typ)); + Value *src = x.V; + auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); + size_t dst_off = 0; + size_t src_off = 0; + size_t npointers = typ->layout->npointers; + bool nodata = allpointers(typ); + bool isstack = isa(dst->stripInBoundsOffsets()) || dst_ai.tbaa == ctx.tbaa().tbaa_stack; + for (size_t i = 0; true; i++) { + bool last = i == npointers; + size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*)); + if (ptr > dst_off) { + emit_memcpy(ctx, + emit_ptrgep(ctx, dst, dst_off), + dst_ai, + emit_ptrgep(ctx, src, src_off), + src_ai, + ptr - dst_off, + align_dst, + align_src, + isVolatileStore); + src_off += ptr - dst_off; + } + if (last) + break; + auto *root = x.inline_roots[i]; + auto *store = ctx.builder.CreateAlignedStore(root, emit_ptrgep(ctx, dst, ptr), Align(sizeof(void*)), isVolatileStore); + if (!isstack) + store->setOrdering(AtomicOrdering::Unordered); + dst_ai.decorateInst(store); + align_dst = align_src = Align(sizeof(void*)); + dst_off = ptr + sizeof(void*); + if (!nodata) { + assert(src_off + sizeof(void*) == dst_off); + src_off = dst_off; + } + } +} + static Value *emit_tagfrom(jl_codectx_t &ctx, jl_datatype_t *dt) { if (dt->smalltag) @@ -1421,15 +1686,23 @@ static void null_load_check(jl_codectx_t &ctx, Value *v, jl_module_t *scope, jl_ } template -static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func) +static void emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, MutableArrayRef defval, Func &&func) { - if (!ifnot) { - return func(); + if (ifnot == nullptr) { + auto res = func(); + assert(res.size() == defval.size()); + for (size_t i = 0; i < defval.size(); i++) + defval[i] = res[i]; + return; } if (auto Cond = dyn_cast(ifnot)) { if (Cond->isZero()) - return defval; - return func(); + return; + auto res = func(); + assert(res.size() == defval.size()); + for (size_t i = 0; i < defval.size(); i++) + defval[i] = res[i]; + return; } ++EmittedGuards; BasicBlock *currBB = ctx.builder.GetInsertBlock(); @@ -1438,16 +1711,33 @@ static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, ctx.builder.CreateCondBr(ifnot, passBB, exitBB); ctx.builder.SetInsertPoint(passBB); auto res = func(); + assert(res.size() == defval.size()); passBB = ctx.builder.GetInsertBlock(); ctx.builder.CreateBr(exitBB); ctx.builder.SetInsertPoint(exitBB); - if (defval == nullptr) + for (size_t i = 0; i < defval.size(); i++) { + PHINode *phi = ctx.builder.CreatePHI(defval[i]->getType(), 2); + phi->addIncoming(defval[i], currBB); + phi->addIncoming(res[i], passBB); + setName(ctx.emission_context, phi, "guard_res"); + defval[i] = phi; + } +} + +template +static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func) +{ + MutableArrayRef res(&defval, defval == nullptr ? 0 : 1); + auto funcwrap = [&func] () -> SmallVector { + auto res = func(); + if (res == nullptr) + return {}; + return {res}; + }; + emit_guarded_test(ctx, ifnot, res, funcwrap); + if (res.empty()) return nullptr; - PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2); - phi->addIncoming(defval, currBB); - phi->addIncoming(res, passBB); - setName(ctx.emission_context, phi, "guard_res"); - return phi; + return res[0]; } template @@ -1755,7 +2045,7 @@ static std::pair emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, // declare that the pointer is legal (for zero bytes) even though it might be undef. static Value *emit_isa_and_defined(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ) { - return emit_nullcheck_guard(ctx, val.ispointer() ? val.V : nullptr, [&] { + return emit_nullcheck_guard(ctx, val.inline_roots.empty() && val.ispointer() ? val.V : nullptr, [&] { return emit_isa(ctx, val, typ, Twine()).first; }); } @@ -1838,6 +2128,9 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v if (ainfo.isghost) { a = Constant::getNullValue(getPointerTy(ctx.builder.getContext())); } + else if (!ainfo.inline_roots.empty()) { + a = value_to_pointer(ctx, ainfo).V; + } else if (!ainfo.ispointer()) { // CreateAlloca is OK here since we are on an error branch Value *tempSpace = ctx.builder.CreateAlloca(a->getType()); @@ -1869,6 +2162,7 @@ static Value *CreateSimplifiedExtractValue(jl_codectx_t &ctx, Value *Agg, ArrayR static void emit_write_barrier(jl_codectx_t&, Value*, ArrayRef); static void emit_write_barrier(jl_codectx_t&, Value*, Value*); static void emit_write_multibarrier(jl_codectx_t&, Value*, Value*, jl_value_t*); +static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, const jl_cgval_t &x); SmallVector first_ptr(Type *T) { @@ -1930,7 +2224,6 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j bool maybe_null_if_boxed = true, unsigned alignment = 0, Value **nullcheck = nullptr) { - // TODO: we should use unordered loads for anything with CountTrackedPointers(elty).count > 0 (if not otherwise locked) Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype); if (type_is_ghost(elty)) { if (isStrongerThanMonotonic(Order)) @@ -1941,74 +2234,71 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j alignment = sizeof(void*); else if (!alignment) alignment = julia_alignment(jltype); + if (idx_0based) + ptr = ctx.builder.CreateInBoundsGEP(elty, ptr, idx_0based); unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype); // note that nb == jl_Module->getDataLayout().getTypeAllocSize(elty) or getTypeStoreSize, depending on whether it is a struct or primitive type AllocaInst *intcast = NULL; - if (Order == AtomicOrdering::NotAtomic) { - if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) { - intcast = emit_static_alloca(ctx, elty, Align(alignment)); - setName(ctx.emission_context, intcast, "aggregate_load_box"); + if (Order == AtomicOrdering::NotAtomic && !isboxed && !aliasscope && elty->isAggregateType() && !jl_is_genericmemoryref_type(jltype)) { + // use split_value to do this load + auto src = mark_julia_slot(ptr, jltype, NULL, tbaa); + auto copy = split_value(ctx, src, Align(alignment)); + if (maybe_null_if_boxed && !copy.second.empty()) { + null_pointer_check(ctx, copy.second[0], nullcheck); } + return mark_julia_slot(copy.first, jltype, NULL, ctx.tbaa().tbaa_stack, copy.second); } - else { + Type *realelty = elty; + if (Order != AtomicOrdering::NotAtomic) { if (!isboxed && !elty->isIntOrPtrTy()) { intcast = emit_static_alloca(ctx, elty, Align(alignment)); setName(ctx.emission_context, intcast, "atomic_load_box"); - elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); + realelty = elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); + } + if (isa(elty)) { + unsigned nb2 = PowerOf2Ceil(nb); + if (nb != nb2) + elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2); } } - Type *realelty = elty; - if (Order != AtomicOrdering::NotAtomic && isa(elty)) { - unsigned nb2 = PowerOf2Ceil(nb); - if (nb != nb2) - elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2); - } - Value *data = ptr; - if (idx_0based) - data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based); Value *instr = nullptr; - if (intcast && Order == AtomicOrdering::NotAtomic) { - emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, Align(alignment), intcast->getAlign()); + if (!isboxed && jl_is_genericmemoryref_type(jltype)) { + // load these FCA as individual fields, so LLVM does not need to split them later + Value *fld0 = ctx.builder.CreateStructGEP(elty, ptr, 0); + LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false); + load0->setOrdering(Order); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); + ai.scope = MDNode::concatenate(aliasscope, ai.scope); + ai.decorateInst(load0); + Value *fld1 = ctx.builder.CreateStructGEP(elty, ptr, 1); + LoadInst *load1 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(1), fld1, Align(alignment), false); + static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order"); + maybe_mark_load_dereferenceable(load1, true, sizeof(void*)*2, alignof(void*)); + load1->setOrdering(Order); + ai.decorateInst(load1); + instr = Constant::getNullValue(elty); + instr = ctx.builder.CreateInsertValue(instr, load0, 0); + instr = ctx.builder.CreateInsertValue(instr, load1, 1); } else { - if (!isboxed && jl_is_genericmemoryref_type(jltype)) { - // load these FCA as individual fields, so LLVM does not need to split them later - Value *fld0 = ctx.builder.CreateStructGEP(elty, data, 0); - LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false); - load0->setOrdering(Order); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); - ai.scope = MDNode::concatenate(aliasscope, ai.scope); - ai.decorateInst(load0); - Value *fld1 = ctx.builder.CreateStructGEP(elty, data, 1); - LoadInst *load1 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(1), fld1, Align(alignment), false); - static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order"); - maybe_mark_load_dereferenceable(load1, true, sizeof(void*)*2, alignof(void*)); - load1->setOrdering(Order); - ai.decorateInst(load1); - instr = Constant::getNullValue(elty); - instr = ctx.builder.CreateInsertValue(instr, load0, 0); - instr = ctx.builder.CreateInsertValue(instr, load1, 1); - } - else { - LoadInst *load = ctx.builder.CreateAlignedLoad(elty, data, Align(alignment), false); - load->setOrdering(Order); - if (isboxed) - maybe_mark_load_dereferenceable(load, true, jltype); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); - ai.scope = MDNode::concatenate(aliasscope, ai.scope); - ai.decorateInst(load); - instr = load; - } - if (elty != realelty) - instr = ctx.builder.CreateTrunc(instr, realelty); - if (intcast) { - ctx.builder.CreateStore(instr, intcast); - instr = nullptr; - } + LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment), false); + load->setOrdering(Order); + if (isboxed) + maybe_mark_load_dereferenceable(load, true, jltype); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); + ai.scope = MDNode::concatenate(aliasscope, ai.scope); + ai.decorateInst(load); + instr = load; + } + if (elty != realelty) + instr = ctx.builder.CreateTrunc(instr, realelty); + if (intcast) { + ctx.builder.CreateAlignedStore(instr, intcast, Align(alignment)); + instr = nullptr; } if (maybe_null_if_boxed) { if (intcast) - instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast); + instr = ctx.builder.CreateAlignedLoad(intcast->getAllocatedType(), intcast, Align(alignment)); Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr); if (first_ptr) null_pointer_check(ctx, first_ptr, nullcheck); @@ -2021,7 +2311,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j // ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)), // ConstantAsMetadata::get(ConstantInt::get(T_int8, 2)) })); if (intcast) - instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast); + instr = ctx.builder.CreateAlignedLoad(intcast->getAllocatedType(), intcast, Align(alignment)); instr = ctx.builder.CreateTrunc(instr, getInt1Ty(ctx.builder.getContext())); } if (instr) @@ -2119,7 +2409,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign()); r = ctx.builder.CreateLoad(realelty, intcast); } - else if (aliasscope || Order != AtomicOrdering::NotAtomic || tracked_pointers) { + else if (aliasscope || Order != AtomicOrdering::NotAtomic || (tracked_pointers && rhs.inline_roots.empty())) { r = emit_unbox(ctx, realelty, rhs, jltype); } if (realelty != elty) @@ -2279,8 +2569,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, if (!tracked_pointers) // oldval is a slot, so put the oldval back ctx.builder.CreateStore(realCompare, intcast); } - else if (Order != AtomicOrdering::NotAtomic) { - assert(!tracked_pointers); + else if (Order != AtomicOrdering::NotAtomic || (tracked_pointers && rhs.inline_roots.empty())) { r = emit_unbox(ctx, realelty, rhs, jltype); } if (realelty != elty) @@ -2393,23 +2682,30 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, ctx.builder.SetInsertPoint(DoneBB); if (needlock) emit_lockstate_value(ctx, needlock, false); - if (parent != NULL && r && tracked_pointers && (!isboxed || !type_is_permalloc(rhs.typ))) { + if (parent != NULL && tracked_pointers && (!isboxed || !type_is_permalloc(rhs.typ))) { if (isreplacefield || issetfieldonce) { BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "xchg_wb", ctx.f); DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg_wb", ctx.f); ctx.builder.CreateCondBr(Success, BB, DoneBB); ctx.builder.SetInsertPoint(BB); } - if (realelty != elty) - r = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, r, realelty)); - if (intcast) { - ctx.builder.CreateStore(r, intcast); - r = ctx.builder.CreateLoad(intcast_eltyp, intcast); + if (r) { + if (realelty != elty) + r = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, r, realelty)); + if (intcast) { + ctx.builder.CreateStore(r, intcast); + r = ctx.builder.CreateLoad(intcast_eltyp, intcast); + } + if (!isboxed) + emit_write_multibarrier(ctx, parent, r, rhs.typ); + else + emit_write_barrier(ctx, parent, r); + } + else { + assert(!isboxed); + assert(!rhs.inline_roots.empty()); + emit_write_multibarrier(ctx, parent, rhs); } - if (!isboxed) - emit_write_multibarrier(ctx, parent, r, rhs.typ); - else if (!type_is_permalloc(rhs.typ)) - emit_write_barrier(ctx, parent, r); if (isreplacefield || issetfieldonce) { ctx.builder.CreateBr(DoneBB); ctx.builder.SetInsertPoint(DoneBB); @@ -2524,7 +2820,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx, } assert(!jl_is_vecelement_type((jl_value_t*)stt)); - if (!strct.ispointer()) { // unboxed + if (strct.inline_roots.empty() && !strct.ispointer()) { // unboxed assert(jl_is_concrete_immutable((jl_value_t*)stt)); bool isboxed = is_datatype_all_pointers(stt); jl_svec_t *types = stt->types; @@ -2580,7 +2876,8 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx, } bool maybeatomic = stt->name->atomicfields != NULL; - if (strct.ispointer() && !maybeatomic) { // boxed or stack + if ((strct.inline_roots.empty() && strct.ispointer()) && !maybeatomic) { // boxed or stack + // COMBAK: inline_roots support could be implemented for this if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) { emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically"); *ret = jl_cgval_t(); // unreachable @@ -2656,8 +2953,7 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex, Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1), tindex0); if (fsz > 0 && mutabl) { // move value to an immutable stack slot (excluding tindex) - Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al); - AllocaInst *lv = emit_static_alloca(ctx, AT, Align(al)); + AllocaInst *lv = emit_static_alloca(ctx, fsz, Align(al)); setName(ctx.emission_context, lv, "immutable_union"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); emit_memcpy(ctx, lv, ai, addr, ai, fsz, Align(al), Align(al)); @@ -2825,7 +3121,41 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st } bool maybe_null = field_may_be_null(strct, jt, idx); size_t byte_offset = jl_field_offset(jt, idx); - if (strct.ispointer()) { + if (!strct.inline_roots.empty()) { + assert(!isatomic && !needlock); + auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset); + auto offsets = split_value_field(jt, idx); + bool hasptr = offsets.second >= 0; + assert(hasptr == jl_field_isptr(jt, idx) || jl_type_hasptr(jfty)); + ArrayRef roots; + if (hasptr) { + roots = ArrayRef(strct.inline_roots).slice(offsets.second, jl_field_isptr(jt, idx) ? 1 : ((jl_datatype_t*)jfty)->layout->npointers); + if (maybe_null) + null_pointer_check(ctx, roots[0], nullcheck); + } + if (jl_field_isptr(jt, idx)) { + return mark_julia_type(ctx, roots[0], true, jfty); + } + Value *addr = offsets.first < 0 ? nullptr : offsets.first == 0 ? strct.V : emit_ptrgep(ctx, strct.V, offsets.first); + if (jl_is_uniontype(jfty)) { + size_t fsz = 0, al = 0; + int union_max = jl_islayout_inline(jfty, &fsz, &al); + size_t fsz1 = jl_field_size(jt, idx) - 1; + bool isptr = (union_max == 0); + assert(!isptr && fsz < jl_field_size(jt, idx)); (void)isptr; + Value *ptindex = emit_ptrgep(ctx, addr, fsz1); + return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, false, union_max, strct.tbaa); + } + else if (jfty == (jl_value_t*)jl_bool_type) { + unsigned align = jl_field_align(jt, idx); + return typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false, + AtomicOrdering::NotAtomic, maybe_null, align, nullcheck); + } + else { + return mark_julia_slot(addr, jfty, nullptr, tbaa, roots); + } + } + else if (strct.ispointer()) { auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset); Value *staddr = data_pointer(ctx, strct); Value *addr; @@ -2901,8 +3231,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st unsigned st_idx = convert_struct_offset(ctx, T, byte_offset); IntegerType *ET = cast(T->getStructElementType(st_idx)); unsigned align = (ET->getBitWidth() + 7) / 8; - lv = emit_static_alloca(ctx, ET, Align(align)); - lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + align - 1) / align)); + lv = emit_static_alloca(ctx, fsz, Align(align)); // emit all of the align-sized words unsigned i = 0; for (; i < fsz / align; i++) { @@ -3079,16 +3408,12 @@ static void init_bits_value(jl_codectx_t &ctx, Value *newv, Value *v, MDNode *tb ai.decorateInst(ctx.builder.CreateAlignedStore(v, newv, alignment)); } -static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v, MDNode *tbaa) +static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t &v) { - // newv should already be tagged - if (v.ispointer()) { - unsigned align = std::max(julia_alignment(v.typ), (unsigned)sizeof(void*)); - emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), Align(align), Align(julia_alignment(v.typ))); - } - else { - init_bits_value(ctx, newv, v.V, tbaa); - } + MDNode *tbaa = jl_is_mutable(v.typ) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut; + Align newv_align{std::max(julia_alignment(v.typ), (unsigned)sizeof(void*))}; + newv = maybe_decay_tracked(ctx, newv); + emit_unbox_store(ctx, v, newv, tbaa, newv_align); } static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant *constant, jl_value_t *jt) @@ -3205,7 +3530,7 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t if (t == getInt1Ty(ctx.builder.getContext())) return track_pjlvalue(ctx, julia_bool(ctx, as_value(ctx, t, vinfo))); - if (ctx.linfo && jl_is_method(ctx.linfo->def.method) && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel + if (ctx.linfo && jl_is_method(ctx.linfo->def.method) && vinfo.inline_roots.empty() && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel if (Constant *c = dyn_cast(vinfo.V)) { jl_value_t *s = static_constant_instance(jl_Module->getDataLayout(), c, jt); if (s) { @@ -3320,9 +3645,8 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, union_alloca_type(ut, allunbox, nbytes, align, min_align); if (nbytes > 0) { // at least some of the values can live on the stack - // try to pick an Integer type size such that SROA will emit reasonable code - Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align); - AllocaInst *lv = emit_static_alloca(ctx, AT, Align(align)); + assert(align % min_align == 0); + AllocaInst *lv = emit_static_alloca(ctx, nbytes, Align(align)); setName(ctx.emission_context, lv, "unionalloca"); return lv; } @@ -3379,7 +3703,7 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB if (!box) { box = emit_allocobj(ctx, jt, true); setName(ctx.emission_context, box, "unionbox"); - init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); + init_bits_cgval(ctx, box, vinfo_r); } } tempBB = ctx.builder.GetInsertBlock(); // could have changed @@ -3502,14 +3826,14 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab box = box_union(ctx, vinfo, skip_none); } else { - assert(vinfo.V && "Missing data for unboxed value."); + assert((vinfo.V || !vinfo.inline_roots.empty()) && "Missing data for unboxed value."); assert(jl_is_concrete_immutable(jt) && "This type shouldn't have been unboxed."); Type *t = julia_type_to_llvm(ctx, jt); assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above! box = _boxed_special(ctx, vinfo, t); if (!box) { bool do_promote = vinfo.promotion_point; - if (do_promote && is_promotable) { + if (do_promote && is_promotable && vinfo.inline_roots.empty()) { auto IP = ctx.builder.saveIP(); ctx.builder.SetInsertPoint(vinfo.promotion_point); box = emit_allocobj(ctx, (jl_datatype_t*)jt, true); @@ -3523,13 +3847,14 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab // end illegal IR originalAlloca->eraseFromParent(); ctx.builder.restoreIP(IP); - } else { + } + else { auto arg_typename = [&] JL_NOTSAFEPOINT { return "box::" + std::string(jl_symbol_name(((jl_datatype_t*)(jt))->name->name)); }; box = emit_allocobj(ctx, (jl_datatype_t*)jt, true); setName(ctx.emission_context, box, arg_typename); - init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); + init_bits_cgval(ctx, box, vinfo); } } } @@ -3542,30 +3867,25 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con if (AllocaInst *ai = dyn_cast(dest)) // TODO: make this a lifetime_end & dereferenceable annotation? ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai, ai->getAlign()); - if (jl_is_concrete_type(src.typ) || src.constant) { - jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ; + if (src.constant) { + jl_value_t *typ = jl_typeof(src.constant); assert(skip || jl_is_pointerfree(typ)); if (jl_is_pointerfree(typ)) { - unsigned alignment = julia_alignment(typ); - if (!src.ispointer() || src.constant) { + emit_guarded_test(ctx, skip, nullptr, [&] { + unsigned alignment = julia_alignment(typ); + emit_unbox_store(ctx, mark_julia_const(ctx, src.constant), dest, tbaa_dst, Align(alignment), isVolatile); + return nullptr; + }); + } + } + else if (jl_is_concrete_type(src.typ)) { + assert(skip || jl_is_pointerfree(src.typ)); + if (jl_is_pointerfree(src.typ)) { + emit_guarded_test(ctx, skip, nullptr, [&] { + unsigned alignment = julia_alignment(src.typ); emit_unbox_store(ctx, src, dest, tbaa_dst, Align(alignment), isVolatile); - } - else { - Value *src_ptr = data_pointer(ctx, src); - unsigned nb = jl_datatype_size(typ); - // TODO: this branch may be bad for performance, but is necessary to work around LLVM bugs with the undef option that we want to use: - // select copy dest -> dest to simulate an undef value / conditional copy - // if (skip) src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr); - auto f = [&] { - (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr, - jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, Align(alignment), Align(alignment), isVolatile); - return nullptr; - }; - if (skip) - emit_guarded_test(ctx, skip, nullptr, f); - else - f(); - } + return nullptr; + }); } } else if (src.TIndex) { @@ -3615,17 +3935,13 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con } else { assert(src.isboxed && "expected boxed value for sizeof/alignment computation"); - auto f = [&] { + emit_guarded_test(ctx, skip, nullptr, [&] { Value *datatype = emit_typeof(ctx, src, false, false); Value *copy_bytes = emit_datatype_size(ctx, datatype); - (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src), - jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, Align(1), Align(1), isVolatile); + emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src), + jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, Align(1), Align(1), isVolatile); return nullptr; - }; - if (skip) - emit_guarded_test(ctx, skip, nullptr, f); - else - f(); + }); } } @@ -3714,6 +4030,12 @@ static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg emit_write_barrier(ctx, parent, ptrs); } +static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, const jl_cgval_t &x) +{ + auto ptrs = get_gc_roots_for(ctx, x, true); + emit_write_barrier(ctx, parent, ptrs); +} + static jl_cgval_t union_store(jl_codectx_t &ctx, Value *ptr, Value *ptindex, jl_cgval_t rhs, jl_cgval_t cmp, jl_value_t *jltype, MDNode *tbaa, MDNode *tbaa_tindex, @@ -3854,25 +4176,24 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg Type *lt = julia_type_to_llvm(ctx, ty); unsigned na = nargs < nf ? nargs : nf; - // whether we should perform the initialization with the struct as a IR value - // or instead initialize the stack buffer with stores - auto tracked = CountTrackedPointers(lt); + // choose whether we should perform the initialization with the struct as a IR value + // or instead initialize the stack buffer with stores (the later is nearly always better) + auto tracked = split_value_size(sty); + assert(CountTrackedPointers(lt).count == tracked.second); bool init_as_value = false; if (lt->isVectorTy() || jl_is_vecelement_type(ty)) { // maybe also check the size ? init_as_value = true; } - else if (tracked.count) { - init_as_value = true; - } Instruction *promotion_point = nullptr; ssize_t promotion_ssa = -1; Value *strct; + SmallVector inline_roots; if (type_is_ghost(lt)) { - strct = NULL; + strct = nullptr; } else if (init_as_value) { - if (tracked.count) { + if (tracked.second) { strct = Constant::getNullValue(lt); } else { @@ -3881,11 +4202,19 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg strct = ctx.builder.CreateFreeze(strct); } } + else if (tracked.second) { + inline_roots.resize(tracked.second, Constant::getNullValue(ctx.types().T_prjlvalue)); + strct = nullptr; + if (tracked.first) { + AllocaInst *bits = emit_static_alloca(ctx, tracked.first, Align(julia_alignment(ty))); + strct = bits; + setName(ctx.emission_context, bits, arg_typename); + is_promotable = false; // wrong layout for promotion + } + } else { strct = emit_static_alloca(ctx, lt, Align(julia_alignment(ty))); setName(ctx.emission_context, strct, arg_typename); - if (tracked.count) - undef_derived_strct(ctx, strct, sty, ctx.tbaa().tbaa_stack); } for (unsigned i = 0; i < na; i++) { @@ -3897,25 +4226,32 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg fval_info = update_julia_type(ctx, fval_info, jtype); if (fval_info.typ == jl_bottom_type) return jl_cgval_t(); + if (type_is_ghost(lt)) + continue; + Type *fty = julia_type_to_llvm(ctx, jtype); + if (type_is_ghost(fty)) + continue; + Instruction *dest = nullptr; + MutableArrayRef roots; + ssize_t offs = jl_field_offset(sty, i); + ssize_t ptrsoffs = -1; + if (!inline_roots.empty()) + std::tie(offs, ptrsoffs) = split_value_field(sty, i); + unsigned llvm_idx = init_as_value ? ((i > 0 && isa(lt)) ? convert_struct_offset(ctx, lt, offs) : i) : -1u; // TODO: Use (post-)domination instead. bool field_promotable = !jl_is_uniontype(jtype) && !init_as_value && fval_info.promotion_ssa != -1 && + fval_info.inline_roots.empty() && inline_roots.empty() && // these need to be compatible, if they were to be implemented fval_info.promotion_point && fval_info.promotion_point->getParent() == ctx.builder.GetInsertBlock(); if (field_promotable) { savedIP = ctx.builder.saveIP(); ctx.builder.SetInsertPoint(fval_info.promotion_point); } - if (type_is_ghost(lt)) - continue; - Type *fty = julia_type_to_llvm(ctx, jtype); - if (type_is_ghost(fty)) - continue; - Value *dest = NULL; - unsigned offs = jl_field_offset(sty, i); - unsigned llvm_idx = (i > 0 && isa(lt)) ? convert_struct_offset(ctx, lt, offs) : i; if (!init_as_value) { // avoid unboxing the argument explicitly // and use memcpy instead - Instruction *inst = cast(emit_ptrgep(ctx, strct, offs)); + Instruction *inst = strct && offs >= 0 ? cast(emit_ptrgep(ctx, strct, offs)) : nullptr; + if (!inline_roots.empty() && ptrsoffs >= 0) + roots = MutableArrayRef(inline_roots).slice(ptrsoffs, jl_field_isptr(sty, i) ? 1 : ((jl_datatype_t*)jtype)->layout->npointers); dest = inst; // Our promotion point needs to come before // A) All of our arguments' promotion points @@ -3936,10 +4272,13 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (jl_field_isptr(sty, i)) { fval = boxed(ctx, fval_info, field_promotable); if (!init_as_value) { - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); - StoreInst *SI = cast(ai.decorateInst( - ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i))))); - SI->setOrdering(AtomicOrdering::Unordered); + if (dest) { + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); + ai.decorateInst(ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i)))); + } + else { + roots[0] = fval; + } } } else if (jl_is_uniontype(jtype)) { @@ -3962,9 +4301,8 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (fsz1 > 0 && !fval_info.isghost) { Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al); assert(lt->getStructElementType(llvm_idx) == ET); - AllocaInst *lv = emit_static_alloca(ctx, ET, Align(al)); + AllocaInst *lv = emit_static_alloca(ctx, fsz1, Align(al)); setName(ctx.emission_context, lv, "unioninit"); - lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz1 + al - 1) / al)); emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr); // emit all of the align-sized words unsigned i = 0; @@ -4002,9 +4340,14 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (field_promotable) { fval_info.V->replaceAllUsesWith(dest); cast(fval_info.V)->eraseFromParent(); - } else if (init_as_value) { + } + else if (init_as_value) { fval = emit_unbox(ctx, fty, fval_info, jtype); - } else { + } + else if (!roots.empty()) { + split_value_into(ctx, fval_info, Align(julia_alignment(jtype)), dest, Align(jl_field_align(sty, i)), jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), roots); + } + else { emit_unbox_store(ctx, fval_info, dest, ctx.tbaa().tbaa_stack, Align(jl_field_align(sty, i))); } } @@ -4025,7 +4368,11 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg } for (size_t i = nargs; i < nf; i++) { if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) { - unsigned offs = jl_field_offset(sty, i); + ssize_t offs = jl_field_offset(sty, i); + ssize_t ptrsoffs = -1; + if (!inline_roots.empty()) + std::tie(offs, ptrsoffs) = split_value_field(sty, i); + assert(ptrsoffs < 0 && offs >= 0); int fsz = jl_field_size(sty, i) - 1; if (init_as_value) { unsigned llvm_idx = convert_struct_offset(ctx, cast(lt), offs + fsz); @@ -4033,19 +4380,23 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg } else { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte); - ai.decorateInst(ctx.builder.CreateAlignedStore( - ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), - emit_ptrgep(ctx, strct, offs + fsz), - Align(1))); + Instruction *dest = cast(emit_ptrgep(ctx, strct, offs + fsz)); + if (promotion_point == nullptr) + promotion_point = dest; + ai.decorateInst(ctx.builder.CreateAlignedStore(ctx.builder.getInt8(0), dest, Align(1))); } } } - if (promotion_point && nargs < nf) { + if (nargs < nf) { assert(!init_as_value); IRBuilderBase::InsertPoint savedIP = ctx.builder.saveIP(); - ctx.builder.SetInsertPoint(promotion_point); - promotion_point = cast(ctx.builder.CreateFreeze(UndefValue::get(lt))); - ctx.builder.CreateStore(promotion_point, strct); + if (promotion_point) + ctx.builder.SetInsertPoint(promotion_point); + if (strct) { + promotion_point = cast(ctx.builder.CreateFreeze(UndefValue::get(lt))); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); + ai.decorateInst(ctx.builder.CreateStore(promotion_point, strct)); + } ctx.builder.restoreIP(savedIP); } if (type_is_ghost(lt)) @@ -4053,7 +4404,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg else if (init_as_value) return mark_julia_type(ctx, strct, false, ty); else { - jl_cgval_t ret = mark_julia_slot(strct, ty, NULL, ctx.tbaa().tbaa_stack); + jl_cgval_t ret = mark_julia_slot(strct, ty, NULL, ctx.tbaa().tbaa_stack, inline_roots); if (is_promotable && promotion_point) { ret.promotion_point = promotion_point; ret.promotion_ssa = promotion_ssa; @@ -4157,7 +4508,20 @@ static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &mem, cons static Value *emit_memoryref_FCA(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout) { - if (ref.ispointer()) { + if (!ref.inline_roots.empty()) { + LLVMContext &C = ctx.builder.getContext(); + StructType *type = get_memoryref_type(C, ctx.types().T_size, layout, 0); + LoadInst *load0 = ctx.builder.CreateLoad(type->getElementType(0), ref.V); + jl_aliasinfo_t ai0 = jl_aliasinfo_t::fromTBAA(ctx, ref.tbaa); + ai0.decorateInst(load0); + setName(ctx.emission_context, load0, "memory_ref_FCA0"); + Value *root = ctx.builder.CreateBitCast(ref.inline_roots[0], type->getElementType(1)); + Value *load = Constant::getNullValue(type); + load = ctx.builder.CreateInsertValue(load, load0, 0); + load = ctx.builder.CreateInsertValue(load, root, 1); + return load; + } + else if (ref.ispointer()) { LLVMContext &C = ctx.builder.getContext(); Type *type = get_memoryref_type(C, ctx.types().T_size, layout, 0); LoadInst *load = ctx.builder.CreateLoad(type, data_pointer(ctx, ref)); diff --git a/src/codegen.cpp b/src/codegen.cpp index 6d4ecc63e5ca1..c719f4ff54078 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1816,11 +1816,12 @@ struct jl_cgval_t { Value *Vboxed; Value *TIndex; // if `V` is an unboxed (tagged) Union described by `typ`, this gives the DataType index (1-based, small int) as an i8 + SmallVector inline_roots; // if present, `V` is a pointer, but not in canonical layout jl_value_t *constant; // constant value (rooted in linfo.def.roots) - jl_value_t *typ; // the original type of V, never NULL + jl_value_t *typ; // the original type of V, never nullptr bool isboxed; // whether this value is a jl_value_t* allocated on the heap with the right type tag bool isghost; // whether this value is "ghost" - MDNode *tbaa; // The related tbaa node. Non-NULL iff this holds an address. + MDNode *tbaa; // The related tbaa node. Non-nullptr iff this holds an address. // If non-null, this memory location may be promoted on use, by hoisting the // destination memory above the promotion point. Instruction *promotion_point; @@ -1831,13 +1832,15 @@ struct jl_cgval_t { bool ispointer() const { // whether this value is compatible with `data_pointer` + assert(inline_roots.empty()); return tbaa != nullptr; } jl_cgval_t(Value *Vval, jl_value_t *typ, Value *tindex) : // general value constructor - V(Vval), // V is allowed to be NULL in a jl_varinfo_t context, but not during codegen contexts + V(Vval), // V is allowed to be nullptr in a jl_varinfo_t context, but not during codegen contexts Vboxed(nullptr), TIndex(tindex), - constant(NULL), + inline_roots(), + constant(nullptr), typ(typ), isboxed(false), isghost(false), @@ -1845,13 +1848,15 @@ struct jl_cgval_t { promotion_point(nullptr), promotion_ssa(-1) { - assert(TIndex == NULL || TIndex->getType() == getInt8Ty(TIndex->getContext())); + assert(TIndex == nullptr || TIndex->getType() == getInt8Ty(TIndex->getContext())); } - jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa) : // general pointer constructor + jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa, Value* inline_roots) = delete; + jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa, ArrayRef inline_roots) : // general pointer constructor V(Vptr), Vboxed(isboxed ? Vptr : nullptr), TIndex(tindex), - constant(NULL), + inline_roots(inline_roots), + constant(nullptr), typ(typ), isboxed(isboxed), isghost(false), @@ -1861,15 +1866,16 @@ struct jl_cgval_t { { if (Vboxed) assert(Vboxed->getType() == JuliaType::get_prjlvalue_ty(Vboxed->getContext())); - assert(tbaa != NULL); - assert(!(isboxed && TIndex != NULL)); - assert(TIndex == NULL || TIndex->getType() == getInt8Ty(TIndex->getContext())); + assert(tbaa != nullptr); + assert(!(isboxed && TIndex != nullptr)); + assert(TIndex == nullptr || TIndex->getType() == getInt8Ty(TIndex->getContext())); } explicit jl_cgval_t(jl_value_t *typ) : // ghost value constructor - // mark explicit to avoid being used implicitly for conversion from NULL (use jl_cgval_t() instead) - V(NULL), - Vboxed(NULL), - TIndex(NULL), + // mark explicit to avoid being used implicitly for conversion from nullptr (use jl_cgval_t() instead) + V(nullptr), + Vboxed(nullptr), + TIndex(nullptr), + inline_roots(), constant(((jl_datatype_t*)typ)->instance), typ(typ), isboxed(false), @@ -1885,6 +1891,7 @@ struct jl_cgval_t { V(v.V), Vboxed(v.Vboxed), TIndex(tindex), + inline_roots(v.inline_roots), constant(v.constant), typ(typ), isboxed(v.isboxed), @@ -1898,17 +1905,18 @@ struct jl_cgval_t { // this constructor expects we had a badly or equivalently typed version // make sure we aren't discarding the actual type information if (v.TIndex) { - assert((TIndex == NULL) == jl_is_concrete_type(typ)); + assert((TIndex == nullptr) == jl_is_concrete_type(typ)); } else { assert(isboxed || v.typ == typ || tindex); } } explicit jl_cgval_t() : // undef / unreachable constructor - V(NULL), - Vboxed(NULL), - TIndex(NULL), - constant(NULL), + V(nullptr), + Vboxed(nullptr), + TIndex(nullptr), + inline_roots(), + constant(nullptr), typ(jl_bottom_type), isboxed(false), isghost(true), @@ -1924,6 +1932,7 @@ struct jl_varinfo_t { Instruction *boxroot; // an address, if the var might be in a jl_value_t** stack slot (marked ctx.tbaa().tbaa_const, if appropriate) jl_cgval_t value; // a stack slot or constant value Value *pTIndex; // i8* stack slot for the value.TIndex tag describing `value.V` + AllocaInst *inline_roots; // stack roots for the inline_roots array, if needed DILocalVariable *dinfo; // if the variable might be used undefined and is not boxed // this i1 flag is true when it is defined @@ -1934,11 +1943,12 @@ struct jl_varinfo_t { bool usedUndef; bool used; - jl_varinfo_t(LLVMContext &ctxt) : boxroot(NULL), + jl_varinfo_t(LLVMContext &ctxt) : boxroot(nullptr), value(jl_cgval_t()), - pTIndex(NULL), - dinfo(NULL), - defFlag(NULL), + pTIndex(nullptr), + inline_roots(nullptr), + dinfo(nullptr), + defFlag(nullptr), isSA(false), isVolatile(false), isArgument(false), @@ -1962,7 +1972,7 @@ class jl_codectx_t { std::map phic_slots; std::map > scope_restore; SmallVector SAvalues; - SmallVector, 0> PhiNodes; + SmallVector, jl_value_t *>, 0> PhiNodes; SmallVector ssavalue_assigned; SmallVector ssavalue_usecount; jl_module_t *module = NULL; @@ -2110,7 +2120,8 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) { } static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL); -static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, BitVector *used_arguments=nullptr, size_t *args_begin=nullptr); +static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, + ArrayRef ArgNames=None, unsigned nreq=0); static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1); static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s, jl_binding_t **pbnd, bool assign, bool alloc); @@ -2133,6 +2144,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p); static unsigned julia_alignment(jl_value_t *jt); +static void recombine_value(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dst, jl_aliasinfo_t const &dst_ai, Align alignment, bool isVolatile); static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G) { @@ -2217,6 +2229,28 @@ static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty, Align align) return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), nullptr, align, "", /*InsertBefore=*/ctx.topalloca); } +static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, unsigned nb, Align align) +{ + // Stupid hack: SROA takes hints from the element type, and will happily split this allocation into lots of unaligned bits + // if it cannot find something better to do, which is terrible for performance. + // However, if we emit this with an element size equal to the alignment, it will instead split it into aligned chunks + // which is great for performance and vectorization. + if (alignTo(nb, align) == align.value()) // don't bother with making an array of length 1 + return emit_static_alloca(ctx, ctx.builder.getIntNTy(align.value() * 8), align); + return emit_static_alloca(ctx, ArrayType::get(ctx.builder.getIntNTy(align.value() * 8), alignTo(nb, align) / align.value()), align); +} + +static AllocaInst *emit_static_roots(jl_codectx_t &ctx, unsigned nroots) +{ + AllocaInst *staticroots = emit_static_alloca(ctx, ctx.types().T_prjlvalue, Align(sizeof(void*))); + staticroots->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nroots)); + IRBuilder<> builder(ctx.topalloca); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe); + // make sure these are nullptr early from LLVM's perspective, in case it decides to SROA it + ai.decorateInst(builder.CreateMemSet(staticroots, builder.getInt8(0), nroots * sizeof(void*), staticroots->getAlign()))->moveAfter(ctx.topalloca); + return staticroots; +} + static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *sty, MDNode *tbaa) { assert(ptr->getType()->getPointerAddressSpace() != AddressSpace::Tracked); @@ -2264,7 +2298,7 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_value_t *typ) if (jl_is_type_type(typ)) { assert(is_uniquerep_Type(typ)); // replace T::Type{T} with T, by assuming that T must be a leaftype of some sort - jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ)); + jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None); constant.constant = jl_tparam0(typ); if (typ == (jl_value_t*)jl_typeofbottom_type->super) constant.isghost = true; @@ -2288,16 +2322,16 @@ static inline jl_cgval_t mark_julia_const(jl_codectx_t &ctx, jl_value_t *jv) if (jl_is_datatype_singleton((jl_datatype_t*)typ)) return ghostValue(ctx, typ); } - jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ)); + jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None); constant.constant = jv; return constant; } -static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, Value *tindex, MDNode *tbaa) +static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, Value *tindex, MDNode *tbaa, ArrayRef inline_roots=None) { // this enables lazy-copying of immutable values and stack or argument slots - jl_cgval_t tagval(v, false, typ, tindex, tbaa); + jl_cgval_t tagval(v, false, typ, tindex, tbaa, inline_roots); return tagval; } @@ -2317,22 +2351,41 @@ static bool valid_as_globalinit(const Value *v) { static Value *zext_struct(jl_codectx_t &ctx, Value *V); +// TODO: in the future, assume all callers will handle the interior pointers separately, and have +// have zext_struct strip them out, so we aren't saving those to the stack here causing shadow stores +// to be necessary too static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_t *typ, Value *tindex) { Value *loc; v = zext_struct(ctx, v); + Align align(julia_alignment(typ)); if (valid_as_globalinit(v)) { // llvm can't handle all the things that could be inside a ConstantExpr assert(jl_is_concrete_type(typ)); // not legal to have an unboxed abstract type - loc = get_pointer_to_constant(ctx.emission_context, cast(v), Align(julia_alignment(typ)), "_j_const", *jl_Module); + loc = get_pointer_to_constant(ctx.emission_context, cast(v), align, "_j_const", *jl_Module); } else { - loc = emit_static_alloca(ctx, v->getType(), Align(julia_alignment(typ))); - ctx.builder.CreateStore(v, loc); + loc = emit_static_alloca(ctx, v->getType(), align); + ctx.builder.CreateAlignedStore(v, loc, align); } return mark_julia_slot(loc, typ, tindex, ctx.tbaa().tbaa_stack); } static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, const jl_cgval_t &v) { + if (!v.inline_roots.empty()) { + //if (v.V == nullptr) { + // AllocaInst *loc = emit_static_roots(ctx, v.inline_roots.size()); + // for (size_t i = 0; i < v.inline_roots.counts(); i++) + // ctx.builder.CreateAlignedStore(v.inline_roots[i], emit_ptrgep(ctx, loc, i * sizeof(void*)), Align(sizeof(void*))); + // return mark_julia_slot(loc, v.typ, v.TIndex, ctx.tbaa().tbaa_gcframe); + //} + Align align(julia_alignment(v.typ)); + Type *ty = julia_type_to_llvm(ctx, v.typ); + AllocaInst *loc = emit_static_alloca(ctx, ty, align); + auto tbaa = v.V == nullptr ? ctx.tbaa().tbaa_gcframe : ctx.tbaa().tbaa_stack; + auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); + recombine_value(ctx, v, loc, stack_ai, align, false); + return mark_julia_slot(loc, v.typ, v.TIndex, tbaa); + } if (v.ispointer()) return v; return value_to_pointer(ctx, v.V, v.typ, v.TIndex); @@ -2354,13 +2407,14 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox if (type_is_ghost(T)) { return ghostValue(ctx, typ); } - if (v && !isboxed && v->getType()->isAggregateType() && CountTrackedPointers(v->getType()).count == 0) { + if (v && !isboxed && v->getType()->isAggregateType()) { // eagerly put this back onto the stack // llvm mem2reg pass will remove this if unneeded - return value_to_pointer(ctx, v, typ, NULL); + if (CountTrackedPointers(v->getType()).count == 0) + return value_to_pointer(ctx, v, typ, NULL); } if (isboxed) - return jl_cgval_t(v, isboxed, typ, NULL, best_tbaa(ctx.tbaa(), typ)); + return jl_cgval_t(v, isboxed, typ, NULL, best_tbaa(ctx.tbaa(), typ), None); return jl_cgval_t(v, typ, NULL); } @@ -2395,7 +2449,7 @@ static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t & if (alwaysboxed) { // discovered that this union-split type must actually be isboxed if (v.Vboxed) { - return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ)); + return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), v.inline_roots); } else { // type mismatch (there weren't any boxed values in the union) @@ -2624,14 +2678,14 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t & decay_derived(ctx, boxv), decay_derived(ctx, slotv)); } - jl_cgval_t newv = jl_cgval_t(slotv, false, typ, new_tindex, tbaa); + jl_cgval_t newv = jl_cgval_t(slotv, false, typ, new_tindex, tbaa, v.inline_roots); assert(boxv->getType() == ctx.types().T_prjlvalue); newv.Vboxed = boxv; return newv; } } else { - return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ)); + return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None); } return jl_cgval_t(v, typ, new_tindex); } @@ -2662,7 +2716,7 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_ if (skip) { *skip = ctx.builder.CreateNot(emit_exactly_isa(ctx, v, (jl_datatype_t*)typ, true)); } - return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ)); + return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), v.inline_roots); } if (mustbox_union) { // type mismatch: there weren't any boxed values in the union @@ -2684,7 +2738,7 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_ unsigned new_idx = get_box_tindex((jl_datatype_t*)v.typ, typ); if (new_idx) { new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), new_idx); - if (v.V && !v.ispointer()) { + if (v.V && v.inline_roots.empty() && !v.ispointer()) { // TODO: remove this branch once all consumers of v.TIndex understand how to handle a non-ispointer value return jl_cgval_t(value_to_pointer(ctx, v), typ, new_tindex); } @@ -2708,7 +2762,7 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_ } if (makeboxed) { // convert to a simple isboxed value - return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ)); + return mark_julia_type(ctx, boxed(ctx, v), true, typ); } } return jl_cgval_t(v, typ, new_tindex); @@ -3524,9 +3578,9 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a jl_datatype_t *sty = (jl_datatype_t*)argty; size_t sz = jl_datatype_size(sty); if (sz > 512 && !sty->layout->flags.haspadding && sty->layout->flags.isbitsegal) { - Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) : + Value *varg1 = arg1.inline_roots.empty() && arg1.ispointer() ? data_pointer(ctx, arg1) : value_to_pointer(ctx, arg1).V; - Value *varg2 = arg2.ispointer() ? data_pointer(ctx, arg2) : + Value *varg2 = arg2.inline_roots.empty() && arg2.ispointer() ? data_pointer(ctx, arg2) : value_to_pointer(ctx, arg2).V; varg1 = emit_pointer_from_objref(ctx, varg1); varg2 = emit_pointer_from_objref(ctx, varg2); @@ -3561,9 +3615,9 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a return ctx.builder.CreateICmpEQ(answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)); } else if (sz > 512 && jl_struct_try_layout(sty) && sty->layout->flags.isbitsegal) { - Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) : + Value *varg1 = arg1.inline_roots.empty() && arg1.ispointer() ? data_pointer(ctx, arg1) : value_to_pointer(ctx, arg1).V; - Value *varg2 = arg2.ispointer() ? data_pointer(ctx, arg2) : + Value *varg2 = arg2.inline_roots.empty() && arg2.ispointer() ? data_pointer(ctx, arg2) : value_to_pointer(ctx, arg2).V; varg1 = emit_pointer_from_objref(ctx, varg1); varg2 = emit_pointer_from_objref(ctx, varg2); @@ -4610,34 +4664,33 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, // For tuples, we can emit code even if we don't know the exact // type (e.g. because we don't know the length). This is possible // as long as we know that all elements are of the same (leaf) type. - if (obj.ispointer()) { - if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) { - emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically"); - *ret = jl_cgval_t(); // unreachable - return true; - } - // Determine which was the type that was homogeneous - jl_value_t *jt = jl_tparam0(utt); - if (jl_is_vararg(jt)) - jt = jl_unwrap_vararg(jt); - assert(jl_is_datatype(jt)); - // This is not necessary for correctness, but allows to omit - // the extra code for getting the length of the tuple - if (!bounds_check_enabled(ctx, boundscheck)) { - vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1)); - } - else { - vidx = emit_bounds_check(ctx, obj, (jl_value_t*)obj.typ, vidx, - emit_datatype_nfields(ctx, emit_typeof(ctx, obj, false, false)), - jl_true); - } - bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0); - Value *ptr = data_pointer(ctx, obj); - *ret = typed_load(ctx, ptr, vidx, - isboxed ? (jl_value_t*)jl_any_type : jt, - obj.tbaa, nullptr, isboxed, AtomicOrdering::NotAtomic, false); + jl_cgval_t ptrobj = obj.isboxed ? obj : value_to_pointer(ctx, obj); + if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) { + emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically"); + *ret = jl_cgval_t(); // unreachable return true; } + // Determine which was the type that was homogeneous + jl_value_t *jt = jl_tparam0(utt); + if (jl_is_vararg(jt)) + jt = jl_unwrap_vararg(jt); + assert(jl_is_datatype(jt)); + // This is not necessary for correctness, but allows to omit + // the extra code for getting the length of the tuple + if (!bounds_check_enabled(ctx, boundscheck)) { + vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1)); + } + else { + vidx = emit_bounds_check(ctx, ptrobj, (jl_value_t*)ptrobj.typ, vidx, + emit_datatype_nfields(ctx, emit_typeof(ctx, ptrobj, false, false)), + jl_true); + } + bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0); + Value *ptr = data_pointer(ctx, ptrobj); + *ret = typed_load(ctx, ptr, vidx, + isboxed ? (jl_value_t*)jl_any_type : jt, + ptrobj.tbaa, nullptr, isboxed, AtomicOrdering::NotAtomic, false); + return true; } // Unknown object, but field known to be integer @@ -4914,7 +4967,12 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, else if (jl_field_isptr(stt, fieldidx) || jl_type_hasptr(jl_field_type(stt, fieldidx))) { Value *fldv; size_t offs = jl_field_offset(stt, fieldidx) / sizeof(jl_value_t*); - if (obj.ispointer()) { + if (!obj.inline_roots.empty()) { + auto offsets = split_value_field(stt, fieldidx); + assert(offsets.second >= 0); + fldv = obj.inline_roots[offsets.second]; + } + else if (obj.ispointer()) { auto tbaa = best_field_tbaa(ctx, obj, stt, fieldidx, offs); if (!jl_field_isptr(stt, fieldidx)) offs += ((jl_datatype_t*)jl_field_type(stt, fieldidx))->layout->first_ptr; @@ -5033,26 +5091,18 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos SmallVector argvals(nfargs); unsigned idx = 0; AllocaInst *result = nullptr; - switch (returninfo.cc) { - case jl_returninfo_t::Boxed: - case jl_returninfo_t::Register: - case jl_returninfo_t::Ghosts: - break; - case jl_returninfo_t::SRet: - result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType(), Align(julia_alignment(jlretty))); - argvals[idx] = result; - idx++; - break; - case jl_returninfo_t::Union: - result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes), Align(returninfo.union_align)); + + if (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union) { + result = emit_static_alloca(ctx, returninfo.union_bytes, Align(returninfo.union_align)); setName(ctx.emission_context, result, "sret_box"); argvals[idx] = result; idx++; - break; } + AllocaInst *return_roots = nullptr; if (returninfo.return_roots) { - AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots), Align(alignof(jl_value_t*))); + assert(returninfo.cc == jl_returninfo_t::SRet); + return_roots = emit_static_roots(ctx, returninfo.return_roots); argvals[idx] = return_roots; idx++; } @@ -5063,16 +5113,27 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos for (size_t i = 0; i < nargs; i++) { jl_value_t *jt = jl_nth_slot_type(specTypes, i); // n.b.: specTypes is required to be a datatype by construction for specsig - jl_cgval_t arg = argv[i]; if (is_opaque_closure && i == 0) { // Special implementation for opaque closures: their jt and thus // julia_type_to_llvm values are likely wrong, so override the // behavior here to directly pass the expected pointer based instead // just on passing arg as a pointer - arg = value_to_pointer(ctx, arg); - argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg)); + jl_cgval_t arg = argv[i]; + if (arg.isghost) { + argvals[idx] = Constant::getNullValue(ctx.builder.getPtrTy(AddressSpace::Derived)); + } + else { + if (!arg.isboxed) + arg = value_to_pointer(ctx, arg); + argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg)); + } + idx++; + continue; } - else if (is_uniquerep_Type(jt)) { + jl_cgval_t arg = update_julia_type(ctx, argv[i], jt); + if (arg.typ == jl_bottom_type) + return jl_cgval_t(); + if (is_uniquerep_Type(jt)) { continue; } else { @@ -5085,8 +5146,24 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos argvals[idx] = boxed(ctx, arg); } else if (et->isAggregateType()) { - arg = value_to_pointer(ctx, arg); - argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg)); + auto tracked = CountTrackedPointers(et); + if (tracked.count && !tracked.all) { + Value *val = arg.V; + SmallVector roots(arg.inline_roots); + if (roots.empty()) + std::tie(val, roots) = split_value(ctx, arg, Align(jl_datatype_align(jt))); + AllocaInst *proots = emit_static_roots(ctx, roots.size()); + for (size_t i = 0; i < roots.size(); i++) + ctx.builder.CreateAlignedStore(roots[i], emit_ptrgep(ctx, proots, i * sizeof(void*)), Align(sizeof(void*))); + assert(val); + argvals[idx++] = decay_derived(ctx, val); + argvals[idx] = proots; + } + else { + if (!arg.isboxed) + arg = value_to_pointer(ctx, arg); + argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg)); + } } else { Value *val = emit_unbox(ctx, et, arg, jt); @@ -5132,7 +5209,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos break; case jl_returninfo_t::SRet: assert(result); - retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack); + retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_gcframe, load_gc_roots(ctx, return_roots, returninfo.return_roots)); break; case jl_returninfo_t::Union: { Value *box = ctx.builder.CreateExtractValue(call, 0); @@ -5460,7 +5537,7 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo JuliaFunction<> *cc; if (f.typ == (jl_value_t*)jl_intrinsic_type) { fptr = prepare_call(jlintrinsic_func); - F = f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V; + F = f.inline_roots.empty() && f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V; F = decay_derived(ctx, F); cc = julia_call3; } @@ -5712,42 +5789,53 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym, int allow_i } static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *varname) { - jl_value_t *typ = vi.value.typ; jl_cgval_t v; Value *isnull = NULL; if (vi.boxroot == NULL || vi.pTIndex != NULL) { - if ((!vi.isVolatile && vi.isSA) || vi.isArgument || vi.value.constant || !vi.value.V) { + if ((!vi.isVolatile && vi.isSA) || vi.isArgument || vi.value.constant || !(vi.value.V || vi.inline_roots)) { v = vi.value; if (vi.pTIndex) v.TIndex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(1)); } else { // copy value to a non-mutable (non-volatile SSA) location - AllocaInst *varslot = cast(vi.value.V); - setName(ctx.emission_context, varslot, jl_symbol_name(varname)); - Type *T = varslot->getAllocatedType(); - assert(!varslot->isArrayAllocation() && "variables not expected to be VLA"); - AllocaInst *ssaslot = cast(varslot->clone()); - setName(ctx.emission_context, ssaslot, jl_symbol_name(varname) + StringRef(".ssa")); - ssaslot->insertAfter(varslot); - if (vi.isVolatile) { - Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot, - varslot->getAlign(), - true); - ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlign()); - } - else { - const DataLayout &DL = jl_Module->getDataLayout(); - uint64_t sz = DL.getTypeStoreSize(T); - emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign(), varslot->getAlign()); + // since this might be a union slot, the most convenient approach to copying + // is to move the whole alloca chunk + AllocaInst *ssaslot = nullptr; + if (vi.value.V) { + auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); + AllocaInst *varslot = cast(vi.value.V); + Type *T = varslot->getAllocatedType(); + assert(!varslot->isArrayAllocation() && "variables not expected to be VLA"); + ssaslot = cast(varslot->clone()); + setName(ctx.emission_context, ssaslot, varslot->getName() + StringRef(".ssa")); + ssaslot->insertAfter(varslot); + if (vi.isVolatile) { + Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot, varslot->getAlign(), true); + stack_ai.decorateInst(ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlign())); + } + else { + const DataLayout &DL = jl_Module->getDataLayout(); + uint64_t sz = DL.getTypeStoreSize(T); + emit_memcpy(ctx, ssaslot, stack_ai, vi.value, sz, ssaslot->getAlign(), varslot->getAlign()); + } } Value *tindex = NULL; if (vi.pTIndex) tindex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(1), vi.isVolatile); - v = mark_julia_slot(ssaslot, vi.value.typ, tindex, ctx.tbaa().tbaa_stack); + v = mark_julia_slot(ssaslot, vi.value.typ, tindex, ctx.tbaa().tbaa_stack, None); + } + if (vi.inline_roots) { + AllocaInst *varslot = vi.inline_roots; + size_t nroots = cast(varslot->getArraySize())->getZExtValue(); + auto T_prjlvalue = varslot->getAllocatedType(); + if (auto AT = dyn_cast(T_prjlvalue)) { + nroots *= AT->getNumElements(); + T_prjlvalue = AT->getElementType(); + } + assert(T_prjlvalue == ctx.types().T_prjlvalue); + v.inline_roots = load_gc_roots(ctx, varslot, nroots, vi.isVolatile); } - if (vi.boxroot == NULL) - v = update_julia_type(ctx, v, typ); if (vi.usedUndef) { assert(vi.defFlag); isnull = ctx.builder.CreateAlignedLoad(getInt1Ty(ctx.builder.getContext()), vi.defFlag, Align(1), vi.isVolatile); @@ -5758,7 +5846,7 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va Value *box_isnull = NULL; if (vi.usedUndef) box_isnull = ctx.builder.CreateICmpNE(boxed, Constant::getNullValue(ctx.types().T_prjlvalue)); - maybe_mark_load_dereferenceable(boxed, vi.usedUndef || vi.pTIndex, typ); + maybe_mark_load_dereferenceable(boxed, vi.usedUndef || vi.pTIndex, vi.value.typ); if (vi.pTIndex) { // value is either boxed in the stack slot, or unboxed in value // as indicated by testing (pTIndex & UNION_BOX_MARKER) @@ -5767,15 +5855,14 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)); if (vi.usedUndef) isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull); - if (v.V) { // v.V will be null if it is a union of all ghost values + if (v.V) // v.V will be null if it is a union of all ghost values v.V = ctx.builder.CreateSelect(load_unbox, decay_derived(ctx, v.V), decay_derived(ctx, boxed)); - } else + else v.V = boxed; v.Vboxed = boxed; - v = update_julia_type(ctx, v, typ); } else { - v = mark_julia_type(ctx, boxed, true, typ); + v = mark_julia_type(ctx, boxed, true, vi.value.typ); if (vi.usedUndef) isnull = box_isnull; } @@ -5807,49 +5894,27 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu store_def_flag(ctx, vi, true); if (!vi.value.constant) { // check that this is not a virtual store - assert(vi.value.ispointer() || (vi.pTIndex && vi.value.V == NULL)); + assert(vi.inline_roots || vi.value.ispointer() || (vi.pTIndex && vi.value.V == NULL)); // store value - if (vi.value.V == NULL) { - // all ghost values in destination - nothing to copy or store - } - else if (rval_info.constant || !rval_info.ispointer()) { - if (rval_info.isghost) { - // all ghost values in source - nothing to copy or store - } - else { - if (rval_info.typ != vi.value.typ && !vi.pTIndex && !rval_info.TIndex) { - // isbits cast-on-assignment is invalid. this branch should be dead-code. - CreateTrap(ctx.builder); - } - else { - Value *dest = vi.value.V; - if (vi.pTIndex) // TODO: use lifetime-end here instead - ctx.builder.CreateStore(UndefValue::get(cast(vi.value.V)->getAllocatedType()), vi.value.V); - Type *store_ty = julia_type_to_llvm(ctx, rval_info.constant ? jl_typeof(rval_info.constant) : rval_info.typ); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); - ai.decorateInst(ctx.builder.CreateStore( - emit_unbox(ctx, store_ty, rval_info, rval_info.typ), - dest, - vi.isVolatile)); - } - } - } - else { - if (vi.pTIndex == NULL) { - assert(jl_is_concrete_type(vi.value.typ)); - // Sometimes we can get into situations where the LHS and RHS - // are the same slot. We're not allowed to memcpy in that case - // due to LLVM bugs. - // This check should probably mostly catch the relevant situations. - if (vi.value.V != rval_info.V) { - Value *copy_bytes = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(vi.value.typ)); - Align alignment(julia_alignment(rval_info.typ)); - emit_memcpy(ctx, vi.value.V, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), rval_info, copy_bytes, - alignment, alignment, vi.isVolatile); - } - } + rval_info = update_julia_type(ctx, rval_info, vi.value.typ); + if (rval_info.typ == jl_bottom_type) + return; + if (vi.pTIndex && vi.value.V) // TODO: use lifetime-end here instead + ctx.builder.CreateStore(UndefValue::get(cast(vi.value.V)->getAllocatedType()), vi.value.V); + // Sometimes we can get into situations where the LHS and RHS + // are the same slot. We're not allowed to memcpy in that case + // due to LLVM bugs. + // This check should probably mostly catch the relevant situations. + if (vi.value.V != nullptr ? vi.value.V != rval_info.V : vi.inline_roots != nullptr) { + MDNode *tbaa = ctx.tbaa().tbaa_stack; // Use vi.value.tbaa ? + if (rval_info.TIndex) + emit_unionmove(ctx, vi.value.V, tbaa, rval_info, /*skip*/isboxed, vi.isVolatile); else { - emit_unionmove(ctx, vi.value.V, ctx.tbaa().tbaa_stack, rval_info, /*skip*/isboxed, vi.isVolatile); + Align align(julia_alignment(rval_info.typ)); + if (vi.inline_roots) + split_value_into(ctx, rval_info, align, vi.value.V, align, jl_aliasinfo_t::fromTBAA(ctx, tbaa), vi.inline_roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe), vi.isVolatile); + else + emit_unbox_store(ctx, rval_info, vi.value.V, tbaa, align, vi.isVolatile); } } } @@ -5864,7 +5929,8 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) jl_value_t *phiType = NULL; if (jl_is_array(ssavalue_types)) { phiType = jl_array_ptr_ref(ssavalue_types, idx); - } else { + } + else { phiType = (jl_value_t*)jl_any_type; } jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0); @@ -5874,6 +5940,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) return; } AllocaInst *dest = nullptr; + SmallVector roots; // N.B.: For any memory space, used as a phi, // we need to emit space twice here. The reason for this is that // phi nodes may be arguments of other phi nodes, so if we don't @@ -5884,7 +5951,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) size_t min_align, nbytes; dest = try_emit_union_alloca(ctx, ((jl_uniontype_t*)phiType), allunbox, min_align, nbytes); if (dest) { - Instruction *phi = dest->clone(); + AllocaInst *phi = cast(dest->clone()); phi->insertAfter(dest); PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi"); Tindex_phi->insertInto(BB, InsertPt); @@ -5893,14 +5960,14 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) Value *isboxed = ctx.builder.CreateICmpNE( ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)), ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)); - ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, dest->getAlign(), nbytes, false); + ctx.builder.CreateMemCpy(phi, Align(min_align), dest, dest->getAlign(), nbytes, false); ctx.builder.CreateLifetimeEnd(dest); Value *ptr = ctx.builder.CreateSelect(isboxed, decay_derived(ctx, ptr_phi), decay_derived(ctx, phi)); jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, best_tbaa(ctx.tbaa(), phiType)); val.Vboxed = ptr_phi; - ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, r)); + ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, roots, r)); ctx.SAvalues[idx] = val; ctx.ssavalue_assigned[idx] = true; return; @@ -5909,7 +5976,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi"); Tindex_phi->insertInto(BB, InsertPt); jl_cgval_t val = mark_julia_slot(NULL, phiType, Tindex_phi, ctx.tbaa().tbaa_stack); - ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)NULL, r)); + ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)nullptr, roots, r)); ctx.SAvalues[idx] = val; ctx.ssavalue_assigned[idx] = true; return; @@ -5928,22 +5995,38 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) } jl_cgval_t slot; PHINode *value_phi = NULL; - if (vtype->isAggregateType() && CountTrackedPointers(vtype).count == 0) { + if (!isboxed && vtype->isAggregateType()) { // the value will be moved into dest in the predecessor critical block. // here it's moved into phi in the successor (from dest) - Align align(julia_alignment(phiType)); - dest = emit_static_alloca(ctx, vtype, align); - Value *phi = emit_static_alloca(ctx, vtype, align); - ctx.builder.CreateMemCpy(phi, align, dest, align, jl_datatype_size(phiType), false); - ctx.builder.CreateLifetimeEnd(dest); - slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack); + auto tracked = CountTrackedPointers(vtype); + if (tracked.count) { + roots.resize(tracked.count); + assert(tracked.count == split_value_size((jl_datatype_t*)phiType).second); + for (size_t nr = 0; nr < tracked.count; nr++) { + auto root_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_nrows(edges), "root_phi"); + root_phi->insertInto(BB, InsertPt); + roots[nr] = root_phi; + } + } + AllocaInst *phi = nullptr; + if (!tracked.all) { + Align align(julia_alignment(phiType)); + unsigned nb = jl_datatype_size(phiType); + dest = emit_static_alloca(ctx, nb, align); + phi = cast(dest->clone()); + phi->insertBefore(dest); + ctx.builder.CreateMemCpy(phi, align, dest, align, nb, false); + ctx.builder.CreateLifetimeEnd(dest); + } + slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack, + roots.empty() ? ArrayRef() : ArrayRef((Value *const *)&roots.front(), roots.size())); } else { value_phi = PHINode::Create(vtype, jl_array_nrows(edges), "value_phi"); value_phi->insertInto(BB, InsertPt); slot = mark_julia_type(ctx, value_phi, isboxed, phiType); } - ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, r)); + ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, roots, r)); ctx.SAvalues[idx] = slot; ctx.ssavalue_assigned[idx] = true; return; @@ -5963,8 +6046,9 @@ static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t ssaidx_0based, jl_valu it = ctx.phic_slots.emplace(ssaidx_0based, jl_varinfo_t(ctx.builder.getContext())).first; } slot = emit_varinfo(ctx, it->second, jl_symbol("phic")); - } else { - slot = emit_expr(ctx, r, ssaidx_0based); // slot could be a jl_value_t (unboxed) or jl_value_t* (ispointer) + } + else { + slot = emit_expr(ctx, r, ssaidx_0based); } if (slot.isboxed || slot.TIndex) { // see if inference suggested a different type for the ssavalue than the expression @@ -6123,11 +6207,22 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val) vi.pTIndex, Align(1), true); } else if (vi.value.V && !vi.value.constant && vi.value.typ != jl_bottom_type) { - assert(vi.value.ispointer()); - Type *T = cast(vi.value.V)->getAllocatedType(); - if (CountTrackedPointers(T).count) { - // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL - ctx.builder.CreateStore(Constant::getNullValue(T), vi.value.V, true); + assert(vi.inline_roots || vi.value.ispointer()); + if (vi.inline_roots) { + // memory optimization: make gc pointers re-initialized to NULL + AllocaInst *ssaroots = vi.inline_roots; + size_t nroots = cast(ssaroots->getArraySize())->getZExtValue(); + auto T_prjlvalue = ssaroots->getAllocatedType(); + if (auto AT = dyn_cast(T_prjlvalue)) { + nroots *= AT->getNumElements(); + T_prjlvalue = AT->getElementType(); + } + assert(T_prjlvalue == ctx.types().T_prjlvalue); + Value *nullval = Constant::getNullValue(T_prjlvalue); + auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe); + for (size_t i = 0; i < nroots; i++) { + stack_ai.decorateInst(ctx.builder.CreateAlignedStore(nullval, emit_ptrgep(ctx, ssaroots, i * sizeof(void*)), ssaroots->getAlign(), true)); + } } } } @@ -6865,14 +6960,17 @@ static void emit_cfunc_invalidate( ++AI; // gcstack_arg } for (size_t i = 0; i < nargs; i++) { + // n.b. calltype is required to be a datatype by construction for specsig jl_value_t *jt = jl_nth_slot_type(calltype, i); - // n.b. specTypes is required to be a datatype by construction for specsig - bool isboxed = false; - Type *et; if (i == 0 && is_for_opaque_closure) { - et = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived); + Value *arg_v = &*AI; + ++AI; + myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const); + continue; } - else if (deserves_argbox(jt)) { + bool isboxed = false; + Type *et; + if (deserves_argbox(jt)) { et = ctx.types().T_prjlvalue; isboxed = true; } @@ -6889,8 +6987,14 @@ static void emit_cfunc_invalidate( else { Value *arg_v = &*AI; ++AI; - if ((i == 0 && is_for_opaque_closure) || (!isboxed && et->isAggregateType())) { - myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const); + if (!isboxed && et->isAggregateType()) { + auto tracked = CountTrackedPointers(et); + SmallVector roots; + if (tracked.count && !tracked.all) { + roots = load_gc_roots(ctx, &*AI, tracked.count); + ++AI; + } + myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const, roots); } else { assert(arg_v->getType() == et); @@ -6903,6 +7007,7 @@ static void emit_cfunc_invalidate( jl_cgval_t gf_retbox = mark_julia_type(ctx, gf_ret, true, jl_any_type); if (cc != jl_returninfo_t::Boxed) { emit_typecheck(ctx, gf_retbox, rettype, "cfunction"); + gf_retbox = update_julia_type(ctx, gf_retbox, rettype); } switch (cc) { @@ -6920,14 +7025,15 @@ static void emit_cfunc_invalidate( break; } case jl_returninfo_t::SRet: { + Value *sret = &*gf_thunk->arg_begin(); + Align align(julia_alignment(rettype)); if (return_roots) { - Value *root1 = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]* - // store the whole object in the first slot - ctx.builder.CreateStore(gf_ret, root1); + Value *roots = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]* + split_value_into(ctx, gf_retbox, align, sret, align, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe)); + } + else { + emit_unbox_store(ctx, gf_retbox, sret, ctx.tbaa().tbaa_stack, align); } - Align alignment(julia_alignment(rettype)); - emit_memcpy(ctx, &*gf_thunk->arg_begin(), jl_aliasinfo_t::fromTBAA(ctx, nullptr), gf_ret, - jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), Align(alignment), Align(alignment)); ctx.builder.CreateRetVoid(); break; } @@ -7698,14 +7804,18 @@ static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, j Align(sizeof(void*))); retval = mark_julia_type(ctx, theArg, true, jl_any_type); } - ctx.builder.CreateRet(boxed(ctx, retval)); + if (retval.typ == jl_bottom_type) + CreateTrap(ctx.builder, false); + else + ctx.builder.CreateRet(boxed(ctx, retval)); } -static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, BitVector *used_arguments, size_t *arg_offset) +static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, + ArrayRef ArgNames, unsigned nreq) { jl_returninfo_t props = {}; - SmallVector fsig; - SmallVector argnames; + SmallVector fsig; + SmallVector argnames; Type *rt = NULL; Type *srt = NULL; if (jlrettype == (jl_value_t*)jl_bottom_type) { @@ -7742,8 +7852,10 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value if (rt != getVoidTy(ctx.builder.getContext()) && deserves_sret(jlrettype, rt)) { auto tracked = CountTrackedPointers(rt, true); assert(!tracked.derived); - if (tracked.count && !tracked.all) + if (tracked.count && !tracked.all) { props.return_roots = tracked.count; + assert(props.return_roots == ((jl_datatype_t*)jlrettype)->layout->npointers); + } props.cc = jl_returninfo_t::SRet; props.union_bytes = jl_datatype_size(jlrettype); props.union_align = props.union_minalign = jl_datatype_align(jlrettype); @@ -7801,29 +7913,22 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value argnames.push_back("pgcstack_arg"); } - if (arg_offset) - *arg_offset = fsig.size(); size_t nparams = jl_nparams(sig); - if (used_arguments) - used_arguments->resize(nparams); - for (size_t i = 0; i < nparams; i++) { jl_value_t *jt = jl_tparam(sig, i); bool isboxed = false; - Type *ty = NULL; - if (i == 0 && is_opaque_closure) { - ty = nullptr; // special token to avoid computing this unnecessarily - } - else { + Type *et = nullptr; + if (i != 0 || !is_opaque_closure) { // special token for OC argument if (is_uniquerep_Type(jt)) continue; isboxed = deserves_argbox(jt); - ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt); - if (type_is_ghost(ty)) + et = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt); + if (type_is_ghost(et)) continue; } AttrBuilder param(ctx.builder.getContext()); - if (ty == nullptr || ty->isAggregateType()) { // aggregate types are passed by pointer + Type *ty = et; + if (et == nullptr || et->isAggregateType()) { // aggregate types are passed by pointer param.addAttribute(Attribute::NoCapture); param.addAttribute(Attribute::ReadOnly); ty = ctx.builder.getPtrTy(AddressSpace::Derived); @@ -7838,8 +7943,26 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value } attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param)); fsig.push_back(ty); - if (used_arguments) - used_arguments->set(i); + size_t argno = i < nreq ? i : nreq; + std::string genname; + if (!ArgNames.empty()) { + genname = ArgNames[argno]; + if (genname.empty()) + genname = (StringRef("#") + Twine(argno + 1)).str(); + if (i >= nreq) + genname += (StringRef("[") + Twine(i - nreq + 1) + StringRef("]")).str(); + const char *arg_typename = jl_is_datatype(jt) ? jl_symbol_name(((jl_datatype_t*)jt)->name->name) : ""; + argnames.push_back((genname + StringRef("::") + arg_typename).str()); + } + if (et && et->isAggregateType()) { + auto tracked = CountTrackedPointers(et); + if (tracked.count && !tracked.all) { + attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param)); + fsig.push_back(ctx.builder.getPtrTy(M->getDataLayout().getAllocaAddrSpace())); + if (!genname.empty()) + argnames.push_back((Twine(".roots.") + genname).str()); + } + } } AttributeSet FnAttrs; @@ -7887,12 +8010,6 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value return props; } -static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, unsigned count) -{ - unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ctx.builder); //This comes from Late-GC-Lowering?? - assert(emitted == count); (void)emitted; (void)count; -} - static DISubroutineType * get_specsig_di(jl_codectx_t &ctx, jl_debugcache_t &debuginfo, jl_value_t *rt, jl_value_t *sig, DIBuilder &dbuilder) { @@ -8105,49 +8222,26 @@ static jl_llvm_functions_t Function *f = NULL; bool has_sret = false; if (specsig) { // assumes !va and !needsparams - BitVector used_args; - size_t args_begin; - returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, - jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg), &used_args, &args_begin); - f = cast(returninfo.decl.getCallee()); - has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union); - jl_init_function(f, ctx.emission_context.TargetTriple); + SmallVector ArgNames(0); if (ctx.emission_context.debug_level >= 2) { - auto arg_typename = [&](size_t i) JL_NOTSAFEPOINT { - auto tp = jl_tparam(lam->specTypes, i); - return jl_is_datatype(tp) ? jl_symbol_name(((jl_datatype_t*)tp)->name->name) : ""; - }; - size_t nreal = 0; - for (size_t i = 0; i < std::min(nreq, static_cast(used_args.size())); i++) { + ArgNames.resize(ctx.nargs, ""); + for (int i = 0; i < ctx.nargs; i++) { jl_sym_t *argname = slot_symbol(ctx, i); if (argname == jl_unused_sym) continue; - if (used_args.test(i)) { - auto &arg = *f->getArg(args_begin++); - nreal++; - auto name = jl_symbol_name(argname); - if (!name[0]) { - arg.setName(StringRef("#") + Twine(nreal) + StringRef("::") + arg_typename(i)); - } else { - arg.setName(name + StringRef("::") + arg_typename(i)); - } - } - } - if (va && ctx.vaSlot != -1) { - size_t vidx = 0; - for (size_t i = nreq; i < used_args.size(); i++) { - if (used_args.test(i)) { - auto &arg = *f->getArg(args_begin++); - auto type = arg_typename(i); - const char *name = jl_symbol_name(slot_symbol(ctx, ctx.vaSlot)); - if (!name[0]) - name = "..."; - vidx++; - arg.setName(name + StringRef("[") + Twine(vidx) + StringRef("]::") + type); - } - } + const char *name = jl_symbol_name(argname); + if (name[0] == '\0' && ctx.vaSlot == i) + ArgNames[i] = "..."; + else + ArgNames[i] = name; } } + returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, + jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg), + ArgNames, nreq); + f = cast(returninfo.decl.getCallee()); + has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union); + jl_init_function(f, ctx.emission_context.TargetTriple); // common pattern: see if all return statements are an argument in that // case the apply-generic call can re-use the original box for the return @@ -8348,14 +8442,16 @@ static jl_llvm_functions_t allocate_gc_frame(ctx, b0); Value *last_age = NULL; auto world_age_field = get_tls_world_age_field(ctx); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe); - last_age = ai.decorateInst(ctx.builder.CreateAlignedLoad( - ctx.types().T_size, world_age_field, ctx.types().alignof_ptr)); - ctx.world_age_at_entry = last_age; // Load world age for use in get_tls_world_age + { // scope + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe); + last_age = ai.decorateInst(ctx.builder.CreateAlignedLoad( + ctx.types().T_size, world_age_field, ctx.types().alignof_ptr)); + ctx.world_age_at_entry = last_age; // Load world age for use in get_tls_world_age + } // step 7. allocate local variables slots // must be in the first basic block for the llvm mem2reg pass to work - auto allocate_local = [&ctx, &dbuilder, &debugcache, topdebugloc, va, debug_enabled, M](jl_varinfo_t &varinfo, jl_sym_t *s, int i) { + auto allocate_local = [&ctx, &dbuilder, &debugcache, topdebugloc, va, debug_enabled](jl_varinfo_t &varinfo, jl_sym_t *s, int i) { jl_value_t *jt = varinfo.value.typ; assert(!varinfo.boxroot); // variables shouldn't have memory locs already if (varinfo.value.constant) { @@ -8375,13 +8471,13 @@ static jl_llvm_functions_t if (lv) { lv->setName(jl_symbol_name(s)); varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack); - varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()), Align(1)); + varinfo.pTIndex = emit_static_alloca(ctx, 1, Align(1)); setName(ctx.emission_context, varinfo.pTIndex, "tindex"); // TODO: attach debug metadata to this variable } else if (allunbox) { // all ghost values just need a selector allocated - AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()), Align(1)); + AllocaInst *lv = emit_static_alloca(ctx, 1, Align(1)); lv->setName(jl_symbol_name(s)); varinfo.pTIndex = lv; varinfo.value.tbaa = NULL; @@ -8394,30 +8490,25 @@ static jl_llvm_functions_t return; } else if (deserves_stack(jt)) { - bool isboxed; - Type *vtype = julia_type_to_llvm(ctx, jt, &isboxed); - assert(!isboxed); - assert(!type_is_ghost(vtype) && "constants should already be handled"); - Value *lv = new AllocaInst(vtype, M->getDataLayout().getAllocaAddrSpace(), nullptr, Align(jl_datatype_align(jt)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca); - if (CountTrackedPointers(vtype).count) { - StoreInst *SI = new StoreInst(Constant::getNullValue(vtype), lv, false, Align(sizeof(void*))); - SI->insertAfter(ctx.topalloca); - } - varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack); + auto sizes = split_value_size((jl_datatype_t*)jt); + AllocaInst *bits = sizes.first > 0 ? emit_static_alloca(ctx, sizes.first, Align(julia_alignment(jt))) : nullptr; + AllocaInst *roots = sizes.second > 0 ? emit_static_roots(ctx, sizes.second) : nullptr; + if (bits) bits->setName(jl_symbol_name(s)); + if (roots) roots->setName(StringRef(".roots.") + jl_symbol_name(s)); + varinfo.value = mark_julia_slot(bits, jt, NULL, ctx.tbaa().tbaa_stack, None); + varinfo.inline_roots = roots; alloc_def_flag(ctx, varinfo); if (debug_enabled && varinfo.dinfo) { assert((Metadata*)varinfo.dinfo->getType() != debugcache.jl_pvalue_dillvmt); - dbuilder.insertDeclare(lv, varinfo.dinfo, dbuilder.createExpression(), + dbuilder.insertDeclare(bits ? bits : roots, varinfo.dinfo, dbuilder.createExpression(), topdebugloc, ctx.builder.GetInsertBlock()); } return; } // otherwise give it a boxroot in this function - AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, M->getDataLayout().getAllocaAddrSpace(), - nullptr, Align(sizeof(jl_value_t*)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca); - StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*))); - SI->insertAfter(ctx.topalloca); + AllocaInst *av = emit_static_roots(ctx, 1); + av->setName(jl_symbol_name(s)); varinfo.boxroot = av; if (debug_enabled && varinfo.dinfo) { SmallVector addr; @@ -8504,12 +8595,18 @@ static jl_llvm_functions_t ++AI; AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo())); jl_cgval_t theArg; - if (llvmArgType->isAggregateType()) { + if (!isboxed && llvmArgType->isAggregateType()) { maybe_mark_argument_dereferenceable(param, argType); - theArg = mark_julia_slot(Arg, argType, NULL, ctx.tbaa().tbaa_const); // this argument is by-pointer + SmallVector roots; + auto tracked = CountTrackedPointers(llvmArgType); + if (tracked.count && !tracked.all) { + roots = load_gc_roots(ctx, &*AI, tracked.count); + ++AI; + } + theArg = mark_julia_slot(Arg, argType, NULL, ctx.tbaa().tbaa_const, roots); // this argument is by-pointer } else { - if (isboxed) // e.g. is-pointer + if (isboxed) maybe_mark_argument_dereferenceable(param, argType); theArg = mark_julia_type(ctx, Arg, isboxed, argType); if (theArg.tbaa == ctx.tbaa().tbaa_immut) @@ -8566,95 +8663,92 @@ static jl_llvm_functions_t bool isboxed = deserves_argbox(argType); Type *llvmArgType = NULL; if (i == 0 && ctx.is_opaque_closure) { - isboxed = true; - llvmArgType = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived); + isboxed = false; + llvmArgType = ctx.builder.getPtrTy(AddressSpace::Derived); argType = (jl_value_t*)jl_any_type; } else { llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType); } - if (s == jl_unused_sym) { - if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) - ++AI; - continue; - } jl_varinfo_t &vi = ctx.slots[i]; - jl_cgval_t theArg; if (s == jl_unused_sym || vi.value.constant) { assert(vi.boxroot == NULL); - if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) + if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) { ++AI; + auto tracked = CountTrackedPointers(llvmArgType); + if (tracked.count && !tracked.all) + ++AI; + } + continue; } - else { - // If this is an opaque closure, implicitly load the env and switch - // the world age. - if (i == 0 && ctx.is_opaque_closure) { - // Load closure world - Value *oc_this = decay_derived(ctx, &*AI++); - Value *argaddr = oc_this; - Value *worldaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, world)); + jl_cgval_t theArg; + // If this is an opaque closure, implicitly load the env and switch + // the world age. + if (i == 0 && ctx.is_opaque_closure) { + // Load closure world + Value *oc_this = decay_derived(ctx, &*AI++); + Value *argaddr = oc_this; + Value *worldaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, world)); - jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type, - nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value()); - ctx.world_age_at_entry = closure_world.V; // The tls world in a OC is the world of the closure - emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr); + jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type, + nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value()); + ctx.world_age_at_entry = closure_world.V; // The tls world in a OC is the world of the closure + emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr); - // Load closure env - Value *envaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, captures)); + // Load closure env + Value *envaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, captures)); - jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type, - nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*)); - theArg = update_julia_type(ctx, closure_env, vi.value.typ); - } - else if (specsig) { - theArg = get_specsig_arg(argType, llvmArgType, isboxed); + jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type, + nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*)); + theArg = update_julia_type(ctx, closure_env, vi.value.typ); + } + else if (specsig) { + theArg = get_specsig_arg(argType, llvmArgType, isboxed); + } + else { + if (i == 0) { + // first (function) arg is separate in jlcall + theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); } else { - if (i == 0) { - // first (function) arg is separate in jlcall - theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); - } - else { - Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - Value *load = ai.decorateInst(maybe_mark_load_dereferenceable( - ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), - false, vi.value.typ)); - theArg = mark_julia_type(ctx, load, true, vi.value.typ); - if (debug_enabled && vi.dinfo && !vi.boxroot) { - SmallVector addr; + Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); + Value *load = ai.decorateInst(maybe_mark_load_dereferenceable( + ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), + false, vi.value.typ)); + theArg = mark_julia_type(ctx, load, true, vi.value.typ); + if (debug_enabled && vi.dinfo && !vi.boxroot) { + SmallVector addr; + addr.push_back(llvm::dwarf::DW_OP_deref); + addr.push_back(llvm::dwarf::DW_OP_plus_uconst); + addr.push_back((i - 1) * sizeof(void*)); + if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt) addr.push_back(llvm::dwarf::DW_OP_deref); - addr.push_back(llvm::dwarf::DW_OP_plus_uconst); - addr.push_back((i - 1) * sizeof(void*)); - if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt) - addr.push_back(llvm::dwarf::DW_OP_deref); - dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr), - topdebugloc, - ctx.builder.GetInsertBlock()); - } + dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr), + topdebugloc, + ctx.builder.GetInsertBlock()); } } + } - - if (vi.boxroot == NULL) { - assert(vi.value.V == NULL && "unexpected variable slot created for argument"); - // keep track of original (possibly boxed) value to avoid re-boxing or moving - vi.value = theArg; - if (debug_enabled && vi.dinfo && theArg.V) { - if (theArg.ispointer()) { - dbuilder.insertDeclare(theArg.V, vi.dinfo, dbuilder.createExpression(), - topdebugloc, ctx.builder.GetInsertBlock()); - } - else { - dbuilder.insertDbgValueIntrinsic(theArg.V, vi.dinfo, dbuilder.createExpression(), - topdebugloc, ctx.builder.GetInsertBlock()); - } + if (vi.boxroot == nullptr) { + assert(vi.value.V == nullptr && vi.inline_roots == nullptr && "unexpected variable slot created for argument"); + // keep track of original (possibly boxed) value to avoid re-boxing or moving + vi.value = theArg; + if (debug_enabled && vi.dinfo && theArg.V) { + if (!theArg.inline_roots.empty() || theArg.ispointer()) { + dbuilder.insertDeclare(theArg.V, vi.dinfo, dbuilder.createExpression(), + topdebugloc, ctx.builder.GetInsertBlock()); + } + else { + dbuilder.insertDbgValueIntrinsic(theArg.V, vi.dinfo, dbuilder.createExpression(), + topdebugloc, ctx.builder.GetInsertBlock()); } } - else { - Value *argp = boxed(ctx, theArg); - ctx.builder.CreateStore(argp, vi.boxroot); - } + } + else { + Value *argp = boxed(ctx, theArg); + ctx.builder.CreateStore(argp, vi.boxroot); } } // step 9. allocate rest argument @@ -9129,29 +9223,31 @@ static jl_llvm_functions_t break; } if (sret) { - if (retvalinfo.ispointer()) { - if (returninfo.return_roots) { - Type *store_ty = julia_type_to_llvm(ctx, retvalinfo.typ); - emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, returninfo.return_roots); - } + Align align(returninfo.union_align); + if (!returninfo.return_roots && !retvalinfo.inline_roots.empty()) { + assert(retvalinfo.V == nullptr); + assert(returninfo.cc == jl_returninfo_t::SRet); + split_value_into(ctx, retvalinfo, align, nullptr, align, + jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), sret, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe)); + } + else if (returninfo.return_roots) { + assert(returninfo.cc == jl_returninfo_t::SRet); + Value *return_roots = f->arg_begin() + 1; + split_value_into(ctx, retvalinfo, align, sret, align, + jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), return_roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe)); + } + else if (retvalinfo.ispointer()) { if (returninfo.cc == jl_returninfo_t::SRet) { assert(jl_is_concrete_type(jlrettype)); - Align alignment(julia_alignment(jlrettype)); emit_memcpy(ctx, sret, jl_aliasinfo_t::fromTBAA(ctx, nullptr), retvalinfo, - jl_datatype_size(jlrettype), alignment, alignment); + jl_datatype_size(jlrettype), align, align); } else { // must be jl_returninfo_t::Union emit_unionmove(ctx, sret, nullptr, retvalinfo, /*skip*/isboxed_union); } } else { - Type *store_ty = retvalinfo.V->getType(); - Value *Val = retvalinfo.V; - if (returninfo.return_roots) { - assert(julia_type_to_llvm(ctx, retvalinfo.typ) == store_ty); - emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, returninfo.return_roots); - } - ctx.builder.CreateAlignedStore(Val, sret, Align(julia_alignment(retvalinfo.typ))); + ctx.builder.CreateAlignedStore(retvalinfo.V, sret, align); assert(retvalinfo.TIndex == NULL && "unreachable"); // unimplemented representation } } @@ -9288,8 +9384,9 @@ static jl_llvm_functions_t PHINode *VN; jl_value_t *r; AllocaInst *dest; + SmallVector roots; BasicBlock *PhiBB; - std::tie(phi_result, PhiBB, dest, VN, r) = tup; + std::tie(phi_result, PhiBB, dest, VN, roots, r) = tup; jl_value_t *phiType = phi_result.typ; jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0); jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(r, 1); @@ -9347,6 +9444,7 @@ static jl_llvm_functions_t val = mark_julia_const(ctx, val.constant); // be over-conservative at making sure `.typ` is set concretely, not tindex if (!jl_is_uniontype(phiType) || !TindexN) { if (VN) { + assert(roots.empty() && !dest); Value *V; if (val.typ == (jl_value_t*)jl_bottom_type) { V = undef_value_for_type(VN->getType()); @@ -9367,14 +9465,34 @@ static jl_llvm_functions_t VN->addIncoming(V, ctx.builder.GetInsertBlock()); assert(!TindexN); } - else if (dest && val.typ != (jl_value_t*)jl_bottom_type) { + else if ((dest || !roots.empty()) && val.typ != (jl_value_t*)jl_bottom_type) { // must be careful to emit undef here (rather than a bitcast or // load of val) if the runtime type of val isn't phiType + auto tracked = split_value_size((jl_datatype_t*)phiType).second; Value *isvalid = emit_isa_and_defined(ctx, val, phiType); - emit_guarded_test(ctx, isvalid, nullptr, [&] { - emit_unbox_store(ctx, update_julia_type(ctx, val, phiType), dest, ctx.tbaa().tbaa_stack, Align(julia_alignment(phiType))); - return nullptr; + assert(roots.size() == tracked && isvalid != nullptr); + SmallVector incomingroots(0); + if (tracked) + incomingroots.resize(tracked, Constant::getNullValue(ctx.types().T_prjlvalue)); + emit_guarded_test(ctx, isvalid, incomingroots, [&] { + jl_cgval_t typedval = update_julia_type(ctx, val, phiType); + SmallVector mayberoots(tracked, Constant::getNullValue(ctx.types().T_prjlvalue)); + if (typedval.typ != jl_bottom_type) { + Align align(julia_alignment(phiType)); + if (tracked) + split_value_into(ctx, typedval, align, dest, align, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), mayberoots); + else + emit_unbox_store(ctx, typedval, dest, ctx.tbaa().tbaa_stack, align); + } + return mayberoots; }); + for (size_t nr = 0; nr < tracked; nr++) + roots[nr]->addIncoming(incomingroots[nr], ctx.builder.GetInsertBlock()); + } + else if (!roots.empty()) { + Value *V = Constant::getNullValue(ctx.types().T_prjlvalue); + for (size_t nr = 0; nr < roots.size(); nr++) + roots[nr]->addIncoming(V, ctx.builder.GetInsertBlock()); } } else { @@ -9383,6 +9501,7 @@ static jl_llvm_functions_t // `V` is always initialized when it is used. // Ref https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96629 Value *V = nullptr; + assert(roots.empty()); if (val.typ == (jl_value_t*)jl_bottom_type) { if (VN) V = undef_value_for_type(VN->getType()); @@ -9473,11 +9592,10 @@ static jl_llvm_functions_t } if (TindexN) TindexN->addIncoming(RTindex, FromBB); - if (dest) { + if (dest) ctx.builder.CreateLifetimeStart(dest); - if (CountTrackedPointers(dest->getAllocatedType()).count) - ctx.builder.CreateStore(Constant::getNullValue(dest->getAllocatedType()), dest); - } + for (size_t nr = 0; nr < roots.size(); nr++) + roots[nr]->addIncoming(Constant::getNullValue(ctx.types().T_prjlvalue), FromBB); ctx.builder.ClearInsertionPoint(); } } @@ -9524,15 +9642,19 @@ static jl_llvm_functions_t if (ctx.vaSlot > 0) { // remove VA allocation if we never referenced it + assert(ctx.slots[ctx.vaSlot].isSA && ctx.slots[ctx.vaSlot].isArgument); Instruction *root = cast_or_null(ctx.slots[ctx.vaSlot].boxroot); if (root) { - Instruction *store_value = NULL; bool have_real_use = false; for (Use &U : root->uses()) { User *RU = U.getUser(); if (StoreInst *SRU = dyn_cast(RU)) { - if (!store_value) - store_value = dyn_cast(SRU->getValueOperand()); + assert(isa(SRU->getValueOperand()) || SRU->getValueOperand() == restTuple); + (void)SRU; + } + else if (MemSetInst *MSI = dyn_cast(RU)) { + assert(MSI->getValue() == ctx.builder.getInt8(0)); + (void)MSI; } else if (isa(RU)) { } @@ -9554,7 +9676,6 @@ static jl_llvm_functions_t if (use) use->eraseFromParent(); root->eraseFromParent(); - assert(!store_value || store_value == restTuple); restTuple->eraseFromParent(); } } diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index c747edfeffe5f..09916297e16ff 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -441,14 +441,14 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va // up being dead code, and type inference knows that the other // branch's type is the only one that matters. if (type_is_ghost(to)) { - return NULL; + return nullptr; } CreateTrap(ctx.builder); return UndefValue::get(to); // type mismatch error } - Constant *c = x.constant ? julia_const_to_llvm(ctx, x.constant) : NULL; - if (!x.ispointer() || c) { // already unboxed, but sometimes need conversion + Constant *c = x.constant ? julia_const_to_llvm(ctx, x.constant) : nullptr; + if ((x.inline_roots.empty() && !x.ispointer()) || c != nullptr) { // already unboxed, but sometimes need conversion Value *unboxed = c ? c : x.V; return emit_unboxed_coercion(ctx, to, unboxed); } @@ -473,28 +473,17 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va } unsigned alignment = julia_alignment(jt); - Type *ptype = to->getPointerTo(); - if (p->getType() != ptype && isa(p)) { - // LLVM's mem2reg can't handle coercion if the load/store type does - // not match the type of the alloca. As such, it is better to - // perform the load using the alloca's type and then perform the - // appropriate coercion manually. - AllocaInst *AI = cast(p); - Type *AllocType = AI->getAllocatedType(); - const DataLayout &DL = jl_Module->getDataLayout(); - if (!AI->isArrayAllocation() && - (AllocType->isFloatingPointTy() || AllocType->isIntegerTy() || AllocType->isPointerTy()) && - (to->isFloatingPointTy() || to->isIntegerTy() || to->isPointerTy()) && - DL.getTypeSizeInBits(AllocType) == DL.getTypeSizeInBits(to)) { - Instruction *load = ctx.builder.CreateAlignedLoad(AllocType, p, Align(alignment)); - setName(ctx.emission_context, load, p->getName() + ".unbox"); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); - return emit_unboxed_coercion(ctx, to, ai.decorateInst(load)); - } + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); + if (!x.inline_roots.empty()) { + assert(x.typ == jt); + AllocaInst *combined = emit_static_alloca(ctx, to, Align(alignment)); + auto combined_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); + recombine_value(ctx, x, combined, combined_ai, Align(alignment), false); + p = combined; + ai = combined_ai; } Instruction *load = ctx.builder.CreateAlignedLoad(to, p, Align(alignment)); setName(ctx.emission_context, load, p->getName() + ".unbox"); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); return ai.decorateInst(load); } @@ -508,18 +497,25 @@ static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest return; } + auto dest_ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest); + + if (!x.inline_roots.empty()) { + recombine_value(ctx, x, dest, dest_ai, alignment, isVolatile); + return; + } + if (!x.ispointer()) { // already unboxed, but sometimes need conversion (e.g. f32 -> i32) assert(x.V); Value *unboxed = zext_struct(ctx, x.V); StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, alignment); store->setVolatile(isVolatile); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest); - ai.decorateInst(store); + dest_ai.decorateInst(store); return; } Value *src = data_pointer(ctx, x); - emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), Align(alignment), Align(julia_alignment(x.typ)), isVolatile); + auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); + emit_memcpy(ctx, dest, dest_ai, src, src_ai, jl_datatype_size(x.typ), Align(alignment), Align(julia_alignment(x.typ)), isVolatile); } static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ) @@ -832,10 +828,9 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef argv) Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1)); setName(ctx.emission_context, im1, "pointerset_idx"); - Value *thePtr; + Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ); if (ety == (jl_value_t*)jl_any_type) { // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots. - thePtr = emit_unbox(ctx, ctx.types().T_size->getPointerTo(), e, e.typ); auto gep = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, thePtr, im1); setName(ctx.emission_context, gep, "pointerset_ptr"); auto val = ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), ctx.types().T_size); @@ -844,8 +839,10 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef argv) jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data); ai.decorateInst(store); } + else if (!x.inline_roots.empty()) { + recombine_value(ctx, e, thePtr, jl_aliasinfo_t(), Align(align_nb), false); + } else if (x.ispointer()) { - thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ); uint64_t size = jl_datatype_size(ety); im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size, LLT_ALIGN(size, jl_datatype_align(ety)))); @@ -859,7 +856,6 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef argv) Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed); assert(!isboxed); if (!type_is_ghost(ptrty)) { - thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); thePtr = ctx.builder.CreateInBoundsGEP(ptrty, thePtr, im1); typed_store(ctx, thePtr, x, jl_cgval_t(), ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, nullptr, true, false, false, false, false, false, nullptr, "atomic_pointerset", nullptr, nullptr); diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h index 956c04dbc7ded..a99e18f3e3762 100644 --- a/src/llvm-codegen-shared.h +++ b/src/llvm-codegen-shared.h @@ -125,7 +125,6 @@ struct CountTrackedPointers { CountTrackedPointers(llvm::Type *T, bool ignore_loaded=false); }; -unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> &irbuilder); llvm::SmallVector ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef perm_offsets={}); static inline void llvm_dump(llvm::Value *v) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 0605098bec361..76dcd944890ab 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -202,9 +202,9 @@ bool FinalLowerGC::runOnFunction(Function &F) } while (0) LOWER_INTRINSIC(newGCFrame, lowerNewGCFrame); + LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot); LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame); LOWER_INTRINSIC(popGCFrame, lowerPopGCFrame); - LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot); LOWER_INTRINSIC(GCAllocBytes, lowerGCAllocBytes); LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot); LOWER_INTRINSIC(safepoint, lowerSafepoint); diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index cb485751d407b..d33567e887118 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -312,7 +312,6 @@ struct State { SmallVector> CalleeRoots; // We don't bother doing liveness on Allocas that were not mem2reg'ed. // they just get directly sunk into the root array. - SmallVector Allocas; DenseMap ArrayAllocas; DenseMap ShadowAllocas; SmallVector, 0> TrackedStores; @@ -332,9 +331,9 @@ struct LateLowerGCFrame: private JuliaPassContext { void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef &SafepointsSoFar, SmallVector &&RefinedPtr = SmallVector()); - void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses); - void NoteUse(State &S, BBState &BBS, Value *V) { - NoteUse(S, BBS, V, BBS.UpExposedUses); + void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses, Function &F); + void NoteUse(State &S, BBState &BBS, Value *V, Function &F) { + NoteUse(S, BBS, V, BBS.UpExposedUses, F); } void LiftPhi(State &S, PHINode *Phi); @@ -348,7 +347,7 @@ struct LateLowerGCFrame: private JuliaPassContext { SmallVector NumberAll(State &S, Value *V); SmallVector NumberAllBase(State &S, Value *Base); - void NoteOperandUses(State &S, BBState &BBS, User &UI); + void NoteOperandUses(State &S, BBState &BBS, Instruction &UI); void MaybeTrackDst(State &S, MemTransferInst *MI); void MaybeTrackStore(State &S, StoreInst *I); State LocalScan(Function &F); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 8d1d5ff73b261..1d390a5115207 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -695,8 +695,15 @@ static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, SmallVectorImpl(V->getType())) { +// if (isSpecialPtr(V->getType())) +// if (isa(V) && !isa(V)) +// F.dump(); +// } +//#endif if (isa(V)) return; if (isa(V->getType())) { @@ -718,9 +725,9 @@ void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitV } } -void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, User &UI) { +void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, Instruction &UI) { for (Use &U : UI.operands()) { - NoteUse(S, BBS, U); + NoteUse(S, BBS, U, *UI.getFunction()); } } @@ -1377,7 +1384,7 @@ State LateLowerGCFrame::LocalScan(Function &F) { unsigned nIncoming = Phi->getNumIncomingValues(); for (unsigned i = 0; i < nIncoming; ++i) { BBState &IncomingBBS = S.BBStates[Phi->getIncomingBlock(i)]; - NoteUse(S, IncomingBBS, Phi->getIncomingValue(i), IncomingBBS.PhiOuts); + NoteUse(S, IncomingBBS, Phi->getIncomingValue(i), IncomingBBS.PhiOuts, F); } } else if (tracked.count) { // We need to insert extra phis for the GC roots @@ -1403,7 +1410,7 @@ State LateLowerGCFrame::LocalScan(Function &F) { } else if (auto *AI = dyn_cast(&I)) { Type *ElT = AI->getAllocatedType(); if (AI->isStaticAlloca() && isa(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked) { - S.Allocas.push_back(AI); + S.ArrayAllocas[AI] = cast(AI->getArraySize())->getZExtValue(); } } } @@ -1494,18 +1501,17 @@ SmallVector ExtractTrackedValues(Value *Src, Type *STy, bool isptr, I return Ptrs; } -unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> &irbuilder) { - auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder); - for (unsigned i = 0; i < Ptrs.size(); ++i) { - Value *Elem = Ptrs[i]; - Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(irbuilder.getInt8Ty(), Dst, i * sizeof(void*)); - StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*))); - shadowStore->setOrdering(AtomicOrdering::NotAtomic); - // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); - } - return Ptrs.size(); -} - +//static unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> &irbuilder) { +// auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder); +// for (unsigned i = 0; i < Ptrs.size(); ++i) { +// Value *Elem = Ptrs[i]; +// Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(irbuilder.getInt8Ty(), Dst, i * sizeof(void*)); +// StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*))); +// shadowStore->setOrdering(AtomicOrdering::NotAtomic); +// // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); +// } +// return Ptrs.size(); +//} // turn a memcpy into a set of loads void LateLowerGCFrame::MaybeTrackDst(State &S, MemTransferInst *MI) { @@ -2321,7 +2327,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl &Colors, St MaxColor = C; // Insert instructions for the actual gc frame - if (MaxColor != -1 || !S.Allocas.empty() || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) { + if (MaxColor != -1 || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) { // Create and push a GC frame. auto gcframe = CallInst::Create( getOrDeclare(jl_intrinsics::newGCFrame), @@ -2334,6 +2340,43 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl &Colors, St {gcframe, ConstantInt::get(T_int32, 0)}); pushGcframe->insertAfter(pgcstack); + // we don't run memsetopt after this, so run a basic approximation of it + // that removes any redundant memset calls in the prologue since getGCFrameSlot already includes the null store + Instruction *toerase = nullptr; + for (auto &I : F->getEntryBlock()) { + if (toerase) + toerase->eraseFromParent(); + toerase = nullptr; + Value *ptr; + Value *value; + bool isvolatile; + if (auto *SI = dyn_cast(&I)) { + ptr = SI->getPointerOperand(); + value = SI->getValueOperand(); + isvolatile = SI->isVolatile(); + } + else if (auto *MSI = dyn_cast(&I)) { + ptr = MSI->getDest(); + value = MSI->getValue(); + isvolatile = MSI->isVolatile(); + } + else { + continue; + } + ptr = ptr->stripInBoundsOffsets(); + AllocaInst *AI = dyn_cast(ptr); + if (isa(ptr)) + break; + if (!S.ArrayAllocas.count(AI)) + continue; + if (isvolatile || !isa(value) || !cast(value)->isNullValue()) + break; // stop once we reach a pointer operation that couldn't be analyzed or isn't a null store + toerase = &I; + } + if (toerase) + toerase->eraseFromParent(); + toerase = nullptr; + // Replace Allocas unsigned AllocaSlot = 2; // first two words are metadata auto replace_alloca = [this, gcframe, &AllocaSlot, T_int32](AllocaInst *&AI) { @@ -2367,11 +2410,6 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl &Colors, St AI->eraseFromParent(); AI = NULL; }; - for (AllocaInst *AI : S.Allocas) { - auto ns = cast(AI->getArraySize())->getZExtValue(); - replace_alloca(AI); - AllocaSlot += ns; - } for (auto AI : S.ArrayAllocas) { replace_alloca(AI.first); AllocaSlot += AI.second; diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index 07308713bb789..26ae965b35319 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -501,10 +501,9 @@ function f37262(x) end end @testset "#37262" begin - str = "store volatile { i8, {}*, {}*, {}*, {}* } zeroinitializer, { i8, {}*, {}*, {}*, {}* }* %phic" - str_opaque = "store volatile { i8, ptr, ptr, ptr, ptr } zeroinitializer, ptr %phic" + str_opaque = "getelementptr inbounds i8, ptr %.roots.phic, i32 8\n store volatile ptr null" llvmstr = get_llvm(f37262, (Bool,), false, false, false) - @test (contains(llvmstr, str) || contains(llvmstr, str_opaque)) || llvmstr + @test contains(llvmstr, str_opaque) @test f37262(Base.inferencebarrier(true)) === nothing end