Skip to content

Commit

Permalink
simple pass at giving union fields an optimized layout
Browse files Browse the repository at this point in the history
unlike codegen, only bitstypes (!isptr) fields are permitted in the union
and the offset count starts from 0 instead of 1
but otherwise the tindex counter is compatible
  • Loading branch information
vtjnash committed May 3, 2017
1 parent 5f296f3 commit 8d5da6a
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 65 deletions.
86 changes: 75 additions & 11 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,33 @@ static Value *emit_bounds_check(const jl_cgval_t &ainfo, jl_value_t *ty, Value *

// --- loading and storing ---

static Value *compute_box_tindex(Value *datatype, jl_value_t *supertype, jl_value_t *ut, jl_codectx_t *ctx)
{
Value *tindex = ConstantInt::get(T_int8, 0);
unsigned counter = 0;
for_each_uniontype_small(
[&](unsigned idx, jl_datatype_t *jt) {
if (jl_subtype((jl_value_t*)jt, supertype)) {
Value *cmp = builder.CreateICmpEQ(literal_pointer_val((jl_value_t*)jt), datatype);
tindex = builder.CreateSelect(cmp, ConstantInt::get(T_int8, idx), tindex);
}
},
ut,
counter);
return tindex;
}

// get the runtime tindex value
static Value *compute_tindex_unboxed(const jl_cgval_t &val, jl_value_t *typ, jl_codectx_t *ctx)
{
if (val.constant)
return ConstantInt::get(T_int8, get_box_tindex((jl_datatype_t*)jl_typeof(val.constant), typ));
if (val.isboxed)
return compute_box_tindex(emit_typeof_boxed(val, ctx), val.typ, typ, ctx);
assert(val.TIndex);
return builder.CreateAnd(val.TIndex, ConstantInt::get(T_int8, 0x7f));
}

// If given alignment is 0 and LLVM's assumed alignment for a load/store via ptr
// might be stricter than the Julia alignment for jltype, return the alignment of jltype.
// Otherwise return the given alignment.
Expand Down Expand Up @@ -1455,27 +1482,50 @@ static jl_cgval_t emit_getfield_knownidx(const jl_cgval_t &strct, unsigned idx,
addr = builder.CreateStructGEP(LLVM37_param(lt) ptr, idx);
}
}
int align = jl_field_offset(jt, idx);
align |= 16;
align &= -align;
if (jl_field_isptr(jt, idx)) {
bool maybe_null = idx >= (unsigned)jt->ninitialized;
Instruction *Load = maybe_mark_load_dereferenceable(
builder.CreateLoad(emit_bitcast(addr, T_ppjlvalue)),
maybe_null, jl_field_type(jt, idx)
maybe_null, jfty
);
Value *fldv = tbaa_decorate(strct.tbaa, Load);
if (maybe_null)
null_pointer_check(fldv, ctx);
return mark_julia_type(fldv, true, jfty, ctx, strct.gcroot || !strct.isimmutable);
}
else if (jl_is_uniontype(jfty)) {
int fsz = jl_field_size(jt, idx);
Value *ptindex = builder.CreateGEP(LLVM37_param(T_int8) emit_bitcast(addr, T_pint8), ConstantInt::get(T_size, fsz - 1));
Value *tindex = builder.CreateNUWAdd(ConstantInt::get(T_int8, 1), builder.CreateLoad(ptindex));
bool isimmutable = strct.isimmutable;
Value *gcroot = strct.gcroot;
if (jt->mutabl) {
// move value to an immutable stack slot
Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * align), (fsz + align - 2) / align);
AllocaInst *lv = emit_static_alloca(AT, ctx);
if (align > 1)
lv->setAlignment(align);
Value *nbytes = ConstantInt::get(T_size, fsz - 1);
builder.CreateMemCpy(lv, addr, nbytes, align);
addr = lv;
isimmutable = true;
gcroot = NULL;
}
jl_cgval_t fieldval = mark_julia_slot(addr, jfty, tindex, strct.tbaa);
fieldval.isimmutable = isimmutable;
fieldval.gcroot = gcroot;
return fieldval;
}
else if (!jt->mutabl) {
// just compute the pointer and let user load it when necessary
jl_cgval_t fieldval = mark_julia_slot(addr, jfty, NULL, strct.tbaa);
fieldval.isimmutable = strct.isimmutable;
fieldval.gcroot = strct.gcroot;
return fieldval;
}
int align = jl_field_offset(jt, idx);
align |= 16;
align &= -align;
return typed_load(addr, ConstantInt::get(T_size, 0), jfty, ctx, strct.tbaa, true, align);
}
else if (isa<UndefValue>(strct.V)) {
Expand Down Expand Up @@ -2069,7 +2119,7 @@ static void emit_unionmove(Value *dest, const jl_cgval_t &src, Value *skip, bool
jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ;
Type *store_ty = julia_type_to_llvm(typ);
assert(skip || jl_isbits(typ));
if (jl_isbits(typ)) {
if (jl_isbits(typ) && jl_datatype_size(typ) > 0) {
if (!src.ispointer() || src.constant) {
emit_unbox(store_ty, src, typ, dest, isVolatile);
}
Expand Down Expand Up @@ -2238,15 +2288,29 @@ static void emit_setfield(jl_datatype_t *sty, const jl_cgval_t &strct, size_t id
if (jl_field_isptr(sty, idx0)) {
Value *r = boxed(rhs, ctx, false); // don't need a temporary gcroot since it'll be rooted by strct (but should ensure strct is rooted via mark_gc_use)
tbaa_decorate(strct.tbaa, builder.CreateStore(r, emit_bitcast(addr, T_ppjlvalue)));
if (wb && strct.isboxed) emit_checked_write_barrier(ctx, boxed(strct, ctx), r);
if (wb && strct.isboxed)
emit_checked_write_barrier(ctx, boxed(strct, ctx), r);
mark_gc_use(strct);
}
else {
int align = jl_field_offset(sty, idx0);
align |= 16;
align &= -align;
typed_store(addr, ConstantInt::get(T_size, 0), rhs, jfty, ctx,
strct.tbaa, data_pointer(strct, ctx, T_pjlvalue), align);
if (jl_is_uniontype(jfty)) {
int fsz = jl_field_size(sty, idx0);
// compute tindex from rhs
jl_cgval_t rhs_union = convert_julia_type(rhs, jfty, ctx);
Value *ptindex = builder.CreateGEP(LLVM37_param(T_int8) emit_bitcast(addr, T_pint8), ConstantInt::get(T_size, fsz - 1));
Value *tindex = compute_tindex_unboxed(rhs_union, jfty, ctx);
tindex = builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
builder.CreateStore(tindex, ptindex);
// copy data
emit_unionmove(addr, rhs, NULL, false, NULL, ctx);
}
else {
int align = jl_field_offset(sty, idx0);
align |= 16;
align &= -align;
typed_store(addr, ConstantInt::get(T_size, 0), rhs, jfty, ctx,
strct.tbaa, data_pointer(strct, ctx, T_pjlvalue), align);
}
}
}
else {
Expand Down
31 changes: 3 additions & 28 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,8 @@ static inline jl_cgval_t update_julia_type(const jl_cgval_t &v, jl_value_t *typ,
return jl_cgval_t(v, typ, NULL);
}

static jl_cgval_t convert_julia_type(const jl_cgval_t &v, jl_value_t *typ, jl_codectx_t *ctx, bool needsroot = true);

// --- allocating local variables ---

static jl_sym_t *slot_symbol(int s, jl_codectx_t *ctx)
Expand Down Expand Up @@ -915,7 +917,7 @@ static void jl_rethrow_with_add(const char *fmt, ...)
}

// given a value marked with type `v.typ`, compute the mapping and/or boxing to return a value of type `typ`
static jl_cgval_t convert_julia_type(const jl_cgval_t &v, jl_value_t *typ, jl_codectx_t *ctx, bool needsroot = true)
static jl_cgval_t convert_julia_type(const jl_cgval_t &v, jl_value_t *typ, jl_codectx_t *ctx, bool needsroot)
{
if (typ == (jl_value_t*)jl_typeofbottom_type)
return ghostValue(typ); // normalize TypeofBottom to Type{Union{}}
Expand Down Expand Up @@ -3717,33 +3719,6 @@ static Value *try_emit_union_alloca(jl_uniontype_t *ut, bool &allunbox, size_t &
return NULL;
}

static Value *compute_box_tindex(Value *datatype, jl_value_t *supertype, jl_value_t *ut, jl_codectx_t *ctx)
{
Value *tindex = ConstantInt::get(T_int8, 0);
unsigned counter = 0;
for_each_uniontype_small(
[&](unsigned idx, jl_datatype_t *jt) {
if (jl_subtype((jl_value_t*)jt, supertype)) {
Value *cmp = builder.CreateICmpEQ(literal_pointer_val((jl_value_t*)jt), datatype);
tindex = builder.CreateSelect(cmp, ConstantInt::get(T_int8, idx), tindex);
}
},
ut,
counter);
return tindex;
}

// get the runtime tindex value
static Value *compute_tindex_unboxed(const jl_cgval_t &val, jl_value_t *typ, jl_codectx_t *ctx)
{
if (val.constant)
return ConstantInt::get(T_int8, get_box_tindex((jl_datatype_t*)jl_typeof(val.constant), typ));
if (val.isboxed)
return compute_box_tindex(emit_typeof_boxed(val, ctx), val.typ, typ, ctx);
assert(val.TIndex);
return builder.CreateAnd(val.TIndex, ConstantInt::get(T_int8, 0x7f));
}

static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx)
{
if (jl_is_ssavalue(l)) {
Expand Down
100 changes: 81 additions & 19 deletions src/datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,38 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t)
return alignment;
}

static int jl_layout_isbits(jl_value_t *ty)
{
if (jl_isbits(ty) && jl_is_leaf_type(ty)) {
if (((jl_datatype_t*)ty)->layout) // layout check handles possible layout recursion
return 1;
}
return 0;
}

static unsigned jl_union_isbits(jl_value_t *ty, size_t *nbytes, size_t *align)
{
if (jl_is_uniontype(ty)) {
unsigned na = jl_union_isbits(((jl_uniontype_t*)ty)->a, nbytes, align);
if (na == 0)
return 0;
unsigned nb = jl_union_isbits(((jl_uniontype_t*)ty)->b, nbytes, align);
if (nb == 0)
return 0;
return na + nb;
}
if (jl_layout_isbits(ty)) {
size_t sz = jl_datatype_size(ty);
size_t al = ((jl_datatype_t*)ty)->layout->alignment;
if (*nbytes < sz)
*nbytes = sz;
if (*align < al)
*align = al;
return 1;
}
return 0;
}

void jl_compute_field_offsets(jl_datatype_t *st)
{
size_t sz = 0, alignm = 1;
Expand Down Expand Up @@ -272,16 +304,21 @@ void jl_compute_field_offsets(jl_datatype_t *st)

for (size_t i = 0; i < nfields; i++) {
jl_value_t *ty = jl_field_type(st, i);
size_t fsz, al;
if (jl_isbits(ty) && jl_is_leaf_type(ty) && ((jl_datatype_t*)ty)->layout) {
fsz = jl_datatype_size(ty);
size_t fsz = 0, al = 0;
unsigned countbits = jl_union_isbits(ty, &fsz, &al);
if (countbits > 0 && countbits < 127) {
// Should never happen
if (__unlikely(fsz > max_size))
goto throw_ovf;
al = ((jl_datatype_t*)ty)->layout->alignment;
desc[i].isptr = 0;
if (((jl_datatype_t*)ty)->layout->haspadding)
if (jl_is_uniontype(ty)) {
haspadding = 1;
fsz += 1; // selector byte
}
else { // isbits struct
if (((jl_datatype_t*)ty)->layout->haspadding)
haspadding = 1;
}
}
else {
fsz = sizeof(void*);
Expand All @@ -306,7 +343,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
goto throw_ovf;
sz += fsz;
}
if (homogeneous && lastty!=NULL && jl_is_tuple_type(st)) {
if (homogeneous && lastty != NULL && jl_is_tuple_type(st)) {
// Some tuples become LLVM vectors with stronger alignment than what was calculated above.
unsigned al = jl_special_vector_alignment(nfields, lastty);
assert(al % alignm == 0);
Expand All @@ -317,10 +354,12 @@ void jl_compute_field_offsets(jl_datatype_t *st)
if (st->size > sz)
haspadding = 1;
st->layout = jl_get_layout(nfields, alignm, haspadding, desc);
if (descsz >= jl_page_size) free(desc);
if (descsz >= jl_page_size)
free(desc);
return;
throw_ovf:
if (descsz >= jl_page_size) free(desc);
if (descsz >= jl_page_size)
free(desc);
jl_throw(jl_overflow_exception);
}

Expand Down Expand Up @@ -715,46 +754,69 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i)
{
jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
assert(i < jl_datatype_nfields(st));
size_t offs = jl_field_offset(st,i);
if (jl_field_isptr(st,i)) {
size_t offs = jl_field_offset(st, i);
if (jl_field_isptr(st, i)) {
return *(jl_value_t**)((char*)v + offs);
}
return jl_new_bits(jl_field_type(st,i), (char*)v + offs);
jl_value_t *ty = jl_field_type(st, i);
if (jl_is_uniontype(ty)) {
uint8_t sel = ((uint8_t*)v)[offs + jl_field_size(st, i) - 1];
ty = jl_nth_union_component(ty, sel);
}
return jl_new_bits(ty, (char*)v + offs);
}

JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i)
{
jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
if (i >= jl_datatype_nfields(st))
jl_bounds_error_int(v, i+1);
size_t offs = jl_field_offset(st,i);
if (jl_field_isptr(st,i)) {
jl_bounds_error_int(v, i + 1);
size_t offs = jl_field_offset(st, i);
if (jl_field_isptr(st, i)) {
jl_value_t *fval = *(jl_value_t**)((char*)v + offs);
if (fval == NULL)
jl_throw(jl_undefref_exception);
return fval;
}
return jl_new_bits(jl_field_type(st,i), (char*)v + offs);
jl_value_t *ty = jl_field_type(st, i);
if (jl_is_uniontype(ty)) {
size_t fsz = jl_field_size(st, i);
uint8_t sel = ((uint8_t*)v)[offs + fsz - 1];
ty = jl_nth_union_component(ty, sel);
if (jl_is_datatype_singleton((jl_datatype_t*)ty))
return ((jl_datatype_t*)ty)->instance;
}
return jl_new_bits(ty, (char*)v + offs);
}

JL_DLLEXPORT void jl_set_nth_field(jl_value_t *v, size_t i, jl_value_t *rhs)
{
jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
size_t offs = jl_field_offset(st,i);
if (jl_field_isptr(st,i)) {
size_t offs = jl_field_offset(st, i);
if (jl_field_isptr(st, i)) {
*(jl_value_t**)((char*)v + offs) = rhs;
if (rhs != NULL) jl_gc_wb(v, rhs);
}
else {
jl_value_t *ty = jl_field_type(st, i);
if (jl_is_uniontype(ty)) {
uint8_t *psel = &((uint8_t*)v)[offs + jl_field_size(st, i) - 1];
unsigned nth = 0;
if (!jl_find_union_component(ty, jl_typeof(rhs), &nth))
assert(0 && "invalid field assignment to isbits union");
*psel = nth;
if (jl_is_datatype_singleton((jl_datatype_t*)ty))
return;
}
jl_assign_bits((char*)v + offs, rhs);
}
}

JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i)
{
jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
size_t offs = jl_field_offset(st,i);
if (jl_field_isptr(st,i)) {
size_t offs = jl_field_offset(st, i);
if (jl_field_isptr(st, i)) {
return *(jl_value_t**)((char*)v + offs) != NULL;
}
return 1;
Expand Down
Loading

2 comments on commit 8d5da6a

@quinnj
Copy link
Member

@quinnj quinnj commented on 8d5da6a Jun 4, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vtjnash, I think I tracked down the extra allocation I'm seeing in my Union performance tests (mentioned here). It seems to come down to the jl_builtin_arrayset codegen here, because I get a merge_own block in my @code_llvm

merge_own49:                                      ; preds = %array_owned48, %L539
  %119 = phi i8** [ %95, %L539 ], [ %118, %array_owned48 ]
  %120 = bitcast i8** %95 to i8****
  %121 = load i8***, i8**** %120, align 8
  %122 = call i8** @jl_box_int64(i64 signext %val.sroa.0.2)
  %123 = getelementptr i8*, i8** %119, i64 -1
  %124 = bitcast i8** %123 to i64*
  %125 = load i64, i64* %124, align 8
  %126 = and i64 %125, 3
  %127 = icmp eq i64 %126, 3
  br i1 %127, label %wb_may_trigger50, label %cont52

It looks like to get there, it's going down the isboxed == true code path; is this another place that can be optimized for Unions w/ isbits types?

@quinnj
Copy link
Member

@quinnj quinnj commented on 8d5da6a Jun 4, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And in looking into the Vector allocation performance, it seems like we'd want to update this code to allow isbits Union types, though I just realized looking back over all the code here that this is optimizing field layout, not array layout. I think that's been mentioned as doable though?

Please sign in to comment.