From 630a551f426798bf02a1786f9470a262a0aca4af Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 17 Dec 2019 12:45:20 -0500 Subject: [PATCH] layout: support pointers inlining into parents [NFCI] (#33886) Includes codegen support for immutable objects that contain pointers appearing on stack (well, in registers, since LLVM support of non-integral addrspace pointers inside aggregates in memory is poor), and includes layout support, so that most (non-self-referential) immutable objects can be stored inline inside their parent allocation. Currently fully disabled, aside from some optimizations and improvements to object_id / isa tests. Co-Authored-By: Oscar Blumberg --- base/array.jl | 79 +++--- base/reflection.jl | 1 + base/refpointer.jl | 2 +- base/refvalue.jl | 2 +- src/array.c | 70 ++++-- src/builtins.c | 79 +++--- src/ccall.cpp | 15 +- src/cgutils.cpp | 113 +++++++-- src/codegen.cpp | 278 +++++++++++++-------- src/codegen_shared.h | 2 + src/datatype.c | 66 ++++- src/dump.c | 27 +- src/gc-debug.c | 6 +- src/gc.c | 166 ++++++++++-- src/gc.h | 10 + src/intrinsics.cpp | 12 +- src/jltypes.c | 6 + src/julia.h | 51 +++- src/julia_internal.h | 5 +- src/llvm-late-gc-lowering.cpp | 202 +++++++++++++-- src/staticdata.c | 87 +++++-- stdlib/Serialization/src/Serialization.jl | 2 +- stdlib/SparseArrays/test/higherorderfns.jl | 37 ++- test/core.jl | 155 ++++++------ test/threads_exec.jl | 11 +- 25 files changed, 1072 insertions(+), 412 deletions(-) diff --git a/base/array.jl b/base/array.jl index 34e0c052546b9..6a8ad66c47057 100644 --- a/base/array.jl +++ b/base/array.jl @@ -158,7 +158,7 @@ size(a::Array{<:Any,N}) where {N} = (@_inline_meta; ntuple(M -> size(a, M), Val( asize_from(a::Array, n) = n > ndims(a) ? () : (arraysize(a,n), asize_from(a, n+1)...) -allocatedinline(::Type{T}) where {T} = (@_pure_meta; ccall(:jl_array_store_unboxed, Cint, (Any,), T) != Cint(0)) +allocatedinline(::Type{T}) where {T} = (@_pure_meta; ccall(:jl_stored_inline, Cint, (Any,), T) != Cint(0)) """ Base.isbitsunion(::Type{T}) @@ -177,14 +177,20 @@ false isbitsunion(u::Union) = allocatedinline(u) isbitsunion(x) = false -function _unsetindex!(A::Array{T}, i::Int) where {T} +@inbounds function _unsetindex!(A::Array{T}, i::Int) where {T} @boundscheck checkbounds(A, i) + t = @_gc_preserve_begin A + p = Ptr{Ptr{Cvoid}}(pointer(A, i)) if !allocatedinline(T) - t = @_gc_preserve_begin A - p = Ptr{Ptr{Cvoid}}(pointer(A)) - unsafe_store!(p, C_NULL, i) - @_gc_preserve_end t + unsafe_store!(p, C_NULL) + elseif T isa DataType + if !datatype_pointerfree(T) + for j = 1:(Core.sizeof(T) ÷ Core.sizeof(Ptr{Cvoid})) + unsafe_store!(p, C_NULL, j) + end + end end + @_gc_preserve_end t return A end @@ -255,19 +261,41 @@ the same manner as C. function unsafe_copyto!(dest::Array{T}, doffs, src::Array{T}, soffs, n) where T t1 = @_gc_preserve_begin dest t2 = @_gc_preserve_begin src - if isbitsunion(T) + destp = pointer(dest, doffs) + srcp = pointer(src, soffs) + if !allocatedinline(T) + ccall(:jl_array_ptr_copy, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), + dest, destp, src, srcp, n) + elseif isbitstype(T) ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, UInt), - pointer(dest, doffs), pointer(src, soffs), n * aligned_sizeof(T)) + destp, srcp, n * aligned_sizeof(T)) + elseif isbitsunion(T) + ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, UInt), + destp, srcp, n * aligned_sizeof(T)) # copy selector bytes ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, UInt), ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), dest) + doffs - 1, ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), src) + soffs - 1, n) - elseif allocatedinline(T) - unsafe_copyto!(pointer(dest, doffs), pointer(src, soffs), n) else - ccall(:jl_array_ptr_copy, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), - dest, pointer(dest, doffs), src, pointer(src, soffs), n) + # handle base-case: everything else above was just optimizations + @inbounds if destp < srcp || destp > srcp + n + for i = 1:n + if isassigned(src, soffs + i - 1) + dest[doffs + i - 1] = src[soffs + i - 1] + else + _unsetindex!(dest, doffs + i - 1) + end + end + else + for i = n:-1:1 + if isassigned(src, soffs + i - 1) + dest[doffs + i - 1] = src[soffs + i - 1] + else + _unsetindex!(dest, doffs + i - 1) + end + end + end end @_gc_preserve_end t2 @_gc_preserve_end t1 @@ -1566,32 +1594,13 @@ function vcat(arrays::Vector{T}...) where T n += length(a) end arr = Vector{T}(undef, n) - ptr = pointer(arr) - if isbitsunion(T) - selptr = ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), arr) - end - elsz = aligned_sizeof(T) - t = @_gc_preserve_begin arr + nd = 1 for a in arrays na = length(a) - nba = na * elsz - if isbitsunion(T) - ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, UInt), - ptr, a, nba) - # copy selector bytes - ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, UInt), - selptr, ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), a), na) - selptr += na - elseif allocatedinline(T) - ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, UInt), - ptr, a, nba) - else - ccall(:jl_array_ptr_copy, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), - arr, ptr, a, pointer(a), na) - end - ptr += nba + @assert nd + na <= 1 + length(arr) # Concurrent modification of arrays? + unsafe_copyto!(arr, nd, a, 1, na) + nd += na end - @_gc_preserve_end t return arr end diff --git a/base/reflection.jl b/base/reflection.jl index a276f98d2bb36..ccb00baa2e667 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -318,6 +318,7 @@ datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Any, (Any,), x) struct DataTypeLayout nfields::UInt32 npointers::UInt32 + firstptr::Int32 alignment::UInt32 # alignment : 9; # haspadding : 1; diff --git a/base/refpointer.jl b/base/refpointer.jl index cbacfc96c0886..b12886b6a5373 100644 --- a/base/refpointer.jl +++ b/base/refpointer.jl @@ -74,7 +74,7 @@ RefArray(x::AbstractArray{T}, i::Int=1, roots::Nothing=nothing) where {T} = RefA convert(::Type{Ref{T}}, x::AbstractArray{T}) where {T} = RefArray(x, 1) function unsafe_convert(P::Type{Ptr{T}}, b::RefArray{T}) where T - if datatype_pointerfree(RefValue{T}) + if allocatedinline(T) p = pointer(b.x, b.i) elseif isconcretetype(T) && T.mutable p = pointer_from_objref(b.x[b.i]) diff --git a/base/refvalue.jl b/base/refvalue.jl index f1f22bc389309..6803ef8314355 100644 --- a/base/refvalue.jl +++ b/base/refvalue.jl @@ -11,7 +11,7 @@ RefValue(x::T) where {T} = RefValue{T}(x) isassigned(x::RefValue) = isdefined(x, :x) function unsafe_convert(P::Type{Ptr{T}}, b::RefValue{T}) where T - if datatype_pointerfree(RefValue{T}) + if allocatedinline(T) p = pointer_from_objref(b) elseif isconcretetype(T) && T.mutable p = pointer_from_objref(b.x) diff --git a/src/array.c b/src/array.c index 3ec965a7ca5c5..11db69bdfea61 100644 --- a/src/array.c +++ b/src/array.c @@ -27,12 +27,6 @@ char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT return ((char*)jl_array_data(a)) + ((jl_array_ndims(a) == 1 ? (a->maxsize - a->offset) : jl_array_len(a)) * a->elsize) + a->offset; } -JL_DLLEXPORT int jl_array_store_unboxed(jl_value_t *eltype) JL_NOTSAFEPOINT -{ - size_t fsz = 0, al = 0; - return jl_islayout_inline(eltype, &fsz, &al); -} - STATIC_INLINE jl_value_t *jl_array_owner(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT { if (a->flags.how == 3) { @@ -53,7 +47,7 @@ size_t jl_arr_xtralloc_limit = 0; #define MAXINTVAL (((size_t)-1)>>1) static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, - int isunboxed, int isunion, int elsz) + int isunboxed, int hasptr, int isunion, int elsz) { jl_ptls_t ptls = jl_get_ptls_states(); size_t i, tot, nel=1; @@ -101,7 +95,7 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, // No allocation or safepoint allowed after this a->flags.how = 0; data = (char*)a + doffs; - if ((tot > 0 && !isunboxed) || isunion) + if (tot > 0 && (!isunboxed || hasptr || isunion)) // TODO: check for zeroinit memset(data, 0, tot); } else { @@ -113,7 +107,7 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, // No allocation or safepoint allowed after this a->flags.how = 2; jl_gc_track_malloced_array(ptls, a); - if (!isunboxed || isunion) + if (tot > 0 && (!isunboxed || hasptr || isunion)) // TODO: check for zeroinit // need to zero out isbits union array selector bytes to ensure a valid type index memset(data, 0, tot); } @@ -127,6 +121,7 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, #endif a->flags.ndims = ndims; a->flags.ptrarray = !isunboxed; + a->flags.hasptr = hasptr; a->elsize = elsz; a->flags.isshared = 0; a->flags.isaligned = 1; @@ -135,9 +130,12 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, a->nrows = nel; a->maxsize = nel; } + else if (a->flags.ndims != ndims) { + jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions"); + } else { size_t *adims = &a->nrows; - for(i=0; i < ndims; i++) + for (i = 0; i < ndims; i++) adims[i] = dims[i]; } @@ -152,6 +150,7 @@ static inline jl_array_t *_new_array(jl_value_t *atype, uint32_t ndims, size_t * jl_type_error_rt("Array", "element type", (jl_value_t*)jl_type_type, eltype); int isunboxed = jl_islayout_inline(eltype, &elsz, &al); int isunion = jl_is_uniontype(eltype); + int hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0); if (!isunboxed) { elsz = sizeof(void*); al = elsz; @@ -160,13 +159,13 @@ static inline jl_array_t *_new_array(jl_value_t *atype, uint32_t ndims, size_t * elsz = LLT_ALIGN(elsz, al); } - return _new_array_(atype, ndims, dims, isunboxed, isunion, elsz); + return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz); } jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims, - int isunboxed, int isunion, int elsz) + int isunboxed, int hasptr, int isunion, int elsz) { - return _new_array_(atype, ndims, dims, isunboxed, isunion, elsz); + return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz); } #ifndef JL_NDEBUG @@ -224,10 +223,12 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, "reinterpret from alignment %d bytes to alignment %d bytes not allowed", (int) oldalign, (int) align); a->flags.ptrarray = 0; + a->flags.hasptr = data->flags.hasptr; } else { a->elsize = sizeof(void*); a->flags.ptrarray = 1; + a->flags.hasptr = 0; } // if data is itself a shared wrapper, @@ -247,6 +248,9 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, a->nrows = l; a->maxsize = l; } + else if (a->flags.ndims != ndims) { + jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions"); + } else { size_t *adims = &a->nrows; size_t l = 1; @@ -281,6 +285,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str) a->flags.isaligned = 0; a->elsize = 1; a->flags.ptrarray = 0; + a->flags.hasptr = 0; jl_array_data_owner(a) = str; a->flags.how = 3; a->flags.isshared = 1; @@ -300,12 +305,12 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, jl_array_t *a; jl_value_t *eltype = jl_tparam0(atype); - int isunboxed = jl_array_store_unboxed(eltype); - size_t elsz; - unsigned align; + int isunboxed = jl_stored_inline(eltype); if (isunboxed && jl_is_uniontype(eltype)) jl_exceptionf(jl_argumenterror_type, "unsafe_wrap: unspecified layout for union element type"); + size_t elsz; + unsigned align; if (isunboxed) { elsz = jl_datatype_size(eltype); align = jl_datatype_align(eltype); @@ -328,6 +333,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, #endif a->elsize = LLT_ALIGN(elsz, align); a->flags.ptrarray = !isunboxed; + a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0); a->flags.ndims = 1; a->flags.isshared = 1; a->flags.isaligned = 0; // TODO: allow passing memalign'd buffers @@ -366,12 +372,12 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, return jl_ptr_to_array_1d(atype, data, nel, own_buffer); jl_value_t *eltype = jl_tparam0(atype); - int isunboxed = jl_array_store_unboxed(eltype); - size_t elsz; - unsigned align; + int isunboxed = jl_stored_inline(eltype); if (isunboxed && jl_is_uniontype(eltype)) jl_exceptionf(jl_argumenterror_type, "unsafe_wrap: unspecified layout for union element type"); + size_t elsz; + unsigned align; if (isunboxed) { elsz = jl_datatype_size(eltype); align = jl_datatype_align(eltype); @@ -394,6 +400,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, #endif a->elsize = LLT_ALIGN(elsz, align); a->flags.ptrarray = !isunboxed; + a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0); a->flags.ndims = ndims; a->offset = 0; a->flags.isshared = 1; @@ -408,6 +415,8 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, } assert(ndims != 1); // handled above + if (a->flags.ndims != ndims) + jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions"); memcpy(&a->nrows, dims, ndims * sizeof(size_t)); return a; } @@ -559,8 +568,16 @@ JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i) JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i) { - if (a->flags.ptrarray) + if (a->flags.ptrarray) { return ((jl_value_t**)jl_array_data(a))[i] != NULL; + } + else if (a->flags.hasptr) { + jl_datatype_t *eltype = (jl_datatype_t*)jl_tparam0(jl_typeof(a)); + assert(eltype->layout->first_ptr >= 0); + jl_value_t **slot = + (jl_value_t**)(&((char*)a->data)[i*a->elsize] + eltype->layout->first_ptr); + return *slot != NULL; + } return 1; } @@ -585,6 +602,8 @@ JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs return; } jl_assign_bits(&((char*)a->data)[i * a->elsize], rhs); + if (a->flags.hasptr) + jl_gc_multi_wb(jl_array_owner(a), rhs); } else { ((jl_value_t**)a->data)[i] = rhs; @@ -598,6 +617,11 @@ JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i) jl_bounds_error_int((jl_value_t*)a, i + 1); if (a->flags.ptrarray) ((jl_value_t**)a->data)[i] = NULL; + else if (a->flags.hasptr) { + size_t elsize = a->elsize; + jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0); + memset(&((jl_value_t**)a->data)[i], 0, elsize); + } } // at this size and bigger, allocate resized array data with malloc directly @@ -809,7 +833,7 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc, #endif a->nrows = newnrows; a->data = newdata; - if (a->flags.ptrarray) { + if (a->flags.ptrarray || a->flags.hasptr) { // TODO: check for zeroinit memset(newdata + idx * elsz, 0, nbinc); } else if (isbitsunion) { @@ -890,7 +914,7 @@ STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx, a->length = newnrows; #endif a->nrows = newnrows; - if (a->flags.ptrarray) { + if (a->flags.ptrarray || a->flags.hasptr) { // TODO: check for zeroinit memset(data + idx * elsz, 0, inc * elsz); } } @@ -1129,7 +1153,7 @@ JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary) int isunion = jl_is_uniontype(jl_tparam0(jl_typeof(ary))); jl_array_t *new_ary = _new_array_(jl_typeof(ary), jl_array_ndims(ary), &ary->nrows, !ary->flags.ptrarray, - isunion, elsz); + ary->flags.hasptr, isunion, elsz); memcpy(new_ary->data, ary->data, len * elsz); // ensure isbits union arrays copy their selector bytes correctly if (jl_array_isbitsunion(ary)) diff --git a/src/builtins.c b/src/builtins.c index 328be6aefd543..4beea1e7aab25 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -81,8 +81,8 @@ static int NOINLINE compare_fields(jl_value_t *a, jl_value_t *b, jl_datatype_t * size_t f, nf = jl_datatype_nfields(dt); for (f = 0; f < nf; f++) { size_t offs = jl_field_offset(dt, f); - char *ao = (char*)jl_data_ptr(a) + offs; - char *bo = (char*)jl_data_ptr(b) + offs; + char *ao = (char*)a + offs; + char *bo = (char*)b + offs; if (jl_field_isptr(dt, f)) { jl_value_t *af = *(jl_value_t**)ao; jl_value_t *bf = *(jl_value_t**)bo; @@ -190,8 +190,8 @@ JL_DLLEXPORT int jl_egal(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE if (sz == 0) return 1; size_t nf = jl_datatype_nfields(dt); - if (nf == 0) - return bits_equal(jl_data_ptr(a), jl_data_ptr(b), sz); + if (nf == 0 || !dt->layout->haspadding) + return bits_equal(a, b, sz); if (dt == jl_unionall_type) return egal_types(a, b, NULL); return compare_fields(a, b, dt); @@ -277,6 +277,44 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN return jl_object_id_((jl_value_t*)tv, v); } +static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOTSAFEPOINT +{ + size_t sz = jl_datatype_size(dt); + if (sz == 0) + return ~h; + size_t f, nf = jl_datatype_nfields(dt); + if (nf == 0 || (!dt->layout->haspadding && dt->layout->npointers == 0)) { + // operate element-wise if there are unused bits inside, + // otherwise just take the whole data block at once + // a few select pointers (notably symbol) also have special hash values + // which may affect the stability of the objectid hash, even though + // they don't affect egal comparison + return bits_hash(v, sz) ^ h; + } + if (dt == jl_unionall_type) + return type_object_id_(v, NULL); + for (f = 0; f < nf; f++) { + size_t offs = jl_field_offset(dt, f); + char *vo = (char*)v + offs; + uintptr_t u; + if (jl_field_isptr(dt, f)) { + jl_value_t *f = *(jl_value_t**)vo; + u = (f == NULL) ? 0 : jl_object_id(f); + } + else { + jl_datatype_t *fieldtype = (jl_datatype_t*)jl_field_type_concrete(dt, f); + if (jl_is_uniontype(fieldtype)) { + uint8_t sel = ((uint8_t*)vo)[jl_field_size(dt, f) - 1]; + fieldtype = (jl_datatype_t*)jl_nth_union_component((jl_value_t*)fieldtype, sel); + } + assert(jl_is_datatype(fieldtype) && !fieldtype->abstract && !fieldtype->mutabl); + u = immut_id_(fieldtype, (jl_value_t*)vo, 0); + } + h = bitmix(h, u); + } + return h; +} + JL_DLLEXPORT uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT { if (tv == (jl_value_t*)jl_symbol_type) @@ -303,38 +341,7 @@ JL_DLLEXPORT uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPO } if (dt->mutabl) return inthash((uintptr_t)v); - size_t sz = jl_datatype_size(tv); - uintptr_t h = jl_object_id(tv); - if (sz == 0) - return ~h; - size_t f, nf = jl_datatype_nfields(dt); - if (nf == 0) - return bits_hash(jl_data_ptr(v), sz) ^ h; - if (dt == jl_unionall_type) - return type_object_id_(v, NULL); - for (f = 0; f < nf; f++) { - size_t offs = jl_field_offset(dt, f); - char *vo = (char*)jl_data_ptr(v) + offs; - uintptr_t u; - if (jl_field_isptr(dt, f)) { - jl_value_t *f = *(jl_value_t**)vo; - u = (f == NULL) ? 0 : jl_object_id(f); - } - else { - jl_datatype_t *fieldtype = (jl_datatype_t*)jl_field_type_concrete(dt, f); - if (jl_is_uniontype(fieldtype)) { - uint8_t sel = ((uint8_t*)vo)[jl_field_size(dt, f) - 1]; - fieldtype = (jl_datatype_t*)jl_nth_union_component((jl_value_t*)fieldtype, sel); - } - assert(jl_is_datatype(fieldtype) && !fieldtype->abstract && !fieldtype->mutabl); - if (fieldtype->layout->haspadding) - u = jl_object_id_((jl_value_t*)fieldtype, (jl_value_t*)vo); - else - u = bits_hash(vo, fieldtype->size); - } - h = bitmix(h, u); - } - return h; + return immut_id_(dt, v, jl_object_id(tv)); } JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT diff --git a/src/ccall.cpp b/src/ccall.cpp index f186e06945600..a94c62d1b515d 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -1625,7 +1625,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) jl_datatype_t *arydt = (jl_datatype_t*)jl_unwrap_unionall(aryv.typ); if (jl_is_array_type(arydt)) { jl_value_t *ety = jl_tparam0(arydt); - if (jl_array_store_unboxed(ety)) { + bool ptrarray = !jl_stored_inline(ety); + if (!ptrarray && !jl_type_hasptr(ety)) { JL_GC_POP(); return mark_or_box_ccall_result(ctx, ConstantInt::get(T_int32, 1), false, rt, unionall, static_rt); @@ -1633,6 +1634,14 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) else if (!jl_has_free_typevars(ety)) { Value *idx = emit_unbox(ctx, T_size, idxv, (jl_value_t*)jl_ulong_type); Value *arrayptr = emit_bitcast(ctx, emit_arrayptr(ctx, aryv, aryex), T_pprjlvalue); + if (!ptrarray) { + size_t elsz = jl_datatype_size(ety); + unsigned align = jl_datatype_align(ety); + size_t stride = LLT_ALIGN(elsz, align) / sizeof(jl_value_t*); + if (stride != 1) + idx = ctx.builder.CreateMul(idx, ConstantInt::get(T_size, stride)); + idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, ((jl_datatype_t*)ety)->layout->first_ptr)); + } Value *slot_addr = ctx.builder.CreateInBoundsGEP(T_prjlvalue, arrayptr, idx); Value *load = tbaa_decorate(tbaa_ptrarraybuf, ctx.builder.CreateLoad(T_prjlvalue, slot_addr)); Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, Constant::getNullValue(T_prjlvalue)), T_int32); @@ -1782,12 +1791,14 @@ jl_cgval_t function_sig_t::emit_a_ccall( bool sretboxed = false; if (sret) { assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid"); - if (jl_justbits(rt)) { + if (jl_justbits(rt, true)) { result = emit_static_alloca(ctx, lrt); argvals[0] = ctx.builder.CreateBitCast(result, fargt_sig.at(0)); } else { // XXX: result needs to be zero'd and given a GC root here + // and has incorrect write barriers. + // instead this code path should behave like `unsafe_load` assert(jl_datatype_size(rt) > 0 && "sret shouldn't be a singleton instance"); result = emit_allocobj(ctx, jl_datatype_size(rt), literal_pointer_val(ctx, (jl_value_t*)rt)); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 3284fa1a1ff36..52a3af170cbf2 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -565,6 +565,11 @@ static Type *bitstype_to_llvm(jl_value_t *bt, bool llvmcall = false) return Type::getIntNTy(jl_LLVMContext, nb * 8); } +static bool jl_type_hasptr(jl_value_t* typ) +{ // assumes that jl_stored_inline(typ) is true + return jl_is_datatype(typ) && ((jl_datatype_t*)typ)->layout->npointers > 0; +} + // compute whether all concrete subtypes of this type have the same layout // (which is conservatively approximated here by asking whether the types of any of the // fields depend on any of the parameters of the containing type) @@ -751,12 +756,6 @@ static bool is_tupletype_homogeneous(jl_svec_t *t, bool allow_va = false) return true; } -static bool deserves_sret(jl_value_t *dt, Type *T) -{ - assert(jl_is_datatype(dt)); - return (size_t)jl_datatype_size(dt) > sizeof(void*) && !T->isFloatingPointTy() && !T->isVectorTy(); -} - static bool for_each_uniontype_small( std::function f, jl_value_t *ty, @@ -769,7 +768,7 @@ static bool for_each_uniontype_small( allunbox &= for_each_uniontype_small(f, ((jl_uniontype_t*)ty)->b, counter); return allunbox; } - else if (jl_justbits(ty)) { + else if (jl_justbits(ty, true)) { f(++counter, (jl_datatype_t*)ty); return true; } @@ -1281,6 +1280,38 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v } static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest = NULL, MDNode *tbaa_dest = nullptr, bool isVolatile = false); +static void emit_write_barrier(jl_codectx_t&, Value*, ArrayRef); +static void emit_write_barrier(jl_codectx_t&, Value*, Value*); +static void emit_write_multibarrier(jl_codectx_t&, Value*, Value*); + +std::vector first_ptr(Type *T) +{ + if (isa(T)) { + if (!isa(T) && cast(T)->getNumElements() == 0) + return {}; + unsigned i = 0; + for (Type *ElTy : T->subtypes()) { + if (isa(ElTy) && ElTy->getPointerAddressSpace() == AddressSpace::Tracked) { + return std::move(std::vector{i}); + } + auto path = first_ptr(ElTy); + if (!path.empty()) { + path.push_back(i); + return std::move(path); + } + i++; + } + } + return {}; +} +Value *extract_first_ptr(jl_codectx_t &ctx, Value *V) +{ + auto path = first_ptr(V->getType()); + if (path.empty()) + return NULL; + std::reverse(std::begin(path), std::end(path)); + return ctx.builder.CreateExtractValue(V, path); +} static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, jl_value_t *jltype, MDNode *tbaa, MDNode *aliasscope, @@ -1313,8 +1344,11 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j load = maybe_mark_load_dereferenceable(load, true, jltype); if (tbaa) load = tbaa_decorate(tbaa, load); - if (maybe_null_if_boxed && isboxed) - null_pointer_check(ctx, load); + if (maybe_null_if_boxed) { + Value *first_ptr = isboxed ? load : extract_first_ptr(ctx, load); + if (first_ptr) + null_pointer_check(ctx, first_ptr); + } //} if (jltype == (jl_value_t*)jl_bool_type) { // "freeze" undef memory to a valid value // NOTE: if we zero-initialize arrays, this optimization should become valid @@ -1339,6 +1373,8 @@ static void typed_store(jl_codectx_t &ctx, Value *r; if (!isboxed) { r = emit_unbox(ctx, elty, rhs, jltype); + if (parent != NULL) + emit_write_multibarrier(ctx, parent, r); } else { r = maybe_decay_untracked(boxed(ctx, rhs)); @@ -1509,7 +1545,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx, if (sizeof(void*) != sizeof(int)) idx = ctx.builder.CreateTrunc(idx, T_int32); // llvm3.3 requires this, harmless elsewhere Value *fld = ctx.builder.CreateExtractElement(strct.V, idx); - *ret = mark_julia_type(ctx, fld, false, jft); + *ret = mark_julia_type(ctx, fld, isboxed, jft); return true; } else if (isa(strct.V->getType())) { @@ -1737,9 +1773,12 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st llvm_unreachable("encountered incompatible type for a struct"); fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(st_idx)); } - if (maybe_null && jl_field_isptr(jt, idx)) - null_pointer_check(ctx, fldv); - return mark_julia_type(ctx, fldv, false, jfty); + if (maybe_null) { + Value *first_ptr = jl_field_isptr(jt, idx) ? fldv : extract_first_ptr(ctx, fldv); + if (first_ptr) + null_pointer_check(ctx, first_ptr); + } + return mark_julia_type(ctx, fldv, jl_field_isptr(jt, idx), jfty); } } @@ -1893,7 +1932,7 @@ static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary) Value *flags = emit_arrayflags(ctx, ary); cast(flags)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(jl_LLVMContext, None)); flags = ctx.builder.CreateLShr(flags, 2); - flags = ctx.builder.CreateAnd(flags, 0x3FF); // (1<<10) - 1 + flags = ctx.builder.CreateAnd(flags, 0x1FF); // (1<<9) - 1 return flags; } @@ -2399,8 +2438,8 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con if (jl_is_concrete_type(src.typ) || src.constant) { jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ; Type *store_ty = julia_type_to_llvm(typ); - assert(skip || jl_justbits(typ)); - if (jl_justbits(typ)) { + assert(skip || jl_justbits(typ, true)); + if (jl_justbits(typ, true)) { if (!src.ispointer() || src.constant) { emit_unbox(ctx, store_ty, src, typ, dest, tbaa_dst, isVolatile); } @@ -2523,11 +2562,26 @@ static Value *emit_new_bits(jl_codectx_t &ctx, Value *jt, Value *pval) // if ptr is NULL this emits a write barrier _back_ static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, Value *ptr) { - parent = maybe_decay_untracked(emit_bitcast(ctx, parent, T_prjlvalue)); - ptr = maybe_decay_untracked(emit_bitcast(ctx, ptr, T_prjlvalue)); - ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), {parent, ptr}); + emit_write_barrier(ctx, parent, makeArrayRef(ptr)); +} + +static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef ptrs) +{ + SmallVector decay_ptrs; + decay_ptrs.push_back(maybe_decay_untracked(emit_bitcast(ctx, parent, T_prjlvalue))); + for (auto ptr : ptrs) { + decay_ptrs.push_back(maybe_decay_untracked(emit_bitcast(ctx, ptr, T_prjlvalue))); + } + ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), decay_ptrs); +} + +static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg) +{ + auto ptrs = ExtractTrackedValues(agg, agg->getType(), false, ctx.builder); + emit_write_barrier(ctx, parent, ptrs); } + static void emit_setfield(jl_codectx_t &ctx, jl_datatype_t *sty, const jl_cgval_t &strct, size_t idx0, const jl_cgval_t &rhs, bool checked, bool wb) @@ -2587,26 +2641,35 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg jl_datatype_t *sty = (jl_datatype_t*)ty; size_t nf = jl_datatype_nfields(sty); if (nf > 0 || sty->mutabl) { - if (jl_justbits(ty)) { + if (deserves_stack(ty)) { Type *lt = julia_type_to_llvm(ty); unsigned na = nargs < nf ? nargs : nf; // whether we should perform the initialization with the struct as a IR value // or instead initialize the stack buffer with stores + auto tracked = CountTrackedPointers(lt); bool init_as_value = false; if (lt->isVectorTy() || jl_is_vecelement_type(ty)) { // maybe also check the size ? init_as_value = true; } + else if (tracked.count) { + init_as_value = true; + } Value *strct; if (type_is_ghost(lt)) { strct = NULL; } else if (init_as_value) { - strct = UndefValue::get(lt); + if (tracked.count) + strct = Constant::getNullValue(lt); + else + strct = UndefValue::get(lt); } else { strct = emit_static_alloca(ctx, lt); + if (tracked.count) + undef_derived_strct(ctx.builder, strct, sty, tbaa_stack); } for (unsigned i = 0; i < na; i++) { @@ -2628,8 +2691,12 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg dest = ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx); } Value *fval = NULL; - assert(!jl_field_isptr(sty, i)); - if (jl_is_uniontype(jtype)) { + if (jl_field_isptr(sty, i)) { + fval = boxed(ctx, fval_info); + if (!init_as_value) + tbaa_decorate(tbaa_stack, ctx.builder.CreateStore(fval, dest)); + } + else if (jl_is_uniontype(jtype)) { // compute tindex from rhs jl_cgval_t rhs_union = convert_julia_type(ctx, fval_info, jtype); if (rhs_union.typ == jl_bottom_type) diff --git a/src/codegen.cpp b/src/codegen.cpp index 94aaf233f8a8c..90ca5ea950957 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -365,12 +365,42 @@ static MDNode *best_tbaa(jl_value_t *jt) { } // tracks whether codegen is currently able to simply stack-allocate this type -// note that this is guaranteed to include jl_isbits -static bool jl_justbits(jl_value_t* t) +// note that this includes jl_isbits, although codegen should work regardless +static bool jl_justbits(jl_value_t* t, bool pointerfree=false) { - return jl_is_immutable_datatype(t) && ((jl_datatype_t*)t)->layout && ((jl_datatype_t*)t)->layout->npointers == 0; + if (!jl_is_immutable_datatype(t)) + return 0; + const jl_datatype_layout_t *layout = ((jl_datatype_t*)t)->layout; + if (!layout) + return 0; + if (pointerfree && layout->npointers != 0) + return 0; + return 1; } +// these queries are usually related, but we split them out here +// for convenience and clarity (and because it changes the calling convention) +static bool deserves_stack(jl_value_t* t, bool pointerfree=false) +{ + if (!jl_justbits(t)) + return false; + return ((jl_datatype_t*)t)->isinlinealloc; +} +static bool deserves_argbox(jl_value_t* t) +{ + return !deserves_stack(t); +} +static bool deserves_retbox(jl_value_t* t) +{ + return deserves_argbox(t); +} +static bool deserves_sret(jl_value_t *dt, Type *T) +{ + assert(jl_is_datatype(dt)); + return (size_t)jl_datatype_size(dt) > sizeof(void*) && !T->isFloatingPointTy() && !T->isVectorTy(); +} + + // metadata tracking for a llvm Value* during codegen struct jl_cgval_t { Value *V; // may be of type T* or T, or set to NULL if ghost (or if the value has not been initialized yet, for a variable definition) @@ -492,6 +522,7 @@ struct jl_returninfo_t { size_t union_bytes; size_t union_align; size_t union_minalign; + unsigned return_roots; }; static jl_returninfo_t get_specsig_function(Module *M, const std::string &name, jl_value_t *sig, jl_value_t *jlrettype); @@ -581,17 +612,14 @@ static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty) static void undef_derived_strct(IRBuilder<> &irbuilder, Value *ptr, jl_datatype_t *sty, MDNode *tbaa) { assert(ptr->getType()->getPointerAddressSpace() != AddressSpace::Tracked); - if (sty->layout->npointers == 0) + size_t i, np = sty->layout->npointers; + if (np == 0) return; ptr = irbuilder.CreateBitCast(ptr, T_prjlvalue->getPointerTo(ptr->getType()->getPointerAddressSpace())); Value *V_null = ConstantPointerNull::get(cast(T_prjlvalue)); - size_t i, nf = jl_datatype_nfields(sty); - for (i = 0; i < nf; i++) { - if (jl_field_isptr(sty, i)) { - tbaa_decorate(tbaa, irbuilder.CreateStore(V_null, - irbuilder.CreateInBoundsGEP(T_prjlvalue, ptr, - ConstantInt::get(T_size, jl_field_offset(sty, i) / sizeof(void*))))); - } + for (i = 0; i < np; i++) { + Value *fld = irbuilder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, jl_ptr_offset(sty, i)); + tbaa_decorate(tbaa, irbuilder.CreateStore(V_null, fld)); } } @@ -694,7 +722,7 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox if (type_is_ghost(T)) { return ghostValue(typ); } - if (v && !isboxed && v->getType()->isAggregateType()) { + if (v && !isboxed && v->getType()->isAggregateType() && CountTrackedPointers(v->getType()).count == 0) { // eagerly put this back onto the stack // llvm mem2reg pass will remove this if unneeded return value_to_pointer(ctx, v, typ, NULL); @@ -725,7 +753,7 @@ static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t & if (jl_is_datatype(utyp)) { bool alwaysboxed; if (jl_is_concrete_type(utyp)) - alwaysboxed = !jl_justbits(utyp); + alwaysboxed = !jl_justbits(utyp, true); else alwaysboxed = !((jl_datatype_t*)utyp)->abstract && ((jl_datatype_t*)utyp)->mutabl; if (alwaysboxed) { @@ -808,8 +836,6 @@ static void CreateConditionalAbort(IRBuilder<> &irbuilder, Value *test) #endif #endif -static void emit_write_barrier(jl_codectx_t&, Value*, Value*); - #include "cgutils.cpp" static void jl_rethrow_with_add(const char *fmt, ...) @@ -986,7 +1012,7 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_ return ghostValue(typ); Value *new_tindex = NULL; if (jl_is_concrete_type(typ)) { - if (v.TIndex && !jl_justbits(typ)) { + if (v.TIndex && !jl_justbits(typ, true)) { // discovered that this union-split type must actually be isboxed if (v.Vboxed) { return jl_cgval_t(v.Vboxed, nullptr, true, typ, NULL); @@ -2252,31 +2278,7 @@ static jl_cgval_t emit_getfield(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_s static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2) { - jl_value_t *rt1 = arg1.typ; - jl_value_t *rt2 = arg2.typ; - - int ptr_comparable = 0; // whether this type is unique'd by pointer - if (rt1 == (jl_value_t*)jl_symbol_type || rt2 == (jl_value_t*)jl_symbol_type) - ptr_comparable = 1; - if (jl_is_mutable_datatype(rt1) && // excludes abstract types - rt1 != (jl_value_t*)jl_string_type && // technically mutable, but compared by contents - rt1 != (jl_value_t*)jl_simplevector_type) - ptr_comparable = 1; - if (jl_is_mutable_datatype(rt2) && // excludes abstract types - rt2 != (jl_value_t*)jl_string_type && // technically mutable, but compared by contents - rt2 != (jl_value_t*)jl_simplevector_type) - ptr_comparable = 1; - if (jl_subtype(rt1, (jl_value_t*)jl_type_type) || - jl_subtype(rt2, (jl_value_t*)jl_type_type)) { - // need to use typeseq for most types - ptr_comparable = 0; - if ((jl_is_type_type(rt1) && jl_is_concrete_type(jl_tparam0(rt1))) || - (jl_is_type_type(rt2) && jl_is_concrete_type(jl_tparam0(rt2)))) { - // but can compare some types by pointer - ptr_comparable = 1; - } - } - if (ptr_comparable) { + if (jl_pointer_egal(arg1.typ) || jl_pointer_egal(arg2.typ)) { Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : arg1.V; Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.V; assert(varg1 && varg2 && (arg1.isboxed || arg1.TIndex) && (arg2.isboxed || arg2.TIndex) && @@ -2369,7 +2371,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a size_t sz = jl_datatype_size(sty); Value *varg1 = arg1.ispointer() ? maybe_decay_tracked(data_pointer(ctx, arg1)) : arg1.V; Value *varg2 = arg2.ispointer() ? maybe_decay_tracked(data_pointer(ctx, arg2)) : arg2.V; - if (sz > 512 && !sty->layout->haspadding && sty->layout->npointers == 0) { + if (sz > 512 && !sty->layout->haspadding) { varg1 = decay_derived(arg1.ispointer() ? varg1 : value_to_pointer(ctx, arg1).V); varg2 = decay_derived(arg2.ispointer() ? varg2 : value_to_pointer(ctx, arg2).V); Value *answer = ctx.builder.CreateCall(prepare_call(memcmp_derived_func), { @@ -2404,8 +2406,16 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a fld2 = ctx.builder.CreateConstInBoundsGEP2_32(at, varg2, 0, llvm_idx); else fld2 = ctx.builder.CreateExtractValue(varg2, llvm_idx); - assert(!jl_field_isptr(sty, i)); - if (jl_is_uniontype(fldty)) { + if (jl_field_isptr(sty, i)) { + if (arg1.ispointer()) + fld1 = ctx.builder.CreateLoad(T_prjlvalue, fld1); + if (arg2.ispointer()) + fld2 = ctx.builder.CreateLoad(T_prjlvalue, fld2); + subAns = emit_box_compare(ctx, + mark_julia_type(ctx, fld1, true, fldty), + mark_julia_type(ctx, fld2, true, fldty)); + } + else if (jl_is_uniontype(fldty)) { unsigned tindex_offset = byte_offset + jl_field_size(sty, i) - 1; jl_cgval_t fld1_info; jl_cgval_t fld2_info; @@ -2747,8 +2757,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } else { PHINode *data_owner = NULL; // owner object against which the write barrier must check - if (isboxed) { // if not boxed we don't need a write barrier - assert(ary.isboxed); + if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier Value *aryv = maybe_decay_untracked(boxed(ctx, ary)); Value *flags = emit_arrayflags(ctx, ary); // the owner of the data is ary itself except if ary->how == 3 @@ -3087,20 +3096,34 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, if (fieldidx < 0 || fieldidx >= jl_datatype_nfields(stt)) { *ret = mark_julia_const(jl_false); } - else if (!jl_field_isptr(stt, fieldidx) || fieldidx < stt->ninitialized) { + else if (fieldidx < stt->ninitialized) { *ret = mark_julia_const(jl_true); } - else { - size_t offs = jl_field_offset(stt, fieldidx); - Value *ptr = emit_bitcast(ctx, maybe_decay_tracked(data_pointer(ctx, obj)), T_pprjlvalue); - Value *llvm_idx = ConstantInt::get(T_size, offs / sizeof(void*)); - Value *addr = ctx.builder.CreateInBoundsGEP(ptr, llvm_idx); - // emit this using the same type as emit_getfield_knownidx - // so that LLVM may be able to load-load forward them and fold the result - Value *fldv = tbaa_decorate(obj.tbaa, ctx.builder.CreateLoad(T_prjlvalue, addr)); + else if (jl_field_isptr(stt, fieldidx) || jl_type_hasptr(jl_field_type(stt, fieldidx))) { + Value *fldv; + size_t offs = jl_field_offset(stt, fieldidx) / sizeof(jl_value_t*); + if (obj.ispointer()) { + if (!jl_field_isptr(stt, fieldidx)) + offs += ((jl_datatype_t*)jl_field_type(stt, fieldidx))->layout->first_ptr; + Value *ptr = emit_bitcast(ctx, maybe_decay_tracked(data_pointer(ctx, obj)), T_pprjlvalue); + Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, offs); + // emit this using the same type as emit_getfield_knownidx + // so that LLVM may be able to load-load forward them and fold the result + fldv = tbaa_decorate(obj.tbaa, ctx.builder.CreateLoad(T_prjlvalue, addr)); + } + else { + fldv = ctx.builder.CreateExtractValue(obj.V, offs); + if (!jl_field_isptr(stt, fieldidx)) { + fldv = extract_first_ptr(ctx, fldv); + assert(fldv); + } + } Value *isdef = ctx.builder.CreateIsNotNull(fldv); *ret = mark_julia_type(ctx, isdef, false, jl_bool_type); } + else { + *ret = mark_julia_const(jl_true); + } return true; } return false; @@ -3162,10 +3185,17 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_code_instance_t break; } + if (returninfo.return_roots) { + AllocaInst *return_roots = emit_static_alloca(ctx, T_prjlvalue); + return_roots->setOperand(0, ConstantInt::get(T_int32, returninfo.return_roots)); + argvals[idx] = return_roots; + idx++; + } + for (size_t i = 0; i < nargs; i++) { jl_value_t *jt = jl_nth_slot_type(codeinst->def->specTypes, i); - bool isboxed; - Type *et = julia_type_to_llvm(jt, &isboxed); + bool isboxed = deserves_argbox(jt); + Type *et = isboxed ? T_prjlvalue : julia_type_to_llvm(jt); if (type_is_ghost(et)) continue; assert(idx < nfargs); @@ -3750,8 +3780,8 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) return; } } - bool isboxed; - Type *vtype = julia_type_to_llvm(phiType, &isboxed); + bool isboxed = !deserves_stack(phiType); + Type *vtype = isboxed ? T_prjlvalue : julia_type_to_llvm(phiType); // The frontend should really not emit this, but we allow it // for convenience. if (type_is_ghost(vtype)) { @@ -3763,7 +3793,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) } jl_cgval_t slot; PHINode *value_phi = NULL; - if (vtype->isAggregateType()) { + if (vtype->isAggregateType() && CountTrackedPointers(vtype).count == 0) { // the value will be moved into dest in the predecessor critical block. // here it's moved into phi in the successor (from dest) dest = emit_static_alloca(ctx, vtype); @@ -4358,7 +4388,7 @@ static void emit_last_age_field(jl_codectx_t &ctx) } static void emit_cfunc_invalidate( - Function *gf_thunk, jl_returninfo_t::CallingConv cc, + Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, jl_code_instance_t *codeinst, size_t nargs, size_t world) { jl_method_instance_t *lam = codeinst->def; @@ -4377,10 +4407,12 @@ static void emit_cfunc_invalidate( jl_cgval_t *myargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs); if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union) ++AI; + if (return_roots) + ++AI; for (size_t i = 0; i < nargs; i++) { jl_value_t *jt = jl_nth_slot_type(lam->specTypes, i); - bool isboxed; - Type *et = julia_type_to_llvm(jt, &isboxed); + bool isboxed = deserves_argbox(jt); + Type *et = isboxed ? T_prjlvalue : julia_type_to_llvm(jt); if (type_is_ghost(et)) { assert(jl_is_datatype(jt) && ((jl_datatype_t*)jt)->instance); myargs[i] = mark_julia_const(((jl_datatype_t*)jt)->instance); @@ -4423,6 +4455,8 @@ static void emit_cfunc_invalidate( break; } case jl_returninfo_t::SRet: { + if (return_roots) + ctx.builder.CreateStore(gf_ret, gf_thunk->arg_begin() + 1); emit_memcpy(ctx, &*gf_thunk->arg_begin(), nullptr, gf_ret, nullptr, jl_datatype_size(astrt), julia_alignment(astrt)); ctx.builder.CreateRetVoid(); break; @@ -4627,7 +4661,7 @@ static Function* gen_cfun_wrapper( ctx.builder.CreateLoad(emit_bitcast(ctx, val, T_pprjlvalue)), true, jl_any_type); } - else if (static_at && jl_justbits(jargty)) { // anything that can be stored unboxed + else if (static_at && jl_justbits(jargty)) { // anything that could be stored unboxed bool isboxed; Type *T = julia_type_to_llvm(jargty, &isboxed); assert(!isboxed); @@ -4804,13 +4838,18 @@ static Function* gen_cfun_wrapper( } args.push_back(result); } + if (returninfo.return_roots) { + AllocaInst *return_roots = emit_static_alloca(ctx, T_prjlvalue); + return_roots->setOperand(0, ConstantInt::get(T_int32, returninfo.return_roots)); + args.push_back(return_roots); + } for (size_t i = 0; i < nargs + 1; i++) { // figure out how to repack the arguments jl_cgval_t &inputarg = inputargs[i]; Value *arg; jl_value_t *spect = jl_nth_slot_type(lam->specTypes, i); - bool isboxed; - Type *T = julia_type_to_llvm(spect, &isboxed); + bool isboxed = deserves_argbox(spect); + Type *T = isboxed ? T_prjlvalue : julia_type_to_llvm(spect); if (isboxed) { arg = boxed(ctx, inputarg); } @@ -4843,7 +4882,7 @@ static Function* gen_cfun_wrapper( // build a specsig -> jl_apply_generic converter thunk // this builds a method that calls jl_apply_generic (as a closure over a singleton function pointer), // but which has the signature of a specsig - emit_cfunc_invalidate(gf_thunk, returninfo.cc, codeinst, nargs + 1, world); + emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, codeinst, nargs + 1, world); theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk); } CallInst *call = ctx.builder.CreateCall(theFptr, ArrayRef(args)); @@ -5256,12 +5295,18 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret idx++; break; } + if (f.return_roots) { + AllocaInst *return_roots = emit_static_alloca(ctx, T_prjlvalue); + return_roots->setOperand(0, ConstantInt::get(T_int32, f.return_roots)); + args[idx] = return_roots; + idx++; + } for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) { jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i); - bool isboxed; - Type *lty = julia_type_to_llvm(ty, &isboxed); - if (lty != NULL && type_is_ghost(lty)) + bool isboxed = deserves_argbox(ty); + Type *lty = isboxed ? T_prjlvalue : julia_type_to_llvm(ty); + if (type_is_ghost(lty)) continue; Value *theArg; if (i == 0) { @@ -5341,7 +5386,7 @@ static bool uses_specsig(jl_value_t *sig, size_t nreq, jl_value_t *rettype, bool // not invalid, consider if specialized signature is worthwhile if (prefer_specsig) return true; - if (jl_justbits(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype)) + if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype)) return true; if (jl_is_uniontype(rettype)) { bool allunbox; @@ -5355,7 +5400,7 @@ static bool uses_specsig(jl_value_t *sig, size_t nreq, jl_value_t *rettype, bool jl_value_t *sigt = jl_tparam(sig, i); bool issing = jl_is_datatype_singleton((jl_datatype_t*)sigt); allSingleton &= issing; - if (jl_justbits(sigt) && !issing) { + if (!deserves_argbox(sigt) && !issing) { return true; } } @@ -5391,20 +5436,26 @@ static jl_returninfo_t get_specsig_function(Module *M, const std::string &name, rt = T_prjlvalue; } } - else { + else if (!deserves_retbox(jlrettype)) { bool retboxed; rt = julia_type_to_llvm(jlrettype, &retboxed); - if (!retboxed) { - if (rt != T_void && deserves_sret(jlrettype, rt)) { - props.cc = jl_returninfo_t::SRet; - fsig.push_back(rt->getPointerTo()); - rt = T_void; - } - else { - props.cc = jl_returninfo_t::Register; - } + assert(!retboxed); + if (rt != T_void && deserves_sret(jlrettype, rt)) { + auto tracked = CountTrackedPointers(rt); + assert(!tracked.derived); + if (tracked.count && !tracked.all) + props.return_roots = tracked.count; + props.cc = jl_returninfo_t::SRet; + fsig.push_back(rt->getPointerTo()); + rt = T_void; + } + else { + props.cc = jl_returninfo_t::Register; } } + else { + rt = T_prjlvalue; + } AttributeList attributes; // function declaration attributes if (props.cc == jl_returninfo_t::SRet) { @@ -5419,10 +5470,16 @@ static jl_returninfo_t get_specsig_function(Module *M, const std::string &name, attributes = attributes.addAttribute(jl_LLVMContext, argno, Attribute::NoCapture); } + if (props.return_roots) { + fsig.push_back(T_pprjlvalue); + unsigned argno = fsig.size(); + attributes = attributes.addAttribute(jl_LLVMContext, argno, Attribute::NoAlias); + attributes = attributes.addAttribute(jl_LLVMContext, argno, Attribute::NoCapture); + } + for (size_t i = 0; i < jl_nparams(sig); i++) { jl_value_t *jt = jl_tparam(sig, i); - bool isboxed; - Type *ty = julia_type_to_llvm(jt, &isboxed); + Type *ty = deserves_argbox(jt) ? T_prjlvalue : julia_type_to_llvm(jt); if (type_is_ghost(ty)) continue; unsigned argno = fsig.size(); @@ -5449,20 +5506,23 @@ static jl_returninfo_t get_specsig_function(Module *M, const std::string &name, return props; } +static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, unsigned count) +{ + if (isptr) + Src = maybe_decay_tracked(Src); + if (isptr && Src->getType()->getPointerElementType() != T) + Src = ctx.builder.CreateBitCast(Src, T->getPointerTo(Src->getType()->getPointerAddressSpace())); + unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ctx.builder); + assert(emitted == count); (void)emitted; (void)count; +} + static DISubroutineType * get_specsig_di(jl_value_t *rt, jl_value_t *sig, DIFile *topfile, DIBuilder &dbuilder) { std::vector ditypes(0); - Type *ty = julia_type_to_llvm(rt); - if (type_is_ghost(ty)) - ditypes.push_back(nullptr); - else - ditypes.push_back(julia_type_to_di(rt, &dbuilder, false)); + ditypes.push_back(julia_type_to_di(rt, &dbuilder, false)); for (size_t i = 0; i < jl_nparams(sig); i++) { jl_value_t *jt = jl_tparam(sig, i); - Type *ty = julia_type_to_llvm(jt); - if (type_is_ghost(ty)) - continue; ditypes.push_back(julia_type_to_di(jt, &dbuilder, false)); } return dbuilder.createSubroutineType(dbuilder.getOrCreateTypeArray(ditypes)); @@ -5913,7 +5973,7 @@ static std::unique_ptr emit_function( if (allunbox) return; } - else if (jl_justbits(jt)) { + else if (deserves_stack(jt, true)) { bool isboxed; Type *vtype = julia_type_to_llvm(jt, &isboxed); assert(!isboxed); @@ -6020,11 +6080,13 @@ static std::unique_ptr emit_function( if (has_sret) AI++; // skip sret slot + if (returninfo.return_roots) + AI++; // skip return_roots slot for (i = 0; i < nreq; i++) { jl_sym_t *s = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i); jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); - bool isboxed; - Type *llvmArgType = julia_type_to_llvm(argType, &isboxed); + bool isboxed = deserves_argbox(argType); + Type *llvmArgType = isboxed ? T_prjlvalue : julia_type_to_llvm(argType); if (s == unused_sym) { if (specsig && !type_is_ghost(llvmArgType)) ++AI; @@ -6105,8 +6167,8 @@ static std::unique_ptr emit_function( jl_cgval_t *vargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * ctx.nvargs); for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) { jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); - bool isboxed; - Type *llvmArgType = julia_type_to_llvm(argType, &isboxed); + bool isboxed = deserves_argbox(argType); + Type *llvmArgType = isboxed ? T_prjlvalue : julia_type_to_llvm(argType); vargs[i - nreq] = get_specsig_arg(argType, llvmArgType, isboxed); } if (jl_is_concrete_type(vi.value.typ)) { @@ -6476,6 +6538,10 @@ static std::unique_ptr emit_function( } if (sret) { if (retvalinfo.ispointer()) { + if (returninfo.return_roots) { + Type *store_ty = julia_type_to_llvm(retvalinfo.typ); + emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, returninfo.return_roots); + } if (returninfo.cc == jl_returninfo_t::SRet) { assert(jl_is_concrete_type(jlrettype)); emit_memcpy(ctx, sret, nullptr, retvalinfo, jl_datatype_size(jlrettype), @@ -6488,9 +6554,14 @@ static std::unique_ptr emit_function( else { Type *store_ty = julia_type_to_llvm(retvalinfo.typ); Type *dest_ty = store_ty->getPointerTo(); + Value *Val = emit_unbox(ctx, store_ty, retvalinfo, retvalinfo.typ); + if (returninfo.return_roots) { + assert(store_ty == Val->getType()); + emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, returninfo.return_roots); + } if (dest_ty != sret->getType()) sret = emit_bitcast(ctx, sret, dest_ty); - ctx.builder.CreateStore(emit_unbox(ctx, store_ty, retvalinfo, retvalinfo.typ), sret); + ctx.builder.CreateStore(Val, sret); } } @@ -6589,8 +6660,9 @@ static std::unique_ptr emit_function( ctx.builder.ClearInsertionPoint(); auto undef_value_for_type = [&](Type *T) { + auto tracked = CountTrackedPointers(T); Value *undef; - if (T == T_prjlvalue) + if (tracked.count) // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL undef = Constant::getNullValue(T); else @@ -6790,8 +6862,11 @@ static std::unique_ptr emit_function( } if (TindexN) TindexN->addIncoming(RTindex, FromBB); - if (dest) + if (dest) { ctx.builder.CreateLifetimeStart(dest); + if (CountTrackedPointers(dest->getAllocatedType()).count) + ctx.builder.CreateStore(Constant::getNullValue(dest->getAllocatedType()), dest); + } ctx.builder.ClearInsertionPoint(); } } @@ -7561,8 +7636,7 @@ static void init_julia_llvm_env(Module *m) add_return_attr(jl_typeof_func, Attribute::NonNull); add_named_global(jl_typeof_func, (void*)NULL, /*dllimport*/false); - jl_write_barrier_func = Function::Create(FunctionType::get(T_void, - {T_prjlvalue, T_prjlvalue}, false), + jl_write_barrier_func = Function::Create(FunctionType::get(T_void, {T_prjlvalue,}, true), Function::ExternalLinkage, "julia.write_barrier"); jl_write_barrier_func->addFnAttr(Attribute::InaccessibleMemOnly); diff --git a/src/codegen_shared.h b/src/codegen_shared.h index 6ecdfd3afc1b4..f5759f794554f 100644 --- a/src/codegen_shared.h +++ b/src/codegen_shared.h @@ -27,6 +27,8 @@ struct CountTrackedPointers { bool derived = false; CountTrackedPointers(llvm::Type *T); }; +unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> irbuilder); +std::vector ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> irbuilder); static inline void llvm_dump(llvm::Value *v) { diff --git a/src/datatype.c b/src/datatype.c index 17a780f59d7c8..aaace4afa5161 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -141,6 +141,7 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t nfields, flddesc->haspadding = haspadding; flddesc->fielddesc_type = fielddesc_type; flddesc->npointers = npointers; + flddesc->first_ptr = (npointers > 0 ? pointers[0] : -1); // fill out the fields of the new descriptor jl_fielddesc8_t* desc8 = (jl_fielddesc8_t*)jl_dt_layout_fields(flddesc); @@ -265,6 +266,32 @@ JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al) return (countbits > 0 && countbits < 127) ? countbits : 0; } +JL_DLLEXPORT int jl_stored_inline(jl_value_t *eltype) JL_NOTSAFEPOINT +{ + size_t fsz = 0, al = 0; + return jl_islayout_inline(eltype, &fsz, &al); +} + +// whether this type is unique'd by pointer +int jl_pointer_egal(jl_value_t *t) +{ + if (t == (jl_value_t*)jl_any_type) + return 0; // when setting up the initial types, jl_is_type_type gets confused about this + if (t == (jl_value_t*)jl_symbol_type) + return 1; + if (jl_is_mutable_datatype(t) && // excludes abstract types + t != (jl_value_t*)jl_string_type && // technically mutable, but compared by contents + t != (jl_value_t*)jl_simplevector_type && + !jl_is_kind(t)) + return 1; + if (jl_is_type_type(t) && jl_is_concrete_type(jl_tparam0(t))) { + // need to use typeseq for most types + // but can compare some types by pointer + return 1; + } + return 0; +} + static int references_name(jl_value_t *p, jl_typename_t *name) JL_NOTSAFEPOINT { if (jl_is_uniontype(p)) @@ -331,18 +358,18 @@ void jl_compute_field_offsets(jl_datatype_t *st) // if we have no fields, we can trivially skip the rest if (st == jl_symbol_type || st == jl_string_type) { // opaque layout - heap-allocated blob - static const jl_datatype_layout_t opaque_byte_layout = {0, 1, 1, 0, 0}; + static const jl_datatype_layout_t opaque_byte_layout = {0, 1, -1, 1, 0, 0}; st->layout = &opaque_byte_layout; return; } else if (st == jl_simplevector_type || st->name == jl_array_typename) { - static const jl_datatype_layout_t opaque_ptr_layout = {0, 1, sizeof(void*), 0, 0}; + static const jl_datatype_layout_t opaque_ptr_layout = {0, 1, -1, sizeof(void*), 0, 0}; st->layout = &opaque_ptr_layout; return; } else { // reuse the same layout for all singletons - static const jl_datatype_layout_t singleton_layout = {0, 0, 1, 0, 0}; + static const jl_datatype_layout_t singleton_layout = {0, 0, -1, 1, 0, 0}; st->layout = &singleton_layout; } } @@ -418,6 +445,7 @@ void jl_compute_field_offsets(jl_datatype_t *st) haspadding = 1; if (!zeroinit) zeroinit = ((jl_datatype_t*)fld)->zeroinit; + npointers += ((jl_datatype_t*)fld)->layout->npointers; } } else { @@ -428,6 +456,10 @@ void jl_compute_field_offsets(jl_datatype_t *st) desc[i].isptr = 1; zeroinit = 1; npointers++; + if (!jl_pointer_egal(fld)) { + // this somewhat poorly named flag says whether some of the bits can be non-unique + haspadding = 1; + } } assert(al <= JL_HEAP_ALIGNMENT && (JL_HEAP_ALIGNMENT % al) == 0); if (al != 0) { @@ -464,8 +496,16 @@ void jl_compute_field_offsets(jl_datatype_t *st) pointers = (uint32_t*)alloca(npointers * sizeof(uint32_t)); size_t ptr_i = 0; for (i = 0; i < nfields; i++) { + jl_value_t *fld = jl_field_type(st, i); + uint32_t offset = desc[i].offset / sizeof(jl_value_t**); if (desc[i].isptr) - pointers[ptr_i++] = desc[i].offset / sizeof(jl_value_t**); + pointers[ptr_i++] = offset; + else if (jl_is_datatype(fld)) { + int j, npointers = ((jl_datatype_t*)fld)->layout->npointers; + for (j = 0; j < npointers; j++) { + pointers[ptr_i++] = offset + jl_ptr_offset((jl_datatype_t*)fld, j); + } + } } assert(ptr_i == npointers); st->layout = jl_get_layout(nfields, npointers, alignm, haspadding, desc, pointers); @@ -477,8 +517,13 @@ void jl_compute_field_offsets(jl_datatype_t *st) } // now finish deciding if this instantiation qualifies for special properties assert(!isbitstype || st->layout->npointers == 0); // the definition of isbits - if (st->layout->npointers != 0) + if (isinlinealloc && st->layout->npointers > 0) { + //if (st->ninitialized != nfields) + // isinlinealloc = 0; + //else if (st->layout->fielddesc_type != 0) // GC only implements support for this + // isinlinealloc = 0; isinlinealloc = 0; + } st->isbitstype = isbitstype; st->isinlinealloc = isinlinealloc; jl_maybe_allocate_singleton_instance(st); @@ -1035,7 +1080,8 @@ JL_DLLEXPORT void jl_set_nth_field(jl_value_t *v, size_t i, jl_value_t *rhs) JL_ size_t offs = jl_field_offset(st, i); if (jl_field_isptr(st, i)) { *(jl_value_t**)((char*)v + offs) = rhs; - if (rhs != NULL) jl_gc_wb(v, rhs); + if (rhs != NULL) + jl_gc_wb(v, rhs); } else { jl_value_t *ty = jl_field_type_concrete(st, i); @@ -1049,6 +1095,7 @@ JL_DLLEXPORT void jl_set_nth_field(jl_value_t *v, size_t i, jl_value_t *rhs) JL_ return; } jl_assign_bits((char*)v + offs, rhs); + jl_gc_multi_wb(v, rhs); } } @@ -1056,8 +1103,13 @@ JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) { jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v); size_t offs = jl_field_offset(st, i); + char *fld = (char*)v + offs; if (jl_field_isptr(st, i)) { - return *(jl_value_t**)((char*)v + offs) != NULL; + return *(jl_value_t**)fld != NULL; + } + jl_datatype_t *ft = (jl_datatype_t*)jl_field_type(st, i); + if (jl_is_datatype(ft) && ft->layout->first_ptr >= 0) { + return ((jl_value_t**)fld)[ft->layout->first_ptr] != NULL; } return 1; } diff --git a/src/dump.c b/src/dump.c index 9fd00e5f521c3..11bb57e3cc80e 100644 --- a/src/dump.c +++ b/src/dump.c @@ -666,14 +666,14 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li else if (jl_is_array(v)) { jl_array_t *ar = (jl_array_t*)v; int isunion = jl_is_uniontype(jl_tparam0(jl_typeof(ar))); - if (ar->flags.ndims == 1 && ar->elsize <= 0x3f) { + if (ar->flags.ndims == 1 && ar->elsize <= 0x1f) { write_uint8(s->s, TAG_ARRAY1D); - write_uint8(s->s, (ar->flags.ptrarray<<7) | (isunion << 6) | (ar->elsize & 0x3f)); + write_uint8(s->s, (ar->flags.ptrarray << 7) | (ar->flags.hasptr << 6) | (isunion << 5) | (ar->elsize & 0x1f)); } else { write_uint8(s->s, TAG_ARRAY); write_uint16(s->s, ar->flags.ndims); - write_uint16(s->s, (ar->flags.ptrarray << 15) | (isunion << 14) | (ar->elsize & 0x3fff)); + write_uint16(s->s, (ar->flags.ptrarray << 15) | (ar->flags.hasptr << 14) | (isunion << 13) | (ar->elsize & 0x1fff)); } for (i = 0; i < ar->flags.ndims; i++) jl_serialize_value(s, jl_box_long(jl_array_dim(ar,i))); @@ -1523,19 +1523,21 @@ static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t ta { int usetable = (s->mode != MODE_IR); int16_t i, ndims; - int isunboxed, isunion, elsize; + int isptr, isunion, hasptr, elsize; if (tag == TAG_ARRAY1D) { ndims = 1; elsize = read_uint8(s->s); - isunboxed = !(elsize >> 7); - isunion = elsize >> 6; - elsize = elsize & 0x3f; + isptr = (elsize >> 7) & 1; + hasptr = (elsize >> 6) & 1; + isunion = (elsize >> 5) & 1; + elsize = elsize & 0x1f; } else { ndims = read_uint16(s->s); elsize = read_uint16(s->s); - isunboxed = !(elsize >> 15); - isunion = elsize >> 14; + isptr = (elsize >> 15) & 1; + hasptr = (elsize >> 14) & 1; + isunion = (elsize >> 13) & 1; elsize = elsize & 0x3fff; } uintptr_t pos = backref_list.len; @@ -1546,7 +1548,7 @@ static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t ta dims[i] = jl_unbox_long(jl_deserialize_value(s, NULL)); } jl_array_t *a = jl_new_array_for_deserialization( - (jl_value_t*)NULL, ndims, dims, isunboxed, isunion, elsize); + (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize); if (usetable) backref_list.items[pos] = a; jl_value_t *aty = jl_deserialize_value(s, &jl_astaggedvalue(a)->type); @@ -1555,6 +1557,11 @@ static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t ta size_t extra = jl_array_isbitsunion(a) ? jl_array_len(a) : 0; size_t tot = jl_array_len(a) * a->elsize + extra; ios_read(s->s, (char*)jl_array_data(a), tot); + assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled + //if (a->flags.hasptr) { + // for (i = 0; i < numel; i++) { + // jl_gc_wb_multi(a, data[i]); + //} } else { jl_value_t **data = (jl_value_t**)jl_array_data(a); diff --git a/src/gc-debug.c b/src/gc-debug.c index d841236d22e7c..675639cfc603c 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1218,6 +1218,7 @@ int gc_slot_to_arrayidx(void *obj, void *_slot) jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); char *start = NULL; size_t len = 0; + size_t elsize = sizeof(void*); if (vt == jl_module_type) { jl_module_t *m = (jl_module_t*)obj; start = (char*)m->usings.items; @@ -1233,10 +1234,11 @@ int gc_slot_to_arrayidx(void *obj, void *_slot) return -1; start = (char*)a->data; len = jl_array_len(a); + elsize = a->elsize; } - if (slot < start || slot >= start + sizeof(void*) * len) + if (slot < start || slot >= start + elsize * len) return -1; - return (slot - start) / sizeof(void*); + return (slot - start) / elsize; } // Print a backtrace from the bottom (start) of the mark stack up to `sp` diff --git a/src/gc.c b/src/gc.c index e101a5fa8d76d..76d076d7cad48 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1499,6 +1499,45 @@ JL_DLLEXPORT void jl_gc_queue_root(jl_value_t *ptr) ptls->heap.remset_nptr++; // conservative } +void jl_gc_queue_multiroot(jl_value_t *parent, jl_value_t *ptr) JL_NOTSAFEPOINT +{ + // first check if this is really necessary + // TODO: should we store this info in one of the extra gc bits? + jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr); + const jl_datatype_layout_t *ly = dt->layout; + uint32_t npointers = ly->npointers; + //if (npointers == 0) // this was checked by the caller + // return; + jl_value_t *ptrf = ((jl_value_t**)ptr)[ly->first_ptr]; + if (ptrf && (jl_astaggedvalue(ptrf)->bits.gc & 1) == 0) { + // this pointer was young, move the barrier back now + jl_gc_wb_back(parent); + return; + } + const uint8_t *ptrs8 = (const uint8_t *)jl_dt_layout_ptrs(ly); + const uint16_t *ptrs16 = (const uint16_t *)jl_dt_layout_ptrs(ly); + const uint32_t *ptrs32 = (const uint32_t*)jl_dt_layout_ptrs(ly); + for (size_t i = 1; i < npointers; i++) { + uint32_t fld; + if (ly->fielddesc_type == 0) { + fld = ptrs8[i]; + } + else if (ly->fielddesc_type == 1) { + fld = ptrs16[i]; + } + else { + assert(ly->fielddesc_type == 2); + fld = ptrs32[i]; + } + jl_value_t *ptrf = ((jl_value_t**)ptr)[fld]; + if (ptrf && (jl_astaggedvalue(ptrf)->bits.gc & 1) == 0) { + // this pointer was young, move the barrier back now + jl_gc_wb_back(parent); + return; + } + } +} + void gc_queue_binding(jl_binding_t *bnd) { jl_ptls_t ptls = jl_get_ptls_states(); @@ -1707,14 +1746,14 @@ STATIC_INLINE int gc_mark_scan_objarray(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, jl_value_t **pnew_obj, uintptr_t *ptag, uint8_t *pbits) { (void)jl_assume(objary == (gc_mark_objarray_t*)sp->data); - for (; begin < end; begin++) { + for (; begin < end; begin += objary->step) { *pnew_obj = *begin; if (*pnew_obj) verify_parent2("obj array", objary->parent, begin, "elem(%d)", gc_slot_to_arrayidx(objary->parent, begin)); if (!gc_try_setmark(*pnew_obj, &objary->nptr, ptag, pbits)) continue; - begin++; + begin += objary->step; // Found an object to mark if (begin < end) { // Haven't done with this one yet. Update the content and push it back @@ -1732,6 +1771,54 @@ STATIC_INLINE int gc_mark_scan_objarray(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, return 0; } +// Scan a sparse array of object references, see `gc_mark_objarray_t` +STATIC_INLINE int gc_mark_scan_array8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, + gc_mark_array8_t *ary8, + jl_value_t **begin, jl_value_t **end, + uint8_t *elem_begin, uint8_t *elem_end, + jl_value_t **pnew_obj, uintptr_t *ptag, uint8_t *pbits) +{ + (void)jl_assume(ary8 == (gc_mark_array8_t*)sp->data); + size_t elsize = ((jl_array_t*)ary8->elem.parent)->elsize / sizeof(jl_value_t*); + for (; begin < end; begin += elsize) { + for (; elem_begin < elem_end; elem_begin++) { + jl_value_t **slot = &begin[*elem_begin]; + *pnew_obj = *slot; + if (*pnew_obj) + verify_parent2("array", ary8->elem.parent, slot, "elem(%d)", + gc_slot_to_arrayidx(ary8->elem.parent, begin)); + if (!gc_try_setmark(*pnew_obj, &ary8->elem.nptr, ptag, pbits)) + continue; + elem_begin++; + // Found an object to mark + if (elem_begin < elem_end) { + // Haven't done with this one yet. Update the content and push it back + ary8->elem.begin = elem_begin; + gc_repush_markdata(sp, gc_mark_array8_t); + } + else { + begin += elsize; + if (begin < end) { + // Haven't done with this array yet. Reset the content and push it back + ary8->elem.begin = ary8->rebegin; + ary8->begin = begin; + gc_repush_markdata(sp, gc_mark_array8_t); + } + else { + // Finished scanning this one, finish up by checking the GC invariance + // and let the next item replacing the current one directly. + gc_mark_push_remset(ptls, ary8->elem.parent, ary8->elem.nptr); + } + } + return 1; + } + ary8->elem.begin = elem_begin = ary8->rebegin; + } + gc_mark_push_remset(ptls, ary8->elem.parent, ary8->elem.nptr); + return 0; +} + + // Scan an object with 8bits field descriptors. see `gc_mark_obj8_t` STATIC_INLINE int gc_mark_scan_obj8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark_obj8_t *obj8, char *parent, uint8_t *begin, uint8_t *end, @@ -1846,6 +1933,8 @@ STATIC_INLINE int gc_mark_scan_obj32(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar goto finlist; \ case GC_MARK_L_objarray: \ goto objarray; \ + case GC_MARK_L_array8: \ + goto array8; \ case GC_MARK_L_obj8: \ goto obj8; \ case GC_MARK_L_obj16: \ @@ -1937,6 +2026,7 @@ JL_EXTENSION NOINLINE void gc_mark_loop(jl_ptls_t ptls, jl_gc_mark_sp_t sp) gc_mark_label_addrs[GC_MARK_L_scan_only] = gc_mark_laddr(scan_only); gc_mark_label_addrs[GC_MARK_L_finlist] = gc_mark_laddr(finlist); gc_mark_label_addrs[GC_MARK_L_objarray] = gc_mark_laddr(objarray); + gc_mark_label_addrs[GC_MARK_L_array8] = gc_mark_laddr(array8); gc_mark_label_addrs[GC_MARK_L_obj8] = gc_mark_laddr(obj8); gc_mark_label_addrs[GC_MARK_L_obj16] = gc_mark_laddr(obj16); gc_mark_label_addrs[GC_MARK_L_obj32] = gc_mark_laddr(obj32); @@ -1955,6 +2045,8 @@ JL_EXTENSION NOINLINE void gc_mark_loop(jl_ptls_t ptls, jl_gc_mark_sp_t sp) jl_value_t **objary_begin; jl_value_t **objary_end; + gc_mark_array8_t *ary8; + gc_mark_obj8_t *obj8; char *obj8_parent; uint8_t *obj8_begin; @@ -2002,6 +2094,19 @@ scan_only: { goto mark; goto pop; +array8: + ary8 = gc_pop_markdata(&sp, gc_mark_array8_t); + objary_begin = ary8->begin; + objary_end = ary8->end; + obj8_begin = ary8->elem.begin; + obj8_end = ary8->elem.end; +array8_loaded: + if (gc_mark_scan_array8(ptls, &sp, ary8, objary_begin, objary_end, obj8_begin, obj8_end, + &new_obj, &tag, &bits)) + goto mark; + goto pop; + + obj8: obj8 = gc_pop_markdata(&sp, gc_mark_obj8_t); obj8_parent = (char*)obj8->parent; @@ -2203,7 +2308,7 @@ module_binding: { // contain the only reference. objary_begin = (jl_value_t**)m->usings.items; objary_end = objary_begin + nusings; - gc_mark_objarray_t data = {(jl_value_t*)m, objary_begin, objary_end, binding->nptr}; + gc_mark_objarray_t data = {(jl_value_t*)m, objary_begin, objary_end, 1, binding->nptr}; gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(objarray), &data, sizeof(data), 0); if (!scanparent) { @@ -2281,7 +2386,7 @@ mark: { uintptr_t nptr = (l << 2) | (bits & GC_OLD); objary_begin = data; objary_end = data + l; - gc_mark_objarray_t markdata = {new_obj, objary_begin, objary_end, nptr}; + gc_mark_objarray_t markdata = {new_obj, objary_begin, objary_end, 1, nptr}; gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(objarray), &markdata, sizeof(markdata), 0); objary = (gc_mark_objarray_t*)sp.data; @@ -2328,17 +2433,50 @@ mark: { } goto pop; } - if (!flags.ptrarray || a->data == NULL || jl_array_len(a) == 0) + if (a->data == NULL || jl_array_len(a) == 0) goto pop; - size_t l = jl_array_len(a); - uintptr_t nptr = (l << 2) | (bits & GC_OLD); - objary_begin = (jl_value_t**)a->data; - objary_end = objary_begin + l; - gc_mark_objarray_t markdata = {new_obj, objary_begin, objary_end, nptr}; - gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(objarray), - &markdata, sizeof(markdata), 0); - objary = (gc_mark_objarray_t*)sp.data; - goto objarray_loaded; + if (flags.ptrarray) { + size_t l = jl_array_len(a); + uintptr_t nptr = (l << 2) | (bits & GC_OLD); + objary_begin = (jl_value_t**)a->data; + objary_end = objary_begin + l; + gc_mark_objarray_t markdata = {new_obj, objary_begin, objary_end, 1, nptr}; + gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(objarray), + &markdata, sizeof(markdata), 0); + objary = (gc_mark_objarray_t*)sp.data; + goto objarray_loaded; + } + else if (flags.hasptr) { + jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(vt); + const jl_datatype_layout_t *layout = et->layout; + unsigned npointers = layout->npointers; + unsigned elsize = a->elsize / sizeof(jl_value_t*); + size_t l = jl_array_len(a); + uintptr_t nptr = ((l * npointers) << 2) | (bits & GC_OLD); + objary_begin = (jl_value_t**)a->data; + objary_end = objary_begin + l * elsize; + if (npointers == 1) { // TODO: detect anytime time stride is uniform? + objary_begin += layout->first_ptr; + gc_mark_objarray_t markdata = {new_obj, objary_begin, objary_end, elsize, nptr}; + gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(objarray), + &markdata, sizeof(markdata), 0); + objary = (gc_mark_objarray_t*)sp.data; + goto objarray_loaded; + } + else if (layout->fielddesc_type == 0) { + obj8_begin = (uint8_t*)jl_dt_layout_ptrs(layout); + obj8_end = obj8_begin + npointers; + gc_mark_array8_t markdata = {objary_begin, objary_end, obj8_begin, {new_obj, obj8_begin, obj8_end, nptr}}; + gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(array8), + &markdata, sizeof(markdata), 0); + ary8 = (gc_mark_array8_t*)sp.data; + goto array8_loaded; + } + else { + assert(0 && "unimplemented"); + } + } + goto pop; } else if (vt == jl_module_type) { if (update_meta) diff --git a/src/gc.h b/src/gc.h index 3e81193b67b5c..270f1101debee 100644 --- a/src/gc.h +++ b/src/gc.h @@ -80,6 +80,7 @@ enum { GC_MARK_L_scan_only, GC_MARK_L_finlist, GC_MARK_L_objarray, + GC_MARK_L_array8, GC_MARK_L_obj8, GC_MARK_L_obj16, GC_MARK_L_obj32, @@ -113,6 +114,7 @@ typedef struct { jl_value_t *parent; // The parent object to trigger write barrier on. jl_value_t **begin; // The first slot to be scanned. jl_value_t **end; // The end address (after the last slot to be scanned) + uint32_t step; // Number of pointers to jump between marks uintptr_t nptr; // See notes about `nptr` above. } gc_mark_objarray_t; @@ -140,6 +142,13 @@ typedef struct { uintptr_t nptr; // See notes about `nptr` above. } gc_mark_obj32_t; +typedef struct { + jl_value_t **begin; // The first slot to be scanned. + jl_value_t **end; // The end address (after the last slot to be scanned) + uint8_t *rebegin; + gc_mark_obj8_t elem; +} gc_mark_array8_t; + // Stack frame typedef struct { jl_gcframe_t *s; // The current stack frame @@ -182,6 +191,7 @@ typedef struct { union _jl_gc_mark_data { gc_mark_marked_obj_t marked; gc_mark_objarray_t objarray; + gc_mark_array8_t array8; gc_mark_obj8_t obj8; gc_mark_obj16_t obj16; gc_mark_obj32_t obj32; diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 281e1661a8d42..f9a9223e1bc41 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -149,7 +149,7 @@ static Value *uint_cnvt(jl_codectx_t &ctx, Type *to, Value *x) static Constant *julia_const_to_llvm(const void *ptr, jl_datatype_t *bt) { - // assumes `jl_justbits(bt)`. + // assumes `jl_justbits(bt, true)`. // `ptr` can point to a inline field, do not read the tag from it. // make sure to return exactly the type specified by // julia_type_to_llvm as this will be assumed by the callee. @@ -193,11 +193,11 @@ static Constant *julia_const_to_llvm(const void *ptr, jl_datatype_t *bt) std::vector fields(0); for (size_t i = 0; i < nf; i++) { size_t offs = jl_field_offset(bt, i); - assert(!jl_field_isptr(bt, i)); jl_value_t *ft = jl_field_type(bt, i); Type *lft = julia_type_to_llvm(ft); if (type_is_ghost(lft)) continue; + assert(!jl_field_isptr(bt, i)); unsigned llvm_idx = isa(lt) ? convert_struct_offset(lt, offs) : i; while (fields.size() < llvm_idx) fields.push_back(UndefValue::get(lct->getTypeAtIndex(fields.size()))); @@ -270,7 +270,7 @@ static Constant *julia_const_to_llvm(jl_value_t *e) if (e == jl_false) return ConstantInt::get(T_int8, 0); jl_value_t *bt = jl_typeof(e); - if (!jl_justbits(bt)) + if (!jl_justbits(bt, true)) return NULL; return julia_const_to_llvm(e, (jl_datatype_t*)bt); } @@ -765,10 +765,8 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_ } Value *ifelse_result; - bool isboxed; - Type *llt1 = julia_type_to_llvm(t1, &isboxed); - if (t1 != t2) - isboxed = true; + bool isboxed = t1 != t2 || !deserves_stack(t1); + Type *llt1 = isboxed ? T_prjlvalue : julia_type_to_llvm(t1); if (!isboxed) { if (type_is_ghost(llt1)) return x; diff --git a/src/jltypes.c b/src/jltypes.c index 29e552f457600..3873a9e4ede72 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -2347,6 +2347,12 @@ void jl_init_types(void) JL_GC_DISABLED jl_compute_field_offsets(jl_unionall_type); jl_compute_field_offsets(jl_simplevector_type); jl_compute_field_offsets(jl_symbol_type); + + // override the preferred layout for a couple types + jl_lineinfonode_type->isinlinealloc = 0; // FIXME: assumed to be a pointer by codegen + // It seems like we probably usually end up needing the box for kinds (used in an Any context)--but is that true? + jl_uniontype_type->isinlinealloc = 0; + jl_unionall_type->isinlinealloc = 0; } #ifdef __cplusplus diff --git a/src/julia.h b/src/julia.h index af694760cac31..647ee82be932f 100644 --- a/src/julia.h +++ b/src/julia.h @@ -159,10 +159,11 @@ typedef struct { 3 = has a pointer to the object that owns the data */ uint16_t how:2; - uint16_t ndims:10; + uint16_t ndims:9; uint16_t pooled:1; - uint16_t ptrarray:1; // representation is pointer array - uint16_t isshared:1; // data is shared by multiple Arrays + uint16_t ptrarray:1; // representation is pointer array + uint16_t hasptr:1; // representation has embedded pointers + uint16_t isshared:1; // data is shared by multiple Arrays uint16_t isaligned:1; // data allocated with memalign } jl_array_flags_t; @@ -425,7 +426,8 @@ typedef struct { typedef struct { uint32_t nfields; - uint32_t npointers; // number of pointer + uint32_t npointers; // number of pointers embedded inside + int32_t first_ptr; // index of the first pointer (or -1) uint32_t alignment : 9; // strictest alignment over all fields uint32_t haspadding : 1; // has internal undefined bytes uint32_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32 @@ -781,13 +783,14 @@ JL_DLLEXPORT void jl_gc_use(jl_value_t *a); JL_DLLEXPORT void jl_clear_malloc_data(void); // GC write barriers -JL_DLLEXPORT void jl_gc_queue_root(jl_value_t *root) JL_NOTSAFEPOINT; // root isa jl_value_t* +JL_DLLEXPORT void jl_gc_queue_root(jl_value_t *root) JL_NOTSAFEPOINT; +JL_DLLEXPORT void jl_gc_queue_multiroot(jl_value_t *root, jl_value_t *stored) JL_NOTSAFEPOINT; STATIC_INLINE void jl_gc_wb(void *parent, void *ptr) JL_NOTSAFEPOINT { // parent and ptr isa jl_value_t* - if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 && - (jl_astaggedvalue(ptr)->bits.gc & 1) == 0)) + if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 && // parent is old and not in remset + (jl_astaggedvalue(ptr)->bits.gc & 1) == 0)) // ptr is young jl_gc_queue_root((jl_value_t*)parent); } @@ -799,6 +802,19 @@ STATIC_INLINE void jl_gc_wb_back(void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_ } } +STATIC_INLINE void jl_gc_multi_wb(void *parent, jl_value_t *ptr) JL_NOTSAFEPOINT +{ + // ptr is an immutable object + if (__likely(jl_astaggedvalue(parent)->bits.gc != 3)) + return; // parent is young or in remset + if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3)) + return; // ptr is old and not in remset (thus it does not point to young) + jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr); + const jl_datatype_layout_t *ly = dt->layout; + if (ly->npointers) + jl_gc_queue_multiroot((jl_value_t*)parent, ptr); +} + JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t *owner); @@ -859,6 +875,7 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set( #define jl_array_ptr_data(a) ((jl_value_t**)((jl_array_t*)(a))->data) STATIC_INLINE jl_value_t *jl_array_ptr_ref(void *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT { + assert(((jl_array_t*)a)->flags.ptrarray); assert(i < jl_array_len(a)); return ((jl_value_t**)(jl_array_data(a)))[i]; } @@ -866,6 +883,7 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set( void *a JL_ROOTING_ARGUMENT, size_t i, void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT { + assert(((jl_array_t*)a)->flags.ptrarray); assert(i < jl_array_len(a)); ((jl_value_t**)(jl_array_data(a)))[i] = (jl_value_t*)x; if (x) { @@ -999,6 +1017,8 @@ static inline const char *jl_dt_layout_ptrs(const jl_datatype_layout_t *l) JL_NO DEFINE_FIELD_ACCESSORS(offset) DEFINE_FIELD_ACCESSORS(size) +#undef DEFINE_FIELD_ACCESSORS + static inline int jl_field_isptr(jl_datatype_t *st, int i) JL_NOTSAFEPOINT { const jl_datatype_layout_t *ly = st->layout; @@ -1006,7 +1026,22 @@ static inline int jl_field_isptr(jl_datatype_t *st, int i) JL_NOTSAFEPOINT return ((const jl_fielddesc8_t*)(jl_dt_layout_fields(ly) + jl_fielddesc_size(ly->fielddesc_type) * i))->isptr; } -#undef DEFINE_FIELD_ACCESSORS +static inline uint32_t jl_ptr_offset(jl_datatype_t *st, int i) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *ly = st->layout; + assert(i >= 0 && (size_t)i < ly->npointers); + const void *ptrs = jl_dt_layout_ptrs(ly); + if (ly->fielddesc_type == 0) { + return ((const uint8_t*)ptrs)[i]; + } + else if (ly->fielddesc_type == 1) { + return ((const uint16_t*)ptrs)[i]; + } + else { + assert(ly->fielddesc_type == 2); + return ((const uint32_t*)ptrs)[i]; + } +} static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT { diff --git a/src/julia_internal.h b/src/julia_internal.h index 9ee9f7914a175..cbc43e817dc60 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -462,10 +462,11 @@ JL_DLLEXPORT jl_methtable_t *jl_method_table_for( jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT; jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT); +int jl_pointer_egal(jl_value_t *t); jl_value_t *jl_nth_slot_type(jl_value_t *sig JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT; void jl_compute_field_offsets(jl_datatype_t *st); jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims, - int isunboxed, int isunion, int elsz); + int isunboxed, int hasptr, int isunion, int elsz); void jl_module_run_initializer(jl_module_t *m); jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT; extern jl_array_t *jl_module_init_order JL_GLOBALLY_ROOTED; @@ -960,7 +961,7 @@ JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b); JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b); JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a); -int jl_array_store_unboxed(jl_value_t *el_type); +JL_DLLEXPORT int jl_stored_inline(jl_value_t *el_type); JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a); JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 9c3a2011bae6c..5f511e0265de3 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -299,6 +299,9 @@ struct State { // We don't bother doing liveness on Allocas that were not mem2reg'ed. // they just get directly sunk into the root array. std::vector Allocas; + DenseMap ArrayAllocas; + DenseMap ShadowAllocas; + std::vector> TrackedStores; State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {} }; @@ -342,6 +345,8 @@ struct LateLowerGCFrame: public FunctionPass, private JuliaPassContext { std::vector NumberAllBase(State &S, Value *Base); void NoteOperandUses(State &S, BBState &BBS, User &UI); + void MaybeTrackDst(State &S, MemTransferInst *MI); + void MaybeTrackStore(State &S, StoreInst *I); State LocalScan(Function &F); void ComputeLiveness(State &S); void ComputeLiveSets(State &S); @@ -820,7 +825,7 @@ std::vector LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) { auto Idxs = EVI->getIndices(); for (unsigned i = 0; i < Tracked.size(); ++i) { auto Elem = makeArrayRef(Tracked[i]); - if (Elem.size() > Idxs.size()) + if (Elem.size() < Idxs.size()) continue; if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs) Numbers.push_back(BaseNumbers[i]); @@ -1302,6 +1307,28 @@ State LateLowerGCFrame::LocalScan(Function &F) { else { MaybeNoteDef(S, BBS, CI, BBS.Safepoints); } + if (CI->hasStructRetAttr()) { + AllocaInst *SRet = dyn_cast((CI->arg_begin()[0])->stripInBoundsOffsets()); + if (SRet) { + Type *ElT = SRet->getAllocatedType(); + if (!(SRet->isStaticAlloca() && isa(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) { + auto tracked = CountTrackedPointers(ElT); + if (tracked.count) { + assert(!tracked.derived); + if (tracked.all) { + S.ArrayAllocas[SRet] = tracked.count * cast(SRet->getArraySize())->getZExtValue(); + } + else { + AllocaInst *SRet_gc = dyn_cast((CI->arg_begin()[1])->stripInBoundsOffsets()); + Type *ElT = SRet_gc->getAllocatedType(); + if (!(SRet_gc->isStaticAlloca() && isa(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) { + S.ArrayAllocas[SRet_gc] = tracked.count * cast(SRet_gc->getArraySize())->getZExtValue(); + } + } + } + } + } + } NoteOperandUses(S, BBS, I); if (CI->canReturnTwice()) { S.ReturnsTwice.push_back(CI); @@ -1332,6 +1359,9 @@ State LateLowerGCFrame::LocalScan(Function &F) { callee->hasFnAttribute(Attribute::ArgMemOnly)) { continue; } + if (MemTransferInst *MI = dyn_cast(CI)) { + MaybeTrackDst(S, MI); + } } if (isa(CI) || CI->hasFnAttr(Attribute::ArgMemOnly) || CI->hasFnAttr(Attribute::ReadNone) || CI->hasFnAttr(Attribute::ReadOnly)) { @@ -1410,8 +1440,9 @@ State LateLowerGCFrame::LocalScan(Function &F) { // We need to insert extra phis for the GC roots LiftPhi(S, Phi); } - } else if (isa(&I)) { + } else if (StoreInst *SI = dyn_cast(&I)) { NoteOperandUses(S, BBS, I); + MaybeTrackStore(S, SI); } else if (isa(&I)) { NoteOperandUses(S, BBS, I); } else if (auto *ASCI = dyn_cast(&I)) { @@ -1440,6 +1471,118 @@ State LateLowerGCFrame::LocalScan(Function &F) { return S; } +static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef Idxs, IRBuilder<> irbuilder) { + Type *T_int32 = Type::getInt32Ty(V->getContext()); + if (isptr) { + std::vector IdxList{Idxs.size() + 1}; + IdxList[0] = ConstantInt::get(T_int32, 0); + for (unsigned j = 0; j < Idxs.size(); ++j) { + IdxList[j + 1] = ConstantInt::get(T_int32, Idxs[j]); + } + Value *GEP = irbuilder.CreateGEP(VTy, V, IdxList); + V = irbuilder.CreateLoad(GEP); + } else if (isa(V->getType())) { + assert(Idxs.empty()); + } + else if (!Idxs.empty()) { + auto IdxsNotVec = Idxs.slice(0, Idxs.size() - 1); + Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec); + bool IsVector = isa(FinalT); + if (Idxs.size() > IsVector) + V = irbuilder.Insert(ExtractValueInst::Create(V, IsVector ? IdxsNotVec : Idxs)); + if (IsVector) + V = irbuilder.Insert(ExtractElementInst::Create(V, + ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back()))); + } + return V; +} + +std::vector ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> irbuilder) { + auto Tracked = TrackCompositeType(STy); + std::vector Ptrs; + for (unsigned i = 0; i < Tracked.size(); ++i) { + auto Idxs = makeArrayRef(Tracked[i]); + Value *Elem = ExtractScalar(Src, STy, isptr, Idxs, irbuilder); + Ptrs.push_back(Elem); + } + return std::move(Ptrs); +} + +unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> irbuilder) { + auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder); + for (unsigned i = 0; i < Ptrs.size(); ++i) { + Value *Elem = Ptrs[i]; + Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(Elem->getType(), Dst, i); + Value *shadowStore = irbuilder.CreateStore(Elem, Slot); + (void)shadowStore; + // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); + } + return Ptrs.size(); +} + + +// turn a memcpy into a set of loads +void LateLowerGCFrame::MaybeTrackDst(State &S, MemTransferInst *MI) { + //Value *Dst = MI->getRawDest()->stripInBoundsOffsets(); + //if (AllocaInst *AI = dyn_cast(Dst)) { + // Type *STy = AI->getAllocatedType(); + // if (!AI->isStaticAlloca() || (isa(STy) && STy->getPointerAddressSpace() == AddressSpace::Tracked) || S.ArrayAllocas.count(AI)) + // return; // already numbered this + // auto tracked = CountTrackedPointers(STy); + // unsigned nroots = tracked.count * cast(AI->getArraySize())->getZExtValue(); + // if (nroots) { + // assert(!tracked.derived); + // if (!tracked.all) { + // // materialize shadow LoadInst and StoreInst ops to make a copy of just the tracked values inside + // //assert(MI->getLength() == DL.getTypeAllocSize(AI->getAllocatedType()) && !AI->isArrayAllocation()); // XXX: handle partial copy + // Value *Src = MI->getSource(); + // Src = new BitCastInst(Src, STy->getPointerTo(MI->getSourceAddressSpace()), "", MI); + // auto &Shadow = S.ShadowAllocas[AI]; + // if (!Shadow) + // Shadow = new AllocaInst(T_prjlvalue, 0, ConstantInt::get(T_int32, nroots), "", MI); + // AI = Shadow; + // unsigned count = TrackWithShadow(Src, STy, true, AI, IRBuilder<>(MI)); + // assert(count == tracked.count); (void)count; + // } + // S.ArrayAllocas[AI] = nroots; + // } + //} + //// TODO: else??? +} + +void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) { + Value *PtrBase = I->getPointerOperand()->stripInBoundsOffsets(); + auto tracked = CountTrackedPointers(I->getValueOperand()->getType()); + if (!tracked.count) + return; // nothing to track is being stored + if (AllocaInst *AI = dyn_cast(PtrBase)) { + Type *STy = AI->getAllocatedType(); + if (!AI->isStaticAlloca() || (isa(STy) && STy->getPointerAddressSpace() == AddressSpace::Tracked) || S.ArrayAllocas.count(AI)) + return; // already numbered this + auto tracked = CountTrackedPointers(STy); + if (tracked.count) { + assert(!tracked.derived); + if (tracked.all) { + // track the Alloca directly + S.ArrayAllocas[AI] = tracked.count * cast(AI->getArraySize())->getZExtValue(); + return; + } + } + } + else { + return; // assume it is rooted--TODO: should we be more conservative? + } + // track the Store with a Shadow + //auto &Shadow = S.ShadowAllocas[AI]; + //if (!Shadow) + // Shadow = new AllocaInst(T_prjlvalue, 0, ConstantInt::get(T_int32, tracked.count), "", MI); + //AI = Shadow; + //Value *Src = I->getValueOperand(); + //unsigned count = TrackWithShadow(Src, Src->getType(), false, AI, MI, TODO which slots are we actually clobbering?); + //assert(count == tracked.count); (void)count; + S.TrackedStores.push_back(std::make_pair(I, tracked.count)); +} + /* * DataFlow equations: * LiveIn[BB] = UpExposedUses[BB] ∪ (LiveOut[BB] - Defs[BB]) @@ -1927,7 +2070,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) { } else if (write_barrier_func && callee == write_barrier_func) { // The replacement for this requires creating new BasicBlocks // which messes up the loop. Queue all of them to be replaced later. - assert(CI->getNumArgOperands() == 2); + assert(CI->getNumArgOperands() >= 1); write_barriers.push_back(CI); ChangesMade = true; ++it; @@ -2000,10 +2143,10 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) { ChangesMade = true; } } - for (auto CI: write_barriers) { + for (auto CI : write_barriers) { auto parent = CI->getArgOperand(0); - auto child = CI->getArgOperand(1); - if (parent == child || IsPermRooted(child, S)) { + if (std::all_of(CI->op_begin() + 1, CI->op_end(), + [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) { CI->eraseFromParent(); continue; } @@ -2013,11 +2156,17 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) { auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3)); auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false); builder.SetInsertPoint(mayTrigTerm); - auto chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1); - auto chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0)); + Value *anyChldNotMarked = NULL; + for (unsigned i = 1; i < CI->getNumArgOperands(); i++) { + Value *child = CI->getArgOperand(i); + Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1); + Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0)); + anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked; + } + assert(anyChldNotMarked); // handled by all_of test above MDBuilder MDB(parent->getContext()); SmallVector Weights{1, 9}; - auto trigTerm = SplitBlockAndInsertIfThen(chldNotMarked, mayTrigTerm, false, + auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false, MDB.createBranchWeights(Weights)); builder.SetInsertPoint(trigTerm); builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent); @@ -2118,7 +2267,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State MaxColor = C; // Insert instructions for the actual gc frame - if (MaxColor != -1 || !S.Allocas.empty()) { + if (MaxColor != -1 || !S.Allocas.empty() || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) { // Create and push a GC frame. auto gcframe = CallInst::Create( getOrDeclare(jl_intrinsics::newGCFrame), @@ -2132,16 +2281,16 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State pushGcframe->insertAfter(ptlsStates); // Replace Allocas - unsigned AllocaSlot = 0; + unsigned AllocaSlot = 2; // first two words are metadata auto replace_alloca = [this, gcframe, &AllocaSlot](AllocaInst *&AI) { // Pick a slot for the alloca. unsigned align = AI->getAlignment() / sizeof(void*); // TODO: use DataLayout pointer size - assert(align <= 16 && "Alignment exceeds llvm-final-gc-lowering abilities"); + assert(align <= 16 / sizeof(void*) && "Alignment exceeds llvm-final-gc-lowering abilities"); if (align > 1) AllocaSlot = LLT_ALIGN(AllocaSlot, align); Instruction *slotAddress = CallInst::Create( getOrDeclare(jl_intrinsics::getGCFrameSlot), - {gcframe, ConstantInt::get(T_int32, AllocaSlot)}); + {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)}); slotAddress->insertAfter(gcframe); slotAddress->takeName(AI); @@ -2173,12 +2322,35 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State replace_alloca(AI); AllocaSlot += ns; } - auto NRoots = ConstantInt::get(T_int32, MaxColor + 1 + AllocaSlot); + for (auto AI : S.ArrayAllocas) { + replace_alloca(AI.first); + AllocaSlot += AI.second; + } + for (auto Store : S.TrackedStores) { + auto SI = Store.first; + auto Base = SI->getValueOperand(); + //auto Tracked = TrackCompositeType(Base->getType()); + for (unsigned i = 0; i < Store.second; ++i) { + auto slotAddress = CallInst::Create( + getOrDeclare(jl_intrinsics::getGCFrameSlot), + {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)}); + slotAddress->insertAfter(gcframe); + auto ValExpr = std::make_pair(Base, isa(Base->getType()) ? -1 : i); + auto Elem = MaybeExtractScalar(S, ValExpr, SI); + //auto Idxs = makeArrayRef(Tracked[i]); + //Value *Elem = ExtractScalar(Base, true, Idxs, SI); + Value *shadowStore = new StoreInst(Elem, slotAddress, SI); + (void)shadowStore; + // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); + AllocaSlot++; + } + } + auto NRoots = ConstantInt::get(T_int32, MaxColor + 1 + AllocaSlot - 2); gcframe->setArgOperand(0, NRoots); pushGcframe->setArgOperand(1, NRoots); // Insert GC frame stores - PlaceGCFrameStores(S, AllocaSlot, Colors, gcframe); + PlaceGCFrameStores(S, AllocaSlot - 2, Colors, gcframe); // Insert GCFrame pops for(Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { if (isa(I->getTerminator())) { diff --git a/src/staticdata.c b/src/staticdata.c index f9e0171c61405..555086def2505 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -340,19 +340,33 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v) jl_serialize_value(s, jl_array_ptr_ref(ar, i)); } } + else if (ar->flags.hasptr) { + const char *data = (const char*)jl_array_data(ar); + uint16_t elsz = ar->elsize; + size_t i, l = jl_array_len(ar); + jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(ar)); + size_t j, np = et->layout->npointers; + for (i = 0; i < l; i++) { + for (j = 0; j < np; j++) { + uint32_t ptr = jl_ptr_offset(et, j); + jl_value_t *fld = ((jl_value_t**)data)[ptr]; + JL_GC_PROMISE_ROOTED(fld); + jl_serialize_value(s, fld); + } + data += elsz; + } + } } else if (jl_typeis(v, jl_module_type)) { jl_serialize_module(s, (jl_module_t*)v); } - else { + else if (t->layout->nfields > 0) { char *data = (char*)jl_data_ptr(v); - size_t i, nf = jl_datatype_nfields(t); - for (i = 0; i < nf; i++) { - if (jl_field_isptr(t, i)) { - char *slot = data + jl_field_offset(t, i); - jl_value_t *fld = *(jl_value_t**)slot; - jl_serialize_value(s, fld); - } + size_t i, np = t->layout->npointers; + for (i = 0; i < np; i++) { + uint32_t ptr = jl_ptr_offset(t, i); + jl_value_t *fld = ((jl_value_t* const*)data)[ptr]; + jl_serialize_value(s, fld); } } } @@ -644,7 +658,7 @@ static void jl_write_values(jl_serializer_state *s) newa->flags.isshared = 0; // write data - if (!ar->flags.ptrarray) { + if (!ar->flags.ptrarray && !ar->flags.hasptr) { uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), 16); // realign stream to max(data-align(array), sizeof(void*)) write_padding(s->const_data, data - ios_pos(s->const_data)); @@ -665,9 +679,35 @@ static void jl_write_values(jl_serializer_state *s) newa->data = (void*)tsz; // relocation offset arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target - size_t i; - for (i = 0; i < alen; i++) { - write_pointerfield(s, jl_array_ptr_ref(v, i)); + if (ar->flags.hasptr) { + // copy all of the data first + const char *data = (const char*)jl_array_data(ar); + ios_write(s->s, data, tot); + // the rewrite all of the embedded pointers to null+relocation + uint16_t elsz = ar->elsize; + jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(t); + size_t j, np = et->layout->npointers; + size_t i; + for (i = 0; i < alen; i++) { + for (j = 0; j < np; j++) { + size_t offset = i * elsz + jl_ptr_offset(et, j) * sizeof(jl_value_t*); + jl_value_t *fld = *(jl_value_t**)&data[offset]; + if (fld != NULL) { + arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + tsz + offset)); // relocation location + arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target + memset(&s->s->buf[reloc_offset + tsz + offset], 0, sizeof(fld)); // relocation offset (none) + } + else { + assert(*(jl_value_t**)&s->s->buf[reloc_offset + tsz + offset] == NULL); + } + } + } + } + else { + size_t i; + for (i = 0; i < alen; i++) { + write_pointerfield(s, jl_array_ptr_ref(v, i)); + } } } } @@ -689,8 +729,7 @@ static void jl_write_values(jl_serializer_state *s) } } else if (jl_is_string(v)) { - ios_write(s->s, (char*)v, sizeof(void*)); - ios_write(s->s, jl_string_data(v), jl_string_len(v)); + ios_write(s->s, (char*)v, sizeof(void*) + jl_string_len(v)); write_uint8(s->s, '\0'); // null-terminated strings for easier C-compatibility } else if (jl_datatype_nfields(t) == 0) { @@ -716,18 +755,17 @@ static void jl_write_values(jl_serializer_state *s) write_pointer(s->s); } else { + const char *data = (const char*)v; size_t i, nf = jl_datatype_nfields(t); size_t tot = 0; for (i = 0; i < nf; i++) { size_t offset = jl_field_offset(t, i); - char *slot = (char*)v + offset; + const char *slot = data + offset; write_padding(s->s, offset - tot); tot = offset; size_t fsz = jl_field_size(t, i); - if (jl_field_isptr(t, i) > 0) { - write_pointerfield(s, *(jl_value_t**)slot); - } - else if (t->mutabl && jl_is_cpointer_type(jl_field_type(t, i))) { + if (t->mutabl && jl_is_cpointer_type(jl_field_type(t, i))) { + assert(!jl_field_isptr(t, i)); write_pointer(s->s); } else if (fsz > 0) { @@ -736,6 +774,17 @@ static void jl_write_values(jl_serializer_state *s) tot += fsz; } + size_t np = t->layout->npointers; + for (i = 0; i < np; i++) { + size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*); + jl_value_t *fld = *(jl_value_t**)&data[offset]; + if (fld != NULL) { + arraylist_push(&s->relocs_list, (void*)(uintptr_t)(offset + reloc_offset)); // relocation location + arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target + memset(&s->s->buf[offset + reloc_offset], 0, sizeof(fld)); // relocation offset (none) + } + } + if (jl_is_method(v)) { write_padding(s->s, sizeof(jl_method_t) - tot); } diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl index 9f860a08a045f..dc6225eb4acd9 100644 --- a/stdlib/Serialization/src/Serialization.jl +++ b/stdlib/Serialization/src/Serialization.jl @@ -104,7 +104,7 @@ const TUPLE_TAG = sertag(Tuple) const SIMPLEVECTOR_TAG = sertag(SimpleVector) const SYMBOL_TAG = sertag(Symbol) const INT8_TAG = sertag(Int8) -const ARRAY_TAG = sertag(Array) +const ARRAY_TAG = findfirst(==(Array), TAGS)%Int32 const EXPR_TAG = sertag(Expr) const MODULE_TAG = sertag(Module) const METHODINSTANCE_TAG = sertag(Core.MethodInstance) diff --git a/stdlib/SparseArrays/test/higherorderfns.jl b/stdlib/SparseArrays/test/higherorderfns.jl index d6f31f3120642..cd77fca6951a5 100644 --- a/stdlib/SparseArrays/test/higherorderfns.jl +++ b/stdlib/SparseArrays/test/higherorderfns.jl @@ -45,11 +45,11 @@ end # --> test map! entry point fX = map(+, fA, fB); X = sparse(fX) map!(+, X, A, B); X = sparse(fX) # warmup for @allocated - @test (@allocated map!(+, X, A, B)) == 0 + @test (@allocated map!(+, X, A, B)) < 300 @test map!(+, X, A, B) == sparse(map!(+, fX, fA, fB)) fX = map(*, fA, fB); X = sparse(fX) map!(*, X, A, B); X = sparse(fX) # warmup for @allocated - @test (@allocated map!(*, X, A, B)) == 0 + @test (@allocated map!(*, X, A, B)) < 300 @test map!(*, X, A, B) == sparse(map!(*, fX, fA, fB)) @test map!(f, X, A, B) == sparse(map!(f, fX, fA, fB)) @test_throws DimensionMismatch map!(f, X, A, spzeros((shapeA .- 1)...)) @@ -72,11 +72,11 @@ end # --> test map! entry point fX = map(+, fA, fB, fC); X = sparse(fX) map!(+, X, A, B, C); X = sparse(fX) # warmup for @allocated - @test (@allocated map!(+, X, A, B, C)) == 0 + @test (@allocated map!(+, X, A, B, C)) < 300 @test map!(+, X, A, B, C) == sparse(map!(+, fX, fA, fB, fC)) fX = map(*, fA, fB, fC); X = sparse(fX) map!(*, X, A, B, C); X = sparse(fX) # warmup for @allocated - @test (@allocated map!(*, X, A, B, C)) == 0 + @test (@allocated map!(*, X, A, B, C)) < 300 @test map!(*, X, A, B, C) == sparse(map!(*, fX, fA, fB, fC)) @test map!(f, X, A, B, C) == sparse(map!(f, fX, fA, fB, fC)) @test_throws DimensionMismatch map!(f, X, A, B, spzeros((shapeA .- 1)...)) @@ -119,12 +119,12 @@ end # --> test broadcast! entry point / zero-preserving op broadcast!(sin, fZ, fX); Z = sparse(fZ) broadcast!(sin, Z, X); Z = sparse(fZ) # warmup for @allocated - @test (@allocated broadcast!(sin, Z, X)) == 0 + @test (@allocated broadcast!(sin, Z, X)) < 300 @test broadcast!(sin, Z, X) == sparse(broadcast!(sin, fZ, fX)) # --> test broadcast! entry point / not-zero-preserving op broadcast!(cos, fZ, fX); Z = sparse(fZ) broadcast!(cos, Z, X); Z = sparse(fZ) # warmup for @allocated - @test (@allocated broadcast!(cos, Z, X)) == 0 + @test (@allocated broadcast!(cos, Z, X)) < 300 @test broadcast!(cos, Z, X) == sparse(broadcast!(cos, fZ, fX)) # --> test shape checks for broadcast! entry point # TODO strengthen this test, avoiding dependence on checking whether @@ -143,12 +143,12 @@ end # --> test broadcast! entry point / zero-preserving op broadcast!(sin, fV, fX); V = sparse(fV) broadcast!(sin, V, X); V = sparse(fV) # warmup for @allocated - @test (@allocated broadcast!(sin, V, X)) == 0 + @test (@allocated broadcast!(sin, V, X)) < 300 @test broadcast!(sin, V, X) == sparse(broadcast!(sin, fV, fX)) # --> test broadcast! entry point / not-zero-preserving broadcast!(cos, fV, fX); V = sparse(fV) broadcast!(cos, V, X); V = sparse(fV) # warmup for @allocated - @test (@allocated broadcast!(cos, V, X)) == 0 + @test (@allocated broadcast!(cos, V, X)) < 300 @test broadcast!(cos, V, X) == sparse(broadcast!(cos, fV, fX)) # --> test shape checks for broadcast! entry point # TODO strengthen this test, avoiding dependence on checking whether @@ -196,17 +196,17 @@ end # --> test broadcast! entry point / +-like zero-preserving op broadcast!(+, fZ, fX, fY); Z = sparse(fZ) broadcast!(+, Z, X, Y); Z = sparse(fZ) # warmup for @allocated - @test (@allocated broadcast!(+, Z, X, Y)) == 0 + @test (@allocated broadcast!(+, Z, X, Y)) < 300 @test broadcast!(+, Z, X, Y) == sparse(broadcast!(+, fZ, fX, fY)) # --> test broadcast! entry point / *-like zero-preserving op broadcast!(*, fZ, fX, fY); Z = sparse(fZ) broadcast!(*, Z, X, Y); Z = sparse(fZ) # warmup for @allocated - @test (@allocated broadcast!(*, Z, X, Y)) == 0 + @test (@allocated broadcast!(*, Z, X, Y)) < 300 @test broadcast!(*, Z, X, Y) == sparse(broadcast!(*, fZ, fX, fY)) # --> test broadcast! entry point / not zero-preserving op broadcast!(f, fZ, fX, fY); Z = sparse(fZ) broadcast!(f, Z, X, Y); Z = sparse(fZ) # warmup for @allocated - @test (@allocated broadcast!(f, Z, X, Y)) == 0 + @test (@allocated broadcast!(f, Z, X, Y)) < 300 @test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZ, fX, fY)) # --> test shape checks for both broadcast and broadcast! entry points # TODO strengthen this test, avoiding dependence on checking whether @@ -259,17 +259,17 @@ end # --> test broadcast! entry point / +-like zero-preserving op fQ = broadcast(+, fX, fY, fZ); Q = sparse(fQ) broadcast!(+, Q, X, Y, Z); Q = sparse(fQ) # warmup for @allocated - @test (@allocated broadcast!(+, Q, X, Y, Z)) == 0 + @test (@allocated broadcast!(+, Q, X, Y, Z)) < 300 @test broadcast!(+, Q, X, Y, Z) == sparse(broadcast!(+, fQ, fX, fY, fZ)) # --> test broadcast! entry point / *-like zero-preserving op fQ = broadcast(*, fX, fY, fZ); Q = sparse(fQ) broadcast!(*, Q, X, Y, Z); Q = sparse(fQ) # warmup for @allocated - @test (@allocated broadcast!(*, Q, X, Y, Z)) == 0 + @test (@allocated broadcast!(*, Q, X, Y, Z)) < 300 @test broadcast!(*, Q, X, Y, Z) == sparse(broadcast!(*, fQ, fX, fY, fZ)) # --> test broadcast! entry point / not zero-preserving op fQ = broadcast(f, fX, fY, fZ); Q = sparse(fQ) broadcast!(f, Q, X, Y, Z); Q = sparse(fQ) # warmup for @allocated - @test (@allocated broadcast!(f, Q, X, Y, Z)) == 0 + @test (@allocated broadcast!(f, Q, X, Y, Z)) < 300 @test broadcast!(f, Q, X, Y, Z) == sparse(broadcast!(f, fQ, fX, fY, fZ)) # --> test shape checks for both broadcast and broadcast! entry points # TODO strengthen this test, avoiding dependence on checking whether @@ -343,11 +343,8 @@ end @test broadcast!(*, X, sparseargs...) == sparse(broadcast!(*, fX, denseargs...)) @test isa(@inferred(broadcast!(*, X, sparseargs...)), SparseMatrixCSC{elT}) X = sparse(fX) # reset / warmup for @allocated test - # It'd be nice for this to be zero, but there's currently some constant overhead - @test_broken (@allocated broadcast!(*, X, sparseargs...)) == 0 - X = sparse(fX) # reset / warmup for @allocated test # And broadcasting over Transposes currently requires making a CSC copy, so we must account for that in the bounds - @test (@allocated broadcast!(*, X, sparseargs...)) <= (sum(x->isa(x, Transpose) ? @allocated(SparseMatrixCSC(x))+128 : 0, sparseargs) + 128) + @test (@allocated broadcast!(*, X, sparseargs...)) <= (sum(x->isa(x, Transpose) ? @allocated(SparseMatrixCSC(x)) + 128 : 0, sparseargs) + 128 + 900) # about zero to 3k bytes end end # test combinations at the limit of inference (eight arguments net) @@ -367,9 +364,7 @@ end @test broadcast!(*, X, sparseargs...) == sparse(broadcast!(*, fX, denseargs...)) @test isa(@inferred(broadcast!(*, X, sparseargs...)), SparseMatrixCSC{elT}) X = sparse(fX) # reset / warmup for @allocated test - @test_broken (@allocated broadcast!(*, X, sparseargs...)) == 0 - X = sparse(fX) # reset / warmup for @allocated test - @test (@allocated broadcast!(*, X, sparseargs...)) <= 128 + @test (@allocated broadcast!(*, X, sparseargs...)) <= 900 end end diff --git a/test/core.jl b/test/core.jl index e92cb2b96eb60..703ae70cea7b2 100644 --- a/test/core.jl +++ b/test/core.jl @@ -4116,82 +4116,90 @@ function f15180(x::T) where T end @test map(f15180(1), [1,2]) == [(Int,1),(Int,1)] -let ary = Vector{Any}(undef, 10) - check_undef_and_fill(ary, rng) = for i in rng - @test !isassigned(ary, i) - ary[i] = (Float64(i), i) # some non-cached content - @test isassigned(ary, i) - end - # Check if the memory is initially zerod and fill it with value - # to check if these values are not reused later. - check_undef_and_fill(ary, 1:10) - # Check if the memory grown at the end are zerod - ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10) - check_undef_and_fill(ary, 11:20) - # Make sure the content of the memory deleted at the end are not reused - ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), ary, 5) - ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 5) - check_undef_and_fill(ary, 16:20) - - # Now check grow/del_end - ary = Vector{Any}(undef, 1010) - check_undef_and_fill(ary, 1:1010) - # This del_beg should move the buffer - ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 1000) - ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 1000) - check_undef_and_fill(ary, 1:1000) - ary = Vector{Any}(undef, 1010) - check_undef_and_fill(ary, 1:1010) - # This del_beg should not move the buffer - ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 10) - ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 10) - check_undef_and_fill(ary, 1:10) - - ary = Vector{Any}(undef, 1010) - check_undef_and_fill(ary, 1:1010) - ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10) - check_undef_and_fill(ary, 1011:1020) - ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), ary, 10) - ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 10) - check_undef_and_fill(ary, 1:10) - - # Make sure newly malloc'd buffers are filled with 0 - # test this for a few different sizes since we need to make sure - # we are malloc'ing the buffer after the grow_end and malloc is not using - # mmap directly (which may return a zero'd new page). - for n in [50, 51, 100, 101, 200, 201, 300, 301] - ary = Vector{Any}(undef, n) - # Try to free the previous buffer that was filled with random content - # and to increase the chance of getting a non-zero'd buffer next time - GC.gc() - GC.gc() - GC.gc() - ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 4) - ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 4) - ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, n) - ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 4) - check_undef_and_fill(ary, 1:(2n + 4)) - end - - ary = Vector{Any}(undef, 100) - ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10000) - ary[:] = 1:length(ary) - ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 10000) - # grow on the back until a buffer reallocation happens - cur_ptr = pointer(ary) - while cur_ptr == pointer(ary) - len = length(ary) - ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10) - for i in (len + 1):(len + 10) +struct ValueWrapper + vpadding::NTuple{2,VecElement{UInt}} + value + ValueWrapper(value) = new((typemax(UInt), typemax(UInt)), value) +end +Base.convert(::Type{ValueWrapper}, x) = ValueWrapper(x) +for T in (Any, ValueWrapper) + let ary = Vector{T}(undef, 10) + check_undef_and_fill(ary, rng) = for i in rng @test !isassigned(ary, i) + ary[i] = (Float64(i), i) # some non-cached content + @test isassigned(ary, i) + end + # Check if the memory is initially zerod and fill it with value + # to check if these values are not reused later. + check_undef_and_fill(ary, 1:10) + # Check if the memory grown at the end are zerod + ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10) + check_undef_and_fill(ary, 11:20) + # Make sure the content of the memory deleted at the end are not reused + ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), ary, 5) + ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 5) + check_undef_and_fill(ary, 16:20) + + # Now check grow/del_end + ary = Vector{T}(undef, 1010) + check_undef_and_fill(ary, 1:1010) + # This del_beg should move the buffer + ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 1000) + ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 1000) + check_undef_and_fill(ary, 1:1000) + ary = Vector{T}(undef, 1010) + check_undef_and_fill(ary, 1:1010) + # This del_beg should not move the buffer + ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 10) + ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 10) + check_undef_and_fill(ary, 1:10) + + ary = Vector{T}(undef, 1010) + check_undef_and_fill(ary, 1:1010) + ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10) + check_undef_and_fill(ary, 1011:1020) + ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), ary, 10) + ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 10) + check_undef_and_fill(ary, 1:10) + + # Make sure newly malloc'd buffers are filled with 0 + # test this for a few different sizes since we need to make sure + # we are malloc'ing the buffer after the grow_end and malloc is not using + # mmap directly (which may return a zero'd new page). + for n in [50, 51, 100, 101, 200, 201, 300, 301] + ary = Vector{T}(undef, n) + # Try to free the previous buffer that was filled with random content + # and to increase the chance of getting a non-zero'd buffer next time + GC.gc() + GC.gc() + GC.gc() + ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 4) + ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 4) + ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, n) + ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 4) + check_undef_and_fill(ary, 1:(2n + 4)) + end + + ary = Vector{T}(undef, 100) + ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10000) + ary[:] = 1:length(ary) + ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 10000) + # grow on the back until a buffer reallocation happens + cur_ptr = pointer(ary) + while cur_ptr == pointer(ary) + len = length(ary) + ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10) + for i in (len + 1):(len + 10) + @test !isassigned(ary, i) + end end - end - ary = Vector{Any}(undef, 100) - ary[:] = 1:length(ary) - ccall(:jl_array_grow_at, Cvoid, (Any, Csize_t, Csize_t), ary, 50, 10) - for i in 51:60 - @test !isassigned(ary, i) + ary = Vector{T}(undef, 100) + ary[:] = 1:length(ary) + ccall(:jl_array_grow_at, Cvoid, (Any, Csize_t, Csize_t), ary, 50, 10) + for i in 51:60 + @test !isassigned(ary, i) + end end end @@ -4355,6 +4363,7 @@ function test_copy_alias(::Type{T}) where T end test_copy_alias(Int) test_copy_alias(Any) +test_copy_alias(Union{Int,Nothing}) # issue #15370 @test isdefined(Core, :Box) diff --git a/test/threads_exec.jl b/test/threads_exec.jl index 30a2dacda17a5..22f9a64569d2c 100644 --- a/test/threads_exec.jl +++ b/test/threads_exec.jl @@ -439,16 +439,7 @@ end function test_thread_cfunction() # ensure a runtime call to `get_trampoline` will be created # TODO: get_trampoline is not thread-safe (as this test shows) - function complex_cfunction(a) - s = zero(eltype(a)) - @inbounds @simd for i in a - s += muladd(a[i], a[i], -2) - end - return s - end - fs = [ let a = zeros(10) - () -> complex_cfunction(a) - end for i in 1:1000 ] + fs = [ Core.Box() for i in 1:1000 ] @noinline cf(f) = @cfunction $f Float64 () cfs = Vector{Base.CFunction}(undef, length(fs)) cf1 = cf(fs[1])