Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8341757: Field layout computation allowing atomic and nullable flattening #1275

Draft
wants to merge 2 commits into
base: lworld
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,19 @@ void BarrierSetAssembler::value_copy(MacroAssembler* masm, DecoratorSet decorato
}
}

void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators,
Register src, Register dst, Register inline_layout_info) {
// flat_field_copy implementation is fairly complex, and there are not any
// "short-cuts" to be made from asm. What there is, appears to have the same
// cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds
// of hand-rolled instructions...
if (decorators & IS_DEST_UNINITIALIZED) {
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized2), src, dst, inline_layout_info);
} else {
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy2), src, dst, inline_layout_info);
}
}

void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
DecoratorSet decorators,
BasicType type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ class BarrierSetAssembler: public CHeapObj<mtGC> {

virtual void value_copy(MacroAssembler* masm, DecoratorSet decorators,
Register src, Register dst, Register value_klass);
virtual void flat_field_copy(MacroAssembler* masm, DecoratorSet decorators,
Register src, Register dst, Register inline_layout_info);

virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
Expand Down
37 changes: 17 additions & 20 deletions src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,53 +229,50 @@ void InterpreterMacroAssembler::allocate_instance(Register klass, Register new_o
}
}

void InterpreterMacroAssembler::read_flat_field(Register holder_klass,
void InterpreterMacroAssembler::read_flat_field(Register entry,
Register field_index, Register field_offset,
Register temp, Register obj) {
Label alloc_failed, empty_value, done;
const Register src = field_offset;
const Register alloc_temp = rscratch1;
const Register dst_temp = temp;
assert_different_registers(obj, holder_klass, field_index, field_offset, dst_temp);
const Register alloc_temp = r10;
const Register dst_temp = field_index;
const Register layout_info = temp;
assert_different_registers(obj, entry, field_index, field_offset, temp, alloc_temp);

// Grab the inline field klass
push(holder_klass);
const Register field_klass = holder_klass;
get_inline_type_field_klass(holder_klass, field_index, field_klass);
ldr(rscratch1, Address(entry, in_bytes(ResolvedFieldEntry::field_holder_offset())));
inline_layout_info(rscratch1, field_index, layout_info);

//check for empty value klass
test_klass_is_empty_inline_type(field_klass, dst_temp, empty_value);
const Register field_klass = dst_temp;
ldr(field_klass, Address(layout_info, in_bytes(InlineLayoutInfo::klass_offset())));

// check for empty value klass
test_klass_is_empty_inline_type(field_klass, rscratch1, empty_value);

// allocate buffer
push(obj); // save holder
allocate_instance(field_klass, obj, alloc_temp, dst_temp, false, alloc_failed);
allocate_instance(field_klass, obj, alloc_temp, rscratch2, false, alloc_failed);

// Have an oop instance buffer, copy into it
data_for_oop(obj, dst_temp, field_klass);
data_for_oop(obj, dst_temp, field_klass); // danger, uses rscratch1
pop(alloc_temp); // restore holder
lea(src, Address(alloc_temp, field_offset));
// call_VM_leaf, clobbers a few regs, save restore new obj
push(obj);
access_value_copy(IS_DEST_UNINITIALIZED, src, dst_temp, field_klass);
flat_field_copy(IS_DEST_UNINITIALIZED, src, dst_temp, layout_info);
pop(obj);
pop(holder_klass);
b(done);

bind(empty_value);
get_empty_inline_type_oop(field_klass, dst_temp, obj);
pop(holder_klass);
get_empty_inline_type_oop(field_klass, alloc_temp, obj);
b(done);

bind(alloc_failed);
pop(obj);
pop(holder_klass);
call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flat_field),
obj, field_index, holder_klass);
obj, entry);

bind(done);

// Ensure the stores to copy the inline field contents are visible
// before any subsequent store that publishes this reference.
membar(Assembler::StoreStore);
}

Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,9 @@ class InterpreterMacroAssembler: public MacroAssembler {
// - input holder object via "obj", which must be r0,
// will return new instance via the same reg
// - assumes holder_klass and valueKlass field klass have both been resolved
void read_flat_field(Register holder_klass,
void read_flat_field(Register entry,
Register field_index, Register field_offset,
Register temp, Register obj = r0);
Register temp, Register obj = r0);

// Allocate value buffer in "obj" and read in flat element at the given index
// NOTES:
Expand Down
34 changes: 23 additions & 11 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5052,6 +5052,12 @@ void MacroAssembler::access_value_copy(DecoratorSet decorators, Register src, Re
bs->value_copy(this, decorators, src, dst, inline_klass);
}

void MacroAssembler::flat_field_copy(DecoratorSet decorators, Register src, Register dst,
Register inline_layout_info) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->flat_field_copy(this, decorators, src, dst, inline_layout_info);
}

void MacroAssembler::first_field_offset(Register inline_klass, Register offset) {
ldr(offset, Address(inline_klass, InstanceKlass::adr_inlineklass_fixed_block_offset()));
ldrw(offset, Address(offset, InlineKlass::first_field_offset_offset()));
Expand Down Expand Up @@ -5297,18 +5303,24 @@ void MacroAssembler::verify_tlab() {
#endif
}

void MacroAssembler::get_inline_type_field_klass(Register klass, Register index, Register inline_klass) {
ldr(inline_klass, Address(klass, InstanceKlass::inline_type_field_klasses_offset()));
#ifdef ASSERT
{
Label done;
cbnz(inline_klass, done);
stop("get_inline_type_field_klass contains no inline klass");
bind(done);
void MacroAssembler::get_inline_type_field_klass(Register holder_klass, Register index, Register inline_klass) {
inline_layout_info(holder_klass, index, inline_klass);
ldr(inline_klass, Address(inline_klass, InlineLayoutInfo::klass_offset()));
}

void MacroAssembler::inline_layout_info(Register holder_klass, Register index, Register layout_info) {
assert_different_registers(holder_klass, index, layout_info);
InlineLayoutInfo array[2];
int size = (char*)&array[1] - (char*)&array[0]; // computing size of array elements
if (is_power_of_2(size)) {
lsl(index, index, log2i_exact(size)); // Scale index by power of 2
} else {
mov(layout_info, size);
mul(index, index, layout_info); // Scale the index to be the entry index * array_element_size
}
#endif
lea(inline_klass, Address(inline_klass, Array<InlineKlass*>::base_offset_in_bytes()));
ldr(inline_klass, Address(inline_klass, index, Address::lsl(3)));
ldr(layout_info, Address(holder_klass, InstanceKlass::inline_layout_info_array_offset()));
add(layout_info, layout_info, Array<InlineLayoutInfo>::base_offset_in_bytes());
lea(layout_info, Address(layout_info, index));
}

// Writes to stack successive pages until offset reached to check for
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,7 @@ class MacroAssembler: public Assembler {
Register tmp1, Register tmp2, Register tmp3);

void access_value_copy(DecoratorSet decorators, Register src, Register dst, Register inline_klass);
void flat_field_copy(DecoratorSet decorators, Register src, Register dst, Register inline_layout_info);

// inline type data payload offsets...
void first_field_offset(Register inline_klass, Register offset);
Expand Down Expand Up @@ -1007,6 +1008,8 @@ class MacroAssembler: public Assembler {

// For field "index" within "klass", return inline_klass ...
void get_inline_type_field_klass(Register klass, Register index, Register inline_klass);
void inline_layout_info(Register holder_klass, Register index, Register layout_info);


// interface method calling
void lookup_interface_method(Register recv_klass,
Expand Down
20 changes: 13 additions & 7 deletions src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2841,7 +2841,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
__ bind(is_flat);
// field is flat
__ mov(r0, obj);
__ read_flat_field(klass, field_index, off, inline_klass /* temp */, r0);
__ read_flat_field(cache, field_index, off, inline_klass /* temp */, r0);
__ verify_oop(r0);
__ push(atos);
__ b(rewrite_inline);
Expand Down Expand Up @@ -3117,13 +3117,16 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
do_oop_store(_masm, field, r0, IN_HEAP);
__ b(rewrite_inline);
__ bind(is_flat);
// field is flat
__ load_field_entry(cache, index); // reload field entry (cache) because it was erased by tos_state
__ load_unsigned_short(index, Address(cache, in_bytes(ResolvedFieldEntry::field_index_offset())));
__ ldr(r2, Address(cache, in_bytes(ResolvedFieldEntry::field_holder_offset())));
__ inline_layout_info(r2, index, r6);
pop_and_check_object(obj);
assert_different_registers(r0, inline_klass, obj, off);
__ load_klass(inline_klass, r0);
__ data_for_oop(r0, r0, inline_klass);
__ add(obj, obj, off);
__ access_value_copy(IN_HEAP, r0, obj, inline_klass);
// because we use InlineLayoutInfo, we need special value access code specialized for fields (arrays will need a different API)
__ flat_field_copy(IN_HEAP, r0, obj, r6);
__ b(rewrite_inline);
__ bind(has_null_marker);
assert_different_registers(r0, cache, r19);
Expand Down Expand Up @@ -3363,10 +3366,14 @@ void TemplateTable::fast_storefield(TosState state)
__ b(done);
__ bind(is_flat);
// field is flat
__ load_field_entry(r4, r3);
__ load_unsigned_short(r3, Address(r4, in_bytes(ResolvedFieldEntry::field_index_offset())));
__ ldr(r4, Address(r4, in_bytes(ResolvedFieldEntry::field_holder_offset())));
__ inline_layout_info(r4, r3, r5);
__ load_klass(r4, r0);
__ data_for_oop(r0, r0, r4);
__ lea(rscratch1, field);
__ access_value_copy(IN_HEAP, r0, rscratch1, r4);
__ flat_field_copy(IN_HEAP, r0, rscratch1, r5);
__ b(done);
__ bind(has_null_marker);
__ load_field_entry(r4, r1);
Expand Down Expand Up @@ -3489,8 +3496,7 @@ void TemplateTable::fast_accessfield(TosState state)
__ bind(is_flat);
// field is flat
__ load_unsigned_short(index, Address(r2, in_bytes(ResolvedFieldEntry::field_index_offset())));
__ ldr(klass, Address(r2, in_bytes(ResolvedFieldEntry::field_holder_offset())));
__ read_flat_field(klass, index, r1, tmp /* temp */, r0);
__ read_flat_field(r2, index, r1, tmp /* temp */, r0);
__ verify_oop(r0);
__ b(Done);
__ bind(has_null_marker);
Expand Down
13 changes: 13 additions & 0 deletions src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,19 @@ void BarrierSetAssembler::value_copy(MacroAssembler* masm, DecoratorSet decorato
}
}

void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators,
Register src, Register dst, Register inline_layout_info) {
// flat_field_copy implementation is fairly complex, and there are not any
// "short-cuts" to be made from asm. What there is, appears to have the same
// cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds
// of hand-rolled instructions...
if (decorators & IS_DEST_UNINITIALIZED) {
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized2), src, dst, inline_layout_info);
} else {
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy2), src, dst, inline_layout_info);
}
}

void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
DecoratorSet decorators,
BasicType type,
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class BarrierSetAssembler: public CHeapObj<mtGC> {

virtual void value_copy(MacroAssembler* masm, DecoratorSet decorators,
Register src, Register dst, Register value_klass);
virtual void flat_field_copy(MacroAssembler* masm, DecoratorSet decorators,
Register src, Register dst, Register inline_layout_info);

// The copy_[load/store]_at functions are used by arraycopy stubs. Be careful to only use
// r10 (aka rscratch1) in a context where restore_arg_regs_using_thread has been used instead
Expand Down
43 changes: 23 additions & 20 deletions src/hotspot/cpu/x86/interp_masm_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1202,50 +1202,53 @@ void InterpreterMacroAssembler::allocate_instance(Register klass, Register new_o
}
}


void InterpreterMacroAssembler::read_flat_field(Register holder_klass,
Register field_index, Register field_offset,
Register obj) {
void InterpreterMacroAssembler::read_flat_field(Register entry, Register tmp1, Register tmp2, Register obj) {
Label alloc_failed, empty_value, done;
const Register src = field_offset;
const Register alloc_temp = LP64_ONLY(rscratch1) NOT_LP64(rsi);
const Register dst_temp = LP64_ONLY(rscratch2) NOT_LP64(rdi);
assert_different_registers(obj, holder_klass, field_index, field_offset, dst_temp);
assert_different_registers(obj, entry, tmp1, tmp2, dst_temp, r8, r9);

// FIXME: code below could be re-written to better use InlineLayoutInfo data structure
// see aarch64 version

// Grap the inline field klass
push(holder_klass);
const Register field_klass = holder_klass;
get_inline_type_field_klass(holder_klass, field_index, field_klass);
const Register field_klass = tmp1;
load_unsigned_short(tmp2, Address(entry, in_bytes(ResolvedFieldEntry::field_index_offset())));
movptr(tmp1, Address(entry, ResolvedFieldEntry::field_holder_offset()));
get_inline_type_field_klass(tmp1, tmp2, field_klass);

//check for empty value klass
//check for empty value klass
test_klass_is_empty_inline_type(field_klass, dst_temp, empty_value);

// allocate buffer
push(obj); // save holder
push(obj); // push object being read from // FIXME spilling on stack could probably be avoided by using tmp2
allocate_instance(field_klass, obj, alloc_temp, dst_temp, false, alloc_failed);

// Have an oop instance buffer, copy into it
load_unsigned_short(r9, Address(entry, in_bytes(ResolvedFieldEntry::field_index_offset())));
movptr(r8, Address(entry, in_bytes(ResolvedFieldEntry::field_holder_offset())));
inline_layout_info(r8, r9, r8); // holder, index, info => InlineLayoutInfo into r8

data_for_oop(obj, dst_temp, field_klass);
pop(alloc_temp); // restore holder
lea(src, Address(alloc_temp, field_offset));
pop(alloc_temp); // restore object being read from
load_sized_value(tmp2, Address(entry, in_bytes(ResolvedFieldEntry::field_offset_offset())), sizeof(int), true /*is_signed*/);
lea(tmp2, Address(alloc_temp, tmp2));
// call_VM_leaf, clobbers a few regs, save restore new obj
push(obj);
access_value_copy(IS_DEST_UNINITIALIZED, src, dst_temp, field_klass);
// access_value_copy(IS_DEST_UNINITIALIZED, tmp2, dst_temp, field_klass);
flat_field_copy(IS_DEST_UNINITIALIZED, tmp2, dst_temp, r8);
pop(obj);
pop(holder_klass);
jmp(done);

bind(empty_value);
get_empty_inline_type_oop(field_klass, dst_temp, obj);
pop(holder_klass);
jmp(done);

bind(alloc_failed);
pop(obj);
pop(holder_klass);
call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flat_field),
obj, field_index, holder_klass);

call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flat_field),
obj, entry);
get_vm_result(obj, r15_thread);
bind(done);
}

Expand Down
8 changes: 4 additions & 4 deletions src/hotspot/cpu/x86/interp_masm_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,18 +212,18 @@ class InterpreterMacroAssembler: public MacroAssembler {
bool notify_jvmdi = true);
void get_method_counters(Register method, Register mcs, Label& skip);

// Kills t1 and t2, perserves klass, return allocation in new_obj
// Kills t1 and t2, preserves klass, return allocation in new_obj
void allocate_instance(Register klass, Register new_obj,
Register t1, Register t2,
bool clear_fields, Label& alloc_failed);

// Allocate instance in "obj" and read in the content of the inline field
// NOTES:
// - input holder object via "obj", which must be rax,
// will return new instance via the same reg
// - assumes holder_klass and valueKlass field klass have both been resolved
// - 32 bits: kills rdi and rsi
void read_flat_field(Register holder_klass,
Register field_index, Register field_offset,
void read_flat_field(Register entry,
Register tmp1, Register tmp2,
Register obj = rax);

// Allocate value buffer in "obj" and read in flat element at the given index
Expand Down
Loading
Loading