Skip to content

Commit

Permalink
[VM] Reduce size of monomorphic entries in AOT on ARM/ARM64 & use sma…
Browse files Browse the repository at this point in the history
…ller tail-calls in various places

Decreases flutter gallery RX size by 1.5 % on ARM
Decreases flutter gallery RX size by 1.3 % on ARM64

Improves arm performance (significantly), e.g. DeltaBlue +6.8%, many typed data benchmarks +xx%
Improves arm64 performance, e.g. DeltaBlue +3.1%


Issue #33274

Change-Id: Iaa6cc31597e0bcde471ec2e966730c3f2c9fdd05
Reviewed-on: https://dart-review.googlesource.com/73180
Reviewed-by: Vyacheslav Egorov <[email protected]>
Reviewed-by: Ryan Macnak <[email protected]>
Commit-Queue: Martin Kustermann <[email protected]>
  • Loading branch information
mkustermann authored and [email protected] committed Sep 11, 2018
1 parent 805c92c commit 05ccfa8
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 76 deletions.
17 changes: 7 additions & 10 deletions runtime/vm/compiler/assembler/assembler_arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2456,8 +2456,11 @@ void Assembler::Branch(const StubEntry& stub_entry,
const int32_t offset = ObjectPool::element_offset(
object_pool_wrapper().FindObject(target_code, patchable));
LoadWordFromPoolOffset(CODE_REG, offset - kHeapObjectTag, pp, cond);
ldr(IP, FieldAddress(CODE_REG, Code::entry_point_offset()), cond);
bx(IP, cond);
Branch(FieldAddress(CODE_REG, Code::entry_point_offset()), cond);
}

void Assembler::Branch(const Address& address, Condition cond) {
ldr(PC, address, cond);
}

void Assembler::BranchLink(const Code& target,
Expand Down Expand Up @@ -3160,17 +3163,11 @@ void Assembler::MonomorphicCheckedEntry() {
set_use_far_branches(false);
#endif

Label miss;
Bind(&miss);
ldr(IP, Address(THR, Thread::monomorphic_miss_entry_offset()));
bx(IP);

Comment("MonomorphicCheckedEntry");
ASSERT(CodeSize() == Instructions::kCheckedEntryOffset);
LoadClassIdMayBeSmi(IP, R0);
SmiUntag(R9);
cmp(IP, Operand(R9));
b(&miss, NE);
cmp(R9, Operand(IP, LSL, 1));
Branch(Address(THR, Thread::monomorphic_miss_entry_offset()), NE);

// Fall through to unchecked entry.
ASSERT(CodeSize() == Instructions::kUncheckedEntryOffset);
Expand Down
2 changes: 2 additions & 0 deletions runtime/vm/compiler/assembler/assembler_arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,8 @@ class Assembler : public ValueObject {
Register pp = PP,
Condition cond = AL);

void Branch(const Address& address, Condition cond = AL);

void BranchLink(
const StubEntry& stub_entry,
ObjectPool::Patchability patchable = ObjectPool::kNotPatchable);
Expand Down
16 changes: 3 additions & 13 deletions runtime/vm/compiler/assembler/assembler_arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1244,25 +1244,15 @@ void Assembler::MonomorphicCheckedEntry() {
bool saved_use_far_branches = use_far_branches();
set_use_far_branches(false);

Label immediate, have_cid, miss;
Label immediate, miss;
Bind(&miss);
ldr(IP0, Address(THR, Thread::monomorphic_miss_entry_offset()));
br(IP0);

Bind(&immediate);
movz(IP0, Immediate(kSmiCid), 0);
b(&have_cid);

Comment("MonomorphicCheckedEntry");
ASSERT(CodeSize() == Instructions::kCheckedEntryOffset);
tsti(R0, Immediate(kSmiTagMask));
SmiUntag(R5);
b(&immediate, EQ);

LoadClassId(IP0, R0);

Bind(&have_cid);
cmp(IP0, Operand(R5));
LoadClassIdMayBeSmi(IP0, R0);
cmp(R5, Operand(IP0, LSL, 1));
b(&miss, NE);

// Fall through to unchecked entry.
Expand Down
3 changes: 1 addition & 2 deletions runtime/vm/compiler/backend/il_arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ DEFINE_BACKEND(TailCall,
Temp<Register> temp)) {
__ LoadObject(CODE_REG, instr->code());
__ LeaveDartFrame(); // The arguments are still on the stack.
__ ldr(temp, FieldAddress(CODE_REG, Code::entry_point_offset()));
__ bx(temp);
__ Branch(FieldAddress(CODE_REG, Code::entry_point_offset()));

// Even though the TailCallInstr will be the last instruction in a basic
// block, the flow graph compiler will emit native code for other blocks after
Expand Down
3 changes: 1 addition & 2 deletions runtime/vm/compiler/intrinsifier_arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2249,8 +2249,7 @@ void Intrinsifier::IntrinsifyRegExpExecuteMatch(Assembler* assembler,

// Tail-call the function.
__ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
__ ldr(R1, FieldAddress(R0, Function::entry_point_offset()));
__ bx(R1);
__ Branch(FieldAddress(R0, Function::entry_point_offset()));
}

// On stack: user tag (+0).
Expand Down
8 changes: 4 additions & 4 deletions runtime/vm/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -4486,11 +4486,11 @@ class Instructions : public Object {
static const intptr_t kCheckedEntryOffset = 15;
static const intptr_t kUncheckedEntryOffset = 34;
#elif defined(TARGET_ARCH_ARM)
static const intptr_t kCheckedEntryOffset = 8;
static const intptr_t kUncheckedEntryOffset = 32;
static const intptr_t kCheckedEntryOffset = 0;
static const intptr_t kUncheckedEntryOffset = 20;
#elif defined(TARGET_ARCH_ARM64)
static const intptr_t kCheckedEntryOffset = 16;
static const intptr_t kUncheckedEntryOffset = 40;
static const intptr_t kCheckedEntryOffset = 8;
static const intptr_t kUncheckedEntryOffset = 28;
#elif defined(TARGET_ARCH_DBC)
static const intptr_t kCheckedEntryOffset = 0;
static const intptr_t kUncheckedEntryOffset = 0;
Expand Down
69 changes: 24 additions & 45 deletions runtime/vm/stub_code_arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,7 @@ void StubCode::GenerateCallStaticFunctionStub(Assembler* assembler) {
__ LeaveStubFrame();
// Jump to the dart function.
__ mov(CODE_REG, Operand(R0));
__ ldr(R0, FieldAddress(R0, Code::entry_point_offset()));
__ bx(R0);
__ Branch(FieldAddress(R0, Code::entry_point_offset()));
}

// Called from a static call only when an invalid code has been entered
Expand All @@ -424,8 +423,7 @@ void StubCode::GenerateFixCallersTargetStub(Assembler* assembler) {
__ LeaveStubFrame();
// Jump to the dart function.
__ mov(CODE_REG, Operand(R0));
__ ldr(R0, FieldAddress(R0, Code::entry_point_offset()));
__ bx(R0);
__ Branch(FieldAddress(R0, Code::entry_point_offset()));
}

// Called from object allocate instruction when the allocation stub has been
Expand All @@ -446,8 +444,7 @@ void StubCode::GenerateFixAllocationStubTargetStub(Assembler* assembler) {
__ LeaveStubFrame();
// Jump to the dart function.
__ mov(CODE_REG, Operand(R0));
__ ldr(R0, FieldAddress(R0, Code::entry_point_offset()));
__ bx(R0);
__ Branch(FieldAddress(R0, Code::entry_point_offset()));
}

// Input parameters:
Expand Down Expand Up @@ -739,8 +736,7 @@ void StubCode::GenerateMegamorphicMissStub(Assembler* assembler) {

// Tail-call to target function.
__ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
__ ldr(R2, FieldAddress(R0, Function::entry_point_offset()));
__ bx(R2);
__ Branch(FieldAddress(R0, Function::entry_point_offset()));
}

// Called for inline allocation of arrays.
Expand Down Expand Up @@ -1639,9 +1635,8 @@ void StubCode::GenerateNArgsCheckInlineCacheStub(
__ Comment("Call target");
__ Bind(&call_target_function);
// R0: target function.
__ ldr(R2, FieldAddress(R0, Function::entry_point_offset()));
__ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
__ bx(R2);
__ Branch(FieldAddress(R0, Function::entry_point_offset()));

#if !defined(PRODUCT)
if (!optimized) {
Expand Down Expand Up @@ -1775,8 +1770,7 @@ void StubCode::GenerateZeroArgsUnoptimizedStaticCallStub(Assembler* assembler) {
// Get function and call it, if possible.
__ LoadFromOffset(kWord, R0, R8, target_offset);
__ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
__ ldr(R2, FieldAddress(R0, Function::entry_point_offset()));
__ bx(R2);
__ Branch(FieldAddress(R0, Function::entry_point_offset()));

#if !defined(PRODUCT)
__ Bind(&stepping);
Expand Down Expand Up @@ -1815,8 +1809,7 @@ void StubCode::GenerateLazyCompileStub(Assembler* assembler) {
// When using the interpreter, the function's code may now point to the
// InterpretCall stub. Make sure R0, R4, and R9 are preserved.
__ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
__ ldr(R2, FieldAddress(R0, Function::entry_point_offset()));
__ bx(R2);
__ Branch(FieldAddress(R0, Function::entry_point_offset()));
}

void StubCode::GenerateInterpretCallStub(Assembler* assembler) {
Expand All @@ -1833,8 +1826,7 @@ void StubCode::GenerateICCallBreakpointStub(Assembler* assembler) {
__ PopList((1 << R0) | (1 << R9));
__ LeaveStubFrame();
__ mov(CODE_REG, Operand(R0));
__ ldr(R0, FieldAddress(CODE_REG, Code::entry_point_offset()));
__ bx(R0);
__ Branch(FieldAddress(CODE_REG, Code::entry_point_offset()));
}

void StubCode::GenerateRuntimeCallBreakpointStub(Assembler* assembler) {
Expand All @@ -1845,8 +1837,7 @@ void StubCode::GenerateRuntimeCallBreakpointStub(Assembler* assembler) {
__ CallRuntime(kBreakpointRuntimeHandlerRuntimeEntry, 0);
__ PopList((1 << CODE_REG));
__ LeaveStubFrame();
__ ldr(R0, FieldAddress(CODE_REG, Code::entry_point_offset()));
__ bx(R0);
__ Branch(FieldAddress(CODE_REG, Code::entry_point_offset()));
}

// Called only from unoptimized code. All relevant registers have been saved.
Expand Down Expand Up @@ -2074,8 +2065,7 @@ void StubCode::GenerateDefaultTypeTestStub(Assembler* assembler) {
__ BranchIf(EQUAL, &done);

__ ldr(CODE_REG, Address(THR, Thread::slow_type_test_stub_offset()));
__ ldr(R9, FieldAddress(CODE_REG, Code::entry_point_offset()));
__ bx(R9);
__ Branch(FieldAddress(CODE_REG, Code::entry_point_offset()));

__ Bind(&done);
__ Ret();
Expand Down Expand Up @@ -2107,8 +2097,7 @@ void TypeTestingStubGenerator::BuildOptimizedTypeTestStub(
kInstanceReg, kClassIdReg);

__ ldr(CODE_REG, Address(THR, Thread::slow_type_test_stub_offset()));
__ ldr(TMP, FieldAddress(CODE_REG, Code::entry_point_offset()));
__ bx(TMP);
__ Branch(FieldAddress(CODE_REG, Code::entry_point_offset()));
}

void TypeTestingStubGenerator::
Expand Down Expand Up @@ -2374,8 +2363,7 @@ void StubCode::GenerateOptimizeFunctionStub(Assembler* assembler) {
__ Pop(R4); // Restore argument descriptor.
__ LeaveStubFrame();
__ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
__ ldr(R1, FieldAddress(R0, Function::entry_point_offset()));
__ bx(R1);
__ Branch(FieldAddress(R0, Function::entry_point_offset()));
__ bkpt(0);
}

Expand Down Expand Up @@ -2520,9 +2508,8 @@ void StubCode::GenerateMegamorphicCallStub(Assembler* assembler) {
// be invoked as a normal Dart function.
__ ldr(R0, FieldAddress(IP, base + kWordSize));
__ ldr(R4, FieldAddress(R9, MegamorphicCache::arguments_descriptor_offset()));
__ ldr(R1, FieldAddress(R0, Function::entry_point_offset()));
__ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
__ bx(R1);
__ Branch(FieldAddress(R0, Function::entry_point_offset()));

// Probe failed, check if it is a miss.
__ Bind(&probe_failed);
Expand Down Expand Up @@ -2565,15 +2552,13 @@ void StubCode::GenerateICCallThroughFunctionStub(Assembler* assembler) {
__ Bind(&found);
const intptr_t target_offset = ICData::TargetIndexFor(1) * kWordSize;
__ LoadFromOffset(kWord, R0, R8, target_offset);
__ ldr(R1, FieldAddress(R0, Function::entry_point_offset()));
__ ldr(CODE_REG, FieldAddress(R0, Function::code_offset()));
__ bx(R1);
__ Branch(FieldAddress(R0, Function::entry_point_offset()));

__ Bind(&miss);
__ LoadIsolate(R2);
__ ldr(CODE_REG, Address(R2, Isolate::ic_miss_code_offset()));
__ ldr(R1, FieldAddress(CODE_REG, Code::entry_point_offset()));
__ bx(R1);
__ Branch(FieldAddress(CODE_REG, Code::entry_point_offset()));
}

void StubCode::GenerateICCallThroughCodeStub(Assembler* assembler) {
Expand All @@ -2600,15 +2585,13 @@ void StubCode::GenerateICCallThroughCodeStub(Assembler* assembler) {
__ Bind(&found);
const intptr_t code_offset = ICData::CodeIndexFor(1) * kWordSize;
const intptr_t entry_offset = ICData::EntryPointIndexFor(1) * kWordSize;
__ ldr(R1, Address(R8, entry_offset));
__ ldr(CODE_REG, Address(R8, code_offset));
__ bx(R1);
__ Branch(Address(R8, entry_offset));

__ Bind(&miss);
__ LoadIsolate(R2);
__ ldr(CODE_REG, Address(R2, Isolate::ic_miss_code_offset()));
__ ldr(R1, FieldAddress(CODE_REG, Code::entry_point_offset()));
__ bx(R1);
__ Branch(FieldAddress(CODE_REG, Code::entry_point_offset()));
}

// Called from switchable IC calls.
Expand All @@ -2630,9 +2613,8 @@ void StubCode::GenerateUnlinkedCallStub(Assembler* assembler) {
__ LeaveStubFrame();

__ ldr(CODE_REG, Address(THR, Thread::ic_lookup_through_code_stub_offset()));
__ ldr(R1, FieldAddress(CODE_REG, Code::entry_point_offset(
Code::EntryKind::kMonomorphic)));
__ bx(R1);
__ Branch(FieldAddress(
CODE_REG, Code::entry_point_offset(Code::EntryKind::kMonomorphic)));
}

// Called from switchable IC calls.
Expand All @@ -2651,9 +2633,8 @@ void StubCode::GenerateSingleTargetCallStub(Assembler* assembler) {
__ cmp(R1, Operand(R3));
__ b(&miss, GT);

__ ldr(R1, FieldAddress(R9, SingleTargetCache::entry_point_offset()));
__ ldr(CODE_REG, FieldAddress(R9, SingleTargetCache::target_offset()));
__ bx(R1);
__ Branch(FieldAddress(R9, SingleTargetCache::entry_point_offset()));

__ Bind(&miss);
__ EnterStubFrame();
Expand All @@ -2670,9 +2651,8 @@ void StubCode::GenerateSingleTargetCallStub(Assembler* assembler) {
__ LeaveStubFrame();

__ ldr(CODE_REG, Address(THR, Thread::ic_lookup_through_code_stub_offset()));
__ ldr(R1, FieldAddress(CODE_REG, Code::entry_point_offset(
Code::EntryKind::kMonomorphic)));
__ bx(R1);
__ Branch(FieldAddress(
CODE_REG, Code::entry_point_offset(Code::EntryKind::kMonomorphic)));
}

// Called from the monomorphic checked entry.
Expand All @@ -2693,9 +2673,8 @@ void StubCode::GenerateMonomorphicMissStub(Assembler* assembler) {
__ LeaveStubFrame();

__ ldr(CODE_REG, Address(THR, Thread::ic_lookup_through_code_stub_offset()));
__ ldr(R1, FieldAddress(CODE_REG, Code::entry_point_offset(
Code::EntryKind::kMonomorphic)));
__ bx(R1);
__ Branch(FieldAddress(
CODE_REG, Code::entry_point_offset(Code::EntryKind::kMonomorphic)));
}

void StubCode::GenerateFrameAwaitingMaterializationStub(Assembler* assembler) {
Expand Down

0 comments on commit 05ccfa8

Please sign in to comment.