From 0170b8d529a60a18c175dfe187896388621f5768 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Wed, 24 Oct 2018 22:49:10 +0000 Subject: [PATCH] Revert "[vm/compiler] Use loop framework for AOT inline heuristics" This reverts commit daae20d32e828fad298d7d12327aa30ac2927a47. Reason for revert: The kernel-precomp bots are seeing this error: ../../runtime/vm/object.h: 3134: error: Handle check failed: saw 2249186640 expected Function Not sure what that is yet, but reverting to get bots green again while I investigate. Original change's description: > [vm/compiler] Use loop framework for AOT inline heuristics > > Rationale: > Without proper execution counters, the inline AOT inliner > marks every call site "cold", effectively disabling inlining > altogether. This change introduces loop-based static heuristic > that assumes statements nested inside loops are executed more > frequently. This results in more inlining. > > Note: > Conservative version is used for now which yields > more performance without increasing code size too much. > There is still a lot of performance left at the table > which we could exploit if we fine tune heuristics > regarding code size. > > Bug: > https://github.com/dart-lang/sdk/issues/34473 > https://github.com/dart-lang/sdk/issues/32167 > > > Change-Id: I86ba60f93bdab363cd22ab6bdbcf6688f2042fea > Reviewed-on: https://dart-review.googlesource.com/c/81187 > Commit-Queue: Aart Bik > Reviewed-by: Alexander Markov TBR=vegorov@google.com,alexmarkov@google.com,ajcbik@google.com Change-Id: If5ca82966966ebef4ec0b4e921515d23f6bd492b No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://dart-review.googlesource.com/c/81335 Reviewed-by: Aart Bik Commit-Queue: Aart Bik --- runtime/vm/compiler/backend/il.cc | 4 -- runtime/vm/compiler/backend/il.h | 1 - runtime/vm/compiler/backend/inliner.cc | 78 +++++--------------------- 3 files changed, 15 insertions(+), 68 deletions(-) diff --git a/runtime/vm/compiler/backend/il.cc b/runtime/vm/compiler/backend/il.cc index 2b5574f258e6d..9688cae944b3a 100644 --- a/runtime/vm/compiler/backend/il.cc +++ b/runtime/vm/compiler/backend/il.cc @@ -1600,10 +1600,6 @@ bool BlockEntryInstr::IsLoopHeader() const { return loop_info_ != nullptr && loop_info_->header() == this; } -intptr_t BlockEntryInstr::NestingDepth() const { - return loop_info_ == nullptr ? 0 : loop_info_->NestingDepth(); -} - // Helper to mutate the graph during inlining. This block should be // replaced with new_block as a predecessor of all of this block's // successors. For each successor, the predecessors will be reordered diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h index 0574f079c28c1..33d143484e9d3 100644 --- a/runtime/vm/compiler/backend/il.h +++ b/runtime/vm/compiler/backend/il.h @@ -1360,7 +1360,6 @@ class BlockEntryInstr : public Instruction { LoopInfo* loop_info() const { return loop_info_; } void set_loop_info(LoopInfo* loop_info) { loop_info_ = loop_info; } bool IsLoopHeader() const; - intptr_t NestingDepth() const; virtual BlockEntryInstr* GetBlock() { return this; } diff --git a/runtime/vm/compiler/backend/inliner.cc b/runtime/vm/compiler/backend/inliner.cc index f3f4a52f8125a..bdbf35ab20f1a 100644 --- a/runtime/vm/compiler/backend/inliner.cc +++ b/runtime/vm/compiler/backend/inliner.cc @@ -254,7 +254,7 @@ struct InlinedInfo { // A collection of call sites to consider for inlining. class CallSites : public ValueObject { public: - explicit CallSites(intptr_t threshold) + explicit CallSites(FlowGraph* flow_graph, intptr_t threshold) : inlining_depth_threshold_(threshold), static_calls_(), closure_calls_(), @@ -264,14 +264,9 @@ class CallSites : public ValueObject { PolymorphicInstanceCallInstr* call; double ratio; const FlowGraph* caller_graph; - intptr_t nesting_depth; InstanceCallInfo(PolymorphicInstanceCallInstr* call_arg, - FlowGraph* flow_graph, - intptr_t depth) - : call(call_arg), - ratio(0.0), - caller_graph(flow_graph), - nesting_depth(depth) {} + FlowGraph* flow_graph) + : call(call_arg), ratio(0.0), caller_graph(flow_graph) {} const Function& caller() const { return caller_graph->function(); } }; @@ -279,14 +274,8 @@ class CallSites : public ValueObject { StaticCallInstr* call; double ratio; FlowGraph* caller_graph; - intptr_t nesting_depth; - StaticCallInfo(StaticCallInstr* value, - FlowGraph* flow_graph, - intptr_t depth) - : call(value), - ratio(0.0), - caller_graph(flow_graph), - nesting_depth(depth) {} + StaticCallInfo(StaticCallInstr* value, FlowGraph* flow_graph) + : call(value), ratio(0.0), caller_graph(flow_graph) {} const Function& caller() const { return caller_graph->function(); } }; @@ -326,32 +315,6 @@ class CallSites : public ValueObject { instance_calls_.Clear(); } - // Heuristic that maps the loop nesting depth to a static estimate of number - // of times code at that depth is executed (code at each higher nesting - // depth is assumed to execute 10x more often up to depth 3). - static intptr_t AotCallCountApproximation(intptr_t nesting_depth) { - switch (nesting_depth) { - case 0: - // Note that we use value 0, and not 1, i.e. any straightline code - // outside a loop is assumed to be very cold. With value 1, inlining - // inside loops is still favored over inlining inside straightline - // code, but for a method without loops, *all* call sites are inlined - // (potentially more performance, at the expense of larger code size). - // TODO(ajcbik): use 1 and fine tune other heuristics - return 0; - case 1: - return 10; - case 2: - return 100; - default: - return 1000; - } - } - - // Computes the ratio for each call site in a method, defined as the - // number of times a call site is executed over the maximum number of - // times any call site is executed in the method. JIT uses actual call - // counts whereas AOT uses a static estimate based on nesting depth. void ComputeCallSiteRatio(intptr_t static_call_start_ix, intptr_t instance_call_start_ix) { const intptr_t num_static_calls = @@ -362,26 +325,21 @@ class CallSites : public ValueObject { intptr_t max_count = 0; GrowableArray instance_call_counts(num_instance_calls); for (intptr_t i = 0; i < num_instance_calls; ++i) { - const InstanceCallInfo& info = - instance_calls_[i + instance_call_start_ix]; - intptr_t aggregate_count = - FLAG_precompiled_mode ? AotCallCountApproximation(info.nesting_depth) - : info.call->CallCount(); + const intptr_t aggregate_count = + instance_calls_[i + instance_call_start_ix].call->CallCount(); instance_call_counts.Add(aggregate_count); if (aggregate_count > max_count) max_count = aggregate_count; } GrowableArray static_call_counts(num_static_calls); for (intptr_t i = 0; i < num_static_calls; ++i) { - const StaticCallInfo& info = static_calls_[i + static_call_start_ix]; intptr_t aggregate_count = - FLAG_precompiled_mode ? AotCallCountApproximation(info.nesting_depth) - : info.call->CallCount(); + static_calls_[i + static_call_start_ix].call->CallCount(); static_call_counts.Add(aggregate_count); if (aggregate_count > max_count) max_count = aggregate_count; } - // Note that max_count can be 0 if none of the calls was executed. + // max_count can be 0 if none of the calls was executed. for (intptr_t i = 0; i < num_instance_calls; ++i) { const double ratio = (max_count == 0) @@ -446,18 +404,12 @@ class CallSites : public ValueObject { const bool inline_only_recognized_methods = (depth == inlining_depth_threshold_); - // In AOT, compute loop hierarchy. - if (FLAG_precompiled_mode) { - graph->GetLoopHierarchy(); - } - const intptr_t instance_call_start_ix = instance_calls_.length(); const intptr_t static_call_start_ix = static_calls_.length(); for (BlockIterator block_it = graph->postorder_iterator(); !block_it.Done(); block_it.Advance()) { - BlockEntryInstr* entry = block_it.Current(); - const intptr_t depth = entry->NestingDepth(); - for (ForwardInstructionIterator it(entry); !it.Done(); it.Advance()) { + for (ForwardInstructionIterator it(block_it.Current()); !it.Done(); + it.Advance()) { Instruction* current = it.Current(); if (current->IsPolymorphicInstanceCall()) { PolymorphicInstanceCallInstr* instance_call = @@ -465,7 +417,7 @@ class CallSites : public ValueObject { if (!inline_only_recognized_methods || instance_call->IsSureToCallSingleRecognizedTarget() || instance_call->HasOnlyDispatcherOrImplicitAccessorTargets()) { - instance_calls_.Add(InstanceCallInfo(instance_call, graph, depth)); + instance_calls_.Add(InstanceCallInfo(instance_call, graph)); } else { // Method not inlined because inlining too deep and method // not recognized. @@ -481,7 +433,7 @@ class CallSites : public ValueObject { if (!inline_only_recognized_methods || static_call->function().IsRecognized() || static_call->function().IsDispatcherOrImplicitAccessor()) { - static_calls_.Add(StaticCallInfo(static_call, graph, depth)); + static_calls_.Add(StaticCallInfo(static_call, graph)); } else { // Method not inlined because inlining too deep and method // not recognized. @@ -799,8 +751,8 @@ class CallSiteInliner : public ValueObject { return; } // Create two call site collections to swap between. - CallSites sites1(inlining_depth_threshold_); - CallSites sites2(inlining_depth_threshold_); + CallSites sites1(caller_graph_, inlining_depth_threshold_); + CallSites sites2(caller_graph_, inlining_depth_threshold_); CallSites* call_sites_temp = NULL; collected_call_sites_ = &sites1; inlining_call_sites_ = &sites2;