From 6339dd46e6036fce3dfa0deef1604cb391ec9255 Mon Sep 17 00:00:00 2001 From: Ashwini Khade Date: Tue, 7 Jan 2020 13:26:28 -0800 Subject: [PATCH 1/4] update defualt optimization level + fix gemm_activation fusion --- csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs | 4 ++-- csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 2 +- csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/Program.cs | 4 ++-- docs/ONNX_Runtime_Graph_Optimizations.md | 4 +++- docs/ONNX_Runtime_Perf_Tuning.md | 4 ++-- onnxruntime/core/framework/session_options.h | 2 +- onnxruntime/core/optimizer/gemm_activation_fusion.cc | 5 +++-- onnxruntime/python/onnxruntime_pybind_state.cc | 6 +++--- onnxruntime/test/onnx/main.cc | 2 +- onnxruntime/test/perftest/test_configuration.h | 2 +- onnxruntime/test/python/onnxruntime_test_python.py | 6 +++--- 11 files changed, 22 insertions(+), 19 deletions(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs index c2a5920741d4d..6f24f49634da0 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs @@ -345,7 +345,7 @@ public int InterOpNumThreads private int _interOpNumThreads = 0; // set to what is set in C++ SessionOptions by default; /// - /// Sets the graph optimization level for the session. Default is set to ORT_ENABLE_BASIC. + /// Sets the graph optimization level for the session. Default is set to ORT_ENABLE_ALL. /// public GraphOptimizationLevel GraphOptimizationLevel { @@ -359,7 +359,7 @@ public GraphOptimizationLevel GraphOptimizationLevel _graphOptimizationLevel = value; } } - private GraphOptimizationLevel _graphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_BASIC; + private GraphOptimizationLevel _graphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL; /// /// Sets the execution mode for the session. Default is set to ORT_SEQUENTIAL. diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index 29d3181568cd3..a18e4faf0846d 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -41,7 +41,7 @@ public void TestSessionOptions() Assert.Equal(LogLevel.Verbose, opt.LogVerbosityLevel); Assert.Equal(0, opt.IntraOpNumThreads); Assert.Equal(0, opt.InterOpNumThreads); - Assert.Equal(GraphOptimizationLevel.ORT_ENABLE_BASIC, opt.GraphOptimizationLevel); + Assert.Equal(GraphOptimizationLevel.ORT_ENABLE_ALL, opt.GraphOptimizationLevel); // try setting options opt.ExecutionMode = ExecutionMode.ORT_PARALLEL; diff --git a/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/Program.cs b/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/Program.cs index de589114e7bad..a9af0449bd15c 100644 --- a/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/Program.cs +++ b/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/Program.cs @@ -32,8 +32,8 @@ class CommandOptions [Option('p', Required = false, HelpText = "Run with parallel exection. Default is false")] public bool ParallelExecution { get; set; } = false; - [Option('o', "optimization_level", Required = false, HelpText = "Optimization Level. Default is 1, partial optimization.")] - public GraphOptimizationLevel OptimizationLevel { get; set; } = GraphOptimizationLevel.ORT_ENABLE_BASIC; + [Option('o', "optimization_level", Required = false, HelpText = "Optimization Level. Default is 99, all optimization.")] + public GraphOptimizationLevel OptimizationLevel { get; set; } = GraphOptimizationLevel.ORT_ENABLE_ALL; } class Program diff --git a/docs/ONNX_Runtime_Graph_Optimizations.md b/docs/ONNX_Runtime_Graph_Optimizations.md index 0c347f87e477d..2d124420db980 100644 --- a/docs/ONNX_Runtime_Graph_Optimizations.md +++ b/docs/ONNX_Runtime_Graph_Optimizations.md @@ -15,9 +15,11 @@ Graph optimizations are divided in three levels: The optimizations belonging to one level are performed after the optimizations of the previous level have been applied (e.g., extended optimizations are applied after basic optimizations have been applied). +**All optimizations are enabled by default.** + ### Basic Graph Optimizations -These are semantics-preserving graph rewrites which remove redundant nodes and redundant computation. These optimizations are enabled by default. They run before graph partitioning and thus apply to all the execution providers. Available basic graph optimizations are as follows: +These are semantics-preserving graph rewrites which remove redundant nodes and redundant computation. They run before graph partitioning and thus apply to all the execution providers. Available basic graph optimizations are as follows: * Constant Folding: Statically computes parts of the graph that rely only on constant initializers. This eliminates the need to compute them during runtime. diff --git a/docs/ONNX_Runtime_Perf_Tuning.md b/docs/ONNX_Runtime_Perf_Tuning.md index 2788ae041b85e..aaab8f8422fe5 100644 --- a/docs/ONNX_Runtime_Perf_Tuning.md +++ b/docs/ONNX_Runtime_Perf_Tuning.md @@ -86,9 +86,9 @@ sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL * Sequential vs Parallel Execution * `sess_options.execution_mode = rt.ExecutionMode.ORT_SEQUENTIAL` controls whether then operators in the graph should run sequentially or in parallel. Usually when a model has many branches, setting this option to false will provide better performance. * When `sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL`, you can set `sess_options.inter_op_num_threads` to control the - number of threads used to parallelize the execution of the graph (across nodes). -* sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL. Default is ORT_ENABLE_BASIC(1). Please see [onnxruntime_c_api.h](../include/onnxruntime/core/session/onnxruntime_c_api.h#L241) (enum GraphOptimizationLevel) for the full list of all optimization levels. For details regarding available optimizations and usage please refer to the [Graph Optimizations Doc](../docs/ONNX_Runtime_Graph_Optimizations.md). + +* sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL. Default is already ORT_ENABLE_ALL(99). Please see [onnxruntime_c_api.h](../include/onnxruntime/core/session/onnxruntime_c_api.h#L241) (enum GraphOptimizationLevel) for the full list of all optimization levels. For details regarding available optimizations and usage please refer to the [Graph Optimizations Doc](../docs/ONNX_Runtime_Graph_Optimizations.md). ### MKL_DNN/nGraph/MKL_ML Execution Provider MKL_DNN, MKL_ML and nGraph all depends on openmp for parallization. For those execution providers, we need to use the openmp enviroment variable to tune the performance. diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h index aaefc50cc6c9c..1c02f9f0ebbc2 100644 --- a/onnxruntime/core/framework/session_options.h +++ b/onnxruntime/core/framework/session_options.h @@ -52,7 +52,7 @@ struct SessionOptions { unsigned max_num_graph_transformation_steps = 10; // TODO choose a good default here? // set graph optimization level - TransformerLevel graph_optimization_level = TransformerLevel::Level1; + TransformerLevel graph_optimization_level = TransformerLevel::Level3; // controls the size of the thread pool used to parallelize the execution of tasks within individual nodes (ops) int intra_op_num_threads = 0; diff --git a/onnxruntime/core/optimizer/gemm_activation_fusion.cc b/onnxruntime/core/optimizer/gemm_activation_fusion.cc index 05d1c864b770b..c27690bd52971 100644 --- a/onnxruntime/core/optimizer/gemm_activation_fusion.cc +++ b/onnxruntime/core/optimizer/gemm_activation_fusion.cc @@ -66,8 +66,9 @@ Status GemmActivationFusion::ApplyImpl(Graph& graph, bool& modified, int graph_l //Add optional attributes for activations if (act_node.OpType() == "LeakyRelu") { - const NodeAttributes& attrs = act_node.GetAttributes(); - for (const auto& attr : attrs) { + NodeAttributes& attrs = const_cast(act_node.GetAttributes()); + for (auto& attr : attrs) { + attr.second.set_name("leaky_relu_" + attr.first); fused_gemm.AddAttribute("leaky_relu_" + attr.first, attr.second); } } diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index e48c03323cc55..ba6035ba2da70 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -587,7 +587,7 @@ Applies to session load, initialization, etc. Default is 0.)pbdoc") .def_property( "graph_optimization_level", [](const SessionOptions* options) -> GraphOptimizationLevel { - GraphOptimizationLevel retval = ORT_ENABLE_BASIC; + GraphOptimizationLevel retval = ORT_ENABLE_ALL; switch (options->graph_optimization_level) { case onnxruntime::TransformerLevel::Default: retval = ORT_DISABLE_ALL; @@ -602,8 +602,8 @@ Applies to session load, initialization, etc. Default is 0.)pbdoc") retval = ORT_ENABLE_ALL; break; default: - retval = ORT_ENABLE_BASIC; - LOGS_DEFAULT(WARNING) << "Got invalid graph optimization level; defaulting to ORT_ENABLE_BASIC"; + retval = ORT_ENABLE_ALL; + LOGS_DEFAULT(WARNING) << "Got invalid graph optimization level; defaulting to ORT_ENABLE_ALL"; break; } return retval; diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 6fa0b30456d03..915212d818b33 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -41,7 +41,7 @@ void usage() { "Default: 'cpu'.\n" "\t-x: Use parallel executor, default (without -x): sequential executor.\n" "\t-d [device_id]: Specifies the device id for multi-device (e.g. GPU). The value should > 0\n" - "\t-o [optimization level]: Default is 1. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n" + "\t-o [optimization level]: Default is 99. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n" "\t\tPlease see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels. " "\n" "\t-h: help\n" diff --git a/onnxruntime/test/perftest/test_configuration.h b/onnxruntime/test/perftest/test_configuration.h index 9d0bf46774057..36186e129f0f8 100644 --- a/onnxruntime/test/perftest/test_configuration.h +++ b/onnxruntime/test/perftest/test_configuration.h @@ -47,7 +47,7 @@ struct RunConfig { ExecutionMode execution_mode{ExecutionMode::ORT_SEQUENTIAL}; int intra_op_num_threads{0}; int inter_op_num_threads{0}; - GraphOptimizationLevel optimization_level{ORT_ENABLE_EXTENDED}; + GraphOptimizationLevel optimization_level{ORT_ENABLE_ALL}; std::basic_string optimized_model_path; }; diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 0a0060767ebea..1208182ce92f5 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -521,10 +521,10 @@ def test_run_model_mlnet(self): def testGraphOptimizationLevel(self): opt = onnxrt.SessionOptions() - self.assertEqual(opt.graph_optimization_level, onnxrt.GraphOptimizationLevel.ORT_ENABLE_BASIC) - # default should be basic optimization - opt.graph_optimization_level = onnxrt.GraphOptimizationLevel.ORT_ENABLE_ALL self.assertEqual(opt.graph_optimization_level, onnxrt.GraphOptimizationLevel.ORT_ENABLE_ALL) + # default should be basic optimization + opt.graph_optimization_level = onnxrt.GraphOptimizationLevel.ORT_ENABLE_EXTEDED + self.assertEqual(opt.graph_optimization_level, onnxrt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED) sess = onnxrt.InferenceSession(self.get_name("logicaland.onnx"), sess_options=opt) a = np.array([[True, True], [False, False]], dtype=np.bool) b = np.array([[True, False], [True, False]], dtype=np.bool) From 7f7f392a6502ba1f9da0e4826b4f198751403900 Mon Sep 17 00:00:00 2001 From: Ashwini Khade Date: Tue, 7 Jan 2020 14:03:10 -0800 Subject: [PATCH 2/4] fix typo --- onnxruntime/test/python/onnxruntime_test_python.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 1208182ce92f5..49b2e6eea2607 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -523,7 +523,7 @@ def testGraphOptimizationLevel(self): opt = onnxrt.SessionOptions() self.assertEqual(opt.graph_optimization_level, onnxrt.GraphOptimizationLevel.ORT_ENABLE_ALL) # default should be basic optimization - opt.graph_optimization_level = onnxrt.GraphOptimizationLevel.ORT_ENABLE_EXTEDED + opt.graph_optimization_level = onnxrt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED self.assertEqual(opt.graph_optimization_level, onnxrt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED) sess = onnxrt.InferenceSession(self.get_name("logicaland.onnx"), sess_options=opt) a = np.array([[True, True], [False, False]], dtype=np.bool) @@ -622,7 +622,7 @@ def testLoadingSessionOptionsFromModel(self): finally: # Make sure the usage of the feature is disabled after this test - os.environ['ORT_LOAD_CONFIG_FROM_MODEL'] = str(0) + os.environ['ORT_LOAD_CONFIG_FROM_MODEL'] = str(0) if __name__ == '__main__': unittest.main() From e4c4f7c75562d6cf55689453b8c581d64d003e9b Mon Sep 17 00:00:00 2001 From: Ashwini Khade Date: Wed, 8 Jan 2020 11:26:31 -0800 Subject: [PATCH 3/4] add unit test and incorporate review comments --- .../core/optimizer/gemm_activation_fusion.cc | 9 ++++--- .../test/optimizer/graph_transform_test.cc | 17 +++++++++++++ .../gemm_activation_fusion.onnx | 24 ++++++++++++++++++ .../test_data_set_0/input_0.pb | 1 + .../test_data_set_0/input_1.pb | 1 + .../test_data_set_0/input_2.pb | Bin 0 -> 27 bytes .../test_data_set_0/output_0.pb | 1 + 7 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 onnxruntime/test/testdata/transform/gemm_activation_fusion/gemm_activation_fusion.onnx create mode 100644 onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_0.pb create mode 100644 onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_1.pb create mode 100644 onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_2.pb create mode 100644 onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/output_0.pb diff --git a/onnxruntime/core/optimizer/gemm_activation_fusion.cc b/onnxruntime/core/optimizer/gemm_activation_fusion.cc index c27690bd52971..c6c5e5316c28a 100644 --- a/onnxruntime/core/optimizer/gemm_activation_fusion.cc +++ b/onnxruntime/core/optimizer/gemm_activation_fusion.cc @@ -66,10 +66,11 @@ Status GemmActivationFusion::ApplyImpl(Graph& graph, bool& modified, int graph_l //Add optional attributes for activations if (act_node.OpType() == "LeakyRelu") { - NodeAttributes& attrs = const_cast(act_node.GetAttributes()); - for (auto& attr : attrs) { - attr.second.set_name("leaky_relu_" + attr.first); - fused_gemm.AddAttribute("leaky_relu_" + attr.first, attr.second); + const NodeAttributes& attrs = act_node.GetAttributes(); + for (const auto& attr : attrs) { + AttributeProto fused_gemm_attr(attr.second); + fused_gemm_attr.set_name("leaky_relu_" + attr.first); + fused_gemm.AddAttribute("leaky_relu_" + attr.first, fused_gemm_attr); } } diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index bbfc1fcc0dd68..46e3682c0c763 100644 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -609,6 +609,23 @@ TEST(GraphTransformationTests, Gemm_Relu_three_input) { std::map op_to_count = CountOpsInGraph(graph); ASSERT_TRUE(op_to_count["Relu"] == 0); } + +TEST(GraphTransformationTests, Gemm_LeakyRelu_Fusion) { + auto model_uri = MODEL_FOLDER "gemm_activation_fusion/gemm_activation_fusion.onnx"; + + std::shared_ptr p_model; + ASSERT_TRUE(Model::Load(model_uri, p_model, nullptr, DefaultLoggingManager().DefaultLogger()).IsOK()); + Graph& graph = p_model->MainGraph(); + std::map op_to_count1 = CountOpsInGraph(graph); + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + ASSERT_TRUE(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, DefaultLoggingManager().DefaultLogger()).IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["LeakyRelu"] == 0); + ASSERT_TRUE(op_to_count["Gemm"] == 0); + ASSERT_TRUE(op_to_count["FusedGemm"] == 1); +} #endif TEST(GraphTransformationTests, FuseConvBnAddMulFloat16) { diff --git a/onnxruntime/test/testdata/transform/gemm_activation_fusion/gemm_activation_fusion.onnx b/onnxruntime/test/testdata/transform/gemm_activation_fusion/gemm_activation_fusion.onnx new file mode 100644 index 0000000000000..87dd469343005 --- /dev/null +++ b/onnxruntime/test/testdata/transform/gemm_activation_fusion/gemm_activation_fusion.onnx @@ -0,0 +1,24 @@ + onnx-helper:“ + +a +b +cy"Gemm + +yz" LeakyRelugemm_activation_fusionZ +a +  + +Z +b +  + +Z +c +  + +b +z +  + +B +ai.onnx \ No newline at end of file diff --git a/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_0.pb b/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000..ee690bab01f44 --- /dev/null +++ b/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_0.pb @@ -0,0 +1 @@ +BaJ<  ?¦7?³N?w} ?HéØ>QY%?n à>~®J?¨e?^k?çól?Z{‘= \ No newline at end of file diff --git a/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_1.pb b/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000..6945e22c76d0a --- /dev/null +++ b/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_1.pb @@ -0,0 +1 @@ +BbJPÙp²= ¡¥<“&U?H5G?¹^?»†z?¨•L?Gì>ÃÐG?Ý9ò=ÝÑ#?4Ë>ÒÕq?Ú—?’NÔ>Õs‡>.4F?‰Œé>ã„?í™< \ No newline at end of file diff --git a/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_2.pb b/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..7077e4ffe336e33efce98d917289ebaaea95ee96 GIT binary patch literal 27 Ucmd;J Date: Mon, 13 Jan 2020 10:21:02 -0800 Subject: [PATCH 4/4] fix test comment --- onnxruntime/test/python/onnxruntime_test_python.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 49b2e6eea2607..21b0fc8a8c457 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -521,8 +521,8 @@ def test_run_model_mlnet(self): def testGraphOptimizationLevel(self): opt = onnxrt.SessionOptions() + # default should be all optimizations optimization self.assertEqual(opt.graph_optimization_level, onnxrt.GraphOptimizationLevel.ORT_ENABLE_ALL) - # default should be basic optimization opt.graph_optimization_level = onnxrt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED self.assertEqual(opt.graph_optimization_level, onnxrt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED) sess = onnxrt.InferenceSession(self.get_name("logicaland.onnx"), sess_options=opt)