microsoft · askhade · Jan 13, 2020 · Jan 7, 2020 · Jan 7, 2020 · Jan 8, 2020
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -345,7 +345,7 @@ public int InterOpNumThreads
         private int _interOpNumThreads = 0; // set to what is set in C++ SessionOptions by default;
 
         /// <summary>
-        /// Sets the graph optimization level for the session. Default is set to ORT_ENABLE_BASIC.        
+        /// Sets the graph optimization level for the session. Default is set to ORT_ENABLE_ALL.
         /// </summary>
         public GraphOptimizationLevel GraphOptimizationLevel
         {
@@ -359,7 +359,7 @@ public GraphOptimizationLevel GraphOptimizationLevel
                 _graphOptimizationLevel = value;
             }
         }
-        private GraphOptimizationLevel _graphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_BASIC;
+        private GraphOptimizationLevel _graphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
 
         /// <summary>
         /// Sets the execution mode for the session. Default is set to ORT_SEQUENTIAL.

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -41,7 +41,7 @@ public void TestSessionOptions()
                 Assert.Equal(LogLevel.Verbose, opt.LogVerbosityLevel);
                 Assert.Equal(0, opt.IntraOpNumThreads);
                 Assert.Equal(0, opt.InterOpNumThreads);
-                Assert.Equal(GraphOptimizationLevel.ORT_ENABLE_BASIC, opt.GraphOptimizationLevel);
+                Assert.Equal(GraphOptimizationLevel.ORT_ENABLE_ALL, opt.GraphOptimizationLevel);
 
                 // try setting options 
                 opt.ExecutionMode = ExecutionMode.ORT_PARALLEL;

diff --git a/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/Program.cs b/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/Program.cs
@@ -32,8 +32,8 @@ class CommandOptions
         [Option('p', Required = false, HelpText = "Run with parallel exection. Default is false")]
         public bool ParallelExecution { get; set; } = false;
 
-        [Option('o', "optimization_level", Required = false, HelpText = "Optimization Level. Default is 1, partial optimization.")]
-        public GraphOptimizationLevel OptimizationLevel { get; set; } = GraphOptimizationLevel.ORT_ENABLE_BASIC;
+        [Option('o', "optimization_level", Required = false, HelpText = "Optimization Level. Default is 99, all optimization.")]
+        public GraphOptimizationLevel OptimizationLevel { get; set; } = GraphOptimizationLevel.ORT_ENABLE_ALL;
     }
 
     class Program

diff --git a/docs/ONNX_Runtime_Graph_Optimizations.md b/docs/ONNX_Runtime_Graph_Optimizations.md
@@ -15,9 +15,11 @@ Graph optimizations are divided in three levels:
 
 The optimizations belonging to one level are performed after the optimizations of the previous level have been applied (e.g., extended optimizations are applied after basic optimizations have been applied).
 
+**All optimizations are enabled by default.**
+
 ### Basic Graph Optimizations
 
-These are semantics-preserving graph rewrites which remove redundant nodes and redundant computation. These optimizations are enabled by default. They run before graph partitioning and thus apply to all the execution providers. Available basic graph optimizations are as follows:
+These are semantics-preserving graph rewrites which remove redundant nodes and redundant computation. They run before graph partitioning and thus apply to all the execution providers. Available basic graph optimizations are as follows:
 
 * Constant Folding: Statically computes parts of the graph that rely only on constant initializers. This eliminates the need to compute them during runtime.
 

diff --git a/docs/ONNX_Runtime_Perf_Tuning.md b/docs/ONNX_Runtime_Perf_Tuning.md
@@ -86,9 +86,9 @@ sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL
 * Sequential vs Parallel Execution
   * `sess_options.execution_mode = rt.ExecutionMode.ORT_SEQUENTIAL` controls whether then operators in the graph should run sequentially or in parallel. Usually when a model has many branches, setting this option to false will provide better performance.
   * When `sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL`, you can set `sess_options.inter_op_num_threads` to control the
-
 number of threads used to parallelize the execution of the graph (across nodes).
-* sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL. Default is ORT_ENABLE_BASIC(1). Please see [onnxruntime_c_api.h](../include/onnxruntime/core/session/onnxruntime_c_api.h#L241)  (enum GraphOptimizationLevel) for the full list of all optimization levels. For details regarding available optimizations and usage please refer to the [Graph Optimizations Doc](../docs/ONNX_Runtime_Graph_Optimizations.md).
+
+* sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL. Default is already ORT_ENABLE_ALL(99). Please see [onnxruntime_c_api.h](../include/onnxruntime/core/session/onnxruntime_c_api.h#L241)  (enum GraphOptimizationLevel) for the full list of all optimization levels. For details regarding available optimizations and usage please refer to the [Graph Optimizations Doc](../docs/ONNX_Runtime_Graph_Optimizations.md).
 
 ### MKL_DNN/nGraph/MKL_ML Execution Provider
 MKL_DNN, MKL_ML and nGraph all depends on openmp for parallization. For those execution providers, we need to use the openmp enviroment variable to tune the performance.

diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h
@@ -52,7 +52,7 @@ struct SessionOptions {
   unsigned max_num_graph_transformation_steps = 10;  // TODO choose a good default here?
 
   // set graph optimization level
-  TransformerLevel graph_optimization_level = TransformerLevel::Level1;
+  TransformerLevel graph_optimization_level = TransformerLevel::Level3;
 
   // controls the size of the thread pool used to parallelize the execution of tasks within individual nodes (ops)
   int intra_op_num_threads = 0;

diff --git a/onnxruntime/core/optimizer/gemm_activation_fusion.cc b/onnxruntime/core/optimizer/gemm_activation_fusion.cc
@@ -68,7 +68,9 @@ Status GemmActivationFusion::ApplyImpl(Graph& graph, bool& modified, int graph_l
     if (act_node.OpType() == "LeakyRelu") {
       const NodeAttributes& attrs = act_node.GetAttributes();
       for (const auto& attr : attrs) {
-        fused_gemm.AddAttribute("leaky_relu_" + attr.first, attr.second);
+        AttributeProto fused_gemm_attr(attr.second);
+        fused_gemm_attr.set_name("leaky_relu_" + attr.first);
+        fused_gemm.AddAttribute("leaky_relu_" + attr.first, fused_gemm_attr);
       }
     }
 

diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -587,7 +587,7 @@ Applies to session load, initialization, etc. Default is 0.)pbdoc")
       .def_property(
           "graph_optimization_level",
           [](const SessionOptions* options) -> GraphOptimizationLevel {
-            GraphOptimizationLevel retval = ORT_ENABLE_BASIC;
+            GraphOptimizationLevel retval = ORT_ENABLE_ALL;
             switch (options->graph_optimization_level) {
               case onnxruntime::TransformerLevel::Default:
                 retval = ORT_DISABLE_ALL;
@@ -602,8 +602,8 @@ Applies to session load, initialization, etc. Default is 0.)pbdoc")
                 retval = ORT_ENABLE_ALL;
                 break;
               default:
-                retval = ORT_ENABLE_BASIC;
-                LOGS_DEFAULT(WARNING) << "Got invalid graph optimization level; defaulting to ORT_ENABLE_BASIC";
+                retval = ORT_ENABLE_ALL;
+                LOGS_DEFAULT(WARNING) << "Got invalid graph optimization level; defaulting to ORT_ENABLE_ALL";
                 break;
             }
             return retval;

diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc
@@ -41,7 +41,7 @@ void usage() {
       "Default: 'cpu'.\n"
       "\t-x: Use parallel executor, default (without -x): sequential executor.\n"
       "\t-d [device_id]: Specifies the device id for multi-device (e.g. GPU). The value should > 0\n"
-      "\t-o [optimization level]: Default is 1. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n"
+      "\t-o [optimization level]: Default is 99. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n"
       "\t\tPlease see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels. "
       "\n"
       "\t-h: help\n"

diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc
@@ -609,6 +609,23 @@ TEST(GraphTransformationTests, Gemm_Relu_three_input) {
   std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
   ASSERT_TRUE(op_to_count["Relu"] == 0);
 }
+
+TEST(GraphTransformationTests, Gemm_LeakyRelu_Fusion) {
+  auto model_uri = MODEL_FOLDER "gemm_activation_fusion/gemm_activation_fusion.onnx";
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_TRUE(Model::Load(model_uri, p_model, nullptr, DefaultLoggingManager().DefaultLogger()).IsOK());
+  Graph& graph = p_model->MainGraph();
+  std::map<std::string, int> op_to_count1 = CountOpsInGraph(graph);
+  onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+  graph_transformation_mgr.Register(onnxruntime::make_unique<GemmActivationFusion>(), TransformerLevel::Level2);
+  ASSERT_TRUE(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, DefaultLoggingManager().DefaultLogger()).IsOK());
+
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_TRUE(op_to_count["LeakyRelu"] == 0);
+  ASSERT_TRUE(op_to_count["Gemm"] == 0);
+  ASSERT_TRUE(op_to_count["FusedGemm"] == 1);
+}
 #endif
 
 TEST(GraphTransformationTests, FuseConvBnAddMulFloat16) {

diff --git a/onnxruntime/test/perftest/test_configuration.h b/onnxruntime/test/perftest/test_configuration.h
@@ -47,7 +47,7 @@ struct RunConfig {
   ExecutionMode execution_mode{ExecutionMode::ORT_SEQUENTIAL};
   int intra_op_num_threads{0};
   int inter_op_num_threads{0};
-  GraphOptimizationLevel optimization_level{ORT_ENABLE_EXTENDED};
+  GraphOptimizationLevel optimization_level{ORT_ENABLE_ALL};
   std::basic_string<ORTCHAR_T> optimized_model_path;
 };
 

diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -521,10 +521,10 @@ def test_run_model_mlnet(self):
 
     def testGraphOptimizationLevel(self):
         opt = onnxrt.SessionOptions()
-        self.assertEqual(opt.graph_optimization_level, onnxrt.GraphOptimizationLevel.ORT_ENABLE_BASIC)
-            # default should be basic optimization
-        opt.graph_optimization_level = onnxrt.GraphOptimizationLevel.ORT_ENABLE_ALL
+        # default should be all optimizations optimization
         self.assertEqual(opt.graph_optimization_level, onnxrt.GraphOptimizationLevel.ORT_ENABLE_ALL)
+        opt.graph_optimization_level = onnxrt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
+        self.assertEqual(opt.graph_optimization_level, onnxrt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED)
         sess = onnxrt.InferenceSession(self.get_name("logicaland.onnx"), sess_options=opt)
         a = np.array([[True, True], [False, False]], dtype=np.bool)
         b = np.array([[True, False], [True, False]], dtype=np.bool)
@@ -622,7 +622,7 @@ def testLoadingSessionOptionsFromModel(self):
 
         finally:
             # Make sure the usage of the feature is disabled after this test
-            os.environ['ORT_LOAD_CONFIG_FROM_MODEL'] = str(0)       
+            os.environ['ORT_LOAD_CONFIG_FROM_MODEL'] = str(0)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/testdata/transform/gemm_activation_fusion/gemm_activation_fusion.onnx b/onnxruntime/test/testdata/transform/gemm_activation_fusion/gemm_activation_fusion.onnx
@@ -0,0 +1,24 @@
+onnx-helper:�
+
+a
+b
+cy"Gemm
+
+yz"	LeakyRelugemm_activation_fusionZ
+a
+
+
+Z
+b
+
+
+Z
+c
+
+
+b
+z
+
+
+B
+ai.onnx	
diff --git a/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_0.pb b/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_0.pb
@@ -0,0 +1 @@
+BaJ<?�7?�N?w}?H��>QY%?n�><Kd?��v?rR�>~�J?�e?^k?��l?Z{�=
diff --git a/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_1.pb b/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_1.pb
@@ -0,0 +1 @@
+BbJP�p�=��<�&U?H5G?�^?��z?��L?G�>��G?�9�=��#?4�>��q?ڗ?�N�>�s�>.4F?���>�?�<

diff --git a/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_2.pb b/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/input_2.pb
diff --git a/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/output_0.pb b/onnxruntime/test/testdata/transform/gemm_activation_fusion/test_data_set_0/output_0.pb
@@ -0,0 +1 @@
+BzJ0c��?�C�?%��?��~?��@D��?\�@��?�p�?�ߎ?N�?�	�?