diff --git a/onnxruntime/core/optimizer/matmul_integer_to_float.cc b/onnxruntime/core/optimizer/matmul_integer_to_float.cc index 6aa473cb72417..630455c62161b 100644 --- a/onnxruntime/core/optimizer/matmul_integer_to_float.cc +++ b/onnxruntime/core/optimizer/matmul_integer_to_float.cc @@ -34,7 +34,7 @@ static bool CheckBiasShape(const TensorShapeProto* bias_shape) { /** MatMulIntegerToFloatFusion will fuse subgraph like below into MatMulIntegerToFloat: - A A_Zero B B_Zero A_Scale B_Scale Bias (Const, Optional) + A A_Zero B B_Zero A_Scale) B_Scale Bias (Const, Optional) \ | | / \ / | \ | | / \ / | \ | | / \ / | @@ -84,13 +84,6 @@ Status MatMulIntegerToFloatFusion::ApplyImpl(Graph& graph, bool& modified, int g continue; } - // A_Scale is scalar and B_Scale is scalar or 1D tensor - auto mul_node_input_defs = p_mul_node_right->InputDefs(); - if (!optimizer_utils::IsScalar(*mul_node_input_defs[0]) || - !optimizer_utils::IsScalar(*mul_node_input_defs[1])) { - continue; - } - Node& cast_node = *graph.GetNode(p_cast_node->Index()); Node& matmulinteger_node = *graph.GetNode(p_matmulinteger_node->Index()); Node& mul_node_right = *graph.GetNode(p_mul_node_right->Index()); diff --git a/onnxruntime/core/optimizer/utils.cc b/onnxruntime/core/optimizer/utils.cc index fb658bfb848eb..39d178ae07638 100644 --- a/onnxruntime/core/optimizer/utils.cc +++ b/onnxruntime/core/optimizer/utils.cc @@ -24,7 +24,7 @@ bool IsFloatingPointDataType(const ONNX_NAMESPACE::TensorProto& tensor_proto) { return tensor_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT || tensor_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 || tensor_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_DOUBLE; } -bool IsScalar(const NodeArg& input_arg) { +inline bool IsScalar(const NodeArg& input_arg) { auto shape = input_arg.Shape(); if (shape == nullptr) { // shape inferencing wasn't able to populate shape information for this NodeArg diff --git a/onnxruntime/core/optimizer/utils.h b/onnxruntime/core/optimizer/utils.h index 2d1025f88cbc6..535edc9c77488 100644 --- a/onnxruntime/core/optimizer/utils.h +++ b/onnxruntime/core/optimizer/utils.h @@ -15,9 +15,6 @@ namespace optimizer_utils { // Check if TensorProto contains a floating point type. bool IsFloatingPointDataType(const ONNX_NAMESPACE::TensorProto& tensor_proto); -// Check if input is a scalar -bool IsScalar(const NodeArg& input_arg); - /** Check whether a input is initializer with specified float value. @param expected_value is the expected value of the initializer. @param is_constant means whether the initializer is required to be constant. @@ -63,7 +60,7 @@ bool ValidateShape(const NodeArg& node_arg, const std::initializer_list */ bool CompareShape(const ONNX_NAMESPACE::TensorShapeProto& node_arg_shape, const ONNX_NAMESPACE::TensorShapeProto& node_arg_other_shape); -/** Check whether each dimension is known for shape of node_arg +/** Check check whether each dimension is known for shape of node_arg @returns false when shape is nullptr, or total dimension is not same as expected_dim_size length, or any dim is unknown (without dim value). */ diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index ec3147fd6e324..63bad72be407a 100644 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -3069,9 +3069,9 @@ TEST_F(GraphTransformationTests, MatMulIntegerToFloatTest) { std::map op_to_count = CountOpsInGraph(graph); EXPECT_EQ(op_to_count["DynamicQuantizeLinear"], 1); - EXPECT_EQ(op_to_count["MatMulInteger"], 1); - EXPECT_EQ(op_to_count["Cast"], 1); - EXPECT_EQ(op_to_count["Mul"], 2); + EXPECT_EQ(op_to_count["MatMulInteger"], 0); + EXPECT_EQ(op_to_count["Cast"], 0); + EXPECT_EQ(op_to_count["Mul"], 0); EXPECT_EQ(op_to_count["com.microsoft.MatMulIntegerToFloat"], 3); EXPECT_EQ(op_to_count["Add"], 1); } diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.onnx b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.onnx index a88a4bd681760..7ea69c580ee43 100644 Binary files a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.onnx and b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.onnx differ diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py index 293a45cb48383..1a270043baa65 100644 --- a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py @@ -29,7 +29,6 @@ def GenerateModel(model_name): nodes.extend(MakeSubGraph("_1", True)) nodes.extend(MakeSubGraph("_2", True)) nodes.extend(MakeSubGraph("_3", False)) - nodes.extend(MakeSubGraph("_4", False)) initializers = [] initializers.extend(MakeInitializer("_1")) @@ -49,15 +48,11 @@ def GenerateModel(model_name): helper.make_tensor_value_info('b_quantized_2', TensorProto.UINT8, [2, 3]), helper.make_tensor_value_info('b_zp_2', TensorProto.UINT8, [1]), helper.make_tensor_value_info('b_scale_2', TensorProto.FLOAT, [1]), - helper.make_tensor_value_info('b_quantized_4', TensorProto.UINT8, [2, 3]), - helper.make_tensor_value_info('b_zp_4', TensorProto.UINT8, [3]), - helper.make_tensor_value_info('b_scale_4', TensorProto.FLOAT, [3]), ], [ # outputs helper.make_tensor_value_info('output_1', TensorProto.FLOAT, [3, 3]), helper.make_tensor_value_info('output_2', TensorProto.FLOAT, [3, 3]), helper.make_tensor_value_info('output_3', TensorProto.FLOAT, [3, 3]), - helper.make_tensor_value_info('output_4', TensorProto.FLOAT, [3, 3]), ], initializers)