[ONNX] Initial work to import pre-quantized ONNX Models (apache#7802)

* Add QuantizeLinear and DequantizeLinear * DynamicDequantizeLinear
trevor-m · May 6, 2021 · 566921c · 566921c
1 parent bba762f
commit 566921c
Show file tree

Hide file tree

Showing 2 changed files with 59 additions and 8 deletions.
diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
@@ -29,6 +29,7 @@
 from .. import expr as _expr
 from .. import function as _function
 from .. import op as _op
+from .. import qnn as _qnn
 from .. import vision as _vision
 from .. import loops as _loops
 from .. import ty as _ty
@@ -2782,6 +2783,60 @@ def _impl_v1(cls, inputs, attr, params):
         return cls._op_dispatch(operator, inputs, attr, params)
 
 
+class QuantizeLinear(OnnxOpConverter):
+    """Operator converter for QuantizeLinear."""
+
+    @classmethod
+    def _impl_v10(cls, inputs, attr, params):
+        data, scale, zp = inputs
+        out_dtype = infer_type(zp).checked_type.dtype
+        return _qnn.op.quantize(data, scale, _op.cast(zp, "int32"), 0, out_dtype)
+
+    @classmethod
+    def _impl_v13(cls, inputs, attr, params):
+        data, scale, zp = inputs
+        out_dtype = infer_type(zp).checked_type.dtype
+        axis = attr.get("axis", 1)
+        return _qnn.op.quantize(data, scale, _op.cast(zp, "int32"), axis, out_dtype)
+
+
+class DequantizeLinear(OnnxOpConverter):
+    """Operator converter for QuantizeLinear."""
+
+    @classmethod
+    def _impl_v10(cls, inputs, attr, params):
+        data, scale, zp = inputs
+        return _qnn.op.dequantize(data, scale, _op.cast(zp, "int32"), 0)
+
+    @classmethod
+    def _impl_v13(cls, inputs, attr, params):
+        data, scale, zp = inputs
+        axis = attr.get("axis", 1)
+        return _qnn.op.dequantize(data, scale, _op.cast(zp, "int32"), axis)
+
+
+class DynamicQuantizeLinear(OnnxOpConverter):
+    """Operator converter for QuantizeLinear."""
+
+    @classmethod
+    def _impl_v11(cls, inputs, attr, params):
+        """This op is deprecated an only supports uint8"""
+        data = inputs[0]
+        data_dtype = infer_type(data).checked_type.dtype
+        zero = _op.const(0, dtype=data_dtype)
+        maximum = _op.maximum(zero, _op.max(data))
+        minimum = _op.minimum(zero, _op.min(data))
+        scale = (maximum - minimum) / _op.const(255, dtype=data_dtype)
+        zp = zero - _op.min(data) / scale
+        zp = _op.cast(_op.round(_op.clip(zp, 0, 255)), "uint8")
+        return _expr.TupleWrapper(
+            _expr.Tuple(
+                [_qnn.op.quantize(data, scale, _op.cast(zp, "int32"), 0, "uint8"), scale, zp]
+            ),
+            size=3,
+        )
+
+
 class BitShift(OnnxOpConverter):
     """Operator converter for NonZero"""
 
@@ -2966,6 +3021,10 @@ def _get_convert_map(opset):
         "If": If.get_converter(opset),
         # Torch ATen Dispatcher.
         "ATen": ATen.get_converter(opset),
+        # Quantization
+        "QuantizeLinear": QuantizeLinear.get_converter(opset),
+        "DequantizeLinear": DequantizeLinear.get_converter(opset),
+        "DynamicQuantizeLinear": DynamicQuantizeLinear.get_converter(opset),
     }
 
 

diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
@@ -4157,15 +4157,8 @@ def verify_cumsum(indata, axis, exclusive=0, reverse=0, type="float32"):
     "test_cumsum_2d_axis_0/",
     "test_cumsum_2d_axis_1/",
     "test_cumsum_2d_negative_axis/",
-    "test_dequantizelinear/",
     "test_det_2d/",
     "test_det_nd/",
-    "test_dynamicquantizelinear/",
-    "test_dynamicquantizelinear_expanded/",
-    "test_dynamicquantizelinear_max_adjusted/",
-    "test_dynamicquantizelinear_max_adjusted_expanded/",
-    "test_dynamicquantizelinear_min_adjusted/",
-    "test_dynamicquantizelinear_min_adjusted_expanded/",
     "test_eyelike_populate_off_main_diagonal/",
     "test_eyelike_with_dtype/",
     "test_eyelike_without_dtype/",
@@ -4193,7 +4186,6 @@ def verify_cumsum(indata, axis, exclusive=0, reverse=0, type="float32"):
     "test_qlinearconv/",
     "test_qlinearmatmul_2D/",
     "test_qlinearmatmul_3D/",
-    "test_quantizelinear/",
     "test_range_float_type_positive_delta_expanded/",
     "test_range_int32_type_negative_delta_expanded/",
     "test_resize_downsample_scales_cubic/",