[ConvertLayout] Support QNN ops.

apache · Mar 15, 2020 · 235c079 · 235c079
1 parent f7488cc
commit 235c079
Show file tree

Hide file tree

Showing 20 changed files with 330 additions and 105 deletions.
diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py
@@ -138,22 +138,14 @@ def convert_conv2d(attrs, inputs, tinfos, desired_layout):
     """
     # pylint: disable=import-outside-toplevel
     from tvm import relay
-    data_layout = attrs['data_layout']
-    kernel_layout = attrs['kernel_layout']
     data, weight = inputs
     assert desired_layout == 'NCHW', \
             "Currently only transformation to NCHW layout is supported."
     if desired_layout == 'NCHW':
         new_attrs = dict(attrs)
         new_attrs['data_layout'] = desired_layout
         new_attrs['kernel_layout'] = 'OIHW'
-
-        if data_layout == 'NHWC' and kernel_layout == 'HWIO':
-            # Convert (NHWC, HWIO) to (NCHW, OIHW)
-            return relay.nn.conv2d(data, weight, **new_attrs)
-        if data_layout == 'NHWC' and kernel_layout == 'HWOI':
-            # Convert (NHWC, HWOI) to (NCHW, OIHW). Depthwise conv2d.
-            return relay.nn.conv2d(data, weight, **new_attrs)
+        return relay.nn.conv2d(data, weight, **new_attrs)
     return None
 
 

diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py
@@ -19,4 +19,4 @@
 from __future__ import absolute_import as _abs
 from .qnn import *
 from .op import register_qnn_legalize
-from . import legalizations
+from . import legalizations, layout_conversions
diff --git a/python/tvm/relay/qnn/op/layout_conversions.py b/python/tvm/relay/qnn/op/layout_conversions.py
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""Convert layout related registration"""
+from __future__ import absolute_import
+
+import tvm
+from tvm.relay.op import op as reg
+
+
+@reg.register_convert_op_layout("qnn.conv2d")
+def convert_qnn_conv2d(attrs, inputs, tinfos, desired_layout):
+    """Convert Layout pass registration for QNN conv2d op.
+
+    Parameters
+    ----------
+    attrs : tvm.ir.Attrs
+        Attributes of current convolution
+    inputs : list of tvm.relay.Expr
+        The args of the Relay expr to be legalized
+    tinfos : list of types
+        List of input and output types
+    desired_layout : str
+        The desired layout
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The transformed expr
+    """
+    # pylint: disable=import-outside-toplevel
+    from tvm import relay
+    assert desired_layout == 'NCHW', \
+            "Currently only transformation to NCHW layout is supported."
+    if desired_layout == 'NCHW':
+        new_attrs = dict(attrs)
+        new_attrs['data_layout'] = desired_layout
+        new_attrs['kernel_layout'] = 'OIHW'
+        return relay.qnn.op.conv2d(*inputs, **new_attrs)
+    return None
diff --git a/src/relay/op/nn/bitserial.cc b/src/relay/op/nn/bitserial.cc
@@ -39,7 +39,7 @@ template <typename T>
 Array<Array<Layout>> BinaryConv2DInferCorrectLayout(const Attrs& attrs,
                                                     const Array<Layout>& new_in_layouts,
                                                     const Array<Layout>& old_in_layouts,
-                                                    const Array<Array<IndexExpr>>& old_in_shapes) {
+                                                    const Array<tvm::relay::Type>& old_in_types) {
   const T* params = attrs.as<T>();
 
   // We always make other operators to fit the layouts of convolution layers

diff --git a/src/relay/op/nn/convolution.cc b/src/relay/op/nn/convolution.cc
@@ -34,23 +34,6 @@
 namespace tvm {
 namespace relay {
 
-
-template<typename T>
-Array<Array<Layout> > ConvInferCorrectLayout(
-    const Attrs& attrs,
-    const Array<Layout>& new_in_layouts,
-    const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
-  const T* params = attrs.as<T>();
-
-  // We always make other operators to fit the layouts of convolution layers
-  // So this inference ignores all inputs
-  return Array<Array<Layout> >{{params->data_layout, params->kernel_layout},
-                               {params->out_layout == "" ?
-                                   params->data_layout : params->out_layout}};
-}
-
-
 template <typename T>
 Expr MakeConv(Expr data,
               Expr weight,

diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h
@@ -360,6 +360,21 @@ bool Conv3DRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
   return true;
 }
 
+template<typename T>
+Array<Array<Layout> > ConvInferCorrectLayout(
+    const Attrs& attrs,
+    const Array<Layout>& new_in_layouts,
+    const Array<Layout>& old_in_layouts,
+    const Array<tvm::relay::Type> &old_in_types) {
+  const T* params = attrs.as<T>();
+
+  // We always make other operators to fit the layouts of convolution layers
+  // So this inference ignores all inputs
+  return Array<Array<Layout> >{{params->data_layout, params->kernel_layout},
+                               {params->out_layout == "" ?
+                                   params->data_layout : params->out_layout}};
+}
+
 }  // namespace relay
 }  // namespace tvm
 #endif  // TVM_RELAY_OP_NN_CONVOLUTION_H_
diff --git a/src/relay/op/nn/nn.cc b/src/relay/op/nn/nn.cc
@@ -272,10 +272,10 @@ Array<Array<Layout> > PReluInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
+    const Array<tvm::relay::Type> &old_in_types) {
 
   CHECK_EQ(old_in_layouts.size(), 2U);
-  CHECK_EQ(old_in_shapes.size(), 2U);
+  CHECK_EQ(old_in_types.size(), 2U);
   Layout data_layout = old_in_layouts[0];
   if (new_in_layouts.defined()) {
     CHECK_EQ(new_in_layouts.size(), 2U);
@@ -615,9 +615,15 @@ TVM_REGISTER_NODE_TYPE(BatchNormAttrs);
 Array<Array<Layout>> BatchNormInferCorrectLayout(const Attrs& attrs,
                                                  const Array<Layout>& new_in_layouts,
                                                  const Array<Layout>& old_in_layouts,
-                                                 const Array<Array<IndexExpr>>& old_in_shapes) {
+                                                 const Array<tvm::relay::Type>& old_in_types) {
   BatchNormAttrs* param = const_cast<BatchNormAttrs*>(attrs.as<BatchNormAttrs>());
 
+  Array<Array<IndexExpr>> old_in_shapes;
+  for (auto old_in_t : old_in_types) {
+    CHECK(old_in_t.as<TensorTypeNode>());
+    old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+  }
+
   size_t axis =
       param->axis < 0 ? param->axis + old_in_shapes[0].size() : static_cast<size_t>(param->axis);
 

diff --git a/src/relay/op/nn/pad.cc b/src/relay/op/nn/pad.cc
@@ -39,7 +39,7 @@ Array<Array<Layout> > PadInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
+    const Array<tvm::relay::Type> &old_in_types) {
   // NOTE: Discard "const" qualifier here.
   PadAttrs *params = const_cast<PadAttrs*>(attrs.as<PadAttrs>());
 

diff --git a/src/relay/op/nn/pooling.cc b/src/relay/op/nn/pooling.cc
@@ -41,7 +41,7 @@ Array<Array<Layout> > PoolInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
+    const Array<tvm::relay::Type> &old_in_types) {
   // NOTE: Discard "const" qualifier here.
   T *params = const_cast<T*>(attrs.as<T>());
 

diff --git a/src/relay/op/nn/upsampling.cc b/src/relay/op/nn/upsampling.cc
@@ -39,7 +39,7 @@ Array<Array<Layout> > UpsamplingInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
+    const Array<tvm::relay::Type> &old_in_types) {
   // NOTE: Discard "const" qualifier here.
   T *params = const_cast<T*>(attrs.as<T>());
 

diff --git a/src/relay/op/tensor/reduce.cc b/src/relay/op/tensor/reduce.cc
@@ -122,11 +122,16 @@ Array<Integer> GetExcludeAxes(size_t indim,
 Array<Array<Layout>> ReduceInferCorrectLayout(const Attrs& attrs,
                                               const Array<Layout>& new_in_layouts,
                                               const Array<Layout>& old_in_layouts,
-                                              const Array<Array<IndexExpr>>& old_in_shapes) {
+                                              const Array<tvm::relay::Type>& old_in_types) {
   // NOTE: Discard "const" qualifier here.
   ReduceAttrs* params = const_cast<ReduceAttrs*>(attrs.as<ReduceAttrs>());
 
   // Get the reduce axes.
+  Array<Array<IndexExpr>> old_in_shapes;
+  for (auto old_in_t : old_in_types) {
+    CHECK(old_in_t.as<TensorTypeNode>());
+    old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+  }
   uint32_t indim = old_in_shapes[0].size();
   auto r_axes = GetReduceAxes(indim, params->axis, params->exclude);
 

diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
@@ -275,54 +275,6 @@ Array<te::Tensor> ConcatenateCompute(const Attrs& attrs,
   return { topi::concatenate(inputs, param->axis) };
 }
 
-Array<Array<Layout>> ConcatenateLayout(
-    const Attrs& attrs,
-    const Array<Layout>& new_in_layouts,
-    const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
-  ConcatenateAttrs* param = const_cast<ConcatenateAttrs*>(attrs.as<ConcatenateAttrs>());
-
-  size_t axis = param->axis < 0 ? param->axis + old_in_shapes[0].size() :
-                static_cast<size_t>(param->axis);
-
-  Layout ret;
-  bool is_new_layout_selected = false;
-  if (new_in_layouts.defined()) {  // this function is called after some operators are alternated.
-    // If all the new input layouts are same, the new in layout gets selected.  For axis, the new
-    // axis in the new layout is identified. The param->axis is then modified on the fly to conform
-    // to the new input layout.
-    const auto& concate_dim = old_in_layouts[0][axis];
-    bool all_input_layouts_same = true;
-    for (auto new_layout : new_in_layouts) {
-      if (!new_layout.Equals(new_in_layouts[0])) {
-        all_input_layouts_same = false;
-      }
-    }
-    if (all_input_layouts_same) {
-      auto new_index = new_in_layouts[0].IndexOf(concate_dim);
-      ret = new_in_layouts[0];
-      param->axis = new_index;
-      is_new_layout_selected = true;
-    }
-  }
-
-  if (!is_new_layout_selected) {
-    // this function is called on the original correct relay ir
-    for (size_t i = 0; i < old_in_layouts.size(); ++i) {
-      if (old_in_layouts[i].defined()) {
-        ret = old_in_layouts[i];
-        break;
-      }
-    }
-
-    if (ret.ndim() <= axis || !ret[axis].IsPrimal()) {
-      return Array<Array<Layout> > {{Layout::Undef()}, {Layout::Undef()}};
-    }
-  }
-
-  return Array<Array<Layout> > {Array<Layout>(old_in_layouts.size(), ret), {ret}};
-}
-
 Expr MakeConcatenate(Expr data,
                      int axis) {
   auto attrs = make_object<ConcatenateAttrs>();
@@ -1933,7 +1885,14 @@ Array<Array<Layout> > StridedSliceInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>>& old_in_shapes) {
+    const Array<tvm::relay::Type>& old_in_types) {
+
+  Array<Array<IndexExpr>> old_in_shapes;
+  for (auto old_in_t : old_in_types) {
+    CHECK(old_in_t.as<TensorTypeNode>());
+    old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+  }
+
   CHECK(old_in_layouts.defined());
   CHECK_EQ(old_in_layouts.size(), 1);
   CHECK(old_in_shapes.defined());

diff --git a/src/relay/op/tensor/transform.h b/src/relay/op/tensor/transform.h
@@ -25,6 +25,7 @@
 #define TVM_RELAY_OP_TENSOR_TRANSFORM_H_
 
 #include <tvm/ir/error.h>
+#include <tvm/relay/attrs/transform.h>
 #include <vector>
 #include <algorithm>
 #include <limits>
@@ -124,6 +125,63 @@ bool ConcatenateRel(const Array<Type>& types,
   return true;
 }
 
+static inline Array<Array<Layout>> ConcatenateLayout(
+    const Attrs& attrs,
+    const Array<Layout>& new_in_layouts,
+    const Array<Layout>& old_in_layouts,
+    const Array<tvm::relay::Type> &old_in_types) {
+  ConcatenateAttrs* param = const_cast<ConcatenateAttrs*>(attrs.as<ConcatenateAttrs>());
+
+  Array<Array<IndexExpr>> old_in_shapes;
+  CHECK_EQ(old_in_types.size(), 1);
+  for (auto old_in_tuple_t : old_in_types) {
+    CHECK(old_in_tuple_t.as<TupleTypeNode>());
+    for (auto old_in_t : old_in_tuple_t.as<TupleTypeNode>()->fields) {
+      old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+    }
+  }
+
+  size_t axis = param->axis < 0 ? param->axis + old_in_shapes[0].size() :
+                static_cast<size_t>(param->axis);
+
+  Layout ret;
+  bool is_new_layout_selected = false;
+  if (new_in_layouts.defined()) {  // this function is called after some operators are alternated.
+    // If all the new input layouts are same, the new in layout gets selected.  For axis, the new
+    // axis in the new layout is identified. The param->axis is then modified on the fly to conform
+    // to the new input layout.
+    const auto& concate_dim = old_in_layouts[0][axis];
+    bool all_input_layouts_same = true;
+    for (auto new_layout : new_in_layouts) {
+      if (!new_layout.Equals(new_in_layouts[0])) {
+        all_input_layouts_same = false;
+      }
+    }
+    if (all_input_layouts_same) {
+      auto new_index = new_in_layouts[0].IndexOf(concate_dim);
+      ret = new_in_layouts[0];
+      param->axis = new_index;
+      is_new_layout_selected = true;
+    }
+  }
+
+  if (!is_new_layout_selected) {
+    // this function is called on the original correct relay ir
+    for (size_t i = 0; i < old_in_layouts.size(); ++i) {
+      if (old_in_layouts[i].defined()) {
+        ret = old_in_layouts[i];
+        break;
+      }
+    }
+
+    if (ret.ndim() <= axis || !ret[axis].IsPrimal()) {
+      return Array<Array<Layout> > {{Layout::Undef()}, {Layout::Undef()}};
+    }
+  }
+
+  return Array<Array<Layout> > {Array<Layout>(old_in_layouts.size(), ret), {ret}};
+}
+
 }  // namespace relay
 }  // namespace tvm
 #endif  // TVM_RELAY_OP_TENSOR_TRANSFORM_H_
diff --git a/src/relay/qnn/op/add.cc b/src/relay/qnn/op/add.cc
@@ -25,13 +25,31 @@
 #include <tvm/relay/op_attr_types.h>
 #include <tvm/relay/qnn/attrs.h>
 #include "../../transforms/pattern_util.h"
+#include "../../transforms/infer_layout_util.h"
 #include "../util.h"
 #include "op_common.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
+/*! \brief Infer layout for QNN binary broadcast operators */
+Array<Array<Layout> > QnnBinaryBroadcastLayout(const Attrs& attrs,
+                                               const Array<Layout>& new_in_layouts,
+                                               const Array<Layout>& old_in_layouts,
+                                               const Array<tvm::relay::Type>& old_in_types) {
+  // Use Relay Binary Broadcast Infer correct layout.
+  auto layouts = BinaryBroadcastLayout(attrs, new_in_layouts, old_in_layouts, old_in_types);
+
+  // Fill the layouts of remaining input tensors - scales and zero points. The layouts of these
+  // tensors can be ignored as they dont go through any transformation.
+  Layout ignore_layout = Layout("I");
+  Array<Layout> input_layouts = {layouts[0][0], layouts[0][1], ignore_layout, ignore_layout,
+                                 ignore_layout, ignore_layout, ignore_layout, ignore_layout};
+  Array<Layout> output_layouts = layouts[1];
+  return {input_layouts, output_layouts};
+}
+
 /*
  * \brief Canonicalizes the QNN add op.
  * \param attrs The QNN concatenate attrs.
@@ -118,7 +136,8 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
 QNN_REGISTER_BINARY_OP("add")
 .describe("Elementwise add with with broadcasting for quantized tensors.")
 .set_support_level(11)
-.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnAddCanonicalize);
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnAddCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", QnnBinaryBroadcastLayout);
 
 }  // namespace qnn
 }  // namespace relay