[BYOC] support arbitrary input dims for add/mul/relu of dnnl c_src co…

…degen (apache#9127) * support arbitrary input dims for add/mul/relu of dnnl c_src codegen * fix lint * fix Co-authored-by: sunway <[email protected]>
ylc · Jan 13, 2022 · ad24256 · ad24256
1 parent e369d0a
commit ad24256
Show file tree

Hide file tree

Showing 3 changed files with 85 additions and 34 deletions.
diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -54,6 +54,15 @@ inline size_t GetShape1DSize(const Type& type) {
   return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
 }
 
+inline std::string GetShapeString(std::vector<int> shape) {
+  std::string v = "std::vector<long int>{";
+  for (auto s : shape) {
+    v += std::to_string(s) + ",";
+  }
+  v += "}";
+  return v;
+}
+
 std::vector<std::string> Conv2d(const CallNode* call) {
   std::vector<std::string> args;
   const auto* conv2d_attr = call->attrs.as<Conv2DAttrs>();
@@ -98,12 +107,8 @@ std::vector<std::string> Dense(const CallNode* call) {
 std::vector<std::string> Relu(const CallNode* call) {
   std::vector<std::string> args;
   auto ishape = GetShape(call->args[0]->checked_type());
-
   // Args: N, C, H, W
-  for (auto s : ishape) {
-    args.push_back(std::to_string(s));
-  }
-
+  args.push_back(GetShapeString(ishape));
   return args;
 }
 
@@ -123,15 +128,25 @@ std::vector<std::string> BatchNorm(const CallNode* call) {
   return args;
 }
 
+// should comply with src/runtime/contrib/dnnl/dnnl.cc
+#define DNNL_BINARY_ADD 0
+#define DNNL_BINARY_MUL 1
+
 std::vector<std::string> Add(const CallNode* call) {
   std::vector<std::string> args;
   auto ishape = GetShape(call->args[0]->checked_type());
-
+  args.push_back(std::to_string(DNNL_BINARY_ADD));
   // Args: H, W
-  for (auto s : ishape) {
-    args.push_back(std::to_string(s));
-  }
+  args.push_back(GetShapeString(ishape));
+  return args;
+}
 
+std::vector<std::string> Multiply(const CallNode* call) {
+  std::vector<std::string> args;
+  auto ishape = GetShape(call->args[0]->checked_type());
+  args.push_back(std::to_string(DNNL_BINARY_MUL));
+  // Args: H, W
+  args.push_back(GetShapeString(ishape));
   return args;
 }
 
@@ -239,11 +254,9 @@ class CodegenDNNL : public MemoizedExprTranslator<std::vector<Output>>, public C
 
     using ArgFunType = std::function<std::vector<std::string>(const CallNode*)>;
     static const std::map<std::string, std::pair<std::string, ArgFunType>> op_map = {
-        {"nn.conv2d", {"dnnl_conv2d", Conv2d}},
-        {"nn.dense", {"dnnl_dense", Dense}},
-        {"nn.relu", {"dnnl_relu", Relu}},
-        {"nn.batch_norm", {"dnnl_bn", BatchNorm}},
-        {"add", {"dnnl_add", Add}},
+        {"nn.conv2d", {"dnnl_conv2d", Conv2d}}, {"nn.dense", {"dnnl_dense", Dense}},
+        {"nn.relu", {"dnnl_relu", Relu}},       {"nn.batch_norm", {"dnnl_bn", BatchNorm}},
+        {"add", {"dnnl_binary_op", Add}},       {"multiply", {"dnnl_binary_op", Multiply}},
     };
 
     const auto op_name = GetRef<Op>(op_node)->name;

diff --git a/src/runtime/contrib/dnnl/dnnl.cc b/src/runtime/contrib/dnnl/dnnl.cc
@@ -44,6 +44,32 @@ typedef struct {
   void** data;
 } DnnlPackedArgs;
 
+inline dnnl::memory::desc GenDNNLMemDescByShape(const dnnl::memory::dims& shape,
+                                                memory::data_type dtype) {
+  using tag = memory::format_tag;
+
+  dnnl::memory::desc data_md;
+
+  switch (shape.size()) {
+    case 2:
+      data_md = dnnl::memory::desc({shape, dtype, tag::ab});
+      break;
+    case 3:
+      data_md = dnnl::memory::desc({shape, dtype, tag::abc});
+      break;
+    case 4:
+      data_md = dnnl::memory::desc({shape, dtype, tag::abcd});
+      break;
+    case 5:
+      data_md = dnnl::memory::desc({shape, dtype, tag::abcde});
+      break;
+    default:
+      LOG(FATAL) << "Unsupported data shape dimension: " << shape.size();
+      break;
+  }
+  return data_md;
+}
+
 // Read from memory, write to handle
 inline void read_from_dnnl_memory(void* handle, const memory& mem) {
   size_t bytes = mem.get_desc().get_size();
@@ -175,16 +201,13 @@ extern "C" void dnnl_dense(float* data, float* weight, float* out, int p_B_, int
   read_from_dnnl_memory(out, dst_memory);
 }
 
-extern "C" void dnnl_relu(float* data, float* out, int p_N_, int p_C_, int p_H_, int p_W_) {
-  using tag = memory::format_tag;
+extern "C" void dnnl_relu(float* data, float* out, std::vector<int64_t> shape) {
   using dt = memory::data_type;
 
   engine eng(engine::kind::cpu, 0);
   stream s(eng);
 
-  memory::dims data_tz = {p_N_, p_C_, p_H_, p_W_};
-
-  auto data_md = memory::desc{{data_tz}, dt::f32, tag::nchw};
+  auto data_md = GenDNNLMemDescByShape(shape, dt::f32);
 
   auto data_memory = memory(data_md, eng, data);
   auto dst_memory = memory(data_md, eng);
@@ -241,27 +264,39 @@ extern "C" void dnnl_bn(float* data, float* gamma, float* beta, float* mean, flo
   free(weight);
 }
 
-extern "C" void dnnl_add(float* data, float* weight, float* out, int p_N_, int p_C_, int p_H_,
-                         int p_W_) {
-  using tag = memory::format_tag;
+// should comply with src/relay/backend/contrib/dnnl/codegen.cc
+#define DNNL_BINARY_ADD 0
+#define DNNL_BINARY_MUL 1
+
+extern "C" void dnnl_binary_op(float* data, float* weight, float* out, int algo_type,
+                               std::vector<int64_t> shape) {
   using dt = memory::data_type;
 
   engine eng(engine::kind::cpu, 0);
   stream s(eng);
 
-  memory::dims data_tz = {p_N_, p_C_, p_H_, p_W_};
-
-  auto data_md = memory::desc{{data_tz}, dt::f32, tag::nchw};
-  auto weight_md = memory::desc({{data_tz}, dt::f32, tag::nchw});
-  auto dst_md = memory::desc({{data_tz}, dt::f32, tag::nchw});
+  auto data_md = GenDNNLMemDescByShape(shape, dt::f32);
 
   auto data_memory = memory(data_md, eng, data);
-  auto weight_memory = memory(weight_md, eng, weight);
-  auto dst_memory = memory(dst_md, eng);
+  auto weight_memory = memory(data_md, eng, weight);
+  auto dst_memory = memory(data_md, eng);
 
-  auto add_desc = binary::desc(algorithm::binary_add, data_md, weight_md, dst_md);
+  algorithm algo = algorithm::undef;
+  switch (algo_type) {
+    case DNNL_BINARY_ADD:
+      algo = algorithm::binary_add;
+      break;
+    case DNNL_BINARY_MUL:
+      algo = algorithm::binary_mul;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported dnnl algorithm: " << algo_type;
+      break;
+  }
+
+  auto add_desc = binary::desc(algo, data_md, data_md, data_md);
   auto add_prim_desc = binary::primitive_desc(add_desc, eng);
-  assert(dst_md == add_prim_desc.dst_desc());
+  assert(data_md == add_prim_desc.dst_desc());
 
   auto add = binary(add_prim_desc);
   add.execute(

diff --git a/src/runtime/contrib/dnnl/dnnl_kernel.h b/src/runtime/contrib/dnnl/dnnl_kernel.h
@@ -26,6 +26,9 @@
 #define TVM_RUNTIME_CONTRIB_DNNL_DNNL_KERNEL_H_
 
 #include <tvm/runtime/c_runtime_api.h>
+#include <tvm/runtime/logging.h>
+
+#include <vector>
 
 #include "dnnl.hpp"
 
@@ -54,14 +57,14 @@ extern "C" TVM_DLL void dnnl_fused_conv2d_bias_relu(float* data, float* weights,
 extern "C" TVM_DLL void dnnl_dense(float* data, float* weight, float* out, int p_B_, int p_I_,
                                    int p_O_);
 
-extern "C" TVM_DLL void dnnl_relu(float* data, float* out, int p_N_, int p_C_, int p_H_, int p_W_);
+extern "C" TVM_DLL void dnnl_relu(float* data, float* out, std::vector<int64_t> shape);
 
 extern "C" TVM_DLL void dnnl_bn(float* data, float* gamma, float* beta, float* mean,
                                 float* variance, float* out, float* new_mean, float* new_variance,
                                 int p_n_, int p_c_, int p_h_, int p_w_, int p_e_);
 
-extern "C" TVM_DLL void dnnl_add(float* data, float* weight, float* out, int p_n_, int p_c_,
-                                 int p_h_, int p_w_);
+extern "C" TVM_DLL void dnnl_binary_op(float* data, float* weight, float* out, int binary_algo,
+                                       std::vector<int64_t> shape);
 
 }  // namespace contrib
 }  // namespace runtime