From 9f997ba620a50e6af17d15114f0bad0ac9307827 Mon Sep 17 00:00:00 2001
From: Altan Haan <ahaan@octoml.ai>
Date: Wed, 28 Jul 2021 16:39:43 -0700
Subject: [PATCH 1/4] hotfix check_grad perf regression: lift compile out of
 hot loop

---
 python/tvm/relay/testing/__init__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/testing/__init__.py b/python/tvm/relay/testing/__init__.py
index bfe797d844a8..de85ed69238a 100644
--- a/python/tvm/relay/testing/__init__.py
+++ b/python/tvm/relay/testing/__init__.py
@@ -154,15 +154,16 @@ def check_grad(
         assert len(grads) > 0, "You must test at least one gradient."
 
         # Get numeric gradients for each dimension of each param, using two-sided approximation.
+        fwd_func_compiled = intrp.evaluate(fwd_func)
         approx_grads = []
         for x in test_inputs:
             approx_grad = np.zeros(x.shape)
             for i in np.ndindex(*x.shape):
                 x_i = x[i]
                 x[i] = x_i + eps
-                fwd_plus = intrp.evaluate(fwd_func)(*inputs).numpy().astype("float64")
+                fwd_plus = fwd_func_compiled(*inputs).numpy().astype("float64")
                 x[i] = x_i - eps
-                fwd_minus = intrp.evaluate(fwd_func)(*inputs).numpy().astype("float64")
+                fwd_minus = fwd_func_compiled(*inputs).numpy().astype("float64")
                 x[i] = x_i
                 approx_grad[i] = np.sum((fwd_plus - fwd_minus) / (2 * eps))
             approx_grads.append(approx_grad)

From 23d235112f98f7f6e6c08295dd5aee4da350bda7 Mon Sep 17 00:00:00 2001
From: Altan Haan <ahaan@octoml.ai>
Date: Thu, 29 Jul 2021 00:03:22 -0700
Subject: [PATCH 2/4] hoist interpreter creation out of python closure, fix
 weird conv2d bug on arm cpu

---
 python/tvm/relay/backend/interpreter.py | 3 ++-
 python/tvm/topi/arm_cpu/group_conv2d.py | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/backend/interpreter.py b/python/tvm/relay/backend/interpreter.py
index b62fca86668d..bd23163ef795 100644
--- a/python/tvm/relay/backend/interpreter.py
+++ b/python/tvm/relay/backend/interpreter.py
@@ -227,6 +227,8 @@ def _make_executor(self, expr=None):
         if expr is None or isinstance(expr, GlobalVar):
             assert self.mod is not None
 
+        _intrp = _backend.CreateInterpreter(self.mod, self.device, self.target)
+
         def _interp_wrapper(*args, **kwargs):
             if expr is None:
                 args = self._convert_args(self.mod["main"], args, kwargs)
@@ -253,7 +255,6 @@ def _interp_wrapper(*args, **kwargs):
 
             mod = self.optimize()
             opt_expr = Call(mod["main"], relay_args)
-            _intrp = _backend.CreateInterpreter(mod, self.device, self.target)
             return _intrp(opt_expr)
 
         return _interp_wrapper
diff --git a/python/tvm/topi/arm_cpu/group_conv2d.py b/python/tvm/topi/arm_cpu/group_conv2d.py
index d852b9acef66..18773fabed95 100644
--- a/python/tvm/topi/arm_cpu/group_conv2d.py
+++ b/python/tvm/topi/arm_cpu/group_conv2d.py
@@ -42,7 +42,7 @@ def schedule_group_conv2d_nchw(outs):
     return schedule_group_conv2d_nchwc(outs)
 
 
-def _get_default_config(cfg, data, kernel, strides, padding, groups, out_dtype, layout="NCHW"):
+def _get_default_config(cfg, data, kernel, strides, padding, dilation, groups, out_dtype, layout="NCHW"):
     """
     Get default schedule config for the workload
     """
@@ -54,7 +54,7 @@ def _get_default_config(cfg, data, kernel, strides, padding, groups, out_dtype,
             static_data_shape.append(dim)
     data = te.placeholder(static_data_shape, dtype=data.dtype)
 
-    wkl = _get_conv2d_workload(data, kernel, strides, padding, out_dtype, layout)
+    wkl = _get_conv2d_workload(data, kernel, strides, padding, dilation, out_dtype, layout)
     _fallback_schedule(cfg, wkl)
 
 
@@ -158,6 +158,7 @@ def group_conv2d_nchw_spatial_pack(
             ),
             strides,
             padding,
+            dilation,
             groups,
             out_dtype,
         )

From 0505a08a5bbface91933e5c7e51331b7e082584d Mon Sep 17 00:00:00 2001
From: Altan Haan <ahaan@octoml.ai>
Date: Thu, 29 Jul 2021 00:06:11 -0700
Subject: [PATCH 3/4] lint

---
 python/tvm/topi/arm_cpu/group_conv2d.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tvm/topi/arm_cpu/group_conv2d.py b/python/tvm/topi/arm_cpu/group_conv2d.py
index 18773fabed95..81b2c7260f05 100644
--- a/python/tvm/topi/arm_cpu/group_conv2d.py
+++ b/python/tvm/topi/arm_cpu/group_conv2d.py
@@ -42,7 +42,9 @@ def schedule_group_conv2d_nchw(outs):
     return schedule_group_conv2d_nchwc(outs)
 
 
-def _get_default_config(cfg, data, kernel, strides, padding, dilation, groups, out_dtype, layout="NCHW"):
+def _get_default_config(
+    cfg, data, kernel, strides, padding, dilation, groups, out_dtype, layout="NCHW"
+):
     """
     Get default schedule config for the workload
     """

From fb7ce155f45e85da21a37b6595957ae21d732c14 Mon Sep 17 00:00:00 2001
From: Altan Haan <ahaan@octoml.ai>
Date: Thu, 29 Jul 2021 08:13:26 -0700
Subject: [PATCH 4/4] try one more fix

---
 python/tvm/relay/backend/interpreter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relay/backend/interpreter.py b/python/tvm/relay/backend/interpreter.py
index bd23163ef795..81edf74a0a03 100644
--- a/python/tvm/relay/backend/interpreter.py
+++ b/python/tvm/relay/backend/interpreter.py
@@ -227,7 +227,7 @@ def _make_executor(self, expr=None):
         if expr is None or isinstance(expr, GlobalVar):
             assert self.mod is not None
 
-        _intrp = _backend.CreateInterpreter(self.mod, self.device, self.target)
+        _intrp = _backend.CreateInterpreter(self.optimize(), self.device, self.target)
 
         def _interp_wrapper(*args, **kwargs):
             if expr is None: