[TOPI] Automated schedule in conv2d TOPI lib, moving to GEMM intrinsic (

apache#35) * removing programming out of end to end example for now * updating TOPI library to use gemm tensor intrinsic * bug fix, autoschedule in TOPI conv lib * removing the deprecated GEVM intrinsic * refactoring, fixed lint test * fix for integer division bug * python3 bug fix for non matching types due to float division * comment
tqchen · Jul 12, 2018 · 3fa87c3 · 3fa87c3
1 parent 240475e
commit 3fa87c3
Show file tree

Hide file tree

Showing 7 changed files with 229 additions and 204 deletions.
diff --git a/vta/examples/resnet18/pynq/imagenet_predict.py b/vta/examples/resnet18/pynq/imagenet_predict.py
@@ -31,11 +31,6 @@
         print ("Downloading {}".format(file))
         wget.download(url+file)
 
-# Program the FPGA remotely
-assert tvm.module.enabled("rpc")
-remote = rpc.connect(host, port)
-vta.program_fpga(remote, BITSTREAM_FILE)
-
 if verbose:
     logging.basicConfig(level=logging.DEBUG)
 
@@ -129,8 +124,10 @@ def mark_nop(graph, conv_layer=-1, skip_conv_layer=()):
                 params=params, target_host=target_host)
 
 
+assert tvm.module.enabled("rpc")
 temp = util.tempdir()
 lib.save(temp.relpath("graphlib.o"))
+remote = rpc.connect(host, port)
 remote.upload(temp.relpath("graphlib.o"))
 lib = remote.load_module("graphlib.o")
 ctx = remote.ext_dev(0) if target.device_name == "vta" else remote.cpu(0)

diff --git a/vta/python/vta/environment.py b/vta/python/vta/environment.py
@@ -55,7 +55,6 @@ def __init__(self, env):
         self.DEBUG_NO_SYNC = False
         env._dev_ctx = self
         self.gemm = intrin.gemm(env, env.mock_mode)
-        self.gevm = intrin.gevm(env, env.mock_mode)
 
     def get_task_qid(self, qid):
         """Get transformed queue index."""
@@ -204,11 +203,6 @@ def gemm(self):
         """GEMM intrinsic"""
         return self.dev.gemm
 
-    @property
-    def gevm(self):
-        """GEVM intrinsic"""
-        return self.dev.gevm
-
     @property
     def target_host(self):
         """The target host"""

diff --git a/vta/python/vta/intrin.py b/vta/python/vta/intrin.py
@@ -3,88 +3,6 @@
 
 import tvm
 
-def gevm(env, mock=False):
-    """Vector-matrix multiply intrinsic
-
-    Parameters
-    ----------
-    env : Environment
-        The Environment
-
-    mock : bool
-        Whether create a mock version.
-    """
-    wgt_lanes = env.WGT_ELEM_BITS // env.WGT_WIDTH
-    assert wgt_lanes == env.BLOCK_OUT * env.BLOCK_IN
-    wgt_shape = (env.BLOCK_OUT, env.BLOCK_IN)
-    assert wgt_shape[0] * wgt_shape[1] == wgt_lanes
-    inp_lanes = env.INP_ELEM_BITS // env.INP_WIDTH
-    out_lanes = env.ACC_ELEM_BITS // env.ACC_WIDTH
-    wgt = tvm.placeholder((wgt_shape[0], wgt_shape[1]),
-                          dtype="int%d" % env.WGT_WIDTH,
-                          name=env.wgt_scope)
-    inp = tvm.placeholder((wgt_shape[1], ),
-                          dtype="int%d" % env.INP_WIDTH,
-                          name=env.inp_scope)
-    k = tvm.reduce_axis((0, wgt_shape[1]), name="k")
-    out_dtype = "int%d" % env.ACC_WIDTH
-    out = tvm.compute((wgt_shape[0],),
-                      lambda i: tvm.sum(inp[k].astype(out_dtype) *
-                                        wgt[i, k].astype(out_dtype),
-                                        axis=[k]),
-                      name="out")
-    wgt_layout = tvm.decl_buffer(
-        wgt.shape, wgt.dtype, env.wgt_scope,
-        scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes)
-    inp_layout = tvm.decl_buffer(
-        inp.shape, inp.dtype, env.inp_scope,
-        scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes)
-    out_layout = tvm.decl_buffer(
-        out.shape, out.dtype, env.acc_scope,
-        scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes)
-
-    def intrin_func(ins, outs):
-        """Vector-matrix multiply intrinsic function"""
-        dinp, dwgt = ins
-        dout = outs[0]
-        def instr(index):
-            """Generate vector-matrix multiply VTA instruction"""
-            irb = tvm.ir_builder.create()
-            dev = env.dev
-            irb.scope_attr(dev.vta_axis, "coproc_scope",
-                           dev.get_task_qid(dev.QID_COMPUTE))
-            irb.scope_attr(dev.vta_axis, "coproc_uop_scope",
-                           dev.vta_push_uop)
-            if index == 0 or index == 2:
-                irb.emit(tvm.call_extern(
-                    "int32", "VTAUopPush",
-                    0, 0,
-                    dout.access_ptr("rw", "int32"),
-                    dinp.access_ptr("r", "int32"),
-                    dwgt.access_ptr("r", "int32"),
-                    0, 0, 0))
-            else:
-                irb.emit(tvm.call_extern(
-                    "int32", "VTAUopPush",
-                    0, 1,
-                    dout.access_ptr("rw", "int32"),
-                    0,
-                    0,
-                    0, 0, 0))
-            return irb.get()
-        # return a triple of normal-set, reset, update
-        nop = tvm.make.Evaluate(0)
-        if mock:
-            return (nop, nop, nop)
-        return (instr(0), instr(1), instr(2))
-
-    return tvm.decl_tensor_intrin(out.op, intrin_func,
-                                  name="GEVM",
-                                  binds={inp: inp_layout,
-                                         wgt: wgt_layout,
-                                         out: out_layout})
-
-
 def gemm(env, mock=False):
     """Matrix-matrix multiply intrinsic