Skip to content

Commit

Permalink
[TOPI] Automated schedule in conv2d TOPI lib, moving to GEMM intrinsic (
Browse files Browse the repository at this point in the history
apache#35)

* removing programming out of end to end example for now

* updating TOPI library to use gemm tensor intrinsic

* bug fix, autoschedule in TOPI conv lib

* removing the deprecated GEVM intrinsic

* refactoring, fixed lint test

* fix for integer division bug

* python3 bug fix for non matching types due to float division

* comment
  • Loading branch information
tmoreau89 authored and tqchen committed Jul 12, 2018
1 parent 240475e commit 3fa87c3
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 204 deletions.
7 changes: 2 additions & 5 deletions vta/examples/resnet18/pynq/imagenet_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@
print ("Downloading {}".format(file))
wget.download(url+file)

# Program the FPGA remotely
assert tvm.module.enabled("rpc")
remote = rpc.connect(host, port)
vta.program_fpga(remote, BITSTREAM_FILE)

if verbose:
logging.basicConfig(level=logging.DEBUG)

Expand Down Expand Up @@ -129,8 +124,10 @@ def mark_nop(graph, conv_layer=-1, skip_conv_layer=()):
params=params, target_host=target_host)


assert tvm.module.enabled("rpc")
temp = util.tempdir()
lib.save(temp.relpath("graphlib.o"))
remote = rpc.connect(host, port)
remote.upload(temp.relpath("graphlib.o"))
lib = remote.load_module("graphlib.o")
ctx = remote.ext_dev(0) if target.device_name == "vta" else remote.cpu(0)
Expand Down
6 changes: 0 additions & 6 deletions vta/python/vta/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def __init__(self, env):
self.DEBUG_NO_SYNC = False
env._dev_ctx = self
self.gemm = intrin.gemm(env, env.mock_mode)
self.gevm = intrin.gevm(env, env.mock_mode)

def get_task_qid(self, qid):
"""Get transformed queue index."""
Expand Down Expand Up @@ -204,11 +203,6 @@ def gemm(self):
"""GEMM intrinsic"""
return self.dev.gemm

@property
def gevm(self):
"""GEVM intrinsic"""
return self.dev.gevm

@property
def target_host(self):
"""The target host"""
Expand Down
82 changes: 0 additions & 82 deletions vta/python/vta/intrin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,88 +3,6 @@

import tvm

def gevm(env, mock=False):
"""Vector-matrix multiply intrinsic
Parameters
----------
env : Environment
The Environment
mock : bool
Whether create a mock version.
"""
wgt_lanes = env.WGT_ELEM_BITS // env.WGT_WIDTH
assert wgt_lanes == env.BLOCK_OUT * env.BLOCK_IN
wgt_shape = (env.BLOCK_OUT, env.BLOCK_IN)
assert wgt_shape[0] * wgt_shape[1] == wgt_lanes
inp_lanes = env.INP_ELEM_BITS // env.INP_WIDTH
out_lanes = env.ACC_ELEM_BITS // env.ACC_WIDTH
wgt = tvm.placeholder((wgt_shape[0], wgt_shape[1]),
dtype="int%d" % env.WGT_WIDTH,
name=env.wgt_scope)
inp = tvm.placeholder((wgt_shape[1], ),
dtype="int%d" % env.INP_WIDTH,
name=env.inp_scope)
k = tvm.reduce_axis((0, wgt_shape[1]), name="k")
out_dtype = "int%d" % env.ACC_WIDTH
out = tvm.compute((wgt_shape[0],),
lambda i: tvm.sum(inp[k].astype(out_dtype) *
wgt[i, k].astype(out_dtype),
axis=[k]),
name="out")
wgt_layout = tvm.decl_buffer(
wgt.shape, wgt.dtype, env.wgt_scope,
scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes)
inp_layout = tvm.decl_buffer(
inp.shape, inp.dtype, env.inp_scope,
scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes)
out_layout = tvm.decl_buffer(
out.shape, out.dtype, env.acc_scope,
scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes)

def intrin_func(ins, outs):
"""Vector-matrix multiply intrinsic function"""
dinp, dwgt = ins
dout = outs[0]
def instr(index):
"""Generate vector-matrix multiply VTA instruction"""
irb = tvm.ir_builder.create()
dev = env.dev
irb.scope_attr(dev.vta_axis, "coproc_scope",
dev.get_task_qid(dev.QID_COMPUTE))
irb.scope_attr(dev.vta_axis, "coproc_uop_scope",
dev.vta_push_uop)
if index == 0 or index == 2:
irb.emit(tvm.call_extern(
"int32", "VTAUopPush",
0, 0,
dout.access_ptr("rw", "int32"),
dinp.access_ptr("r", "int32"),
dwgt.access_ptr("r", "int32"),
0, 0, 0))
else:
irb.emit(tvm.call_extern(
"int32", "VTAUopPush",
0, 1,
dout.access_ptr("rw", "int32"),
0,
0,
0, 0, 0))
return irb.get()
# return a triple of normal-set, reset, update
nop = tvm.make.Evaluate(0)
if mock:
return (nop, nop, nop)
return (instr(0), instr(1), instr(2))

return tvm.decl_tensor_intrin(out.op, intrin_func,
name="GEVM",
binds={inp: inp_layout,
wgt: wgt_layout,
out: out_layout})


def gemm(env, mock=False):
"""Matrix-matrix multiply intrinsic
Expand Down
Loading

0 comments on commit 3fa87c3

Please sign in to comment.