Skip to content

Commit

Permalink
[TUTORIAL] use OpenCL on ARM board (apache#633)
Browse files Browse the repository at this point in the history
  • Loading branch information
merrymercy authored and sergei-mironov committed Aug 8, 2018
1 parent ffe6f04 commit 55cab74
Showing 1 changed file with 56 additions and 2 deletions.
58 changes: 56 additions & 2 deletions tutorials/deployment/cross_compilation_and_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@
# `LLVM guide of cross compilation <https://clang.llvm.org/docs/CrossCompilation.html>`_.

######################################################################
# Run Kernel Remotely by RPC
# --------------------------
# Run CPU Kernel Remotely by RPC
# ------------------------------
# Here we will show you how to run the kernel on the remote device:

# replace host with the ip address of your device
Expand Down Expand Up @@ -204,6 +204,60 @@
cost = time_f(a, b).mean
print('%g secs/op' % cost)

#########################################################################
# Run OpenCL Kernel Remotely by RPC
# ---------------------------------
# As for remote OpenCL devices, the workflow is almost the same as above.
# You can define the kernel, upload files, and run by RPC. The files
# include host object, kernel source code and module meta file. We rely
# on remote compiler to re-link them.
#
# .. note::
# Raspberry Pi does not support OpenCL, the following code is tested on
# Firefly-RK3399. The target_host should be 'llvm -target=aarch64-linux-gnu'.
# But here we set 'llvm' to enable this tutorial to run locally.

# build kernel (different from cpu, we need bind axis for OpenCL)
s = tvm.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=32)
s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd")

# save files
path_o = temp.relpath("myadd.o")
path_cl = temp.relpath("myadd.cl")
path_json = temp.relpath("myadd.tvm_meta.json")
f.save(path_o)
f.imported_modules[0].save(path_cl)

# upload files
remote.upload(path_o)
remote.upload(path_cl)
remote.upload(path_json)

# load files on remote device
fhost = remote.load_module("myadd.o")
fdev = remote.load_module("myadd.cl")
fhost.import_module(fdev)

# run
ctx = remote.cl(0)
a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
fhost(a, b)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

#####################################################################
# Instead of uploading files separately, there is a more convinient way.
# You can export libraray as a tar ball.
path_tar = temp.relpath("myadd.tar")
f.export_library(path_tar)
remote.upload(path_tar)
fhost = remote.load_module("myadd.tar")
fhost(a, b)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

# terminate the server after experiment
server.terminate()

Expand Down

0 comments on commit 55cab74

Please sign in to comment.