Skip to content

Commit

Permalink
[Refactor] Introduce target generic dispatch system (#556)
Browse files Browse the repository at this point in the history
* [TVM] Introduce target generic dispatch system

* fix target warning
  • Loading branch information
tqchen authored Oct 14, 2017
1 parent c3cac46 commit eb761f3
Show file tree
Hide file tree
Showing 32 changed files with 672 additions and 161 deletions.
1 change: 1 addition & 0 deletions docs/api/python/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Python API
intrin
tensor
schedule
target
build
module
ndarray
Expand Down
13 changes: 13 additions & 0 deletions docs/api/python/target.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
tvm.target
----------
.. automodule:: tvm.target

.. autofunction:: tvm.target.generic_func

.. autoclass:: tvm.target.Target
:members:

.. autofunction:: tvm.target.cuda
.. autofunction:: tvm.target.rocm
.. autofunction:: tvm.target.rasp
.. autofunction:: tvm.target.create
30 changes: 13 additions & 17 deletions docs/api/python/topi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,11 @@ Index

.. autosummary::

topi.cuda.schedule_conv2d_nchw
topi.cuda.schedule_conv2d_hwcn
topi.cuda.schedule_depthwise_conv2d_nchw
topi.cuda.schedule_depthwise_conv2d_nhwc
topi.cuda.schedule_reduce
topi.cuda.schedule_broadcast
topi.cuda.schedule_injective
topi.generic.schedule_conv2d_nchw
topi.generic.schedule_depthwise_conv2d_nchw
topi.generic.schedule_reduce
topi.generic.schedule_broadcast
topi.generic.schedule_injective

topi
~~~~
Expand Down Expand Up @@ -75,14 +73,12 @@ topi.nn
.. autofunction:: topi.nn.depthwise_conv2d_nhwc


topi.cuda
~~~~~~~~~
.. automodule:: topi.cuda
topi.generic
~~~~~~~~~~~~
.. automodule:: topi.generic

.. autofunction:: topi.cuda.schedule_conv2d_nchw
.. autofunction:: topi.cuda.schedule_conv2d_hwcn
.. autofunction:: topi.cuda.schedule_depthwise_conv2d_nchw
.. autofunction:: topi.cuda.schedule_depthwise_conv2d_nhwc
.. autofunction:: topi.cuda.schedule_reduce
.. autofunction:: topi.cuda.schedule_broadcast
.. autofunction:: topi.cuda.schedule_injective
.. autofunction:: topi.generic.schedule_conv2d_nchw
.. autofunction:: topi.generic.schedule_depthwise_conv2d_nchw
.. autofunction:: topi.generic.schedule_reduce
.. autofunction:: topi.generic.schedule_broadcast
.. autofunction:: topi.generic.schedule_injective
6 changes: 1 addition & 5 deletions python/tvm/_ffi/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,7 @@ def context(dev_type, dev_id=0):
assert tvm.context("cuda", 0) == tvm.gpu(0)
"""
if isinstance(dev_type, string_types):
if dev_type not in TVMContext.STR2MASK:
if dev_type.find("nvptx") != -1:
dev_type = "cuda"
if dev_type.find("rocm") != -1:
dev_type = "rocm"
dev_type = dev_type.split()[0]
if dev_type not in TVMContext.STR2MASK:
raise ValueError("Unknown device type %s" % dev_type)
dev_type = TVMContext.STR2MASK[dev_type]
Expand Down
3 changes: 3 additions & 0 deletions python/tvm/_ffi/runtime_ctypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,12 @@ class TVMContext(ctypes.Structure):
12: 'ext_dev',
}
STR2MASK = {
'llvm': 1,
'stackvm': 1,
'cpu': 1,
'gpu': 2,
'cuda': 2,
'nvptx': 2,
'cl': 4,
'opencl': 4,
'metal': 8,
Expand Down
59 changes: 20 additions & 39 deletions python/tvm/build_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from . import module
from . import codegen
from . import ndarray
from . import target as _target

class BuildConfig(object):
"""Configuration scope to set a build config option.
Expand Down Expand Up @@ -238,7 +239,7 @@ def lower(sch,

def build(sch,
args=None,
target="llvm",
target=None,
target_host=None,
name="default_function",
binds=None):
Expand All @@ -252,36 +253,10 @@ def build(sch,
args : list of Buffer or Tensor or Var, optional
The argument lists to the function.
target : str, optional
target : str or :any:`tvm.target.Target`, optional
The target and option of the compilation.
When the target is llvm, you can set options like:
- **-mtriple=<target triple>** or **-target**
Specify the target triple, which is useful for cross
compilation.
- **-mcpu=<cpuname>**
Specify a specific chip in the current architecture to
generate code for. By default this is infered from the
target triple and autodetected to the current architecture.
- **-mattr=a1,+a2,-a3,...**
Override or control specific attributes of the target,
such as whether SIMD operations are enabled or not. The
default set of attributes is set by the current CPU.
- **-system-lib**
Build TVM system library module. System lib is a global module that contains
self registered functions in program startup. User can get the module using
:any:`tvm.module.system_lib`.
It is useful in environments where dynamic loading api like dlopen is banned.
The system lib will be available as long as the result code is linked by the program.
target_host : str, optional
target_host : str or :any:`tvm.target.Target` optional
Host compilation target, if target is device.
When TVM compiles device specific program such as CUDA,
we also need host(CPU) side code to interact with the driver
Expand All @@ -301,6 +276,10 @@ def build(sch,
-------
f : Function, or pair of functions
The result function.
Note
----
See the note on :any:`tvm.target` on target string format.
"""
if isinstance(sch, schedule.Schedule):
if args is None:
Expand All @@ -325,14 +304,17 @@ def build(sch,
if x.name in fname_set:
raise ValueError("Duplicate function name %s" % x.name)

target = _target.current_target() if target is None else target
target = _target.create(target) if target else _target.create("llvm")

fhost = []
fdevice = []
for func in flist:
if func.func_type == container.LoweredFunc.MixedFunc:
if BuildConfig.current.detect_global_barrier:
func = ir_pass.ThreadSync(func, "global")
func = ir_pass.ThreadSync(func, "shared")
warp_size = 32 if target == "cuda" else 1
warp_size = target.thread_warp_size
func = ir_pass.LowerThreadAllreduce(func, warp_size)
fsplits = [s for s in ir_pass.SplitHostDevice(func)]
fhost.append(fsplits[0])
Expand All @@ -345,29 +327,28 @@ def build(sch,
else:
raise ValueError("unknown function type %d" % func.func_type)

if not target.startswith("llvm") and target not in ("stackvm", "ext_dev") and not fdevice:
if "gpu" in target.keys and not fdevice:
warnings.warn(
"Specified target %s, but cannot find device code, did you do bind?" % target)

device = "cpu" if target.startswith("llvm") or target == "stackvm" else target
device_type = ndarray.context(device, 0).device_type
device_type = ndarray.context(target.target_name, 0).device_type
fhost = [ir_pass.BindDeviceType(x, device_type) for x in fhost]
fhost = [ir_pass.LowerTVMBuiltin(x) for x in fhost]

if not target_host:
if device == "cpu":
if device_type == ndarray.cpu(0).device_type:
target_host = target
assert not fdevice
else:
target_host = "llvm" if module.enabled("llvm") else "stackvm"

target_host = _target.create(target_host)
target_device = target
fdevice = [ir_pass.LowerIntrin(x, target_device) for x in fdevice]
fhost = [ir_pass.LowerIntrin(x, target_host) for x in fhost]
fdevice = [ir_pass.LowerIntrin(x, target_device.target_name) for x in fdevice]
fhost = [ir_pass.LowerIntrin(x, target_host.target_name) for x in fhost]
fhost = [ir_pass.CombineContextCall(x) for x in fhost]
mhost = codegen.build_module(fhost, target_host)
mhost = codegen.build_module(fhost, str(target_host))

if fdevice:
mdev = codegen.build_module(fdevice, target_device)
mdev = codegen.build_module(fdevice, str(target_device))
mhost.import_module(mdev)
return mhost
Loading

0 comments on commit eb761f3

Please sign in to comment.