Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add dilation in x86 NCHWc depthwise conv support #6267

Merged
merged 1 commit into from
Aug 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions python/tvm/topi/x86/depthwise_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,18 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation,

strides = strides if isinstance(strides, (tuple, list)) else (strides, strides)
HSTR, WSTR = strides
pad_top, pad_left, pad_down, pad_right = get_pad_tuple(padding, (filter_height, filter_width))

dh, dw = dilation if isinstance(dilation, (tuple, list)) else (dilation, dilation)
assert (dh, dw) == (1, 1), "Does not support dilation"

out_height = (in_height - filter_height + pad_top + pad_down) // HSTR + 1
out_width = (in_width - filter_width + pad_left + pad_right) // WSTR + 1
dilated_kernel_h = (filter_height - 1) * dh + 1
dilated_kernel_w = (filter_width - 1) * dw + 1
pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
padding, (dilated_kernel_h, dilated_kernel_w))
HPAD = pad_top + pad_down
WPAD = pad_left + pad_right

out_height = (in_height + HPAD - dilated_kernel_h) // HSTR + 1
out_width = (in_width + WPAD - dilated_kernel_w) // WSTR + 1

cfg.define_split("tile_ic", in_channel, num_outputs=2)
cfg.define_split("tile_oc", out_channel, num_outputs=2)
Expand All @@ -140,7 +145,7 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation,
te.placeholder((batch, in_channel, in_height, in_width), dtype=data.dtype),
te.placeholder((out_channel, channel_multiplier, filter_height, filter_width),
dtype=kernel.dtype),
strides, padding, out_dtype)
strides, (pad_top, pad_down), out_dtype)
if cfg.is_fallback:
_fallback_schedule(cfg, wkl)

Expand Down Expand Up @@ -172,6 +177,7 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation,
else:
data_pad = data


# depthconv stage
idxdiv = tvm.tir.indexdiv
idxmod = tvm.tir.indexmod
Expand All @@ -184,7 +190,7 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, padding, dilation,
(data_pad[
b,
idxdiv(idxdiv(oco * out_channel_block + oci, channel_multiplier), in_channel_block),
oh*HSTR+kh, ow*WSTR+kw,
oh*HSTR+kh*dh, ow*WSTR+kw*dw,
idxmod(idxdiv(oco * out_channel_block + oci, channel_multiplier), in_channel_block)]
.astype(out_dtype) *
kernel[oco, 0, kh, kw, 0, oci].astype(out_dtype)),
Expand Down
7 changes: 1 addition & 6 deletions tests/python/frontend/pytorch/test_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -1552,12 +1552,7 @@ def forward(self, inp):
inp = [torch.rand((1, 3, 300, 300), dtype=torch.float)]

verify_model(SegmentationModelWrapper(fcn.eval()), inp, atol=1e-4, rtol=1e-4)

# depthwise + dilated covolution not supported on x86
# see https://github.com/apache/incubator-tvm/issues/4962
cuda_ctx = ("cuda", tvm.gpu(0))
if cuda_ctx[1].exist:
verify_model(SegmentationModelWrapper(deeplab.eval()), inp, [cuda_ctx], atol=1e-4, rtol=1e-4)
verify_model(SegmentationModelWrapper(deeplab.eval()), inp, atol=1e-4, rtol=1e-4)


def test_3d_models():
Expand Down
7 changes: 4 additions & 3 deletions tests/python/topi/python/test_topi_depthwise_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,6 @@ def depthwise_conv2d_with_workload_NCHWc(batch, in_channel, in_height, channel_m
filter_width = filter_height
stride_h = stride_w = stride

assert dilation == 1, "depthwise_conv2d_NCHWc currently does not support dilation."
assert channel_multiplier == 1, "depthwise_conv2d_NCHWc currently does not support channel multiplier > 1."
pad_h, pad_w, _, _ = get_pad_tuple(padding, (filter_height, filter_width))
padding_args = (pad_h, pad_w)
Expand Down Expand Up @@ -307,7 +306,7 @@ def check_device(device):
# declare
DepthwiseConv2d = topi.x86.depthwise_conv2d_NCHWc(Input, Filter,
(stride_h, stride_w),
padding_args,
padding,
(dilation, dilation),
in_layout,
out_layout, dtype)
Expand All @@ -330,8 +329,9 @@ def get_ref_data():
input_np = np.random.uniform(size=input_shape).astype(dtype)
filter_np = np.random.uniform(size=filter_shape).astype(dtype)
# correctness with scipy
dw_np = tvm.topi.testing.dilate_python(filter_np, (1, 1, dilation, dilation)).astype(dtype)
depthwise_conv2d_scipy = tvm.topi.testing.depthwise_conv2d_python_nchw(
input_np, filter_np, stride, padding)
input_np, dw_np, stride, padding)
relu_scipy = np.maximum(depthwise_conv2d_scipy, 0)
return (_transform_data(input_np, ic_block),
_transform_kernel(filter_np, oc_block),
Expand Down Expand Up @@ -390,6 +390,7 @@ def test_depthwise_conv2d():
# depthwise_conv2d_with_workload_nhwc(1, 728, 64, 1, 3, 1, "SAME", dilation=2)

# NCHW[x]c
depthwise_conv2d_with_workload_NCHWc(1, 728, 32, 1, 3, 1, "SAME", dilation=2)
depthwise_conv2d_with_workload_NCHWc(1, 728, 32, 1, 3, 1, "SAME")
depthwise_conv2d_with_workload_NCHWc(1, 728, 32, 1, 3, 1, "VALID")

Expand Down