Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QNN] Conv2D operator #3580

Merged
merged 1 commit into from
Sep 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/langref/relay_op.rst
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ This level supports dialect operators.
:nosignatures:

tvm.relay.qnn.op.requantize
anijain2305 marked this conversation as resolved.
Show resolved Hide resolved
tvm.relay.qnn.op.conv2d


Level 1 Definitions
Expand Down Expand Up @@ -357,3 +358,4 @@ Level 10 Definitions
Level 11 Definitions
--------------------
.. autofunction:: tvm.relay.qnn.op.requantize
anijain2305 marked this conversation as resolved.
Show resolved Hide resolved
.. autofunction:: tvm.relay.qnn.op.conv2d
62 changes: 62 additions & 0 deletions include/tvm/relay/qnn/attrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,68 @@ struct QnnConcatenateAttrs : public tvm::AttrsNode<QnnConcatenateAttrs> {
}
}; // struct QnnConcatenateAttrs

/*! \brief Attribute for QNN Conv2d operator */
struct QnnConv2DAttrs : public tvm::AttrsNode<QnnConv2DAttrs> {
// Traditional conv2d attributes.
Array<IndexExpr> strides;
Array<IndexExpr> padding;
Array<IndexExpr> dilation;
int groups;
IndexExpr channels;
Array<IndexExpr> kernel_size;
std::string data_layout;
std::string kernel_layout;
std::string out_layout;
DataType out_dtype;

// Quantization related attributes.
int32_t input_zero_point;
int32_t kernel_zero_point;

TVM_DECLARE_ATTRS(QnnConv2DAttrs, "relay.attrs.QnnConv2DAttrs") {
TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
.describe("Specifies the strides of the convolution.");
TVM_ATTR_FIELD(padding).set_default(Array<IndexExpr>({0, 0}))
.describe("If padding is non-zero, then the input is implicitly zero-padded"
"on both sides for padding number of points");
TVM_ATTR_FIELD(dilation).set_default(Array<IndexExpr>({1, 1}))
.describe("Specifies the dilation rate to use for dilated convolution.");
TVM_ATTR_FIELD(groups).set_default(1)
.describe("Controls the connections between inputs and outputs."
"At groups=1, all inputs are convolved to all outputs."
"At groups=2, the operation becomes equivalent to having two convolution"
"layers side by side, each seeing half the input channels, and producing"
"half the output channels, and both subsequently concatenated.");
TVM_ATTR_FIELD(channels)
.describe("The number of output channels in the convolution."
" If it is not set, inferred by shape of the weight.")
.set_default(NullValue<IndexExpr>());
TVM_ATTR_FIELD(kernel_size)
.describe("Specifies the dimensions of the convolution window.")
.set_default(NullValue<Array<IndexExpr> >());
TVM_ATTR_FIELD(data_layout).set_default("NCHW")
.describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
"dimensions respectively. Convolution is applied on the 'H' and"
"'W' dimensions.");
TVM_ATTR_FIELD(kernel_layout).set_default("OIHW")
.describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
"'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
"dimensions respectively.");
TVM_ATTR_FIELD(out_layout).set_default("")
.describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
"dimensions respectively. Default to be same as input layout.");
TVM_ATTR_FIELD(out_dtype)
.set_default(NullValue<DataType>())
.describe("Output data type, set to explicit type under mixed precision setting");
TVM_ATTR_FIELD(input_zero_point)
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(kernel_zero_point)
.describe("The zero point of the kernel tensor.");
}
};

} // namespace qnn
} // namespace relay
} // namespace tvm
Expand Down
80 changes: 80 additions & 0 deletions python/tvm/relay/qnn/op/qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,83 @@ def concatenate(data,
output_scale,
output_zero_point,
axis)


def conv2d(data,
kernel,
input_zero_point,
kernel_zero_point,
strides=(1, 1),
padding=(0, 0),
dilation=(1, 1),
groups=1,
channels=None,
kernel_size=None,
data_layout="NCHW",
kernel_layout="OIHW",
out_layout="",
out_dtype="int32"):
r"""Quantized 2D convolution.

This operator convolves quantized data with quantized kernel. The scale of
the output quantized tensor is the product of the kernel_scale and
input_scale of the input quantized tensors. The zero point of the output
quantized tensor is 0. By default, the dtype of output is int32. Please also
refer to Requantize operator to understand how to scale back the int32
output to (u)int8.

Parameters
----------
data : tvm.relay.Expr
The input data to the operator.

kernel : tvm.relay.Expr
The kernel expressions.

input_zero_point: int
The zero point of the data distribution.

kernel_zero_point: int
The zero point of the quantized_kernel distribution.

strides : tuple of int, optional
The strides of convolution.

padding : tuple of int, optional
The padding of convolution on both sides of inputs before convolution.

dilation : tuple of int, optional
Specifies the dilation rate to be used for dilated convolution.

groups : int, optional
Number of groups for grouped convolution.

channels : int, optional
Number of output channels of this convolution.

kernel_size : tuple of int, optional
The spatial of the convolution kernel.

data_layout : str, optional
Layout of the input.

kernel_layout : str, optional
Layout of the kernel.

out_layout : str, optional
Layout of the output, by default, out_layout is the same as data_layout

out_dtype : str, optional
Specifies the output data type for mixed precision conv2d.

Returns
-------
result : tvm.relay.Expr
The computed result.
"""

return _make.conv2d(data, kernel,
input_zero_point, kernel_zero_point,
strides, padding, dilation,
groups, channels, kernel_size,
data_layout, kernel_layout, out_layout, out_dtype)
65 changes: 65 additions & 0 deletions src/relay/pass/pattern_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,71 @@ static inline Expr Full(Expr fill_value,
return CallNode::make(op, {fill_value}, Attrs(attrs), {});
}

static inline Expr Conv2D(Expr data, Expr weight, Array<IndexExpr> strides,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks this is the same as MakeConv2d, right?
If this is true, should we just keep one signature instead of having duplication. I am not strongly against this because it's obviously used by other cases as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I kept it to follow other usecases. I guess this repetition might be because typically TVM does not want header and implementation linking problems. Will keep it Conv2D for now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a strong feeling about this, actually I'm not sure why we prefer to copying this (and the others) here instead of adding declarations

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think linking should be fine. We can put TVM_DLL if needed. But anyway, we can keep it this way for now.

Array<IndexExpr> padding, Array<IndexExpr> dilation, int groups,
IndexExpr channels, Array<IndexExpr> kernel_size, std::string data_layout,
std::string kernel_layout, std::string out_layout, DataType out_dtype) {
auto attrs = make_node<Conv2DAttrs>();
attrs->strides = std::move(strides);
attrs->padding = std::move(padding);
attrs->dilation = std::move(dilation);
attrs->groups = groups;
attrs->channels = std::move(channels);
attrs->kernel_size = std::move(kernel_size);
attrs->data_layout = std::move(data_layout);
attrs->kernel_layout = std::move(kernel_layout);
attrs->out_layout = std::move(out_layout);
attrs->out_dtype = std::move(out_dtype);
static const Op& op = Op::Get("nn.conv2d");
return CallNode::make(op, {data, weight}, Attrs(attrs), {});
}

static inline Expr Sum(Expr data, Array<Integer> axis, bool keepdims, bool exclude) {
auto attrs = make_node<ReduceAttrs>();
attrs->axis = std::move(axis);
attrs->keepdims = keepdims;
attrs->exclude = exclude;
static const Op& op = Op::Get("sum");
return CallNode::make(op, {data}, Attrs(attrs), {});
}

static inline Expr Reshape(Expr data, Array<Integer> newshape) {
auto attrs = make_node<ReshapeAttrs>();
attrs->newshape = std::move(newshape);
attrs->reverse = false;
static const Op& op = Op::Get("reshape");
return CallNode::make(op, {data}, Attrs(attrs), {});
}

static inline Expr AvgPool2D(Expr data, Array<IndexExpr> pool_size, Array<IndexExpr> strides,
Array<IndexExpr> padding, std::string layout, bool ceil_mode,
bool count_include_pad) {
auto attrs = make_node<AvgPool2DAttrs>();
attrs->pool_size = std::move(pool_size);
attrs->strides = std::move(strides);
attrs->padding = std::move(padding);
attrs->layout = std::move(layout);
attrs->ceil_mode = ceil_mode;
attrs->count_include_pad = count_include_pad;
static const Op& op = Op::Get("nn.avg_pool2d");
return CallNode::make(op, {data}, Attrs(attrs), {});
}

static inline Expr Pad(Expr data, Array<Array<IndexExpr>> pad_width, double pad_value) {
auto attrs = make_node<PadAttrs>();
attrs->pad_value = pad_value;
attrs->pad_width = std::move(pad_width);
static const Op& op = Op::Get("nn.pad");
return CallNode::make(op, {data}, Attrs(attrs), {});
}

static inline Expr Tile(Expr data, Array<Integer> reps) {
auto attrs = make_node<TileAttrs>();
attrs->reps = reps;
static const Op& op = Op::Get("tile");
return CallNode::make(op, {data}, Attrs(attrs), {});
}

Expr MakeConcatenate(Expr data, int axis);

Expr MakeStridedSlice(Expr data, Array<Integer> begin, Array<Integer> end, Array<Integer> strides);
Expand Down
Loading