Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Relay] [Quantization] WIP - Protoyping the quantized convolution op #3367

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions include/tvm/relay/attrs/qnn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file tvm/relay/attrs/nn.h
* \brief Auxiliary attributes for nn operators.
*/
#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_
#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_

#include <tvm/attrs.h>
#include <string>

namespace tvm {
namespace relay {

/*! \brief Attribute for quantized conv2d operator */
struct QConv2DAttrs : public tvm::AttrsNode<QConv2DAttrs> {
// Traditional conv2d attributes.
Array<IndexExpr> strides;
Array<IndexExpr> padding;
Array<IndexExpr> dilation;
int groups;
IndexExpr channels;
Array<IndexExpr> kernel_size;
std::string data_layout;
std::string kernel_layout;
std::string out_layout;
DataType out_dtype;

// Quantization related attributes.
int32_t input_zero_point;
int32_t kernel_zero_point;

TVM_DECLARE_ATTRS(QConv2DAttrs, "relay.attrs.QConv2DAttrs") {
TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
.describe("Specifies the strides of the convolution.");
TVM_ATTR_FIELD(padding).set_default(Array<IndexExpr>({0, 0}))
.describe("If padding is non-zero, then the input is implicitly zero-padded"
"on both sides for padding number of points");
TVM_ATTR_FIELD(dilation).set_default(Array<IndexExpr>({1, 1}))
.describe("Specifies the dilation rate to use for dilated convolution.");
TVM_ATTR_FIELD(groups).set_default(1)
.describe("Controls the connections between inputs and outputs."
"At groups=1, all inputs are convolved to all outputs."
"At groups=2, the operation becomes equivalent to having two convolution"
"layers side by side, each seeing half the input channels, and producing"
"half the output channels, and both subsequently concatenated.");
TVM_ATTR_FIELD(channels)
.describe("The number of output channels in the convolution."
" If it is not set, inferred by shape of the weight.")
.set_default(NullValue<IndexExpr>());
TVM_ATTR_FIELD(kernel_size)
.describe("Specifies the dimensions of the convolution window.")
.set_default(NullValue<Array<IndexExpr> >());
TVM_ATTR_FIELD(data_layout).set_default("NCHW")
.describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
"dimensions respectively. Convolution is applied on the 'H' and"
"'W' dimensions.");
TVM_ATTR_FIELD(kernel_layout).set_default("OIHW")
.describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
"'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
"dimensions respectively.");
TVM_ATTR_FIELD(out_layout).set_default("")
.describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
"dimensions respectively. Default to be same as input layout.");
TVM_ATTR_FIELD(out_dtype)
.set_default(NullValue<DataType>())
.describe("Output data type, set to explicit type under mixed precision setting");
TVM_ATTR_FIELD(input_zero_point)
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(kernel_zero_point)
.describe("The zero point of the kernel tensor.");
}
};


/*! \brief Attribute for requantize operator */
struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
double input_scale;
int32_t input_zero_point;
double output_scale;
int32_t output_zero_point;
bool use_int_compute;
std::string rounding_mode;
DataType out_dtype;

TVM_DECLARE_ATTRS(RequantizeAttrs, "relay.attrs.RequantizeAttrs") {
TVM_ATTR_FIELD(input_zero_point)
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(output_zero_point)
.describe("The zero point of the output tensor.");
TVM_ATTR_FIELD(input_scale)
.describe("The scale of the input tensor.");
TVM_ATTR_FIELD(output_scale)
.describe("The scale of the output tensor.");
TVM_ATTR_FIELD(use_int_compute).set_default(false)
.describe("When true, the integer computation is used to handle output scale");
TVM_ATTR_FIELD(out_dtype)
.set_default(NullValue<DataType>())
.describe("Output data type, set to explicit type under mixed precision setting");
TVM_ATTR_FIELD(rounding_mode).set_default("FE_UPWARD")
.describe("Defines the rounding direction when the value is midway between"
"two representable values. There are two supported modes - FE_UPWARD"
"or FE_AWAY_FROM_ZERO. More context can be found at"
"https://www.gnu.org/software/libc/manual/html_node/Rounding.html");
}
};


} // namespace relay
} // namespace tvm
#endif // TVM_RELAY_ATTRS_NN_QUANTIZE_H_
139 changes: 139 additions & 0 deletions include/tvm/relay/quantize_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file nnvm/compiler/quantize_util.h
* \brief Utility methods needs for quantized ops that can be shared
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nits -

s/needs/needed.

shared between frontends ?

*/

#ifndef TVM_QUANTIZE_UTIL_H
#define TVM_QUANTIZE_UTIL_H

#include <tvm/expr.h>
#include "./base.h"

namespace tvm {
namespace relay {

inline bool is_Int8(const DataType& dtype) {
return dtype == Int(8);
}

inline bool is_UInt8(const DataType& dtype) {
return dtype == UInt(8);
}


inline bool is_Int16(const DataType& dtype) {
return dtype == Int(16);
}

inline bool is_UInt16(const DataType& dtype) {
return dtype == UInt(16);
}

inline bool is_Int32(const DataType& dtype) {
return dtype == Int(32);
}

inline bool is_UInt32(const DataType& dtype) {
return dtype == UInt(32);
}



Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nit, unnecessary new lines.

inline bool is_Float32(const DataType& dtype) {
return dtype == Float(32);
}

inline bool is_quantized_type(const DataType& dtype) {
return is_Int8(dtype) || is_UInt8(dtype)
|| is_Int16(dtype) || is_UInt16(dtype);
}

enum class QuantizeOpType : uint8_t {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some notes here on Quantize, Requantize and Quantize_Requantize might be appropriate. Alternatively a pointer to the python documentation might also be useful for the reader.

Quantize_Requantize,
Dequantize,
Requantize
};

inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
switch(op_type) {
case QuantizeOpType::Quantize_Requantize:
return is_Float32(in_dtype) || is_quantized_type(in_dtype);
case QuantizeOpType ::Dequantize:
return is_quantized_type(in_dtype);
case QuantizeOpType ::Requantize:
return is_Int16(in_dtype) || is_Int32(in_dtype);
default:
return false;
}
}

inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
switch(op_type) {
case QuantizeOpType::Quantize_Requantize:
return is_quantized_type(in_dtype);
case QuantizeOpType::Dequantize:
return is_Float32(in_dtype);
default:
return false;
}
}

inline const int32_t get_qmin(const DataType& dtype) {
if (is_Int8(dtype)) {
return std::numeric_limits<int8_t>::min();
} else if (is_UInt8(dtype)) {
return std::numeric_limits<uint8_t>::min();
} else if (is_Int16(dtype)) {
return std::numeric_limits<int16_t>::min();
} else if (is_UInt16(dtype)) {
return std::numeric_limits<uint16_t>::min();
} else if (is_Int32(dtype)) {
return std::numeric_limits<int32_t>::min();
} else if (is_UInt32(dtype)) {
return std::numeric_limits<uint32_t>::min();
}
LOG(FATAL) << "Type not supported\n";
return -1;
}


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nit, unnecessary new line.

inline const int32_t get_qmax(const DataType& dtype) {
if (is_Int8(dtype)) {
return std::numeric_limits<int8_t>::max();
} else if (is_UInt8(dtype)) {
return std::numeric_limits<uint8_t>::max();
} else if (is_Int16(dtype)) {
return std::numeric_limits<int16_t>::max();
} else if (is_UInt16(dtype)) {
return std::numeric_limits<uint16_t>::max();
} else if (is_Int32(dtype)) {
return std::numeric_limits<int32_t>::max();
} else if (is_UInt32(dtype)) {
return std::numeric_limits<uint32_t>::max();
}
LOG(FATAL) << "Type not supported\n";
return -1;
}

} // namespace relay
} // namespace tvm
#endif //TVM_QUANTIZE_UTIL_H
1 change: 1 addition & 0 deletions python/tvm/relay/op/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from .transform import *
from .algorithm import *
from . import nn
from . import qnn
from . import annotation
from . import image
from . import vision
Expand Down
21 changes: 21 additions & 0 deletions python/tvm/relay/op/qnn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=wildcard-import
"""Neural network related operators."""
from __future__ import absolute_import as _abs
from .qnn import *
# from . import _nn
20 changes: 20 additions & 0 deletions python/tvm/relay/op/qnn/_make.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Constructor APIs"""
from ...._ffi.function import _init_api

_init_api("relay.op.qnn._make", __name__)
Loading