Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BYOC][ACL] Add support for dense (fully connected) layer #6254

Merged
merged 2 commits into from
Aug 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/deploy/arm_compute_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,13 @@ Operator support
| | |
| | (only groups = 1 supported) |
+--------------+-------------------------------------------------------------------------+
| nn.dense | fp32: |
| | Simple: nn.dense |
| | Composite: nn.dense, nn.bias_add? |
+--------------+-------------------------------------------------------------------------+
| qnn.dense | uint8: |
| | Composite: qnn.dense, nn.bias_add?, qnn.requantize |
+--------------+-------------------------------------------------------------------------+
| nn.maxpool2d | fp32, uint8 |
+--------------+-------------------------------------------------------------------------+
| reshape | fp32, uint8 |
Expand Down
74 changes: 73 additions & 1 deletion python/tvm/relay/op/contrib/arm_compute_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,33 @@ def qnn_conv_pattern():
pattern, wildcard(), wildcard(), is_constant(), is_constant())
return pattern

def dense_pattern():
"""Create a dense (fully-connected) pattern.

Returns
-------
pattern : dataflow_pattern.AltPattern
Denotes the convolution pattern.
"""
pattern = is_op('nn.dense')(wildcard(), is_constant())
pattern = pattern.optional(lambda x: is_op('nn.bias_add')(x, is_constant()))
return pattern

def qnn_dense_pattern():
"""Create a quantized dense (fully-connected) pattern.

Returns
-------
pattern : dataflow_pattern.AltPattern
Denotes the convolution pattern.
"""
pattern = is_op('qnn.dense')(
wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant())
pattern = pattern.optional(lambda x: is_op('nn.bias_add')(x, is_constant()))
pattern = is_op('qnn.requantize')(
pattern, wildcard(), wildcard(), is_constant(), is_constant())
return pattern

def check_conv(extract):
"""Check conv pattern is supported by ACL."""
call = extract
Expand All @@ -114,8 +141,26 @@ def check_qnn_conv(extract):
call = call.args[0]
return qnn_conv2d(call.attrs, call.args)

def check_dense(extract):
"""Check conv pattern is supported by ACL."""
call = extract
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After reading check_qnn_dense, do you need to check the out_dtype here?

Copy link
Contributor Author

@lhutton1 lhutton1 Aug 13, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The out_dtype attribute check is actually on the requantize node. The first node in the extract for fp32 would be nn.bias_add which doesn't have the out_dtype attribute. The out_dtype is checked though in the dense function below. Hope that makes sense :)

while call.op.name != "nn.dense":
call = call.args[0]
return dense(call.attrs, call.args)

def check_qnn_dense(extract):
"""Check qnn conv pattern is supported by ACL."""
if extract.attrs.out_dtype != "uint8":
return False
call = extract
while call.op.name != "qnn.dense":
call = call.args[0]
return qnn_dense(call.attrs, call.args)

return [('arm_compute_lib.conv2d', conv_pattern(), check_conv),
('arm_compute_lib.qnn_conv2d', qnn_conv_pattern(), check_qnn_conv)]
('arm_compute_lib.qnn_conv2d', qnn_conv_pattern(), check_qnn_conv),
('arm_compute_lib.dense', dense_pattern(), check_dense),
('arm_compute_lib.qnn_dense', qnn_dense_pattern(), check_qnn_dense)]


def _register_external_op_helper(op_name, supported=True):
Expand Down Expand Up @@ -164,6 +209,33 @@ def qnn_conv2d(attrs, args):
return True


@tvm.ir.register_op_attr("nn.dense", "target.arm_compute_lib")
def dense(attrs, args):
"""Check if the external ACL codegen for dense should be used."""
data_typ = args[0].checked_type
if data_typ.dtype != "float32":
return False
kernel_typ = args[1].checked_type
if len(kernel_typ.shape) != 2 or kernel_typ.dtype != "float32":
return False
if attrs.out_dtype != "float32" and attrs.out_dtype != "":
return False
return True


def qnn_dense(attrs, args):
"""Check if the external ACL codegen for qnn.dense should be used."""
data_typ = args[0].checked_type
if data_typ.dtype != "uint8":
return False
kernel_typ = args[1].checked_type
if len(kernel_typ.shape) != 2 or kernel_typ.dtype != "uint8":
return False
if attrs.out_dtype != "int32":
return False
return True


@tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib")
def max_pool2d(attrs, args):
"""Check if the external ACL codegen for maxpool2d should be used."""
Expand Down
77 changes: 77 additions & 0 deletions src/relay/backend/contrib/arm_compute_lib/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
const CallNode* requantize = nullptr;
};

/*!
* \brief A series of operators that form a composite
* dense layer. Supports both nn.dense and qnn.dense.
*/
struct CompositeDenseNode {
const CallNode* dense = nullptr;
const CallNode* bias = nullptr;
const CallNode* requantize = nullptr;
};

/*!
* \brief Visit call nodes and generate appropriate JSON node.
*
Expand All @@ -82,6 +92,8 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
std::shared_ptr<JSONGraphNode> json_node;
if (name == "arm_compute_lib.conv2d" || name == "arm_compute_lib.qnn_conv2d") {
json_node = CreateCompositeConvJSONNode(cn);
} else if (name == "arm_compute_lib.dense" || name == "arm_compute_lib.qnn_dense") {
json_node = CreateCompositeDenseJSONNode(cn);
} else {
LOG(FATAL) << "Unrecognized Arm Compute Library pattern: " << name;
}
Expand Down Expand Up @@ -190,6 +202,71 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
}
return json_node;
}

/*!
* \brief Extract dense nodes from a composite function.
*
* \param cn The call node of the composite function.
* \return Extracted composite convolution nodes.
*/
static CompositeDenseNode UnpackCompositeDense(const CallNode* cn) {
CompositeDenseNode nodes{};
const auto* fn = cn->op.as<FunctionNode>();
CHECK(fn);

// Traverse composite dense function from child to parent
const auto* current_call = fn->body.as<CallNode>();
if (backend::IsOp(current_call, "qnn.requantize")) {
nodes.requantize = current_call;
current_call = current_call->args[0].as<CallNode>();
}
if (backend::IsOp(current_call, "nn.bias_add")) {
nodes.bias = current_call;
current_call = current_call->args[0].as<CallNode>();
}
// Enforce a dense node exists at this point during traversal
if (nodes.requantize) {
CHECK(backend::IsOp(current_call, "qnn.dense"));
} else {
CHECK(backend::IsOp(current_call, "nn.dense"));
}
nodes.dense = current_call;
return nodes;
}

/*!
* \brief Create a JSON representation of a composite dense (fully-connected) operator.
*
* \param cn The call to be represented.
* \return A JSON representation of a specific operator.
*/
std::shared_ptr<JSONGraphNode> CreateCompositeDenseJSONNode(const CallNode* cn) {
CompositeDenseNode nodes = UnpackCompositeDense(cn);
std::string name = "nn.dense";

// Inputs must be added in the same order they appear in the relay graph.
std::vector<JSONGraphNodeEntry> inputs;
inputs.push_back(VisitExpr(cn->args[0])[0]);
inputs.push_back(VisitExpr(nodes.dense->args[1])[0]);
if (nodes.requantize) {
name = "qnn.dense";
inputs.push_back(VisitExpr(nodes.dense->args[2])[0]); // input zero-point
inputs.push_back(VisitExpr(nodes.dense->args[3])[0]); // weight zero-point
inputs.push_back(VisitExpr(nodes.dense->args[4])[0]); // input scale
inputs.push_back(VisitExpr(nodes.dense->args[5])[0]); // weight scale
}
if (nodes.bias) {
inputs.push_back(VisitExpr(nodes.bias->args[1])[0]);
}
if (nodes.requantize) {
inputs.push_back(VisitExpr(nodes.requantize->args[3])[0]); // output scale
inputs.push_back(VisitExpr(nodes.requantize->args[4])[0]); // output zero-point
}

auto json_node = std::make_shared<JSONGraphNode>(name, "kernel", inputs, 1);
SetCallNodeAttribute(json_node, nodes.dense);
return json_node;
}
};

/*!
Expand Down
48 changes: 48 additions & 0 deletions src/runtime/contrib/arm_compute_lib/acl_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>

Expand Down Expand Up @@ -128,6 +129,9 @@ class ACLRuntime : public JSONRuntimeBase {
if ("nn.conv2d" == op_name || "qnn.conv2d" == op_name) {
CreateConvolution2DLayer(&layer_, node, mm);
num_pools++;
} else if ("nn.dense" == op_name || "qnn.dense" == op_name) {
CreateFullyConnectedLayer(&layer_, node, mm);
num_pools++;
} else if ("nn.max_pool2d" == op_name) {
CreatePoolingLayer(&layer_, node);
} else if ("reshape" == op_name) {
Expand Down Expand Up @@ -257,6 +261,50 @@ class ACLRuntime : public JSONRuntimeBase {
layer->function = function;
}

/*!
* \brief Create a fully connected (dense) layer.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
* \param mm The ACL fully connected layer can request auxiliary memory from TVM.
*/
void CreateFullyConnectedLayer(CachedLayer* layer, const JSONGraphNode& node,
const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& mm) {
arm_compute::FullyConnectedLayerInfo fc_info;
fc_info.set_weights_trained_layout(arm_compute::DataLayout::NHWC);

// Collect inputs and outputs, handling both nn.dense and qnn.dense cases.
std::vector<JSONGraphNodeEntry> inputs = node.GetInputs();
size_t num_inputs = inputs.size();
bool has_bias;
if (node.GetOpName() == "qnn.dense") {
CHECK(num_inputs >= 8U && num_inputs <= 9U)
<< "Quantized fully connected (dense) layer requires 9 inputs with a bias, 8 inputs "
"without.";
has_bias = num_inputs == 9;
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[0], &inputs[4], &inputs[2]));
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[1], &inputs[5], &inputs[3]));
if (has_bias) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[6]));
}
layer->outputs.push_back(
MakeACLTensorFromJSONNode(node, &inputs[6 + has_bias], &inputs[7 + has_bias]));
} else {
CHECK(num_inputs >= 2U && num_inputs <= 3U)
<< "Fully connected (dense) layer requires 3 inputs with a bias, 2 inputs without.";
has_bias = num_inputs == 3;
for (const auto& i : inputs) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(i));
}
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
}

auto function = std::make_shared<arm_compute::NEFullyConnectedLayer>(mm);
function->configure(&layer->inputs[0], &layer->inputs[1],
has_bias ? &layer->inputs[2] : nullptr, &layer->outputs[0], fc_info);
layer->function = function;
}

/*!
* \brief Create a pooling layer.
*
Expand Down
2 changes: 1 addition & 1 deletion tests/python/contrib/test_arm_compute_lib/test_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def test_qnn_conv2d():
"output scale": output_sc,
"output zero point": output_zp
}
verify(outputs, atol=1, rtol=0, params=params)
verify(outputs, atol=1, rtol=0, params=params, verify_saturation=True)


def test_codegen_qnn_conv2d():
Expand Down
Loading