From ada92f7f7d802983c303870b44883a372953d0a2 Mon Sep 17 00:00:00 2001 From: daquexian Date: Fri, 14 Jun 2019 09:50:43 +0800 Subject: [PATCH] Polish --- common/common_bitpack.h | 22 ------------ dabnn/bitpack.h | 23 +++++++++---- dabnn/net.cpp | 2 +- tests/bitpack_test.cpp | 22 +----------- tools/onnx2bnn/OnnxConverter.cpp | 57 +++++++++----------------------- 5 files changed, 33 insertions(+), 93 deletions(-) diff --git a/common/common_bitpack.h b/common/common_bitpack.h index 458e195..b6809cc 100644 --- a/common/common_bitpack.h +++ b/common/common_bitpack.h @@ -9,28 +9,6 @@ #include -inline void pack_128_fallback(const float *float_ptr, void *binary_ptr, - size_t size) { - uint64_t *ui64_ptr = static_cast(binary_ptr); - const size_t UNIT_LEN = 64; - std::bitset bits1; - std::bitset bits2; - static_assert(sizeof(decltype(bits1.to_ullong())) * CHAR_BIT == 64, - "bits.to_ullong() must return a 64-bit element"); - - FORZS(j, size, 128) { - FORZS(i, 128, 4) { - const auto t = i / 4; - bits1[t] = (*(float_ptr + j + i) > 0); - bits1[t + 32] = (*(float_ptr + j + i + 1) > 0); - bits2[t] = (*(float_ptr + j + i + 2) > 0); - bits2[t + 32] = (*(float_ptr + j + i + 3) > 0); - } - *ui64_ptr++ = bits1.to_ullong(); - *ui64_ptr++ = bits2.to_ullong(); - } -} - inline void pack_64_bitset(const float *fptr, uint64_t *buf) { const size_t UNIT_LEN = 64; std::bitset bits; diff --git a/dabnn/bitpack.h b/dabnn/bitpack.h index c1cd84c..b784ba5 100644 --- a/dabnn/bitpack.h +++ b/dabnn/bitpack.h @@ -26,7 +26,7 @@ #include "mat.h" #ifdef __aarch64__ -inline void pack_128_2(const float *float_ptr, void *binary_ptr, size_t size) { +inline void pack_128_opt(const float *float_ptr, void *binary_ptr, size_t size) { /** * This is the optimized bit-packing. * @@ -122,7 +122,7 @@ inline void pack_128_2(const float *float_ptr, void *binary_ptr, size_t size) { "v19", "v20", "v21", "v22", "v23", "x0"); } -inline void pack_128(const float *float_ptr, void *binary_ptr, size_t size) { +inline void pack_128_baseline(const float *float_ptr, void *binary_ptr, size_t size) { size_t nn_size = size >> 7; asm volatile( @@ -210,19 +210,28 @@ inline void pack_128(const float *float_ptr, void *binary_ptr, size_t size) { "x0"); } -inline void pack_mat_128_2(const bnn::Mat &float_mat, bnn::Mat &binary_mat) { +inline void pack_mat_128_opt(const bnn::Mat &float_mat, bnn::Mat &binary_mat) { assert(!binary_mat.empty()); - pack_128_2(static_cast(float_mat.data), binary_mat.data, + pack_128_opt(static_cast(float_mat.data), binary_mat.data, float_mat.total()); } -inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) { +inline void pack_mat_128_baseline(const bnn::Mat &float_mat, bnn::Mat &binary_mat) { assert(!binary_mat.empty()); - pack_128(static_cast(float_mat.data), binary_mat.data, + pack_128_baseline(static_cast(float_mat.data), binary_mat.data, float_mat.total()); } + +inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) { + /** + * Delegate it to optimized implementation. + * The cost of function calling will be eliminated by compiler, + * don't bother. + */ + pack_mat_128_opt(float_mat, binary_mat); +} #endif // __aarch64__ inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) { @@ -251,7 +260,7 @@ inline void pack_mat(const bnn::Mat &float_mat, bnn::Mat &binary_mat) { BNN_ASSERT(float_mat.c % 64 == 0, float_mat.c); #ifdef __aarch64__ if (float_mat.c % 128 == 0) { - pack_mat_128_2(float_mat, binary_mat); + pack_mat_128_opt(float_mat, binary_mat); } else { pack_mat_64(float_mat, binary_mat); } diff --git a/dabnn/net.cpp b/dabnn/net.cpp index fcdb3a8..5e032ab 100644 --- a/dabnn/net.cpp +++ b/dabnn/net.cpp @@ -94,7 +94,7 @@ void Net::prepare() { add_mat(name, std::make_shared(shape[0], shape[1], shape[2], shape[3], bnn::DataType::Bit, false)); - pack_mat_128_2(*tmp, *mat_map_[name]); + pack_mat_128(*tmp, *mat_map_[name]); } else { #endif // __aarch64__ add_mat(name, std::make_shared( diff --git a/tests/bitpack_test.cpp b/tests/bitpack_test.cpp index dd7c97f..b0f4b03 100644 --- a/tests/bitpack_test.cpp +++ b/tests/bitpack_test.cpp @@ -21,7 +21,7 @@ TEST(bitpack, pack_mat_128) { const bnn::Mat a(AHEIGHT, AWIDTH, CHANNEL, a_data, bnn::DataType::Float); bnn::Mat a_binary(AHEIGHT, AWIDTH, CHANNEL, bnn::DataType::Bit); bnn::Mat expected(AHEIGHT, AWIDTH, CHANNEL, bnn::DataType::Bit); - pack_mat_128(a, a_binary); + pack_mat_128_opt(a, a_binary); baseline_pack_mat(a, expected); @@ -59,26 +59,6 @@ TEST(bitpack, pack_mat_64) { } } -#ifdef __aarch64__ -TEST(bitpack, pack_mat_fallback) { - const size_t AHEIGHT = 64; - const size_t AWIDTH = 64; - const size_t CHANNEL = 256; - const size_t ALEN = AHEIGHT * AWIDTH * CHANNEL; - float a_data[ALEN]; - fill_rand_float(a_data, ALEN); - - const bnn::Mat a(AHEIGHT, AWIDTH, CHANNEL, a_data, bnn::DataType::Float); - bnn::Mat a_binary(AHEIGHT, AWIDTH, CHANNEL, bnn::DataType::Bit); - bnn::Mat expected(AHEIGHT, AWIDTH, CHANNEL, bnn::DataType::Bit); - pack_mat_128(a, a_binary); - - pack_128_fallback(a_data, expected.data, ALEN); - - ASSERT_EQ(a_binary, expected); -} -#endif // __aarch64__ - TEST(bitpack, addv_v7) { uint64_t data[2]; fill_rand_uint64(data, 2); diff --git a/tools/onnx2bnn/OnnxConverter.cpp b/tools/onnx2bnn/OnnxConverter.cpp index 5a1b059..2eb2b5b 100644 --- a/tools/onnx2bnn/OnnxConverter.cpp +++ b/tools/onnx2bnn/OnnxConverter.cpp @@ -59,8 +59,8 @@ void OnnxConverter::AddBinConv(const std::string &input_name, const auto param = flatbnn::CreateBinConv2DDirect( builder_, bin_name.c_str(), weight_name.c_str(), nullptr, &pads, &strides, &dilations, output_name.c_str()); - const auto layer = flatbnn::CreateLayer( - builder_, flatbnn::LayerType::BinConv2D, 0, param); + const auto layer = + flatbnn::CreateLayer(builder_, flatbnn::LayerType::BinConv2D, 0, param); const auto flat_tensor = flatbnn::CreateTensorDirect( builder_, flatbnn::DataType::Bit, &bin_weight.data, nullptr, &bin_weight.shape, weight_name.c_str()); @@ -145,18 +145,11 @@ OnnxConverter::BTensor OnnxConverter::bitpack(OnnxConverter::FTensor ftensor) { BNN_ASSERT(c % 64 == 0, ftensor.shape); vector packed_data; - // if (c % 128 == 0) { - if (false) { - const auto size = Shaper::total(ftensor.shape); - packed_data.resize(size / 64); - pack_128_fallback(&ftensor.data[0], &packed_data[0], size); - } else { - bin_t tmp; + bin_t tmp; - FORZS(i, Shaper::total(ftensor.shape), 64) { - pack_64_bitset(&ftensor.data[i], &tmp); - packed_data.push_back(tmp); - } + FORZS(i, Shaper::total(ftensor.shape), 64) { + pack_64_bitset(&ftensor.data[i], &tmp); + packed_data.push_back(tmp); } Shape shape = {ftensor.shape[0], ftensor.shape[1], ftensor.shape[2], @@ -190,27 +183,6 @@ std::vector OnnxConverter::split( return outputs; } -vector bitpack(const float *data, Shape shape) { - static_assert(std::is_same::value, - "bitpack requires bin_t is 64 bit"); - - auto c = Shaper::onnx_kc(shape); - - BNN_ASSERT(c % 64 == 0, shape); - - vector packed; - - bin_t tmp; - - FORZS(i, Shaper::total(shape), 64) { - pack_64_bitset(&data[i], &tmp); - packed.push_back(tmp); - } - BNN_ASSERT(false, ""); - - return packed; -} - void OnnxConverter::Convert(const ONNX_NAMESPACE::ModelProto &model_proto, const std::string &filepath, const OnnxConverter::Level level) { @@ -220,18 +192,19 @@ void OnnxConverter::Convert(const ONNX_NAMESPACE::ModelProto &model_proto, // Please check out "dabnn_*" pases in // https://github.com/daquexian/onnx/blob/optimizer_for_bnn/onnx/optimizer/passes // for details. - vector optimizers{"eliminate_nop_pad", "extract_constant_to_initializer", - "dabnn_bconv_strict"}; + vector optimizers{"eliminate_nop_pad", + "extract_constant_to_initializer", + "dabnn_bconv_strict"}; if (level == Level::kModerate || level == Level::kAggressive) { optimizers.push_back("dabnn_bconv_moderate"); } if (level == Level::kAggressive) { optimizers.push_back("dabnn_bconv_aggressive"); } - // model_proto is only used here. Please use the member variable model_proto_ - // in the following code - model_proto_ = ONNX_NAMESPACE::optimization::Optimize( - model_proto, optimizers); + // model_proto is only used here. Please use the member variable + // model_proto_ in the following code + model_proto_ = + ONNX_NAMESPACE::optimization::Optimize(model_proto, optimizers); for (const auto &tensor : model_proto_.graph().initializer()) { if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { @@ -548,8 +521,8 @@ void OnnxConverter::CalculateCoeff(const ONNX_NAMESPACE::NodeProto &node, coeff_b_data.push_back(b.data[i] - scale.data[i] * mean.data[i] / tmp); } for (const auto &node2 : model_proto_.graph().node()) { - if (node2.domain() == "dabnn" && node2.op_type() == "Conv" - && node2.output(0) == node.input(0)) { + if (node2.domain() == "dabnn" && node2.op_type() == "Conv" && + node2.output(0) == node.input(0)) { const auto &weight = onnx_float_tensors_[node2.input(1)]; { int channels = Shaper::onnx_kc(weight.shape);