Skip to content

Commit

Permalink
Merge pull request #35 from JDAI-CV/polish
Browse files Browse the repository at this point in the history
Polish codes
  • Loading branch information
daquexian committed Jun 14, 2019
2 parents 99f0321 + ada92f7 commit b3b35de
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 93 deletions.
22 changes: 0 additions & 22 deletions common/common_bitpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,6 @@

#include <common/helper.h>

inline void pack_128_fallback(const float *float_ptr, void *binary_ptr,
size_t size) {
uint64_t *ui64_ptr = static_cast<uint64_t *>(binary_ptr);
const size_t UNIT_LEN = 64;
std::bitset<UNIT_LEN> bits1;
std::bitset<UNIT_LEN> bits2;
static_assert(sizeof(decltype(bits1.to_ullong())) * CHAR_BIT == 64,
"bits.to_ullong() must return a 64-bit element");

FORZS(j, size, 128) {
FORZS(i, 128, 4) {
const auto t = i / 4;
bits1[t] = (*(float_ptr + j + i) > 0);
bits1[t + 32] = (*(float_ptr + j + i + 1) > 0);
bits2[t] = (*(float_ptr + j + i + 2) > 0);
bits2[t + 32] = (*(float_ptr + j + i + 3) > 0);
}
*ui64_ptr++ = bits1.to_ullong();
*ui64_ptr++ = bits2.to_ullong();
}
}

inline void pack_64_bitset(const float *fptr, uint64_t *buf) {
const size_t UNIT_LEN = 64;
std::bitset<UNIT_LEN> bits;
Expand Down
23 changes: 16 additions & 7 deletions dabnn/bitpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include "mat.h"

#ifdef __aarch64__
inline void pack_128_2(const float *float_ptr, void *binary_ptr, size_t size) {
inline void pack_128_opt(const float *float_ptr, void *binary_ptr, size_t size) {
/**
* This is the optimized bit-packing.
*
Expand Down Expand Up @@ -122,7 +122,7 @@ inline void pack_128_2(const float *float_ptr, void *binary_ptr, size_t size) {
"v19", "v20", "v21", "v22", "v23", "x0");
}

inline void pack_128(const float *float_ptr, void *binary_ptr, size_t size) {
inline void pack_128_baseline(const float *float_ptr, void *binary_ptr, size_t size) {
size_t nn_size = size >> 7;

asm volatile(
Expand Down Expand Up @@ -210,19 +210,28 @@ inline void pack_128(const float *float_ptr, void *binary_ptr, size_t size) {
"x0");
}

inline void pack_mat_128_2(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
inline void pack_mat_128_opt(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
assert(!binary_mat.empty());

pack_128_2(static_cast<float *>(float_mat.data), binary_mat.data,
pack_128_opt(static_cast<float *>(float_mat.data), binary_mat.data,
float_mat.total());
}

inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
inline void pack_mat_128_baseline(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
assert(!binary_mat.empty());

pack_128(static_cast<float *>(float_mat.data), binary_mat.data,
pack_128_baseline(static_cast<float *>(float_mat.data), binary_mat.data,
float_mat.total());
}

inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
/**
* Delegate it to optimized implementation.
* The cost of function calling will be eliminated by compiler,
* don't bother.
*/
pack_mat_128_opt(float_mat, binary_mat);
}
#endif // __aarch64__

inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
Expand Down Expand Up @@ -251,7 +260,7 @@ inline void pack_mat(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
BNN_ASSERT(float_mat.c % 64 == 0, float_mat.c);
#ifdef __aarch64__
if (float_mat.c % 128 == 0) {
pack_mat_128_2(float_mat, binary_mat);
pack_mat_128_opt(float_mat, binary_mat);
} else {
pack_mat_64(float_mat, binary_mat);
}
Expand Down
2 changes: 1 addition & 1 deletion dabnn/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void Net::prepare() {
add_mat(name, std::make_shared<Mat>(shape[0], shape[1],
shape[2], shape[3],
bnn::DataType::Bit, false));
pack_mat_128_2(*tmp, *mat_map_[name]);
pack_mat_128(*tmp, *mat_map_[name]);
} else {
#endif // __aarch64__
add_mat(name, std::make_shared<Mat>(
Expand Down
22 changes: 1 addition & 21 deletions tests/bitpack_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ TEST(bitpack, pack_mat_128) {
const bnn::Mat a(AHEIGHT, AWIDTH, CHANNEL, a_data, bnn::DataType::Float);
bnn::Mat a_binary(AHEIGHT, AWIDTH, CHANNEL, bnn::DataType::Bit);
bnn::Mat expected(AHEIGHT, AWIDTH, CHANNEL, bnn::DataType::Bit);
pack_mat_128(a, a_binary);
pack_mat_128_opt(a, a_binary);

baseline_pack_mat(a, expected);

Expand Down Expand Up @@ -59,26 +59,6 @@ TEST(bitpack, pack_mat_64) {
}
}

#ifdef __aarch64__
TEST(bitpack, pack_mat_fallback) {
const size_t AHEIGHT = 64;
const size_t AWIDTH = 64;
const size_t CHANNEL = 256;
const size_t ALEN = AHEIGHT * AWIDTH * CHANNEL;
float a_data[ALEN];
fill_rand_float(a_data, ALEN);

const bnn::Mat a(AHEIGHT, AWIDTH, CHANNEL, a_data, bnn::DataType::Float);
bnn::Mat a_binary(AHEIGHT, AWIDTH, CHANNEL, bnn::DataType::Bit);
bnn::Mat expected(AHEIGHT, AWIDTH, CHANNEL, bnn::DataType::Bit);
pack_mat_128(a, a_binary);

pack_128_fallback(a_data, expected.data, ALEN);

ASSERT_EQ(a_binary, expected);
}
#endif // __aarch64__

TEST(bitpack, addv_v7) {
uint64_t data[2];
fill_rand_uint64(data, 2);
Expand Down
57 changes: 15 additions & 42 deletions tools/onnx2bnn/OnnxConverter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ void OnnxConverter::AddBinConv(const std::string &input_name,
const auto param = flatbnn::CreateBinConv2DDirect(
builder_, bin_name.c_str(), weight_name.c_str(), nullptr, &pads,
&strides, &dilations, output_name.c_str());
const auto layer = flatbnn::CreateLayer(
builder_, flatbnn::LayerType::BinConv2D, 0, param);
const auto layer =
flatbnn::CreateLayer(builder_, flatbnn::LayerType::BinConv2D, 0, param);
const auto flat_tensor = flatbnn::CreateTensorDirect(
builder_, flatbnn::DataType::Bit, &bin_weight.data, nullptr,
&bin_weight.shape, weight_name.c_str());
Expand Down Expand Up @@ -145,18 +145,11 @@ OnnxConverter::BTensor OnnxConverter::bitpack(OnnxConverter::FTensor ftensor) {
BNN_ASSERT(c % 64 == 0, ftensor.shape);

vector<bin_t> packed_data;
// if (c % 128 == 0) {
if (false) {
const auto size = Shaper::total(ftensor.shape);
packed_data.resize(size / 64);
pack_128_fallback(&ftensor.data[0], &packed_data[0], size);
} else {
bin_t tmp;
bin_t tmp;

FORZS(i, Shaper::total(ftensor.shape), 64) {
pack_64_bitset(&ftensor.data[i], &tmp);
packed_data.push_back(tmp);
}
FORZS(i, Shaper::total(ftensor.shape), 64) {
pack_64_bitset(&ftensor.data[i], &tmp);
packed_data.push_back(tmp);
}

Shape shape = {ftensor.shape[0], ftensor.shape[1], ftensor.shape[2],
Expand Down Expand Up @@ -190,27 +183,6 @@ std::vector<OnnxConverter::BTensor> OnnxConverter::split(
return outputs;
}

vector<bin_t> bitpack(const float *data, Shape shape) {
static_assert(std::is_same<bin_t, uint64_t>::value,
"bitpack requires bin_t is 64 bit");

auto c = Shaper::onnx_kc(shape);

BNN_ASSERT(c % 64 == 0, shape);

vector<bin_t> packed;

bin_t tmp;

FORZS(i, Shaper::total(shape), 64) {
pack_64_bitset(&data[i], &tmp);
packed.push_back(tmp);
}
BNN_ASSERT(false, "");

return packed;
}

void OnnxConverter::Convert(const ONNX_NAMESPACE::ModelProto &model_proto,
const std::string &filepath,
const OnnxConverter::Level level) {
Expand All @@ -220,18 +192,19 @@ void OnnxConverter::Convert(const ONNX_NAMESPACE::ModelProto &model_proto,
// Please check out "dabnn_*" pases in
// https://github.com/daquexian/onnx/blob/optimizer_for_bnn/onnx/optimizer/passes
// for details.
vector<string> optimizers{"eliminate_nop_pad", "extract_constant_to_initializer",
"dabnn_bconv_strict"};
vector<string> optimizers{"eliminate_nop_pad",
"extract_constant_to_initializer",
"dabnn_bconv_strict"};
if (level == Level::kModerate || level == Level::kAggressive) {
optimizers.push_back("dabnn_bconv_moderate");
}
if (level == Level::kAggressive) {
optimizers.push_back("dabnn_bconv_aggressive");
}
// model_proto is only used here. Please use the member variable model_proto_
// in the following code
model_proto_ = ONNX_NAMESPACE::optimization::Optimize(
model_proto, optimizers);
// model_proto is only used here. Please use the member variable
// model_proto_ in the following code
model_proto_ =
ONNX_NAMESPACE::optimization::Optimize(model_proto, optimizers);

for (const auto &tensor : model_proto_.graph().initializer()) {
if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
Expand Down Expand Up @@ -548,8 +521,8 @@ void OnnxConverter::CalculateCoeff(const ONNX_NAMESPACE::NodeProto &node,
coeff_b_data.push_back(b.data[i] - scale.data[i] * mean.data[i] / tmp);
}
for (const auto &node2 : model_proto_.graph().node()) {
if (node2.domain() == "dabnn" && node2.op_type() == "Conv"
&& node2.output(0) == node.input(0)) {
if (node2.domain() == "dabnn" && node2.op_type() == "Conv" &&
node2.output(0) == node.input(0)) {
const auto &weight = onnx_float_tensors_[node2.input(1)];
{
int channels = Shaper::onnx_kc(weight.shape);
Expand Down

0 comments on commit b3b35de

Please sign in to comment.