Skip to content

Commit

Permalink
Separate data size and shape. Add bitpack wrt eff_bits and binarized …
Browse files Browse the repository at this point in the history
…weight by it
  • Loading branch information
daquexian committed Aug 20, 2019
1 parent ef1852e commit 5268538
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 16 deletions.
16 changes: 14 additions & 2 deletions common/common_bitpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,23 @@

#include <common/helper.h>

inline void pack_64_bitset(const float *fptr, uint64_t *buf) {
inline void pack_64_bitset(const float *fptr, uint64_t *buf,
const size_t eff_bits = 64) {
/**
* The eff_bits is to support non-128-multiple channels.
* In this case, we need pad the tensor to make the
* channel aligned with 128.
*/
const size_t UNIT_LEN = 64;
BNN_ASSERT(eff_bits < UNIT_LEN,
"The eff_bits must be smaller than UNIT_LEN (64)");
std::bitset<UNIT_LEN> bits;
for (size_t i = 0; i < UNIT_LEN; i++) {
bits[i] = (*(fptr + i) > 0);
if (i < eff_bits) {
bits[i] = (*(fptr + i) > 0);
} else {
bits[i] = 0;
}
}
static_assert(sizeof(decltype(bits.to_ullong())) * CHAR_BIT == 64,
"bits.to_ullong() must return a 64-bit element");
Expand Down
2 changes: 2 additions & 0 deletions dabnn/bitpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
inline void pack_128_opt(const float *float_ptr, void *binary_ptr,
size_t size) {
/**
* size: the number of __elements__ needed to be packed.
*
* This is the optimized bit-packing.
*
* sri is the "shift-right-and-overwrite" instruction.
Expand Down
8 changes: 4 additions & 4 deletions dabnn/layers/BinConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ BinConv::BinConv(NetCP net, const std::string &name, css input, css weight,
const auto trans_weight_mat_name = "trans_" + weight;
// transpose the weight for bgemm
const int m = weight_mat->n;
const int k = weight_mat->h * weight_mat->w * weight_mat->c;
transposed_weight_mat =
std::make_shared<Mat>(weight_mat->n, weight_mat->h, weight_mat->w,
weight_mat->elem_c, DataType::Bit, false);
BNN_ASSERT(weight_mat->total() % m == 0, "");
const int k = weight_mat->total() / m;
transposed_weight_mat = std::make_shared<Mat>(
m, k * 64, DataType::Bit, false);
auto *trans_data_ptr =
static_cast<uint64_t *>(transposed_weight_mat->data);
auto *data_ptr = static_cast<uint64_t *>(weight_mat->data);
Expand Down
11 changes: 7 additions & 4 deletions dabnn/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,21 +79,24 @@ void Net::prepare() {
shaper.AddShape(name, shape);

#ifdef __aarch64__
// TODO: Move it to binconv.cpp
// 1. More correct
// 2. Don't need to maintain the the same shape
if (Shaper::c(shape) % 128 == 0) {
// Re-arrange the bit order
const auto len = shaper.total(shape);
// Re-arrange the bit order for the optmized bit-packing
const auto len = tensor->bin_data()->size();
const auto tmp = std::make_shared<Mat>(
shape[0], shape[1], shape[2], shape[3],
bnn::DataType::Float, false);
auto *float_data = static_cast<float *>(tmp->data);
FORZ(i, len / 64) {
FORZ(i, len) {
std::bitset<64> bs(*(data + i));
FORZ(j, 64) { float_data[i * 64 + j] = bs[j] ? 1 : -1; }
}

add_mat(name, std::make_shared<Mat>(shape[0], shape[1],
shape[2], shape[3],
bnn::DataType::Bit, false));
bnn::DataType::Bit, len, false));
pack_mat_128(*tmp, *mat_map_[name]);
} else {
#endif // __aarch64__
Expand Down
22 changes: 16 additions & 6 deletions tools/onnx2bnn/OnnxConverter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,21 +124,31 @@ void OnnxConverter::AddConv(const string &input_name,

/*
* Bitpack a bnn tensor, input_channels should be the last dimension
* The data size of the packed tensor may be different from
* Shaper::total(tensor.shape) / 64, since every HWC will be padded
* so that they are aligned to 128.
*/
OnnxConverter::BTensor OnnxConverter::bitpack(OnnxConverter::FTensor ftensor) {
static_assert(std::is_same<bin_t, uint64_t>::value,
"bitpack requires bin_t is 64 bit");

auto c = Shaper::kc(ftensor.shape);

BNN_ASSERT(c % 64 == 0, ftensor.shape);
const auto N = Shaper::kn(ftensor.shape);
const auto HWC = Shaper::total(ftensor.shape) / N;

vector<bin_t> packed_data;
bin_t tmp;

FORZS(i, Shaper::total(ftensor.shape), 64) {
pack_64_bitset(&ftensor.data[i], &tmp);
packed_data.push_back(tmp);
FORZ(n, N) {
FORZS(i, HWC, 128) {
const size_t eff_bits = std::max<size_t>(HWC - i, 128);
pack_64_bitset(&ftensor.data[i], &tmp,
std::min<size_t>(eff_bits, 64));
packed_data.push_back(tmp);
pack_64_bitset(
&ftensor.data[i + 64], &tmp,
std::min<size_t>(std::max<size_t>(0, eff_bits - 64), 64));
packed_data.push_back(tmp);
}
}

Shape shape = {ftensor.shape[0], ftensor.shape[1], ftensor.shape[2],
Expand Down

0 comments on commit 5268538

Please sign in to comment.