Separate data size and shape. Add bitpack wrt eff_bits and binarized …

…weight by it
JDAI-CV · Aug 20, 2019 · 5268538 · 5268538
1 parent ef1852e
commit 5268538
Show file tree

Hide file tree

Showing 5 changed files with 43 additions and 16 deletions.
diff --git a/common/common_bitpack.h b/common/common_bitpack.h
@@ -9,11 +9,23 @@
 
 #include <common/helper.h>
 
-inline void pack_64_bitset(const float *fptr, uint64_t *buf) {
+inline void pack_64_bitset(const float *fptr, uint64_t *buf,
+                           const size_t eff_bits = 64) {
+    /**
+     * The eff_bits is to support non-128-multiple channels.
+     * In this case, we need pad the tensor to make the
+     * channel aligned with 128.
+     */
     const size_t UNIT_LEN = 64;
+    BNN_ASSERT(eff_bits < UNIT_LEN,
+               "The eff_bits must be smaller than UNIT_LEN (64)");
     std::bitset<UNIT_LEN> bits;
     for (size_t i = 0; i < UNIT_LEN; i++) {
-        bits[i] = (*(fptr + i) > 0);
+        if (i < eff_bits) {
+            bits[i] = (*(fptr + i) > 0);
+        } else {
+            bits[i] = 0;
+        }
     }
     static_assert(sizeof(decltype(bits.to_ullong())) * CHAR_BIT == 64,
                   "bits.to_ullong() must return a 64-bit element");

diff --git a/dabnn/bitpack.h b/dabnn/bitpack.h
@@ -29,6 +29,8 @@
 inline void pack_128_opt(const float *float_ptr, void *binary_ptr,
                          size_t size) {
     /**
+     * size: the number of __elements__ needed to be packed.
+     *
      * This is the optimized bit-packing.
      *
      * sri is the "shift-right-and-overwrite" instruction.

diff --git a/dabnn/layers/BinConv.cpp b/dabnn/layers/BinConv.cpp
@@ -43,10 +43,10 @@ BinConv::BinConv(NetCP net, const std::string &name, css input, css weight,
         const auto trans_weight_mat_name = "trans_" + weight;
         // transpose the weight for bgemm
         const int m = weight_mat->n;
-        const int k = weight_mat->h * weight_mat->w * weight_mat->c;
-        transposed_weight_mat =
-            std::make_shared<Mat>(weight_mat->n, weight_mat->h, weight_mat->w,
-                                  weight_mat->elem_c, DataType::Bit, false);
+        BNN_ASSERT(weight_mat->total() % m == 0, "");
+        const int k = weight_mat->total() / m;
+        transposed_weight_mat = std::make_shared<Mat>(
+            m, k * 64, DataType::Bit, false);
         auto *trans_data_ptr =
             static_cast<uint64_t *>(transposed_weight_mat->data);
         auto *data_ptr = static_cast<uint64_t *>(weight_mat->data);

diff --git a/dabnn/net.cpp b/dabnn/net.cpp
@@ -79,21 +79,24 @@ void Net::prepare() {
             shaper.AddShape(name, shape);
 
 #ifdef __aarch64__
+            // TODO: Move it to binconv.cpp
+            // 1. More correct
+            // 2. Don't need to maintain the the same shape
             if (Shaper::c(shape) % 128 == 0) {
-                // Re-arrange the bit order
-                const auto len = shaper.total(shape);
+                // Re-arrange the bit order for the optmized bit-packing
+                const auto len = tensor->bin_data()->size();
                 const auto tmp = std::make_shared<Mat>(
                     shape[0], shape[1], shape[2], shape[3],
                     bnn::DataType::Float, false);
                 auto *float_data = static_cast<float *>(tmp->data);
-                FORZ(i, len / 64) {
+                FORZ(i, len) {
                     std::bitset<64> bs(*(data + i));
                     FORZ(j, 64) { float_data[i * 64 + j] = bs[j] ? 1 : -1; }
                 }
 
                 add_mat(name, std::make_shared<Mat>(shape[0], shape[1],
                                                     shape[2], shape[3],
-                                                    bnn::DataType::Bit, false));
+                                                    bnn::DataType::Bit, len, false));
                 pack_mat_128(*tmp, *mat_map_[name]);
             } else {
 #endif  // __aarch64__

diff --git a/tools/onnx2bnn/OnnxConverter.cpp b/tools/onnx2bnn/OnnxConverter.cpp
@@ -124,21 +124,31 @@ void OnnxConverter::AddConv(const string &input_name,
 
 /*
  * Bitpack a bnn tensor, input_channels should be the last dimension
+ * The data size of the packed tensor may be different from
+ * Shaper::total(tensor.shape) / 64, since every HWC will be padded
+ * so that they are aligned to 128.
  */
 OnnxConverter::BTensor OnnxConverter::bitpack(OnnxConverter::FTensor ftensor) {
     static_assert(std::is_same<bin_t, uint64_t>::value,
                   "bitpack requires bin_t is 64 bit");
 
-    auto c = Shaper::kc(ftensor.shape);
-
-    BNN_ASSERT(c % 64 == 0, ftensor.shape);
+    const auto N = Shaper::kn(ftensor.shape);
+    const auto HWC = Shaper::total(ftensor.shape) / N;
 
     vector<bin_t> packed_data;
     bin_t tmp;
 
-    FORZS(i, Shaper::total(ftensor.shape), 64) {
-        pack_64_bitset(&ftensor.data[i], &tmp);
-        packed_data.push_back(tmp);
+    FORZ(n, N) {
+        FORZS(i, HWC, 128) {
+            const size_t eff_bits = std::max<size_t>(HWC - i, 128);
+            pack_64_bitset(&ftensor.data[i], &tmp,
+                           std::min<size_t>(eff_bits, 64));
+            packed_data.push_back(tmp);
+            pack_64_bitset(
+                &ftensor.data[i + 64], &tmp,
+                std::min<size_t>(std::max<size_t>(0, eff_bits - 64), 64));
+            packed_data.push_back(tmp);
+        }
     }
 
     Shape shape = {ftensor.shape[0], ftensor.shape[1], ftensor.shape[2],