Skip to content

Commit

Permalink
1. Apply new bitpack
Browse files Browse the repository at this point in the history
2. Fix bug of the sign bit (1 if negative) vs padding (0 if negative)
by "not" the packed element
3. Update binrep
4. misc changes
  • Loading branch information
daquexian committed May 17, 2019
1 parent f65fd3e commit 2618327
Show file tree
Hide file tree
Showing 12 changed files with 121 additions and 126 deletions.
16 changes: 16 additions & 0 deletions benchmark/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,22 @@
#include <dabnn/mat.h>
#include <dabnn/net.h>

static void BM_pack_mat_64_small(benchmark::State &state) {
const bnn::Mat a(1, 32, 32, 128, bnn::DataType::Float, 0);
bnn::Mat b(1, 32, 32, 128, bnn::DataType::Bit, 0);
for (auto _ : state) {
pack_mat_64(a, b);
}
}

static void BM_pack_mat_128_small(benchmark::State &state) {
const bnn::Mat a(1, 32, 32, 128, bnn::DataType::Float, 0);
bnn::Mat b(1, 32, 32, 128, bnn::DataType::Bit, 0);
for (auto _ : state) {
pack_mat_128(a, b);
}
}

static void BM_pack_mat_64(benchmark::State &state) {
const bnn::Mat a(1, 64, 64, 128, bnn::DataType::Float);
bnn::Mat b(1, 64, 64, 128, bnn::DataType::Bit);
Expand Down
9 changes: 4 additions & 5 deletions binaries/run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@
#include <algorithm>
#include <chrono>

#include <common/argh.h>
#include <common/flatbuffers_helper.h>
#include <dabnn/net.h>

int main(int argc, char **argv) {
(void)argc;
argh::parser cmdl(argc, argv);
google::InitGoogleLogging(argv[0]);
FLAGS_v = 1;
cmdl("v", 1) >> FLAGS_v;
FLAGS_alsologtostderr = true;
// FLAGS_logbuflevel = -1;

float *input = new float[3 * 224 * 224];
FORZ(i, 3 * 224 * 224) { input[i] = 1; }

// const std::string blob_name = "125";
auto net1 = bnn::Net::create();
net1->optimize = true;
net1->run_fconv = true;
Expand All @@ -31,7 +31,6 @@ int main(int argc, char **argv) {
FORZ(i, N) {
LOG(INFO) << "------";
net1->run(input);
// LOG(INFO) << "hh";
}
const auto t2 = Clock::now();
css blob_name = argv[2];
Expand All @@ -45,7 +44,7 @@ int main(int argc, char **argv) {
if (blob1->data_type == bnn::DataType::Float) {
LOG(INFO) << static_cast<float *>(blob1->data)[i];
} else {
LOG(INFO) << binrep(static_cast<uint64_t *>(blob1->data)[i]);
LOG(INFO) << binrep(static_cast<uint64_t *>(blob1->data) + i, 64, false);
}
}
LOG(INFO) << "Time: "
Expand Down
2 changes: 1 addition & 1 deletion common/baseline.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <bitset>

#include <common/helper.h>
#include <dabnn/bitpack.h>
#include <common/common_bitpack.h>
#include <dabnn/mat.h>

inline int bitcount(uint64_t x) {
Expand Down
20 changes: 20 additions & 0 deletions common/common_bitpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <cstdint>

#include <common/helper.h>
#include <dabnn/mat.h>

inline void pack_128_fallback(const float *float_ptr, void *binary_ptr,
size_t size) {
Expand Down Expand Up @@ -183,4 +184,23 @@ inline void pack_64_bitfield(const float *fptr, uint64_t *buf) {
*buf = u.u64;
}

inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
BNN_ASSERT(
float_mat.w * float_mat.c > 0 && float_mat.w * float_mat.c % 64 == 0,
float_mat.w * float_mat.c);
BNN_ASSERT(float_mat.c / 64 == binary_mat.c && float_mat.c % 64 == 0, "");

FORZ(n, float_mat.n) {
FORZ(h, float_mat.h) {
auto *fptr = float_mat.point<float>(n, h, 0);
auto *bptr = binary_mat.point<uint64_t>(n, h, 0);
FORZ(i, float_mat.w * float_mat.c / 64) {
pack_64_bitfield(fptr, bptr);
fptr += 64;
bptr++;
}
}
}
}

#endif /* COMMON_BITPACK_H */
30 changes: 13 additions & 17 deletions common/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ inline float random_float() {
static std::normal_distribution<float> distr;

float rand_float = distr(eng) / 10;
if (rand_float == 0) {
return random_float();
}
// LOG(INFO) << "Random float: " << rand_float;

return rand_float;
Expand Down Expand Up @@ -113,27 +116,20 @@ inline void fill_rand_uint64(uint64_t *data, size_t num) {
FORZ(i, num) { *(data + i) = random_uint64(); }
}

template <typename T>
std::string binrep(const T &a) {
const char *beg = reinterpret_cast<const char *>(&a);
const char *end = beg + sizeof(a);

std::stringstream ss;

while (beg != end) ss << std::bitset<CHAR_BIT>(*beg++) << ' ';
ss << '\n';
return ss.str();
}

template <typename T>
std::string binrep(const T &a, const size_t size) {
const char *beg = reinterpret_cast<const char *>(&a);
/**
* parameter human will make the output on little endian machines human-readable
*/
inline std::string binrep(const void *a, const size_t size, bool reverse) {
const char *beg = static_cast<const char *>(a);
const char *end = beg + size;

std::stringstream ss;

while (beg != end) ss << std::bitset<CHAR_BIT>(*beg++) << ' ';
ss << '\n';
if (reverse) {
while (beg != end) ss << std::bitset<CHAR_BIT>(*(end-- - 1)) << ' ';
} else {
while (beg != end) ss << std::bitset<CHAR_BIT>(*beg++) << ' ';
}
return ss.str();
}

Expand Down
113 changes: 22 additions & 91 deletions dabnn/bitpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,94 +16,6 @@
#include <glog/logging.h>
#include "mat.h"

inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
BNN_ASSERT(
float_mat.w * float_mat.c > 0 && float_mat.w * float_mat.c % 64 == 0,
float_mat.w * float_mat.c);
BNN_ASSERT(float_mat.c / 64 == binary_mat.c && float_mat.c % 64 == 0, "");

FORZ(n, float_mat.n) {
FORZ(h, float_mat.h) {
auto *fptr = float_mat.point<float>(n, h, 0);
auto *bptr = binary_mat.point<uint64_t>(n, h, 0);
FORZ(i, float_mat.w * float_mat.c / 64) {
pack_64_bitfield(fptr, bptr);
fptr += 64;
bptr++;
}
}
}
}

inline void pack_128_3(const float *float_ptr, void *binary_ptr, size_t size) {
size_t nn_size = size >> 7;

asm volatile(
"0: \n"
"prfm pldl1keep, [%0] \n"
"ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [%0], #64 \n"
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%0], #64 \n"
"sri v0.4s, v1.4s, #1 \n"
"sri v0.4s, v2.4s, #1 \n"
"sri v0.4s, v3.4s, #1 \n"
"sri v0.4s, v4.4s, #1 \n"

"ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [%0], #64 \n"
"prfm pldl1keep, [%0, #64] \n"
"ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [%0], #64 \n"
"sri v0.4s, v5.4s, #1 \n"
"sri v0.4s, v6.4s, #1 \n"
"sri v0.4s, v7.4s, #1 \n"
"sri v0.4s, v8.4s, #1 \n"

"subs %2, %2, #1 \n"

"ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%0], #64 \n"
"prfm pldl1keep, [%0, #64] \n"
"ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%0], #64 \n"

"sri v0.4s, v9.4s, #2 \n"
"sri v0.4s, v10.4s, #2 \n"
"sri v0.4s, v11.4s, #2 \n"
"sri v0.4s, v12.4s, #2 \n"

"sri v0.4s, v13.4s, #1 \n"
"sri v0.4s, v14.4s, #1 \n"
"sri v0.4s, v15.4s, #1 \n"
"sri v0.4s, v16.4s, #1 \n"

"ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [%0], #64 \n"
"prfm pldl1keep, [%0, #64] \n"
"ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [%0], #64 \n"
"sri v0.4s, v17.4s, #1 \n"
"sri v0.4s, v18.4s, #1 \n"
"sri v0.4s, v19.4s, #1 \n"
"sri v0.4s, v20.4s, #1 \n"

"sri v0.4s, v21.4s, #2 \n"
"sri v0.4s, v22.4s, #2 \n"
"sri v0.4s, v23.4s, #2 \n"
"sri v0.4s, v24.4s, #2 \n"

"sri v0.4s, v25.4s, #4 \n"
"sri v0.4s, v26.4s, #4 \n"
"sri v0.4s, v27.4s, #4 \n"
"sri v0.4s, v28.4s, #4 \n"

"sri v0.4s, v29.4s, #8 \n"
"sri v0.4s, v31.4s, #8 \n"
"sri v0.4s, v30.4s, #16 \n"

"st1 {v0.4s}, [%1], #16 \n"
"bne 0b \n"
: "+r"(float_ptr), // %0
"+r"(binary_ptr), // %1
"+r"(nn_size) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
"v19", "x0");
}
inline void pack_128_2(const float *float_ptr, void *binary_ptr, size_t size) {
size_t nn_size = size >> 7;

Expand Down Expand Up @@ -163,6 +75,17 @@ inline void pack_128_2(const float *float_ptr, void *binary_ptr, size_t size) {
"sri v2.4s, v3.4s, #8 \n"
"sri v0.4s, v2.4s, #16 \n"

// Bit-packing with sign bit is introduced after the first version
// of dabnn is published. Sign bit will be 1 when x < 0, 0 when x > 0,
// which is different with the way we used before --- set bit to 1 if
// x > 0 or 0 if x < 0
// So for the compatibility we add a "not" instruction here.
// Maybe we can save this instruction by introducing "version" for
// dabnn model and force users to upgrade.
// Note: If this line is removed, the padding value of binary convolution
// should also be changed from 0 (-1 in xnor) to -1 (1 in xnor)
"not v0.16b, v0.16b \n"

"st1 {v0.4s}, [%1], #16 \n"
"bne 0b \n"
: "+r"(float_ptr), // %0
Expand All @@ -171,8 +94,9 @@ inline void pack_128_2(const float *float_ptr, void *binary_ptr, size_t size) {
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
"v19", "x0");
"v19", "v20", "v21", "v22", "v23", "x0");
}

inline void pack_128(const float *float_ptr, void *binary_ptr, size_t size) {
size_t nn_size = size >> 7;

Expand Down Expand Up @@ -261,17 +185,24 @@ inline void pack_128(const float *float_ptr, void *binary_ptr, size_t size) {
"x0");
}

inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
inline void pack_mat_128_2(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
assert(!binary_mat.empty());

pack_128_2(static_cast<float *>(float_mat.data), binary_mat.data,
float_mat.total());
}

inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
assert(!binary_mat.empty());

pack_128(static_cast<float *>(float_mat.data), binary_mat.data,
float_mat.total());
}

inline void pack_mat(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
BNN_ASSERT(float_mat.c % 64 == 0, float_mat.c);
if (float_mat.c % 128 == 0) {
pack_mat_128(float_mat, binary_mat);
pack_mat_128_2(float_mat, binary_mat);
} else {
pack_mat_64(float_mat, binary_mat);
}
Expand Down
2 changes: 1 addition & 1 deletion dabnn/layers/BinConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ std::string BinConv::to_str() const {
std::stringstream ss;
ss << type_ << ", ";
PNT_TO(ss, input_mat->h, input_mat->w, input_mat->elem_c, weight_mat->h,
weight_mat->w, weight_mat->n);
weight_mat->w, weight_mat->n, pad_h, pad_w);

return ss.str();
}
Expand Down
9 changes: 8 additions & 1 deletion dabnn/layers/Binarize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,15 @@
#include "Binarize.h"

#include <dabnn/bitpack.h>
#include <dabnn/net.h>

namespace bnn {
void Binarize::forward_impl() const { ::pack_mat_64(*input_mat, *output_mat); }
void Binarize::forward_impl() const {
if (net_.lock()->new_bitpack) {
::pack_mat(*input_mat, *output_mat);
} else {
::pack_mat_64(*input_mat, *output_mat);
}
}

} // namespace bnn
5 changes: 3 additions & 2 deletions dabnn/mat.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,9 @@ inline std::ostream &operator<<(std::ostream &os, const Mat &mat) {
os << "n: " << mat.n << ", width: " << mat.w << ", height: " << mat.h
<< ", channels: " << mat.c << std::endl;
if (mat.data_type == DataType::Bit) {
return os << binrep(*static_cast<char *>(mat.data),
std::min(mat.total(), size_t{10}) * mat.elemsize);
return os << binrep(static_cast<char *>(mat.data),
std::min(mat.total(), size_t{10}) * mat.elemsize,
true);
} else {
for (size_t i = 0;
i < std::min(static_cast<decltype(mat.total())>(10), mat.total());
Expand Down
Loading

0 comments on commit 2618327

Please sign in to comment.