Skip to content

Commit

Permalink
reformat
Browse files Browse the repository at this point in the history
  • Loading branch information
daquexian committed Jun 14, 2019
1 parent b3b35de commit 5665e16
Show file tree
Hide file tree
Showing 11 changed files with 61 additions and 62 deletions.
13 changes: 7 additions & 6 deletions dabnn/bconv.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <arm_neon.h>
#endif // __ARM_NEON

#if not defined (__aarch64__)
#if not defined(__aarch64__)
#include <common/baseline.h>
#endif
#include <common/helper.h>
Expand Down Expand Up @@ -850,7 +850,7 @@ inline void unpack_output(float *b, float *a, int width, int height,

#undef A
}
#endif // __aarch64__
#endif // __aarch64__

inline void bnn::bconv_3x3(const Mat &bottom_blob, const Mat &weight,
Mat &top_blob, const int stride) {
Expand Down Expand Up @@ -924,9 +924,10 @@ inline void bnn::bconv_3x3(const Mat &bottom_blob, const Mat &weight,
unpack_output(packed_output, static_cast<float *>(top_blob.data),
top_blob.w, top_blob.h, top_blob.c);
}
#else // __aarch64__
baseline_bconv(bottom_blob, weight, 3, 3, 0, 0, stride, stride, 1, 1, top_blob.c, top_blob);
#endif // __aarch64__
#else // __aarch64__
baseline_bconv(bottom_blob, weight, 3, 3, 0, 0, stride, stride, 1, 1,
top_blob.c, top_blob);
#endif // __aarch64__
}

#ifdef __aarch64__
Expand Down Expand Up @@ -1717,6 +1718,6 @@ inline void bnn::bconv_1x1_64(const Mat &bottom_blob, const Mat &weight,
}
}
}
#endif // __aarch64__
#endif // __aarch64__

#endif
16 changes: 8 additions & 8 deletions dabnn/bgemm.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#else
#define P 4
#define R 4
#endif // __aarch64__
#endif // __ARM_NEON
#endif // __aarch64__
#endif // __ARM_NEON

#define A(i, j) a[(j)*lda + (i)] // A(y, x)
#define B(i, j) b[(j)*ldb + (i)] // B(y, x)
Expand Down Expand Up @@ -425,7 +425,7 @@ inline void micro_kernel(int64_t kc, float *c, const uint64_t *a,
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27",
"v28", "v29", "v30");
#else // __aarch64__
#else // __aarch64__
// C: 4x4(float 32, 4x1=4), A: 4*K(4regs), B: K*4(4regs)
// q0~q3 contains C, q4~q7 contains 4*128 of B, q8~q11 contains 128*4 of A
Expand Down Expand Up @@ -473,7 +473,7 @@ inline void micro_kernel(int64_t kc, float *c, const uint64_t *a,
"vzip.u32 q13, q15 \n"
"vzip.u32 q12, q13 \n"
"vadd.u32 q0, q0, q12 \n"

"veor.u8 q12, q5, q8 \n"
"veor.u8 q13, q5, q9 \n"
"veor.u8 q14, q5, q10 \n"
Expand Down Expand Up @@ -502,7 +502,7 @@ inline void micro_kernel(int64_t kc, float *c, const uint64_t *a,
"vzip.u32 q13, q15 \n"
"vzip.u32 q12, q13 \n"
"vadd.u32 q1, q1, q12 \n"

"veor.u8 q12, q6, q8 \n"
"veor.u8 q13, q6, q9 \n"
"veor.u8 q14, q6, q10 \n"
Expand Down Expand Up @@ -531,7 +531,7 @@ inline void micro_kernel(int64_t kc, float *c, const uint64_t *a,
"vzip.u32 q13, q15 \n"
"vzip.u32 q12, q13 \n"
"vadd.u32 q2, q2, q12 \n"

"subs %0, %0, #1 \n"

"veor.u8 q12, q7, q8 \n"
Expand Down Expand Up @@ -578,8 +578,8 @@ inline void micro_kernel(int64_t kc, float *c, const uint64_t *a,
"+r"(b), // %2
"+r"(a) // %3
:
: "cc", "memory", "r0", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
: "cc", "memory", "r0", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15");
#endif // __aarch64__
}
#endif // __ARM_NEON
Expand Down
38 changes: 21 additions & 17 deletions dabnn/bitpack.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
// Copyright 2019 JD.com Inc. JD AI
//
// The step of bit-packing packs N 32-bit float/integer to an N-bit
// operand according their signs. For example, performing bit-packing
// on 128 float numbers produces a 128-bit operand. xnor/xor is only
// The step of bit-packing packs N 32-bit float/integer to an N-bit
// operand according their signs. For example, performing bit-packing
// on 128 float numbers produces a 128-bit operand. xnor/xor is only
// enabled on these packed operands.
//
// The method in this file is usually for the packing of input. The
// packing of weight has been performed offline in the step of
// The method in this file is usually for the packing of input. The
// packing of weight has been performed offline in the step of
// onnx2bnn.

#ifndef BITPACK_H
Expand All @@ -26,13 +26,14 @@
#include "mat.h"

#ifdef __aarch64__
inline void pack_128_opt(const float *float_ptr, void *binary_ptr, size_t size) {
inline void pack_128_opt(const float *float_ptr, void *binary_ptr,
size_t size) {
/**
* This is the optimized bit-packing.
*
* sri is the "shift-right-and-overwrite" instruction.
* By this instruction, we directly leveraging the existing
* sign bits in 32-bit operands (both IEEE 754 float and
* sign bits in 32-bit operands (both IEEE 754 float and
* 32-bit integer).
* Note that the order of bits in the output operand is not
* the consistent with the order of input operands. Fortunately,
Expand Down Expand Up @@ -107,8 +108,9 @@ inline void pack_128_opt(const float *float_ptr, void *binary_ptr, size_t size)
// So for the compatibility we add a "not" instruction here.
// Maybe we can save this instruction by introducing "version" for
// dabnn model and force users to upgrade.
// Note: If this line is removed, the padding value of binary convolution
// should also be changed from 0 (-1 in xnor) to -1 (1 in xnor)
// Note: If this line is removed, the padding value of binary
// convolution should also be changed from 0 (-1 in xnor) to -1 (1 in
// xnor)
"not v0.16b, v0.16b \n"

"st1 {v0.4s}, [%1], #16 \n"
Expand All @@ -118,11 +120,12 @@ inline void pack_128_opt(const float *float_ptr, void *binary_ptr, size_t size)
"+r"(nn_size) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
"v19", "v20", "v21", "v22", "v23", "x0");
"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
"v19", "v20", "v21", "v22", "v23", "x0");
}

inline void pack_128_baseline(const float *float_ptr, void *binary_ptr, size_t size) {
inline void pack_128_baseline(const float *float_ptr, void *binary_ptr,
size_t size) {
size_t nn_size = size >> 7;

asm volatile(
Expand Down Expand Up @@ -214,14 +217,15 @@ inline void pack_mat_128_opt(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
assert(!binary_mat.empty());

pack_128_opt(static_cast<float *>(float_mat.data), binary_mat.data,
float_mat.total());
float_mat.total());
}

inline void pack_mat_128_baseline(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
inline void pack_mat_128_baseline(const bnn::Mat &float_mat,
bnn::Mat &binary_mat) {
assert(!binary_mat.empty());

pack_128_baseline(static_cast<float *>(float_mat.data), binary_mat.data,
float_mat.total());
float_mat.total());
}

inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
Expand All @@ -232,7 +236,7 @@ inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
*/
pack_mat_128_opt(float_mat, binary_mat);
}
#endif // __aarch64__
#endif // __aarch64__

inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
/**
Expand Down Expand Up @@ -266,7 +270,7 @@ inline void pack_mat(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
}
#else
pack_mat_64(float_mat, binary_mat);
#endif // __aarch64__
#endif // __aarch64__
}

#endif /* BITPACK_H */
6 changes: 3 additions & 3 deletions dabnn/layers/Affine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ inline void affine_inplace(bnn::Mat &data, const bnn::Mat &a,
}
}

inline void affine(const bnn::Mat &data, const bnn::Mat &a,
const bnn::Mat &b, bnn::Mat &output) {
inline void affine(const bnn::Mat &data, const bnn::Mat &a, const bnn::Mat &b,
bnn::Mat &output) {
FORZ(n, data.n) {
FORZ(h, data.h) {
const auto *ptr = data.point<float>(n, h, 0);
Expand All @@ -43,7 +43,7 @@ void Affine::forward_impl() const {
#ifdef BNN_CHECK_CONSISTENCY
affine(*data_mat, *a_mat, *b_mat, *output_mat);
#else
affine_inplace(*data_mat, *a_mat, *b_mat);
affine_inplace(*data_mat, *a_mat, *b_mat);
#endif
}

Expand Down
8 changes: 4 additions & 4 deletions dabnn/layers/AvePool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void ave_pool_2x2_s2(const bnn::Mat &input, bnn::Mat &output) {
}
}
}
#endif // __ARM_NEON
#endif // __ARM_NEON

void ave_pool_fallback(const bnn::Mat &input, const size_t pad_h,
const size_t pad_w, const size_t stride_h,
Expand Down Expand Up @@ -156,9 +156,9 @@ void AvePool::forward_impl() const {
kernel_h, kernel_w, *output_mat);
}
#else
ave_pool_fallback(*input_mat, pad_h, pad_w, stride_h, stride_w,
kernel_h, kernel_w, *output_mat);
#endif // __ARM_NEON
ave_pool_fallback(*input_mat, pad_h, pad_w, stride_h, stride_w, kernel_h,
kernel_w, *output_mat);
#endif // __ARM_NEON
}

} // namespace bnn
10 changes: 5 additions & 5 deletions dabnn/layers/BinConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ BinConv::BinConv(NetCP net, const std::string &name, css input, css weight,
}
const auto col_mat_name = "col_mat";
if (mat_map.find(col_mat_name) == mat_map.end()) {
const auto len = output_mat->h * output_mat->w * weight_mat->h * weight_mat->w * input_mat->elem_c;
mat_map[col_mat_name] =
std::make_shared<Mat>(len, bnn::DataType::Bit);
const auto len = output_mat->h * output_mat->w * weight_mat->h *
weight_mat->w * input_mat->elem_c;
mat_map[col_mat_name] = std::make_shared<Mat>(len, bnn::DataType::Bit);
}

padded_mat = mat(pad_name);
Expand Down Expand Up @@ -110,8 +110,8 @@ void BinConv::forward_impl() const {
static_cast<float *>(output_mat->data), m);
} else {
baseline_bconv(*input_mat, *weight_mat, weight_mat->h,
weight_mat->w, pad_h, pad_w, stride_h, stride_w, 1, 1,
output_mat->c, *output_mat);
weight_mat->w, pad_h, pad_w, stride_h, stride_w, 1,
1, output_mat->c, *output_mat);
}
} else {
baseline_bconv(*input_mat, *weight_mat, weight_mat->h, weight_mat->w,
Expand Down
4 changes: 1 addition & 3 deletions dabnn/layers/Binarize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
#include <dabnn/net.h>

namespace bnn {
void Binarize::forward_impl() const {
::pack_mat(*input_mat, *output_mat);
}
void Binarize::forward_impl() const { ::pack_mat(*input_mat, *output_mat); }

} // namespace bnn
11 changes: 6 additions & 5 deletions dabnn/layers/MaxPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ void maxpool3x3(const bnn::Mat &input, bnn::Mat &output, const int stride_h = 1,
"+r"(output_ptr), // %9
"+r"(nn) // %10
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8");
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6",
"q7", "q8");
#endif
}
}
Expand Down Expand Up @@ -269,8 +270,8 @@ MaxPool::MaxPool(NetCP net, const std::string &name, css input, css output,
void MaxPool::forward_impl() const {
#ifdef __ARM_NEON
if (kernel_h == 3 && kernel_w == 3) {
// std::numeric_limits<float>::min() is the closest value to 0, so we uses
// -max()
// std::numeric_limits<float>::min() is the closest value to 0, so we
// uses -max()
pad(*input_mat, pad_h, pad_w, *padded_mat,
-std::numeric_limits<float>::max());
maxpool3x3(*padded_mat, *output_mat, stride_h, stride_w);
Expand All @@ -283,8 +284,8 @@ void MaxPool::forward_impl() const {
kernel_h, kernel_w, *output_mat);
}
#else
max_pool_fallback(*input_mat, pad_h, pad_w, stride_h, stride_w,
kernel_h, kernel_w, *output_mat);
max_pool_fallback(*input_mat, pad_h, pad_w, stride_h, stride_w, kernel_h,
kernel_w, *output_mat);
#endif // __aarch64__
}

Expand Down
7 changes: 2 additions & 5 deletions dabnn/layers/Pad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@

#include "Pad.h"

#include <dabnn/pad.h>
#include <dabnn/net.h>
#include <dabnn/pad.h>

namespace bnn {
void Pad::forward_impl() const {
pad(*input_mat, pad_h, pad_w, *output_mat);
}
void Pad::forward_impl() const { pad(*input_mat, pad_h, pad_w, *output_mat); }

} // namespace bnn

6 changes: 2 additions & 4 deletions dabnn/layers/Relu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ void Relu::forward_impl() const {
}
#else
float *ptr = static_cast<float *>(*data_mat);
FORZ(i, data_mat->total()) {
*ptr = std::max(*ptr, 0.f);
}
#endif // __ARM_NEON
FORZ(i, data_mat->total()) { *ptr = std::max(*ptr, 0.f); }
#endif // __ARM_NEON
}
} // namespace bnn
4 changes: 2 additions & 2 deletions dabnn/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,14 @@ void Net::prepare() {
bnn::DataType::Bit, false));
pack_mat_128(*tmp, *mat_map_[name]);
} else {
#endif // __aarch64__
#endif // __aarch64__
add_mat(name, std::make_shared<Mat>(
shape[0], shape[1], shape[2], shape[3],
const_cast<uint64_t *>(data),
bnn::DataType::Bit, false));
#ifdef __aarch64__
}
#endif // __aarch64__
#endif // __aarch64__
} else if (tensor->data_type() == flatbnn::DataType::Float32) {
Shaper::Shape shape(tensor->shape()->begin(),
tensor->shape()->end());
Expand Down

0 comments on commit 5665e16

Please sign in to comment.