Skip to content

Commit

Permalink
Merge pull request #29 from JDAI-CV/non-android
Browse files Browse the repository at this point in the history
Compile on non-Android aarch64 devices and non-aarch64 devices
  • Loading branch information
daquexian committed May 28, 2019
2 parents 2be7b13 + c039558 commit 2acd1a3
Show file tree
Hide file tree
Showing 14 changed files with 92 additions and 12 deletions.
7 changes: 7 additions & 0 deletions .daq_pm/configs/bconv_test_x86
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# It is a configuration file for [project_manager.vim](https://github.com/daquexian/project_manager.vim)
name binary-nn
type cpp
build_dir build_bconv_test_x86
target bconv_test
cmake_options -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBNN_BUILD_MAIN_LIB=ON -GNinja
binary tests/bconv_test
2 changes: 1 addition & 1 deletion .daq_pm/configs/net_test
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ name binary-nn
type cpp
build_dir build_net_test
target net_test
cmake_options -DCMAKE_TOOLCHAIN_FILE=~/Android/Sdk/ndk-bundle/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-28 -DANDROID_ABI=arm64-v8a -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Release
cmake_options -DCMAKE_TOOLCHAIN_FILE=~/Android/Sdk/ndk-bundle/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-28 -DANDROID_ABI=arm64-v8a -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Release -GNinja
binary ~/adb_push_and_run.sh tests/net_test
7 changes: 7 additions & 0 deletions .daq_pm/configs/net_test_x86
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# It is a configuration file for [project_manager.vim](https://github.com/daquexian/project_manager.vim)
name binary-nn
type cpp
build_dir build_net_test_x86
target net_test
cmake_options -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Release -DBNN_BUILD_MAIN_LIB=ON -GNinja
binary tests/net_test
15 changes: 9 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,15 @@ option(BNN_SYSTEM_PROTOBUF "Use system protobuf to build onnx2bnn" ON)
option(BNN_BUILD_PYTHON "Build onnx2bnn python interface" OFF)
option(BNN_USE_MSVC_STATIC_RUNTIME "Link onnx2bnn to msvc static runtime" ON)

if (${CMAKE_SYSTEM_NAME} STREQUAL "Android")
set(BNN_BUILD_ANDROID ON)
else()
set(BNN_BUILD_ANDROID OFF)
message(STATUS "Target architecture: ${CMAKE_SYSTEM_PROCESSOR}")
if (NOT DEFINED BNN_BUILD_MAIN_LIB)
if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
set(BNN_BUILD_MAIN_LIB ON)
else()
set(BNN_BUILD_MAIN_LIB OFF)
endif()
endif()
message(STATUS "BNN_BUILD_MAIN_LIB: ${BNN_BUILD_MAIN_LIB}")

include(cmake/utils.cmake)
bnn_add_msvc_runtime_flag()
Expand All @@ -42,7 +46,7 @@ if (${BNN_NET_BENCHMARK})
add_compile_options("-DBNN_BENCHMARK")
endif()

if (${BNN_BUILD_ANDROID})
if (BNN_BUILD_MAIN_LIB)
set(CMAKE_CXX_STANDARD 17)

set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
Expand All @@ -64,7 +68,6 @@ if (${BNN_BUILD_ANDROID})
configure_benchmark()
add_subdirectory(benchmark)
endif()

else()
set(CMAKE_CXX_STANDARD 11)

Expand Down
3 changes: 2 additions & 1 deletion cmake/system.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright 2019 JD.com Inc. JD AI

if (${CMAKE_SYSTEM_NAME} STREQUAL "Android")
if ((NOT CMAKE_SYSTEM_NAME STREQUAL CMAKE_HOST_SYSTEM_NAME) OR
(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR))
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
Expand Down
15 changes: 15 additions & 0 deletions dabnn/bconv.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@
#include <arm_neon.h>
#endif // __ARM_NEON

#if not defined (__aarch64__)
#include <common/baseline.h>
#endif
#include <common/helper.h>
#include <dabnn/im2col.h>
#include "mat.h"

namespace bnn {
#ifdef __aarch64__
inline void bconv_1x1_64(const Mat &bottom_blob, const Mat &weight,
Mat &top_blob);
inline void bconv_1x1_128(const Mat &bottom_blob, const Mat &weight,
Expand All @@ -20,8 +24,10 @@ inline void bconv_1x1_256(const Mat &bottom_blob, const Mat &weight,
Mat &top_blob);
inline void bconv_1x1_512(const Mat &bottom_blob, const Mat &weight,
Mat &top_blob);
#endif
inline void bconv_3x3(const Mat &bottom_blob, const Mat &weight, Mat &top_blob,
const int stride = 1);
#ifdef __aarch64__
inline void bconv_3x3_64(const Mat &bottom_blob, const Mat &weight,
Mat &top_blob, const int stride = 1);
inline void bconv_3x3_64_fallback(const Mat &bottom_blob, const Mat &weight,
Expand All @@ -44,8 +50,10 @@ inline void bconv_3x3_128_internal_s1(const uint64_t *bottom_ptr, const int b_w,
inline void bconv_3x3_128_internal_fallback(
const uint64_t *bottom_ptr, const int b_w, const uint64_t *weight_ptr,
float *top_ptr, const int top_h, const int top_w, const int stride = 1);
#endif
} // namespace bnn

#ifdef __aarch64__
inline void bnn::bconv_3x3_64(const Mat &bottom_blob, const Mat &weight,
Mat &top_blob, const int stride) {
bconv_3x3_64_opt4(bottom_blob, weight, top_blob, 0, stride);
Expand Down Expand Up @@ -819,9 +827,11 @@ inline void unpack_output(float *b, float *a, int width, int height,

#undef A
}
#endif // __aarch64__

inline void bnn::bconv_3x3(const Mat &bottom_blob, const Mat &weight,
Mat &top_blob, const int stride) {
#ifdef __aarch64__
// TODO: more elegant way
static uint64_t packed_weight[999999];
static uint64_t packed_input[9999999];
Expand Down Expand Up @@ -884,8 +894,12 @@ inline void bnn::bconv_3x3(const Mat &bottom_blob, const Mat &weight,
unpack_output(packed_output, static_cast<float *>(top_blob.data),
top_blob.w, top_blob.h, top_blob.c);
}
#else // __aarch64__
baseline_bconv(bottom_blob, weight, 3, 3, 0, 0, stride, stride, 1, 1, top_blob.c, top_blob);
#endif // __aarch64__
}

#ifdef __aarch64__
inline void bnn::bconv_1x1_512(const Mat &bottom_blob, const Mat &weight,
Mat &top_blob) {
FORZS(th, top_blob.h, 2) {
Expand Down Expand Up @@ -1673,5 +1687,6 @@ inline void bnn::bconv_1x1_64(const Mat &bottom_blob, const Mat &weight,
}
}
}
#endif // __aarch64__

#endif
13 changes: 11 additions & 2 deletions dabnn/bgemm.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#define min(i, j) ((i) < (j) ? (i) : (j))

#if __aarch64__
inline void pack_a(const int kc, const uint64_t *a, const int lda,
uint64_t *a_to);
inline void pack_b(const int kc, const uint64_t *b, const int ldb,
Expand All @@ -28,13 +29,15 @@ inline void inner_kernel(const int m, const int n, const int k,
const uint64_t *a, const int lda, const uint64_t *b,
const int ldb, float *c, const int ldc,
const int first_time);
#endif // __aarch64__
inline void bgemm_naive(const int m, const int n, const int k,
const uint64_t *a, const int lda, const uint64_t *b,
const int ldb, float *c, const int ldc);

inline void bgemm(const int m, const int n, const int k, const uint64_t *a,
int lda, const uint64_t *b, const int ldb, float *c,
const int ldc) {
#if __aarch64__
int kc = 32;
int mc = 32;
int i, q, qb, ib;
Expand All @@ -48,8 +51,12 @@ inline void bgemm(const int m, const int n, const int k, const uint64_t *a,
i == 0);
}
}
#else
bgemm_naive(m, n, k, a, lda, b, ldb, c, ldc);
#endif // __aarch64__
}

#if __aarch64__
inline void inner_kernel(const int m, const int n, const int k,
const uint64_t *a, const int lda, const uint64_t *b,
const int ldb, float *c, const int ldc,
Expand Down Expand Up @@ -414,9 +421,11 @@ inline void micro_kernel(int64_t kc, float *c, const uint64_t *a,
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27",
"v28", "v29", "v30");
}
#endif // __aarch64__

inline void bgemm_naive(int m, int n, int k, uint64_t *a, int lda, uint64_t *b,
int ldb, float *c, int ldc) {
inline void bgemm_naive(const int m, const int n, const int k,
const uint64_t *a, const int lda, const uint64_t *b,
const int ldb, float *c, const int ldc) {
FORZ(i, m) {
FORZ(j, n) {
FORZ(h, k) {
Expand Down
6 changes: 6 additions & 0 deletions dabnn/bitpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <glog/logging.h>
#include "mat.h"

#ifdef __aarch64__
inline void pack_128_2(const float *float_ptr, void *binary_ptr, size_t size) {
size_t nn_size = size >> 7;

Expand Down Expand Up @@ -198,6 +199,7 @@ inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
pack_128(static_cast<float *>(float_mat.data), binary_mat.data,
float_mat.total());
}
#endif // __aarch64__

inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
BNN_ASSERT(
Expand All @@ -220,11 +222,15 @@ inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {

inline void pack_mat(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
BNN_ASSERT(float_mat.c % 64 == 0, float_mat.c);
#ifdef __aarch64__
if (float_mat.c % 128 == 0) {
pack_mat_128_2(float_mat, binary_mat);
} else {
pack_mat_64(float_mat, binary_mat);
}
#else
pack_mat_64(float_mat, binary_mat);
#endif // __aarch64__
}

#endif /* BITPACK_H */
7 changes: 7 additions & 0 deletions dabnn/layers/AvePool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

namespace bnn {

#ifdef __aarch64__
void ave_pool_2x2_s2(const bnn::Mat &input, bnn::Mat &output) {
FORZ(h, output.h) {
FORZ(w, output.w) {
Expand Down Expand Up @@ -48,6 +49,7 @@ void ave_pool_2x2_s2(const bnn::Mat &input, bnn::Mat &output) {
}
}
}
#endif // __aarch64__

void ave_pool_fallback(const bnn::Mat &input, const size_t pad_h,
const size_t pad_w, const size_t stride_h,
Expand Down Expand Up @@ -114,6 +116,7 @@ AvePool::AvePool(NetCP net, const std::string &name, css input, css output,
}

void AvePool::forward_impl() const {
#ifdef __aarch64__
if (stride_h == 2 && stride_w == 2 && kernel_h == 2 && kernel_w == 2 &&
input_mat->c % 4 == 0) {
pad(*input_mat, pad_h, pad_w, *padded_mat);
Expand All @@ -122,6 +125,10 @@ void AvePool::forward_impl() const {
ave_pool_fallback(*input_mat, pad_h, pad_w, stride_h, stride_w,
kernel_h, kernel_w, *output_mat);
}
#else
ave_pool_fallback(*input_mat, pad_h, pad_w, stride_h, stride_w,
kernel_h, kernel_w, *output_mat);
#endif // __aarch64__
}

} // namespace bnn
8 changes: 8 additions & 0 deletions dabnn/layers/BinConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ BinConv::BinConv(NetCP net, const std::string &name, css input, css weight,
}

bool BinConv::direct_conv_compatible() const {
#ifdef __aarch64__
if (weight_mat->h == 3 && weight_mat->w == 3 && input_mat->c == 1 &&
stride_h == stride_w) {
return true;
Expand All @@ -78,10 +79,17 @@ bool BinConv::direct_conv_compatible() const {
return true;
}
return false;
#else
return false;
#endif
}

bool BinConv::gemm_compatible() const {
#ifdef __aarch64__
return weight_mat->h * weight_mat->n * weight_mat->c % 2 == 0;
#else
return false;
#endif
}

void BinConv::forward_impl() const {
Expand Down
6 changes: 6 additions & 0 deletions dabnn/layers/MaxPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

namespace bnn {

#ifdef __aarch64__
void maxpool2x2(const bnn::Mat &input, bnn::Mat &output, const int stride_h = 1,
const int stride_w = 1) {
FORZ(h, output.h) {
Expand Down Expand Up @@ -126,6 +127,7 @@ void maxpool3x3(const bnn::Mat &input, bnn::Mat &output, const int stride_h = 1,
}
}
}
#endif // __aarch64__

MaxPool::MaxPool(NetCP net, const std::string &name, css input, css output,
int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h,
Expand All @@ -150,6 +152,7 @@ MaxPool::MaxPool(NetCP net, const std::string &name, css input, css output,
padded_mat = mat_map[pad_name];
}
void MaxPool::forward_impl() const {
#ifdef __aarch64__
// std::numeric_limits<float>::min() is the closest value to 0, so we uses
// -max()
pad(*input_mat, pad_h, pad_w, *padded_mat,
Expand All @@ -164,6 +167,9 @@ void MaxPool::forward_impl() const {
} else {
std::invalid_argument("Not supported max_pool");
}
#else
std::invalid_argument("Not supported max_pool");
#endif // __aarch64__
}

std::string MaxPool::to_str() const {
Expand Down
7 changes: 7 additions & 0 deletions dabnn/layers/Relu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

namespace bnn {
void Relu::forward_impl() const {
#if __ARM_NEON
float32x4_t _zero = vdupq_n_f32(0.f);
float *ptr = static_cast<float *>(*data_mat);
FORZ(i, data_mat->total() / 4) {
Expand All @@ -17,5 +18,11 @@ void Relu::forward_impl() const {

ptr += 4;
}
#else
float *ptr = static_cast<float *>(*data_mat);
FORZ(i, data_mat->total()) {
*ptr = std::max(*ptr, 0.f);
}
#endif // __ARM_NEON
}
} // namespace bnn
4 changes: 4 additions & 0 deletions dabnn/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ void Net::prepare() {

shaper.AddShape(name, shape);

#ifdef __aarch64__
if (Shaper::c(shape) % 128 == 0) {
// Re-arrange the bit order
const auto len = shaper.total(shape);
Expand All @@ -95,11 +96,14 @@ void Net::prepare() {
bnn::DataType::Bit, false));
pack_mat_128_2(*tmp, *mat_map_[name]);
} else {
#endif // __aarch64__
add_mat(name, std::make_shared<Mat>(
shape[0], shape[1], shape[2], shape[3],
const_cast<uint64_t *>(data),
bnn::DataType::Bit, false));
#ifdef __aarch64__
}
#endif // __aarch64__
} else if (tensor->data_type() == flatbnn::DataType::Float32) {
Shaper::Shape shape(tensor->shape()->begin(),
tensor->shape()->end());
Expand Down
4 changes: 2 additions & 2 deletions tests/bconv_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ TEST(bconv_test, bconv_test_3x3_64) {

bnn::Mat c(CHEIGHT, CWIDTH, NUM_OUTPUT, bnn::DataType::Float);
c.fill<float>(0);
bnn::bconv_3x3_64(padded, b, c);
bnn::bconv_3x3(padded, b, c);

bnn::Mat expected(CHEIGHT, CWIDTH, NUM_OUTPUT, bnn::DataType::Float);
expected.fill<float>(0);
Expand Down Expand Up @@ -229,7 +229,7 @@ TEST(bconv_test, bconv_test_3x3_64_s2) {

bnn::Mat c(CHEIGHT, CWIDTH, NUM_OUTPUT, bnn::DataType::Float);
c.fill<float>(0);
bnn::bconv_3x3_64(padded, b, c, 2);
bnn::bconv_3x3(padded, b, c, 2);

bnn::Mat expected(CHEIGHT, CWIDTH, NUM_OUTPUT, bnn::DataType::Float);
expected.fill<float>(0);
Expand Down

0 comments on commit 2acd1a3

Please sign in to comment.