Merge pull request #29 from JDAI-CV/non-android

Compile on non-Android aarch64 devices and non-aarch64 devices
JDAI-CV · May 28, 2019 · 2acd1a3 · 2acd1a3
2 parents 2be7b13 + c039558
commit 2acd1a3
Show file tree

Hide file tree

Showing 14 changed files with 92 additions and 12 deletions.
diff --git a/.daq_pm/configs/bconv_test_x86 b/.daq_pm/configs/bconv_test_x86
@@ -0,0 +1,7 @@
+# It is a configuration file for [project_manager.vim](https://github.com/daquexian/project_manager.vim)
+name binary-nn
+type cpp
+build_dir build_bconv_test_x86
+target bconv_test
+cmake_options  -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBNN_BUILD_MAIN_LIB=ON -GNinja
+binary tests/bconv_test
diff --git a/.daq_pm/configs/net_test b/.daq_pm/configs/net_test
@@ -3,5 +3,5 @@ name binary-nn
 type cpp
 build_dir build_net_test
 target net_test
-cmake_options -DCMAKE_TOOLCHAIN_FILE=~/Android/Sdk/ndk-bundle/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-28 -DANDROID_ABI=arm64-v8a -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Release
+cmake_options -DCMAKE_TOOLCHAIN_FILE=~/Android/Sdk/ndk-bundle/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-28 -DANDROID_ABI=arm64-v8a -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Release -GNinja
 binary ~/adb_push_and_run.sh tests/net_test
diff --git a/.daq_pm/configs/net_test_x86 b/.daq_pm/configs/net_test_x86
@@ -0,0 +1,7 @@
+# It is a configuration file for [project_manager.vim](https://github.com/daquexian/project_manager.vim)
+name binary-nn
+type cpp
+build_dir build_net_test_x86
+target net_test
+cmake_options -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Release -DBNN_BUILD_MAIN_LIB=ON -GNinja 
+binary tests/net_test
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,11 +22,15 @@ option(BNN_SYSTEM_PROTOBUF "Use system protobuf to build onnx2bnn" ON)
 option(BNN_BUILD_PYTHON "Build onnx2bnn python interface" OFF)
 option(BNN_USE_MSVC_STATIC_RUNTIME "Link onnx2bnn to msvc static runtime" ON)
 
-if (${CMAKE_SYSTEM_NAME} STREQUAL "Android")
-    set(BNN_BUILD_ANDROID ON)
-else()
-    set(BNN_BUILD_ANDROID OFF)
+message(STATUS "Target architecture: ${CMAKE_SYSTEM_PROCESSOR}")
+if (NOT DEFINED BNN_BUILD_MAIN_LIB)
+    if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
+        set(BNN_BUILD_MAIN_LIB ON)
+    else()
+        set(BNN_BUILD_MAIN_LIB OFF)
+    endif()
 endif()
+message(STATUS "BNN_BUILD_MAIN_LIB: ${BNN_BUILD_MAIN_LIB}")
 
 include(cmake/utils.cmake)
 bnn_add_msvc_runtime_flag()
@@ -42,7 +46,7 @@ if (${BNN_NET_BENCHMARK})
     add_compile_options("-DBNN_BENCHMARK")
 endif()
 
-if (${BNN_BUILD_ANDROID})
+if (BNN_BUILD_MAIN_LIB)
     set(CMAKE_CXX_STANDARD 17)
 
     set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
@@ -64,7 +68,6 @@ if (${BNN_BUILD_ANDROID})
         configure_benchmark()
         add_subdirectory(benchmark)
     endif()
-
 else()
     set(CMAKE_CXX_STANDARD 11)
 

diff --git a/cmake/system.cmake b/cmake/system.cmake
@@ -1,6 +1,7 @@
 # Copyright 2019 JD.com Inc. JD AI
 
-if (${CMAKE_SYSTEM_NAME} STREQUAL "Android")
+if ((NOT CMAKE_SYSTEM_NAME STREQUAL CMAKE_HOST_SYSTEM_NAME) OR 
+    (NOT CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR))
     set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
     set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
     set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

diff --git a/dabnn/bconv.h b/dabnn/bconv.h
@@ -7,11 +7,15 @@
 #include <arm_neon.h>
 #endif  // __ARM_NEON
 
+#if not defined (__aarch64__)
+#include <common/baseline.h>
+#endif
 #include <common/helper.h>
 #include <dabnn/im2col.h>
 #include "mat.h"
 
 namespace bnn {
+#ifdef __aarch64__
 inline void bconv_1x1_64(const Mat &bottom_blob, const Mat &weight,
                          Mat &top_blob);
 inline void bconv_1x1_128(const Mat &bottom_blob, const Mat &weight,
@@ -20,8 +24,10 @@ inline void bconv_1x1_256(const Mat &bottom_blob, const Mat &weight,
                           Mat &top_blob);
 inline void bconv_1x1_512(const Mat &bottom_blob, const Mat &weight,
                           Mat &top_blob);
+#endif
 inline void bconv_3x3(const Mat &bottom_blob, const Mat &weight, Mat &top_blob,
                       const int stride = 1);
+#ifdef __aarch64__
 inline void bconv_3x3_64(const Mat &bottom_blob, const Mat &weight,
                          Mat &top_blob, const int stride = 1);
 inline void bconv_3x3_64_fallback(const Mat &bottom_blob, const Mat &weight,
@@ -44,8 +50,10 @@ inline void bconv_3x3_128_internal_s1(const uint64_t *bottom_ptr, const int b_w,
 inline void bconv_3x3_128_internal_fallback(
     const uint64_t *bottom_ptr, const int b_w, const uint64_t *weight_ptr,
     float *top_ptr, const int top_h, const int top_w, const int stride = 1);
+#endif
 }  // namespace bnn
 
+#ifdef __aarch64__
 inline void bnn::bconv_3x3_64(const Mat &bottom_blob, const Mat &weight,
                               Mat &top_blob, const int stride) {
     bconv_3x3_64_opt4(bottom_blob, weight, top_blob, 0, stride);
@@ -819,9 +827,11 @@ inline void unpack_output(float *b, float *a, int width, int height,
 
 #undef A
 }
+#endif // __aarch64__
 
 inline void bnn::bconv_3x3(const Mat &bottom_blob, const Mat &weight,
                            Mat &top_blob, const int stride) {
+#ifdef __aarch64__
     // TODO: more elegant way
     static uint64_t packed_weight[999999];
     static uint64_t packed_input[9999999];
@@ -884,8 +894,12 @@ inline void bnn::bconv_3x3(const Mat &bottom_blob, const Mat &weight,
         unpack_output(packed_output, static_cast<float *>(top_blob.data),
                       top_blob.w, top_blob.h, top_blob.c);
     }
+#else // __aarch64__
+    baseline_bconv(bottom_blob, weight, 3, 3, 0, 0, stride, stride, 1, 1, top_blob.c, top_blob);
+#endif // __aarch64__
 }
 
+#ifdef __aarch64__
 inline void bnn::bconv_1x1_512(const Mat &bottom_blob, const Mat &weight,
                                Mat &top_blob) {
     FORZS(th, top_blob.h, 2) {
@@ -1673,5 +1687,6 @@ inline void bnn::bconv_1x1_64(const Mat &bottom_blob, const Mat &weight,
         }
     }
 }
+#endif // __aarch64__
 
 #endif
diff --git a/dabnn/bgemm.h b/dabnn/bgemm.h
@@ -16,6 +16,7 @@
 
 #define min(i, j) ((i) < (j) ? (i) : (j))
 
+#if __aarch64__
 inline void pack_a(const int kc, const uint64_t *a, const int lda,
                    uint64_t *a_to);
 inline void pack_b(const int kc, const uint64_t *b, const int ldb,
@@ -28,13 +29,15 @@ inline void inner_kernel(const int m, const int n, const int k,
                          const uint64_t *a, const int lda, const uint64_t *b,
                          const int ldb, float *c, const int ldc,
                          const int first_time);
+#endif  // __aarch64__
 inline void bgemm_naive(const int m, const int n, const int k,
                         const uint64_t *a, const int lda, const uint64_t *b,
                         const int ldb, float *c, const int ldc);
 
 inline void bgemm(const int m, const int n, const int k, const uint64_t *a,
                   int lda, const uint64_t *b, const int ldb, float *c,
                   const int ldc) {
+#if __aarch64__
     int kc = 32;
     int mc = 32;
     int i, q, qb, ib;
@@ -48,8 +51,12 @@ inline void bgemm(const int m, const int n, const int k, const uint64_t *a,
                          i == 0);
         }
     }
+#else
+    bgemm_naive(m, n, k, a, lda, b, ldb, c, ldc);
+#endif  // __aarch64__
 }
 
+#if __aarch64__
 inline void inner_kernel(const int m, const int n, const int k,
                          const uint64_t *a, const int lda, const uint64_t *b,
                          const int ldb, float *c, const int ldc,
@@ -414,9 +421,11 @@ inline void micro_kernel(int64_t kc, float *c, const uint64_t *a,
           "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27",
           "v28", "v29", "v30");
 }
+#endif  // __aarch64__
 
-inline void bgemm_naive(int m, int n, int k, uint64_t *a, int lda, uint64_t *b,
-                        int ldb, float *c, int ldc) {
+inline void bgemm_naive(const int m, const int n, const int k,
+                        const uint64_t *a, const int lda, const uint64_t *b,
+                        const int ldb, float *c, const int ldc) {
     FORZ(i, m) {
         FORZ(j, n) {
             FORZ(h, k) {

diff --git a/dabnn/bitpack.h b/dabnn/bitpack.h
@@ -16,6 +16,7 @@
 #include <glog/logging.h>
 #include "mat.h"
 
+#ifdef __aarch64__
 inline void pack_128_2(const float *float_ptr, void *binary_ptr, size_t size) {
     size_t nn_size = size >> 7;
 
@@ -198,6 +199,7 @@ inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
     pack_128(static_cast<float *>(float_mat.data), binary_mat.data,
              float_mat.total());
 }
+#endif // __aarch64__
 
 inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
     BNN_ASSERT(
@@ -220,11 +222,15 @@ inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
 
 inline void pack_mat(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
     BNN_ASSERT(float_mat.c % 64 == 0, float_mat.c);
+#ifdef __aarch64__
     if (float_mat.c % 128 == 0) {
         pack_mat_128_2(float_mat, binary_mat);
     } else {
         pack_mat_64(float_mat, binary_mat);
     }
+#else
+    pack_mat_64(float_mat, binary_mat);
+#endif // __aarch64__
 }
 
 #endif /* BITPACK_H */
diff --git a/dabnn/layers/AvePool.cpp b/dabnn/layers/AvePool.cpp
@@ -7,6 +7,7 @@
 
 namespace bnn {
 
+#ifdef __aarch64__
 void ave_pool_2x2_s2(const bnn::Mat &input, bnn::Mat &output) {
     FORZ(h, output.h) {
         FORZ(w, output.w) {
@@ -48,6 +49,7 @@ void ave_pool_2x2_s2(const bnn::Mat &input, bnn::Mat &output) {
         }
     }
 }
+#endif // __aarch64__
 
 void ave_pool_fallback(const bnn::Mat &input, const size_t pad_h,
                        const size_t pad_w, const size_t stride_h,
@@ -114,6 +116,7 @@ AvePool::AvePool(NetCP net, const std::string &name, css input, css output,
 }
 
 void AvePool::forward_impl() const {
+#ifdef __aarch64__
     if (stride_h == 2 && stride_w == 2 && kernel_h == 2 && kernel_w == 2 &&
         input_mat->c % 4 == 0) {
         pad(*input_mat, pad_h, pad_w, *padded_mat);
@@ -122,6 +125,10 @@ void AvePool::forward_impl() const {
         ave_pool_fallback(*input_mat, pad_h, pad_w, stride_h, stride_w,
                           kernel_h, kernel_w, *output_mat);
     }
+#else
+    ave_pool_fallback(*input_mat, pad_h, pad_w, stride_h, stride_w,
+                      kernel_h, kernel_w, *output_mat);
+#endif // __aarch64__
 }
 
 }  // namespace bnn
diff --git a/dabnn/layers/BinConv.cpp b/dabnn/layers/BinConv.cpp
@@ -57,6 +57,7 @@ BinConv::BinConv(NetCP net, const std::string &name, css input, css weight,
 }
 
 bool BinConv::direct_conv_compatible() const {
+#ifdef __aarch64__
     if (weight_mat->h == 3 && weight_mat->w == 3 && input_mat->c == 1 &&
         stride_h == stride_w) {
         return true;
@@ -78,10 +79,17 @@ bool BinConv::direct_conv_compatible() const {
         return true;
     }
     return false;
+#else
+    return false;
+#endif
 }
 
 bool BinConv::gemm_compatible() const {
+#ifdef __aarch64__
     return weight_mat->h * weight_mat->n * weight_mat->c % 2 == 0;
+#else
+    return false;
+#endif
 }
 
 void BinConv::forward_impl() const {

diff --git a/dabnn/layers/MaxPool.cpp b/dabnn/layers/MaxPool.cpp
@@ -9,6 +9,7 @@
 
 namespace bnn {
 
+#ifdef __aarch64__
 void maxpool2x2(const bnn::Mat &input, bnn::Mat &output, const int stride_h = 1,
                 const int stride_w = 1) {
     FORZ(h, output.h) {
@@ -126,6 +127,7 @@ void maxpool3x3(const bnn::Mat &input, bnn::Mat &output, const int stride_h = 1,
         }
     }
 }
+#endif // __aarch64__
 
 MaxPool::MaxPool(NetCP net, const std::string &name, css input, css output,
                  int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h,
@@ -150,6 +152,7 @@ MaxPool::MaxPool(NetCP net, const std::string &name, css input, css output,
     padded_mat = mat_map[pad_name];
 }
 void MaxPool::forward_impl() const {
+#ifdef __aarch64__
     // std::numeric_limits<float>::min() is the closest value to 0, so we uses
     // -max()
     pad(*input_mat, pad_h, pad_w, *padded_mat,
@@ -164,6 +167,9 @@ void MaxPool::forward_impl() const {
     } else {
         std::invalid_argument("Not supported max_pool");
     }
+#else
+    std::invalid_argument("Not supported max_pool");
+#endif // __aarch64__
 }
 
 std::string MaxPool::to_str() const {

diff --git a/dabnn/layers/Relu.cpp b/dabnn/layers/Relu.cpp
@@ -8,6 +8,7 @@
 
 namespace bnn {
 void Relu::forward_impl() const {
+#if __ARM_NEON
     float32x4_t _zero = vdupq_n_f32(0.f);
     float *ptr = static_cast<float *>(*data_mat);
     FORZ(i, data_mat->total() / 4) {
@@ -17,5 +18,11 @@ void Relu::forward_impl() const {
 
         ptr += 4;
     }
+#else
+    float *ptr = static_cast<float *>(*data_mat);
+    FORZ(i, data_mat->total()) {
+        *ptr = std::max(*ptr, 0.f);
+    }
+#endif // __ARM_NEON
 }
 }  // namespace bnn
diff --git a/dabnn/net.cpp b/dabnn/net.cpp
@@ -78,6 +78,7 @@ void Net::prepare() {
 
             shaper.AddShape(name, shape);
 
+#ifdef __aarch64__
             if (Shaper::c(shape) % 128 == 0) {
                 // Re-arrange the bit order
                 const auto len = shaper.total(shape);
@@ -95,11 +96,14 @@ void Net::prepare() {
                                                     bnn::DataType::Bit, false));
                 pack_mat_128_2(*tmp, *mat_map_[name]);
             } else {
+#endif // __aarch64__
                 add_mat(name, std::make_shared<Mat>(
                                   shape[0], shape[1], shape[2], shape[3],
                                   const_cast<uint64_t *>(data),
                                   bnn::DataType::Bit, false));
+#ifdef __aarch64__
             }
+#endif // __aarch64__
         } else if (tensor->data_type() == flatbnn::DataType::Float32) {
             Shaper::Shape shape(tensor->shape()->begin(),
                                 tensor->shape()->end());

diff --git a/tests/bconv_test.cpp b/tests/bconv_test.cpp
@@ -190,7 +190,7 @@ TEST(bconv_test, bconv_test_3x3_64) {
 
     bnn::Mat c(CHEIGHT, CWIDTH, NUM_OUTPUT, bnn::DataType::Float);
     c.fill<float>(0);
-    bnn::bconv_3x3_64(padded, b, c);
+    bnn::bconv_3x3(padded, b, c);
 
     bnn::Mat expected(CHEIGHT, CWIDTH, NUM_OUTPUT, bnn::DataType::Float);
     expected.fill<float>(0);
@@ -229,7 +229,7 @@ TEST(bconv_test, bconv_test_3x3_64_s2) {
 
     bnn::Mat c(CHEIGHT, CWIDTH, NUM_OUTPUT, bnn::DataType::Float);
     c.fill<float>(0);
-    bnn::bconv_3x3_64(padded, b, c, 2);
+    bnn::bconv_3x3(padded, b, c, 2);
 
     bnn::Mat expected(CHEIGHT, CWIDTH, NUM_OUTPUT, bnn::DataType::Float);
     expected.fill<float>(0);