Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ Tensor ] Refactor blas/math related files into cpu backend considering arch-dep @open sesame 10/02 13:19 #2549

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Applications/AlexNet/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
10 changes: 4 additions & 6 deletions Applications/Android/NNDetector/app/src/main/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,15 @@ include $(CLEAR_VARS)
NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/include/nntrainer
SIMPLESHOT_DIR = .


LOCAL_ARM_NEON := true
LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib
LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/
LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions
LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib -DARM=1
LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ -DARM=1
LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions -fopenmp -static-openmp -DARM=1
LOCAL_CFLAGS += -pthread -fexceptions -fopenmp -static-openmp
LOCAL_LDFLAGS += -fexceptions -fopenmp -static-openmp
LOCAL_MODULE_TAGS := optional
LOCAL_ARM_MODE := arm
LOCAL_MODULE := simpleshot_jni
LOCAL_LDLIBS := -llog -landroid -fopenmp -static-openmp -ljnigraphics
LOCAL_LDLIBS := -llog -landroid -fopenmp -static-openmp -ljnigraphics -DARM=1

LOCAL_SRC_FILES := simpleshot.cpp simpleshot_jni.cpp dataloader.cpp image.cpp
LOCAL_SHARED_LIBRARIES := ccapi-nntrainer nntrainer
Expand Down
3 changes: 3 additions & 0 deletions Applications/Custom/LayerClient/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
${ML_API_COMMON_INCLUDES}
Expand Down
3 changes: 3 additions & 0 deletions Applications/LLaMA/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/Layers/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/Multi_input/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/PicoGPT/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/ProductRatings/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer/include \
$(NNTRAINER_ROOT)/nntrainer/layers \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor
Expand Down
3 changes: 3 additions & 0 deletions Applications/ReinforcementLearning/DeepQ/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/Resnet/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/models \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
Expand Down
3 changes: 3 additions & 0 deletions Applications/VGG/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/YOLOv2/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/YOLOv3/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
5 changes: 4 additions & 1 deletion debian/nntrainer-dev.install
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
/usr/include/nntrainer/short_tensor.h
/usr/include/nntrainer/float_tensor.h
/usr/include/nntrainer/tensor_wrap_specs.h
/usr/include/nntrainer/blas_interface.h
usr/include/nntrainer/fallback_internal.h
usr/include/nntrainer/cblas_interface.h
usr/include/nntrainer/x86_compute_backend.h
/usr/include/nntrainer/cpu_backend.h
/usr/include/nntrainer/var_grad.h
/usr/include/nntrainer/weight.h
# todo: update dataset headers
Expand Down
6 changes: 6 additions & 0 deletions jni/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ and_conf.set('VERSION_MAJOR', nntrainer_version_split[0])
and_conf.set('VERSION_MINOR', nntrainer_version_split[1])
and_conf.set('VERSION_MICRO', nntrainer_version_split[2])

arch = host_machine.cpu_family()
and_conf.set('ARM', 1)
if arch == 'arm'
and_conf.set('ARMV7', 1)
endif

if get_option('enable-capi').enabled()
and_conf.set('MESON_CAPI_NNTRAINER_SRCS', ' '.join(capi_src))
and_conf.set('MESON_CAPI_NNTRAINER_INCS', ' '.join(capi_inc_abs))
Expand Down
21 changes: 15 additions & 6 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,23 @@ warning_c_flags = [
'-Wno-error=varargs'
]

arch = host_machine.cpu_family()
if arch == 'arm' or arch == 'aarch64' or get_option('platform') == 'android'
message('Build for ARM architecture')
extra_defines += '-DARM=1'
if arch == 'arm'
extra_defines += '-DARMV7=1'
endif
elif arch == 'x86' or arch == 'x86_64'
message('Build for X86 architecture')
if get_option('enable-fp16')
add_project_arguments(['-march=native'], language: ['c','cpp'])
message('-march=native added for AVX hardware acceleration.')
endif
extra_defines += '-DX86=1'
endif

if get_option('enable-fp16')
arch = host_machine.cpu_family()
if get_option('platform') == 'android'
add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
extra_defines += '-DENABLE_FP16=1'
Expand Down Expand Up @@ -110,11 +124,6 @@ if get_option('enable-fp16')
if cc.version().version_compare('>=12.1.0')
message ('Float16 for x86_64 enabled. Modern gcc-x64 generally supports float16 with _Float16.')
extra_defines += '-DENABLE_FP16=1'
if get_option('enable-avx')
extra_defines += '-DUSE_AVX=1'
add_project_arguments(['-march=native'], language: ['c','cpp'])
message('-march=native added for AVX hardware acceleration.')
endif
else
warning ('Float16 for x86_64 enabled. However, software emulation is applied for fp16, making it slower and inconsistent. Use GCC 12+ for FP16 support. This build will probably fail unless you bring a compiler that supports fp16 for x64.')
endif
Expand Down
2 changes: 1 addition & 1 deletion nntrainer/layers/acti_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#define __ACTI_FUNC_H__
#ifdef __cplusplus

#include <blas_interface.h>
#include <common_properties.h>
#include <cpu_backend.h>

namespace nntrainer {

Expand Down
5 changes: 2 additions & 3 deletions nntrainer/layers/activation_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#include <vector>

#include <activation_layer.h>
#include <blas_interface.h>
#include <common_properties.h>
#include <cpu_backend.h>
#include <layer_context.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
Expand All @@ -32,8 +32,7 @@

namespace nntrainer {
ActivationLayer::ActivationLayer() :
Layer(),
activation_props(new PropTypes(props::Activation())) {
Layer(), activation_props(new PropTypes(props::Activation())) {
acti_func.setActiFunc(ActivationType::ACT_NONE);
}

Expand Down
2 changes: 1 addition & 1 deletion nntrainer/layers/conv2d_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#include <limits>
#include <string>

#include <blas_interface.h>
#include <conv2d_layer.h>
#include <cpu_backend.h>
#include <layer_context.h>
#include <lazy_tensor.h>
#include <nntr_threads.h>
Expand Down
Loading
Loading