From aa62bd50e0eb6224476d81853fd6c461f154fd77 Mon Sep 17 00:00:00 2001 From: Xiaoyang Chen Date: Wed, 6 Mar 2024 17:26:55 +0800 Subject: [PATCH 1/9] update --- .github/workflows/cmake-darwin.yml | 22 ++++++++ CMakeLists.txt | 15 +++-- src/libllm/CMakeLists.txt | 70 +++++++++++++++--------- src/libllm/cpu/kernel/kernel.cc | 24 ++++++-- src/libllm/cpu/kernel/kernel_fallback.cc | 19 ++++++- src/libllm/cpu/kernel/sgemv.h | 4 +- src/libllm/cpu/kernel/skernel.h | 7 +++ src/libllm/cpu/kernel/test.cc | 6 +- src/libllm/dtype.cc | 8 --- src/libllm/lut/path_darwin.cc | 62 +++++++++++++++++++++ src/libllm/lut/platform.h | 2 - src/libllm/lut/platform_linux.cc | 17 ------ src/libllm/lut/zip_file.cc | 2 + src/libllm/state_map.cc | 3 - 14 files changed, 192 insertions(+), 69 deletions(-) create mode 100644 .github/workflows/cmake-darwin.yml create mode 100644 src/libllm/lut/path_darwin.cc diff --git a/.github/workflows/cmake-darwin.yml b/.github/workflows/cmake-darwin.yml new file mode 100644 index 0000000..c6e80c1 --- /dev/null +++ b/.github/workflows/cmake-darwin.yml @@ -0,0 +1,22 @@ +name: macOS + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + BUILD_TYPE: Release + +jobs: + build: + runs-on: macos-13-xlarge + steps: + - uses: actions/checkout@v3 + - name: Configure CMake + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + - name: Build + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + - name: Test + run: ctest --verbose -C ${{env.BUILD_TYPE}} --test-dir ${{github.workspace}}/build/src/libllm diff --git a/CMakeLists.txt b/CMakeLists.txt index 046d931..2cce5e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,12 +40,19 @@ set(MKL_PREFIX "/opt/intel/mkl" CACHE STRING "Prefix for MKL headers and librari #add_link_options(-fsanitize=address) if(WIN32) - add_definitions( "/D_CRT_SECURE_NO_WARNINGS /DCATCH_AMALGAMATED_CUSTOM_MAIN /DCATCH_CONFIG_PREFIX_ALL" ) + add_definitions( "/D_CRT_SECURE_NO_WARNINGS /DCATCH_AMALGAMATED_CUSTOM_MAIN /DCATCH_CONFIG_PREFIX_ALL" ) endif(WIN32) if(UNIX) - add_definitions( "-DCATCH_AMALGAMATED_CUSTOM_MAIN -DCATCH_CONFIG_PREFIX_ALL" ) - set(CMAKE_CXX_FLAGS "-O3 -g") - set(CMAKE_C_FLAGS "-O3 -g") + add_definitions( "-DCATCH_AMALGAMATED_CUSTOM_MAIN -DCATCH_CONFIG_PREFIX_ALL -D_FILE_OFFSET_BITS=64" ) + set(CMAKE_CXX_FLAGS "-O0 -g") + set(CMAKE_C_FLAGS "-O0 -g") endif(UNIX) +if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") + add_compile_definitions(LIBLLM_ARCH_X86_64) +endif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") +if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64") + add_compile_definitions(LIBLLM_ARCH_AARCH64) +endif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64") + add_subdirectory("src/libllm") diff --git a/src/libllm/CMakeLists.txt b/src/libllm/CMakeLists.txt index f594ee4..e3e8e82 100644 --- a/src/libllm/CMakeLists.txt +++ b/src/libllm/CMakeLists.txt @@ -20,8 +20,6 @@ set(lut_SOURCES "lut/zip_file.cc") set(libllm_SOURCES - "cpu/kernel/kernel_avx2.cc" - "cpu/kernel/kernel_avx512.cc" "cpu/kernel/kernel_fallback.cc" "cpu/kernel/kernel.cc" "cpu/kernel/util.cc" @@ -80,7 +78,7 @@ set(llm_SOURCES "dialog_manager.cc" "llm_main.cc") -set(libllm_INCDIR ".." "../../third_party") +set(libllm_INCDIR ".." "../../third_party" ${OpenMP_CXX_INCLUDE_DIRS}) if (WITH_CUDA) set(libllm_INCDIR ${libllm_INCDIR} ${CUDAToolkit_INCLUDE_DIRS}) @@ -118,38 +116,58 @@ if (WITH_CUDA) "lut/internal/log.cc") endif() +# OS specific code if(WIN32) - set_source_files_properties( - "cpu/kernel/kernel_avx512.cc" - PROPERTIES COMPILE_FLAGS /arch:AVX512) - set_source_files_properties( - "cpu/kernel/kernel_avx2.cc" - PROPERTIES COMPILE_FLAGS /arch:AVX2) set(libllm_SOURCES ${libllm_SOURCES} "lut/path_windows.cc" "lut/platform_windows.cc" "lut/shared_library_windows.cc") -endif(WIN32) - +endif() if(UNIX) - set_source_files_properties( - "cpu/kernel/kernel_avx512.cc" - PROPERTIES COMPILE_FLAGS "-mavx512f") - set_source_files_properties( - "cpu/kernel/kernel_avx2.cc" - PROPERTIES COMPILE_FLAGS "-mavx2 -mfma -mf16c") set(libllm_SOURCES ${libllm_SOURCES} - "lut/path_linux.cc" "lut/platform_linux.cc" "lut/shared_library_linux.cc") -endif(UNIX) +endif() +if(UNIX AND APPLE) + set(libllm_SOURCES + ${libllm_SOURCES} + "lut/path_darwin.cc") +endif() +if(UNIX AND NOT APPLE) + set(libllm_SOURCES + ${libllm_SOURCES} + "lut/path_linux.cc") +endif() + +# CPU specific code +if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(libllm_SOURCES + ${libllm_SOURCES} + "cpu/kernel/kernel_avx2.cc" + "cpu/kernel/kernel_avx512.cc") + if(WIN32) + set_source_files_properties( + "cpu/kernel/kernel_avx512.cc" + PROPERTIES COMPILE_FLAGS /arch:AVX512) + set_source_files_properties( + "cpu/kernel/kernel_avx2.cc" + PROPERTIES COMPILE_FLAGS /arch:AVX2) + endif(WIN32) + if(UNIX) + set_source_files_properties( + "cpu/kernel/kernel_avx512.cc" + PROPERTIES COMPILE_FLAGS "-mavx512f") + set_source_files_properties( + "cpu/kernel/kernel_avx2.cc" + PROPERTIES COMPILE_FLAGS "-mavx2 -mfma -mf16c") + endif(UNIX) +endif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") add_library(lut STATIC ${lut_SOURCES}) set_target_properties(lut PROPERTIES CXX_VISIBILITY_PRESET hidden) -target_include_directories(lut PRIVATE ".." "../../third_party/") - +target_include_directories(lut PRIVATE ${libllm_INCDIR}) set(libllm_LIBADD lut @@ -165,14 +183,14 @@ target_include_directories(libllm_static PRIVATE ${libllm_INCDIR}) add_library(libllm SHARED $) target_link_libraries(libllm ${libllm_LIBADD} ) set_property(TARGET libllm PROPERTY OUTPUT_NAME llm) -if(UNIX) +if(UNIX AND NOT APPLE) target_link_options(libllm PUBLIC "-Wl,--no-undefined") -endif(UNIX) +endif() add_library(catch2 STATIC "../../third_party/catch2/catch_amalgamated.cpp") add_executable(unittest ${unittest_SOURCES}) -target_include_directories(unittest PRIVATE .. "../../third_party/") -target_link_libraries(unittest libllm_static lut catch2) +target_include_directories(unittest PRIVATE ${libllm_INCDIR}) +target_link_libraries(unittest libllm_static lut catch2 OpenMP::OpenMP_CXX) add_executable(llm ${llm_SOURCES}) target_include_directories(llm PRIVATE ..) @@ -180,7 +198,7 @@ target_link_libraries(llm libllm lut) if (WITH_CUDA) add_library(llmextcublas SHARED ${llmextcublas_SOURCES}) - target_include_directories(llmextcublas PRIVATE .. "../../third_party/") + target_include_directories(llmextcublas PRIVATE ${libllm_INCDIR}) target_link_libraries(llmextcublas lut CUDA::cublas) if(UNIX) target_link_options(llmextcublas PUBLIC "-Wl,--no-undefined") diff --git a/src/libllm/cpu/kernel/kernel.cc b/src/libllm/cpu/kernel/kernel.cc index c60eeb3..6617948 100644 --- a/src/libllm/cpu/kernel/kernel.cc +++ b/src/libllm/cpu/kernel/kernel.cc @@ -40,7 +40,8 @@ namespace kernel { enum class CPUMathBackend { DEFAULT, AVX2, - AVX512 + AVX512, + ASIMDHP }; CPUMathBackend findBestCpuMathBackend() { @@ -49,19 +50,30 @@ CPUMathBackend findBestCpuMathBackend() { bool isaAvx512f = ruapu_supports("avx512f") > 0; bool isaF16c = ruapu_supports("f16c") > 0; +#ifdef LIBLLM_ARCH_X86_64 LOG(INFO) << lut::sprintf( "ISA support: AVX2=%d F16C=%d AVX512F=%d", isaAvx2, isaF16c, isaAvx512f); +#endif // LIBLLM_ARCH_X86_64 +#ifdef LIBLLM_ARCH_X86_64 if (isaAvx512f && isaF16c) { LOG(INFO) << "Use Avx512 backend."; return CPUMathBackend::AVX512; - } else if (isaAvx2 && isaF16c) { + } + + if (isaAvx2 && isaF16c) { LOG(INFO) << "Use Avx2 backend."; return CPUMathBackend::AVX2; - } else { - LOG(FATAL) << "CPU not supported (AVX2 and F16C is required)."; - NOT_IMPL(); } +#endif // LIBLLM_ARCH_X86_64 + +#ifdef LIBLLM_ARCH_AARCH64 + LOG(INFO) << "Use default backend."; + return CPUMathBackend::DEFAULT; +#endif // LIBLLM_ARCH_AARCH64 + + LOG(FATAL) << "CPU not supported."; + NOT_IMPL(); } // instance of Api. @@ -106,6 +118,7 @@ void Api::init() { _instance = new Api(); switch (findBestCpuMathBackend()) { +#ifdef LIBLLM_ARCH_X86_64 case CPUMathBackend::AVX512: _instance->_sgemm = std::make_unique(); _instance->_sgemmOmp = std::make_unique(); @@ -120,6 +133,7 @@ void Api::init() { _instance->_q4dequant = std::make_unique(); _instance->_cvtHalfToFloat = std::make_unique(); break; +#endif // LIBLLM_ARCH_X86_64 case CPUMathBackend::DEFAULT: _instance->_sgemm = std::make_unique(); _instance->_sgemmOmp = std::make_unique(); diff --git a/src/libllm/cpu/kernel/kernel_fallback.cc b/src/libllm/cpu/kernel/kernel_fallback.cc index 5ea1158..891c9aa 100644 --- a/src/libllm/cpu/kernel/kernel_fallback.cc +++ b/src/libllm/cpu/kernel/kernel_fallback.cc @@ -105,12 +105,29 @@ void SAxpyFallbackKernel::apply(int64_t n, float a, PCFp32 x, PFp32 y) { const float *px = x; float *py = y; for (int i = 0; i < n; ++i) { - *py = a * *px; + *py += a * *px; ++px; ++py; } } +void SAxpyFallbackKernel::applyColumn(const SGEMVArgs &args, int column, float *y) { + apply(args.N, args.x[column], args.A + column * args.lda, y); +} + +float SDotFallbackKernel::apply(int64_t n, const float *x, const float *y) { + float sum = 0; + for (int64_t i = 0; i < n; ++i) { + sum += x[i] * y[i]; + } + + return sum; +} + +float SDotFallbackKernel::applyRow(const SGEMVArgs &args, int row) { + return apply(args.N, args.A + row * args.lda, args.x); +} + void CvtHalfToFloatFallbackKernel::apply(int64_t n, PCFp16 x, PFp32 y) { for (int i = 0; i < n; ++i) { y[i] = lut::cvtsh_ss(x[i]); diff --git a/src/libllm/cpu/kernel/sgemv.h b/src/libllm/cpu/kernel/sgemv.h index b527923..86e5dd7 100644 --- a/src/libllm/cpu/kernel/sgemv.h +++ b/src/libllm/cpu/kernel/sgemv.h @@ -48,10 +48,10 @@ class SGEMVImpl : public SGEMV { typedef SGEMVImpl SGEMVImplAvx512; typedef SGEMVImpl SGEMVImplAvx2; -typedef SGEMVImpl SGEMVImplDefault; +typedef SGEMVImpl SGEMVImplDefault; typedef SGEMVImpl SGEMVImplAvx512OMP; typedef SGEMVImpl SGEMVImplAvx2OMP; -typedef SGEMVImpl SGEMVImplDefaultOMP; +typedef SGEMVImpl SGEMVImplDefaultOMP; } // namespace kernel } // namespace cpu diff --git a/src/libllm/cpu/kernel/skernel.h b/src/libllm/cpu/kernel/skernel.h index 3e9d7e8..b4964d0 100644 --- a/src/libllm/cpu/kernel/skernel.h +++ b/src/libllm/cpu/kernel/skernel.h @@ -83,6 +83,13 @@ struct SDotAvx2Kernel { static float applyRow(const SGEMVArgs &args, int row); }; +struct SDotFallbackKernel { + typedef float ValueType; + + static float apply(int64_t n, const float *x, const float *y); + static float applyRow(const SGEMVArgs &args, int row); +}; + } // namespace kernel } // namespace cpu } // namespace op diff --git a/src/libllm/cpu/kernel/test.cc b/src/libllm/cpu/kernel/test.cc index 1ce7b3d..68291c0 100644 --- a/src/libllm/cpu/kernel/test.cc +++ b/src/libllm/cpu/kernel/test.cc @@ -164,6 +164,9 @@ void refSgemm( } } + +#ifdef LIBLLM_ARCH_X86_64 + CATCH_TEST_CASE("test q4 dequantization", "[lymath][dequant][q4]") { constexpr int DIM = DequantMinElemPerThread * 2 + GroupSizeQ4; @@ -269,6 +272,7 @@ CATCH_TEST_CASE("test q4 dot kernels apply row", "[lymath][dot][q4]") { float a = DotQ4Avx2Kernel::apply(NUM_COL * 2, x2.data(), {A.data(), scaleA.data(), zeroA.data()}, 0); CATCH_REQUIRE(isClose(a, a0 + a1)); } +#endif // LIBLLM_ARCH_X86_64 CATCH_TEST_CASE("test lymath_q4gemm", "[lymath][api][q4]") { testGemmQ4(true, 1, 32, 128); @@ -359,7 +363,7 @@ void testHalfToFloat(int n) { random.fill(lut::makeSpan(yr)); std::transform(yr.begin(), yr.end(), x.begin(), lut::cvtss_sh); - CvtHalfToFloatAvx2OMP().apply(n, x.data(), y.data()); + convertHalfToFloat(n, x.data(), y.data()); CATCH_REQUIRE(isClose(yr, y, 1e-4, 1e-3)); } diff --git a/src/libllm/dtype.cc b/src/libllm/dtype.cc index 38cf9de..ce1ad59 100644 --- a/src/libllm/dtype.cc +++ b/src/libllm/dtype.cc @@ -70,14 +70,6 @@ DType DType::getTypeImpl() { #endif -template DType DType::getTypeImpl(); -template DType DType::getTypeImpl(); -template DType DType::getTypeImpl(); -template DType DType::getTypeImpl(); -template DType DType::getTypeImpl(); -template DType DType::getTypeImpl(); - - int64_t DType::getTotalSize(int64_t numel) const { switch (_dtype) { case DType::kFloat: diff --git a/src/libllm/lut/path_darwin.cc b/src/libllm/lut/path_darwin.cc new file mode 100644 index 0000000..9d1b416 --- /dev/null +++ b/src/libllm/lut/path_darwin.cc @@ -0,0 +1,62 @@ +// The MIT License (MIT) +// +// Copyright (c) 2023 Xiaoyang Chen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software +// and associated documentation files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#include "libllm/lut/path.h" + +#include +#include +#include +#include "libllm/lut/error.h" +#include "libllm/lut/log.h" +#include "libllm/lut/strings.h" + +namespace lut { + +Path Path::currentExecutablePath() { + char path[PATH_MAX + 1]; + uint32_t size = sizeof(path); + int ret = _NSGetExecutablePath(path, &size); + if (ret) { + throw lut::AbortedError("failed to call _NSGetExecutablePath()"); + } + + return Path(path); +} + +Path Path::currentModulePath() { + Dl_info info; + int success = dladdr(reinterpret_cast(¤tModulePath), &info); + CHECK(success); + + return Path(info.dli_fname); +} + +bool Path::isabs() const { + if (_path.size() == 0) return false; + if (_path[0] == '/') return true; + + return false; +} + +std::string Path::normPath(const std::string &path) { + return path; +} + + +} // namespace lut diff --git a/src/libllm/lut/platform.h b/src/libllm/lut/platform.h index 3d3dfb8..c0a83f9 100644 --- a/src/libllm/lut/platform.h +++ b/src/libllm/lut/platform.h @@ -35,8 +35,6 @@ namespace lut { -bool isAvx512Available(); -bool isAvx2Available(); void *alloc32ByteAlignedMem(int64_t nbytes); void free32ByteAlignedMem(void *); const char *getPathDelim(); diff --git a/src/libllm/lut/platform_linux.cc b/src/libllm/lut/platform_linux.cc index a5fb667..a25072d 100644 --- a/src/libllm/lut/platform_linux.cc +++ b/src/libllm/lut/platform_linux.cc @@ -23,23 +23,6 @@ namespace lut { -void initCpuInfo() { -#if !defined(__clang__) || __clang_major__ >= 6 - __builtin_cpu_init(); -#endif -} - -bool isAvx512Available() { - initCpuInfo(); - return __builtin_cpu_supports("avx512f") != 0; -} - -bool isAvx2Available() { - initCpuInfo(); - return __builtin_cpu_supports("avx2") != 0; -} - - void *alloc32ByteAlignedMem(int64_t size) { if (size % 32 != 0) { size += (32 - size % 32); diff --git a/src/libllm/lut/zip_file.cc b/src/libllm/lut/zip_file.cc index 0617aaa..e51a011 100644 --- a/src/libllm/lut/zip_file.cc +++ b/src/libllm/lut/zip_file.cc @@ -27,6 +27,8 @@ #ifdef _MSC_VER #define FSEEK64 _fseeki64 +#elif _FILE_OFFSET_BITS == 64 +#define FSEEK64 fseeko #else #define FSEEK64 fseeko64 #endif // _MSC_VER diff --git a/src/libllm/state_map.cc b/src/libllm/state_map.cc index 7ae3bb7..600e772 100644 --- a/src/libllm/state_map.cc +++ b/src/libllm/state_map.cc @@ -120,8 +120,5 @@ bool StateMap::hasValue(const std::string &name) const { return _intDict.find(name) != _intDict.end(); } -template int StateMap::getValue(const std::string &name) const; -template void StateMap::putValue(const std::string &name, int value); -template bool StateMap::hasValue(const std::string &name) const; } From d8878141924f3052a406c08c4ed2948b6231bf74 Mon Sep 17 00:00:00 2001 From: Xiaoyang Chen Date: Wed, 6 Mar 2024 17:36:02 +0800 Subject: [PATCH 2/9] update --- .github/workflows/cmake-darwin.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-darwin.yml b/.github/workflows/cmake-darwin.yml index c6e80c1..0026fb6 100644 --- a/.github/workflows/cmake-darwin.yml +++ b/.github/workflows/cmake-darwin.yml @@ -11,7 +11,7 @@ env: jobs: build: - runs-on: macos-13-xlarge + runs-on: macos-14 steps: - uses: actions/checkout@v3 - name: Configure CMake From 37941ea2b15bb62ef7d77df9dd0236497e0adc79 Mon Sep 17 00:00:00 2001 From: Xiaoyang Chen Date: Wed, 6 Mar 2024 17:41:00 +0800 Subject: [PATCH 3/9] update --- .github/workflows/cmake-darwin.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-darwin.yml b/.github/workflows/cmake-darwin.yml index 0026fb6..f5a7e3e 100644 --- a/.github/workflows/cmake-darwin.yml +++ b/.github/workflows/cmake-darwin.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + run: CXX=$(brew --prefix llvm@15)/bin/clang cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Test From 8c313c2e7433c594684c751322966edd10743f98 Mon Sep 17 00:00:00 2001 From: Xiaoyang Chen Date: Wed, 6 Mar 2024 17:42:31 +0800 Subject: [PATCH 4/9] update --- .github/workflows/cmake-darwin.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-darwin.yml b/.github/workflows/cmake-darwin.yml index f5a7e3e..6995607 100644 --- a/.github/workflows/cmake-darwin.yml +++ b/.github/workflows/cmake-darwin.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Configure CMake - run: CXX=$(brew --prefix llvm@15)/bin/clang cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + run: OpenMP_ROOT=$(brew --prefix)/opt/libomp cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Test From a558c5e6b9442d0acb6c510a4480afcf903d584a Mon Sep 17 00:00:00 2001 From: Xiaoyang Chen Date: Wed, 6 Mar 2024 17:43:57 +0800 Subject: [PATCH 5/9] update --- .github/workflows/cmake-darwin.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/cmake-darwin.yml b/.github/workflows/cmake-darwin.yml index 6995607..20a7c4d 100644 --- a/.github/workflows/cmake-darwin.yml +++ b/.github/workflows/cmake-darwin.yml @@ -14,6 +14,8 @@ jobs: runs-on: macos-14 steps: - uses: actions/checkout@v3 + - name: Install openmp + run: brew install libomp - name: Configure CMake run: OpenMP_ROOT=$(brew --prefix)/opt/libomp cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - name: Build From 5f86a6139ca8c0e8ade2b4f9b30e41156b9c5f49 Mon Sep 17 00:00:00 2001 From: Xiaoyang Chen Date: Wed, 6 Mar 2024 17:49:38 +0800 Subject: [PATCH 6/9] update --- .github/workflows/cmake-windows.yml | 2 +- CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cmake-windows.yml b/.github/workflows/cmake-windows.yml index 263e1f1..9f54d36 100644 --- a/.github/workflows/cmake-windows.yml +++ b/.github/workflows/cmake-windows.yml @@ -19,4 +19,4 @@ jobs: - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Test - run: ctest --verbose -C ${{env.BUILD_TYPE}} --test-dir ${{github.workspace}}/build/src/libllm + run: ${{github.workspace}}/build/src/libllm/unittest diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cce5e3..a8f82b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,8 +44,8 @@ if(WIN32) endif(WIN32) if(UNIX) add_definitions( "-DCATCH_AMALGAMATED_CUSTOM_MAIN -DCATCH_CONFIG_PREFIX_ALL -D_FILE_OFFSET_BITS=64" ) - set(CMAKE_CXX_FLAGS "-O0 -g") - set(CMAKE_C_FLAGS "-O0 -g") + set(CMAKE_CXX_FLAGS "-O3 -g") + set(CMAKE_C_FLAGS "-O3 -g") endif(UNIX) if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") From 358683246e21c7928588e99fabd9e6e7cb935c80 Mon Sep 17 00:00:00 2001 From: Xiaoyang Chen Date: Wed, 6 Mar 2024 17:52:08 +0800 Subject: [PATCH 7/9] update --- .github/workflows/cmake-windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-windows.yml b/.github/workflows/cmake-windows.yml index 9f54d36..ee0127a 100644 --- a/.github/workflows/cmake-windows.yml +++ b/.github/workflows/cmake-windows.yml @@ -19,4 +19,4 @@ jobs: - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Test - run: ${{github.workspace}}/build/src/libllm/unittest + run: ${{github.workspace}}\build\src\libllm\unittest.exe From 0950ef6d48ee6e239c2d057d76097d062fba05c3 Mon Sep 17 00:00:00 2001 From: Xiaoyang Chen Date: Wed, 6 Mar 2024 17:54:51 +0800 Subject: [PATCH 8/9] update --- .github/workflows/cmake-windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-windows.yml b/.github/workflows/cmake-windows.yml index ee0127a..0bad8c7 100644 --- a/.github/workflows/cmake-windows.yml +++ b/.github/workflows/cmake-windows.yml @@ -19,4 +19,4 @@ jobs: - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Test - run: ${{github.workspace}}\build\src\libllm\unittest.exe + run: ${{github.workspace}}\build\src\libllm\${{env.BUILD_TYPE}}\unittest.exe From 32b8e8e64fa6d4244609bbea50e345aeaa6ab7b4 Mon Sep 17 00:00:00 2001 From: Xiaoyang Chen Date: Wed, 6 Mar 2024 20:38:07 +0800 Subject: [PATCH 9/9] update --- CMakeLists.txt | 11 +++++++---- src/libllm/CMakeLists.txt | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a8f82b7..b72012a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,11 +48,14 @@ if(UNIX) set(CMAKE_C_FLAGS "-O3 -g") endif(UNIX) -if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") +message("CMAKE_HOST_SYSTEM_PROCESSOR=" ${CMAKE_HOST_SYSTEM_PROCESSOR}) + +if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") add_compile_definitions(LIBLLM_ARCH_X86_64) -endif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") -if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64") + set(LIBLLM_KERNEL_X86_64 ON) +endif() +if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)") add_compile_definitions(LIBLLM_ARCH_AARCH64) -endif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64") +endif() add_subdirectory("src/libllm") diff --git a/src/libllm/CMakeLists.txt b/src/libllm/CMakeLists.txt index e3e8e82..c5f0190 100644 --- a/src/libllm/CMakeLists.txt +++ b/src/libllm/CMakeLists.txt @@ -142,7 +142,7 @@ if(UNIX AND NOT APPLE) endif() # CPU specific code -if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") +if(LIBLLM_KERNEL_X86_64) set(libllm_SOURCES ${libllm_SOURCES} "cpu/kernel/kernel_avx2.cc" @@ -163,7 +163,7 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") "cpu/kernel/kernel_avx2.cc" PROPERTIES COMPILE_FLAGS "-mavx2 -mfma -mf16c") endif(UNIX) -endif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") +endif() add_library(lut STATIC ${lut_SOURCES}) set_target_properties(lut PROPERTIES CXX_VISIBILITY_PRESET hidden)