Skip to content

Commit

Permalink
Fix TFloat builds for Apple M1
Browse files Browse the repository at this point in the history
Signed-off-by: Stefan Weil <[email protected]>
  • Loading branch information
stweil committed Jul 13, 2021
1 parent 01ae69e commit a09531a
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ noinst_LTLIBRARIES += libtesseract_native.la
libtesseract_native_la_CXXFLAGS = -O3 -ffast-math
if MARCH_NATIVE_OPT
libtesseract_native_la_CXXFLAGS += -march=native -mtune=native
libtesseract_native_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
endif
libtesseract_native_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
libtesseract_native_la_SOURCES = src/arch/dotproduct.cpp

if HAVE_AVX
Expand Down
1 change: 1 addition & 0 deletions src/arch/dotproduct.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ TFloat DotProductFMA(const TFloat *u, const TFloat *v, int n);
// Uses Intel SSE intrinsics to access the SIMD instruction set.
TFloat DotProductSSE(const TFloat *u, const TFloat *v, int n);

TFloat DotProductAccelerate(const TFloat *u, const TFloat *v, int n);
} // namespace tesseract.

#endif // TESSERACT_ARCH_DOTPRODUCT_H_
13 changes: 11 additions & 2 deletions src/arch/intsimdmatrixneon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#if defined(__ARM_NEON)

# include "intsimdmatrix.h"
# include "tfloat.h"

# include <algorithm>
# include <cstdint>
Expand All @@ -27,6 +28,12 @@

namespace tesseract {

#if defined(FAST_FLOAT)

const IntSimdMatrix *IntSimdMatrix::intSimdMatrixNEON = nullptr;

#else

// Number of outputs held in each register. (Actually, we use a
// pair of 4x32 registers, so 8 x 32 bit ints).
constexpr int kNumOutputsPerRegister = 8;
Expand Down Expand Up @@ -186,7 +193,7 @@ static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *
num_out & (kNumOutputsPerRegister - 1));
}

static const IntSimdMatrix intSimdMatrix = {
static const IntSimdMatrix simdMatrix = {
// Function.
matrixDotVector,
// Number of 32 bit outputs held in each register.
Expand All @@ -199,7 +206,9 @@ static const IntSimdMatrix intSimdMatrix = {
kNumInputsPerGroup
};

const IntSimdMatrix *IntSimdMatrix::intSimdMatrixNEON = &intSimdMatrix;
const IntSimdMatrix *IntSimdMatrix::intSimdMatrixNEON = &simdMatrix;

#endif // FAST_FLOAT

} // namespace tesseract.

Expand Down
10 changes: 7 additions & 3 deletions src/arch/simddetect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,14 @@ bool SIMDDetect::sse_available_;
#endif

#if defined(HAVE_FRAMEWORK_ACCELERATE)
static double DotProductAccelerate(const double* u, const double* v, int n) {
double total = 0.0;
TFloat DotProductAccelerate(const TFloat* u, const TFloat* v, int n) {
TFloat total = 0;
const int stride = 1;
#if defined(FAST_FLOAT)
vDSP_dotpr(u, stride, v, stride, &total, n);
#else
vDSP_dotprD(u, stride, v, stride, &total, n);
#endif
return total;
}
#endif
Expand Down Expand Up @@ -138,7 +142,7 @@ SIMDDetect::SIMDDetect() {
SetDotProduct(DotProductGeneric);
const char* dotproduct_env = getenv("DOTPRODUCT");
if (dotproduct_env != nullptr) {
dotproduct = env;
dotproduct = dotproduct_env;
Update();
if (strcmp(dotproduct_env, "native") == 0) {
SetDotProduct(DotProductNative);
Expand Down
24 changes: 23 additions & 1 deletion unittest/dotproduct_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class DotProductTest : public ::testing::Test {
std::locale::global(std::locale(""));
}
void RunTest(TFloat (*f)(const TFloat *u, const TFloat *v, int n));
static const size_t multiplications = 500000000;
static const size_t multiplications = 5000000000U;
static const size_t n = 40;
//static const size_t n = 1000000;
TFloat u[n];
Expand Down Expand Up @@ -118,4 +118,26 @@ TEST_F(DotProductTest, FMA) {
#endif
}

#if defined(HAVE_FRAMEWORK_ACCELERATE)
TEST_F(DotProductTest, Accelerate) {
RunTest(DotProductAccelerate);
}
#endif

#if 0
// Tests that the NEON implementation gets the same result as the vanilla.
TEST_F(DotProductTest, NEON) {
#if defined(HAVE_NEON)
if (!SIMDDetect::IsNEONAvailable()) {
GTEST_LOG_(INFO) << "No NEON found! Not tested!";
GTEST_SKIP();
}
RunTest(DotProductNEON);
#else
GTEST_LOG_(INFO) << "NEON unsupported! Not tested!";
GTEST_SKIP();
#endif
}
#endif

} // namespace tesseract

0 comments on commit a09531a

Please sign in to comment.