Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-11905: [C++] Fix SIMD detection on macOS #9657

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 79 additions & 26 deletions cpp/src/arrow/util/cpu_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,19 @@
#include "arrow/result.h"
#include "arrow/util/io_util.h"
#include "arrow/util/logging.h"
#include "arrow/util/optional.h"
#include "arrow/util/string.h"

namespace arrow {
namespace internal {

namespace {

using std::max;

static constexpr int64_t kDefaultL1CacheSize = 32 * 1024; // Level 1: 32k
static constexpr int64_t kDefaultL2CacheSize = 256 * 1024; // Level 2: 256k
static constexpr int64_t kDefaultL3CacheSize = 3072 * 1024; // Level 3: 3M
constexpr int64_t kDefaultL1CacheSize = 32 * 1024; // Level 1: 32k
constexpr int64_t kDefaultL2CacheSize = 256 * 1024; // Level 2: 256k
constexpr int64_t kDefaultL3CacheSize = 3072 * 1024; // Level 3: 3M

#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
Expand All @@ -74,6 +80,23 @@ int64_t _xgetbv(int xcr) {
}
#endif

#ifdef __APPLE__
util::optional<int64_t> IntegerSysCtlByName(const char* name) {
size_t len = sizeof(int64_t);
int64_t data = 0;
if (sysctlbyname(name, &data, &len, nullptr, 0) == 0) {
return data;
}
// ENOENT is the official errno value for non-existing sysctl's,
// but EINVAL and ENOTSUP have been seen in the wild.
if (errno != ENOENT && errno != EINVAL && errno != ENOTSUP) {
auto st = IOErrorFromErrno(errno, "sysctlbyname failed for '", name, "'");
ARROW_LOG(WARNING) << st.ToString();
}
return util::nullopt;
}
#endif

#if defined(__GNUC__) && defined(__linux__) && defined(__aarch64__)
// There is no direct instruction to get cache size on Arm64 like '__cpuid' on x86;
// Get Arm64 cache size by reading '/sys/devices/system/cpu/cpu0/cache/index*/size';
Expand All @@ -82,11 +105,11 @@ int64_t _xgetbv(int xcr) {
// index1: L1 Icache
// index2: L2 cache
// index3: L3 cache
static const char* kL1CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index0/size";
static const char* kL2CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index2/size";
static const char* kL3CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index3/size";
const char* kL1CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index0/size";
const char* kL2CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index2/size";
const char* kL3CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index3/size";

static int64_t GetArm64CacheSize(const char* filename, int64_t default_size = -1) {
int64_t GetArm64CacheSize(const char* filename, int64_t default_size = -1) {
char* content = nullptr;
char* last_char = nullptr;
size_t file_len = 0;
Expand Down Expand Up @@ -125,10 +148,8 @@ static int64_t GetArm64CacheSize(const char* filename, int64_t default_size = -1
}
#endif

namespace arrow {
namespace internal {

static struct {
#if !defined(_WIN32) && !defined(__APPLE__)
struct {
std::string name;
int64_t flag;
} flag_mappings[] = {
Expand All @@ -145,10 +166,7 @@ static struct {
{"asimd", CpuInfo::ASIMD},
#endif
};
static const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);

#ifndef _WIN32
namespace {
const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);

// Helper function to parse for hardware flags.
// values contains a list of space-separated flags. check to see if the flags we
Expand All @@ -163,8 +181,6 @@ int64_t ParseCPUFlags(const std::string& values) {
}
return flags;
}

} // namespace
#endif

#ifdef _WIN32
Expand Down Expand Up @@ -294,6 +310,8 @@ bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name,
}
#endif

} // namespace

CpuInfo::CpuInfo()
: hardware_flags_(0),
num_cores_(1),
Expand Down Expand Up @@ -330,6 +348,36 @@ void CpuInfo::Init() {
if (QueryPerformanceFrequency(&performance_frequency)) {
max_mhz = static_cast<float>(performance_frequency.QuadPart);
}
#elif defined(__APPLE__)
// On macOS, get CPU information from system information base
struct SysCtlCpuFeature {
const char* name;
int64_t flag;
};
std::vector<SysCtlCpuFeature> features = {
#if defined(__aarch64__)
// ARM64 (note that this is exposed under Rosetta as well)
{"hw.optional.neon", ASIMD},
#else
// x86
{"hw.optional.sse4_2", SSSE3 | SSE4_1 | SSE4_2 | POPCNT},
{"hw.optional.avx1_0", AVX},
{"hw.optional.avx2_0", AVX2},
{"hw.optional.bmi1", BMI1},
{"hw.optional.bmi2", BMI2},
{"hw.optional.avx512f", AVX512F},
{"hw.optional.avx512cd", AVX512CD},
{"hw.optional.avx512dq", AVX512DQ},
{"hw.optional.avx512bw", AVX512BW},
{"hw.optional.avx512vl", AVX512VL},
#endif
};
for (const auto& feature : features) {
auto v = IntegerSysCtlByName(feature.name);
if (v.value_or(0)) {
hardware_flags_ |= feature.flag;
}
}
#else
// Read from /proc/cpuinfo
std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in);
Expand Down Expand Up @@ -365,15 +413,20 @@ void CpuInfo::Init() {
#endif

#ifdef __APPLE__
// On Mac OS X use sysctl() to get the cache sizes
size_t len = sizeof(int64_t);
int64_t data[1];
sysctlbyname("hw.l1dcachesize", data, &len, NULL, 0);
cache_sizes_[0] = data[0];
sysctlbyname("hw.l2cachesize", data, &len, NULL, 0);
cache_sizes_[1] = data[0];
sysctlbyname("hw.l3cachesize", data, &len, NULL, 0);
cache_sizes_[2] = data[0];
// On macOS, get cache size from system information base
SetDefaultCacheSize();
auto c = IntegerSysCtlByName("hw.l1dcachesize");
if (c.has_value()) {
cache_sizes_[0] = *c;
}
c = IntegerSysCtlByName("hw.l2cachesize");
if (c.has_value()) {
cache_sizes_[1] = *c;
}
c = IntegerSysCtlByName("hw.l3cachesize");
if (c.has_value()) {
cache_sizes_[2] = *c;
}
#elif _WIN32
if (!RetrieveCacheSize(cache_sizes_)) {
SetDefaultCacheSize();
Expand Down
5 changes: 3 additions & 2 deletions python/pyarrow/tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import os
import subprocess
import sys

import pytest

Expand Down Expand Up @@ -69,10 +70,10 @@ def test_runtime_info():
info = pa.runtime_info()
assert info.simd_level == 'none', info.simd_level
assert info.detected_simd_level == f{info.detected_simd_level!r},\
assert info.detected_simd_level == {info.detected_simd_level!r},\
info.detected_simd_level
"""
subprocess.check_call(["python", "-c", code], env=env)
subprocess.check_call([sys.executable, "-c", code], env=env)


@pytest.mark.parametrize('klass', [
Expand Down