Skip to content

Commit

Permalink
make the interface friendlier for future adjustments
Browse files Browse the repository at this point in the history
  • Loading branch information
picnixz committed Oct 6, 2024
1 parent 5006686 commit 3c0b4f1
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 61 deletions.
22 changes: 16 additions & 6 deletions Include/internal/pycore_cpuinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,25 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include <stdbool.h>

typedef struct {
bool sse, sse2, sse3, sse41, sse42, avx, avx2, avx512vbmi;
bool done;
} _py_cpu_simd_flags;
/* Streaming SIMD Extensions */
uint8_t sse: 1;
uint8_t sse2: 1;
uint8_t sse3: 1;
uint8_t sse41: 1; // SSE4.1
uint8_t sse42: 1; // SSE4.2

/* Advanced Vector Extensions */
uint8_t avx: 1;
uint8_t avx2: 1;
uint8_t avx512vbmi: 1; // AVX-512 Vector Byte Manipulation Instructions

uint8_t done; // indicate whether the structure was filled or not
} py_cpu_simd_flags;

/* Detect the available SIMD features on this machine. */
extern void
_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags);
_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags);

#ifdef __cplusplus
}
Expand Down
154 changes: 99 additions & 55 deletions Python/cpuinfo.c
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
/*
* Naive CPU SIMD features detection.
* Python CPU SIMD features detection.
*
* See Modules/black2module.c.
* See https://en.wikipedia.org/wiki/CPUID for details.
*/

#include "Python.h"
#include "pycore_cpuinfo.h"

#include <stdbool.h>
#define CPUID_REG(ARG) ARG

/*
* For simplicity, we only enable SIMD instructions for Intel CPUs,
* even though we could support ARM NEON and POWER.
*/
#if defined(__x86_64__) && defined(__GNUC__)
# include <cpuid.h>
#elif defined(_M_X64)
# include <intrin.h>
#else
# undef CPUID_REG
# define CPUID_REG(ARG) Py_UNUSED(ARG)
#endif

// AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64).
Expand All @@ -24,6 +31,15 @@
# undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
#endif

/*
* The macros below describe masks to apply on CPUID output registers.
*
* Each macro is of the form [REGISTER][PAGE]_[FEATURE] where
*
* - REGISTER is either EBX, ECX or EDX,
* - PAGE is either 1 or 7 depending, and
* - FEATURE is an SIMD instruction set.
*/
#define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25
#define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26
#define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0
Expand All @@ -33,78 +49,106 @@
#define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5
#define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1

void
_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags)
{
if (flags->done) {
return;
}
#define CHECK_CPUID_REGISTER(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1

int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0;
int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0;
/*
* Indicate whether the CPUID input EAX=1 may be needed to
* detect SIMD basic features (e.g., SSE).
*/
#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \
|| defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \
|| defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \
|| defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \
|| defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \
|| defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS)
# define MAY_DETECT_CPUID_SIMD_FEATURES
#endif

/*
* Indicate whether the CPUID input EAX=7 may be needed to
* detect SIMD extended features (e.g., AVX2 or AVX-512).
*/
#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \
|| defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS)
# define MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES
#endif

static inline void
get_cpuid_info(int32_t level /* input eax */,
int32_t count /* input ecx */,
int32_t *CPUID_REG(eax),
int32_t *CPUID_REG(ebx),
int32_t *CPUID_REG(ecx),
int32_t *CPUID_REG(edx))
{
#if defined(__x86_64__) && defined(__GNUC__)
__cpuid_count(1, 0, eax1, ebx1, ecx1, edx1);
__cpuid_count(7, 0, eax7, ebx7, ecx7, edx7);
__cpuid_count(level, count, *eax, *ebx, *ecx, *edx);
#elif defined(_M_X64)
int info1[4] = {0};
__cpuidex(info1, 1, 0);
eax1 = info1[0];
ebx1 = info1[1];
ecx1 = info1[2];
edx1 = info1[3];

int info7[4] = {0};
__cpuidex(info7, 7, 0);
eax7 = info7[0];
ebx7 = info7[1];
ecx7 = info7[2];
edx7 = info7[3];
#else
// use (void) expressions to avoid warnings
(void) eax1; (void) ebx1; (void) ecx1; (void) edx1;
(void) eax7; (void) ebx7; (void) ecx7; (void) edx7;
int32_t info[4] = {0};
__cpuidex(info, level, count);
*eax = info[0];
*ebx = info[1];
*ecx = info[2];
*edx = info[3];
#endif
}

/* Processor Info and Feature Bits (EAX=1, ECX=0). */
static inline void
detect_cpu_simd_features(py_cpu_simd_flags *flags)
{
int32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx);
#ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS
flags->sse = (edx1 & EDX1_SSE) != 0;
#else
flags->sse = false;
flags->sse = CHECK_CPUID_REGISTER(edx, EDX1_SSE);
#endif
#ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS
flags->sse2 = (edx1 & EDX1_SSE2) != 0;
#else
flags->sse2 = false;
flags->sse2 = CHECK_CPUID_REGISTER(edx, EDX1_SSE2);
#endif
#ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS
flags->sse3 = (ecx1 & ECX1_SSE3) != 0;
#else
flags->sse3 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE3);
#endif
flags->sse3 = false;
#ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS
flags->sse41 = (ecx1 & ECX1_SSE4_1) != 0;
#else
flags->sse41 = false;
flags->sse41 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_1);
#endif
#ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS
flags->sse42 = (ecx1 & ECX1_SSE4_2) != 0;
#else
flags->sse42 = false;
flags->sse42 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_2);
#endif
#ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS
flags->avx = (ecx1 & ECX1_AVX) != 0;
#else
flags->avx = false;
flags->avx = CHECK_CPUID_REGISTER(ecx, ECX1_AVX);
#endif
}

/* Extended feature bits (EAX=7, ECX=0). */
static inline void
detect_cpu_simd_extended_features(py_cpu_simd_flags *flags)
{
int32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx);
#ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS
flags->avx2 = (ebx7 & EBX7_AVX2) != 0;
#else
flags->avx2 = false;
flags->avx2 = CHECK_CPUID_REGISTER(ebx, EBX7_AVX2);
#endif
#ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
flags->avx512vbmi = (ecx7 & ECX7_AVX512_VBMI) != 0;
#else
flags->avx512vbmi = false;
flags->avx512vbmi = CHECK_CPUID_REGISTER(ecx, ECX7_AVX512_VBMI);
#endif
}

flags->done = true;
void
_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags)
{
if (flags->done) {
return;
}
#ifdef MAY_DETECT_CPUID_SIMD_FEATURES
detect_cpu_simd_features(flags);
#else
flags->sse = flags->sse2 = flags->sse3 = flags->sse41 = flags->sse42 = 0;
flags->avx = 0;
#endif
#ifdef MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES
detect_cpu_simd_extended_features(flags);
#else
flags->avx2 = flags->avx512vbmi = 0;
#endif
flags->done = 1;
}

0 comments on commit 3c0b4f1

Please sign in to comment.