Skip to content

Commit

Permalink
separate SSE and SSE2
Browse files Browse the repository at this point in the history
  • Loading branch information
recp committed Mar 31, 2024
1 parent e3ed983 commit 32a477e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 8 deletions.
14 changes: 12 additions & 2 deletions include/cglm/simd/intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@

#if defined( _MSC_VER )
# if (defined(_M_AMD64) || defined(_M_X64)) || _M_IX86_FP == 2
# ifndef __SSE__
# define __SSE__
# endif
# ifndef __SSE2__
# define __SSE2__
# endif
Expand All @@ -24,15 +27,22 @@
# endif
#endif

#if defined( __SSE__ ) || defined( __SSE2__ )
#if defined(__SSE__)
# include <xmmintrin.h>
# include <emmintrin.h>
# define CGLM_SSE_FP 1
# ifndef CGLM_SIMD_x86
# define CGLM_SIMD_x86
# endif
#endif

#if defined(__SSE2__)
# include <emmintrin.h>
# define CGLM_SSE2_FP 1
# ifndef CGLM_SIMD_x86
# define CGLM_SIMD_x86
# endif
#endif

#if defined(__SSE3__)
# include <pmmintrin.h>
# ifndef CGLM_SIMD_x86
Expand Down
23 changes: 17 additions & 6 deletions include/cglm/simd/x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#define glmm_set1(x) _mm_set1_ps(x)
#define glmm_128 __m128

#ifdef CGLM_USE_INT_DOMAIN
#if defined(CGLM_USE_INT_DOMAIN) && defined(__SSE2__)
# define glmm_shuff1(xmm, z, y, x, w) \
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \
_MM_SHUFFLE(z, y, x, w)))
Expand Down Expand Up @@ -56,16 +56,27 @@

/* Note that `0x80000000` corresponds to `INT_MIN` for a 32-bit int. */
#define GLMM_NEGZEROf ((int)0x80000000) /* 0x80000000 ---> -0.0f */
#define GLMM_POSZEROf ((int)0x00000000) /* 0x00000000 ---> +0.0f */

#define GLMM__SIGNMASKf(X, Y, Z, W) \
#if defined(__SSE2__)
# define GLMM__SIGNMASKf(X, Y, Z, W) \
_mm_castsi128_ps(_mm_set_epi32(X, Y, Z, W))
/* _mm_set_ps(X, Y, Z, W); */
#else
# define GLMM__SIGNMASKf(X, Y, Z, W) _mm_set_ps(X, Y, Z, W)
#endif

#define glmm_float32x4_SIGNMASK_PNPN GLMM__SIGNMASKf(GLMM_POSZEROf, GLMM_NEGZEROf, GLMM_POSZEROf, GLMM_NEGZEROf)
#define glmm_float32x4_SIGNMASK_NPNP GLMM__SIGNMASKf(GLMM_NEGZEROf, GLMM_POSZEROf, GLMM_NEGZEROf, GLMM_POSZEROf)
#define glmm_float32x4_SIGNMASK_NPPN GLMM__SIGNMASKf(GLMM_NEGZEROf, GLMM_POSZEROf, GLMM_POSZEROf, GLMM_NEGZEROf)

#define glmm_float32x4_SIGNMASK_PNPN GLMM__SIGNMASKf(0, GLMM_NEGZEROf, 0, GLMM_NEGZEROf)
#define glmm_float32x4_SIGNMASK_NPNP GLMM__SIGNMASKf(GLMM_NEGZEROf, 0, GLMM_NEGZEROf, 0)
#define glmm_float32x4_SIGNMASK_NPPN GLMM__SIGNMASKf(GLMM_NEGZEROf, 0, 0, GLMM_NEGZEROf)
/* fasth math prevents -0.0f to work */
#if defined(__SSE2__)
# define glmm_float32x4_SIGNMASK_NEG _mm_castsi128_ps(_mm_set1_epi32(GLMM_NEGZEROf)) /* _mm_set1_ps(-0.0f) */
#else
# define glmm_float32x4_SIGNMASK_NEG _mm_set1_ps(GLMM_NEGZEROf)
#endif

#define glmm_float32x4_SIGNMASK_NEG _mm_castsi128_ps(_mm_set1_epi32(GLMM_NEGZEROf)) /* _mm_set1_ps(-0.0f) */
#define glmm_float32x8_SIGNMASK_NEG _mm256_castsi256_ps(_mm256_set1_epi32(GLMM_NEGZEROf))

static inline
Expand Down

0 comments on commit 32a477e

Please sign in to comment.