Skip to content

Commit

Permalink
kram - simd - a few more ops
Browse files Browse the repository at this point in the history
  • Loading branch information
alecazam committed Oct 12, 2024
1 parent 926a335 commit 3a1b00a
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 0 deletions.
12 changes: 12 additions & 0 deletions libkram/vectormath/float234.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,11 @@ SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) { return x + t*(y - x); }
SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) { return x + t*(y - x); }
SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) { return x + t*(y - x); }

SIMD_CALL float2 lerp(float2 x, float2 y, float t) { return x + t*(y - x); }
SIMD_CALL float3 lerp(float3 x, float3 y, float t) { return x + t*(y - x); }
SIMD_CALL float4 lerp(float4 x, float4 y, float t) { return x + t*(y - x); }


// dot
SIMD_CALL float dot(float2 x, float2 y) { return reduce_add(x * y); }
SIMD_CALL float dot(float3 x, float3 y) { return reduce_add(x * y); }
Expand Down Expand Up @@ -829,6 +834,13 @@ SIMD_CALL float3 operator*(quatf q, float3 v) {
return v + qv.w * t + cross(qv.xyz, t);
}

SIMD_CALL bool equal(quatf x, quatf y) {
return all(x.v == y.v);
}
SIMD_CALL bool operator==(quatf x, quatf y) {
return all(x.v == y.v);
}

float4x4 float4x4m(quatf q);

// how many quatf ops are needed?
Expand Down
34 changes: 34 additions & 0 deletions libkram/vectormath/int234.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ namespace SIMD_NAMESPACE {

macroVector4TypesStorageRenames(int, int)

SIMD_CALL int4 zeroext(int2 x) {
int4 v = 0; v.xy = x; return v;
}
SIMD_CALL int4 zeroext(int3 x) {
int4 v = 0; v.xyz = x; return v;
}

//-----------------------------------
// imlementation - only code simd arch specific

Expand All @@ -45,6 +52,20 @@ SIMD_CALL bool all(int4 x) {
return vminvq_u32(x) & 0x80000000;
}

SIMD_CALL int reduce_add(int2 x) {
x = vpadd_s32(x, x);
return x.x; // repeat x to all values
}
SIMD_CALL int reduce_add(int4 x) {
// 4:1 reduction
x = vpaddq_s32(x, x); // xy = x+y,z+w
x = vpaddq_s32(x, x); // x = x+y
return x.x; // repeat x to all values
}
SIMD_CALL int reduce_add(int3 x) {
return reduce_add(zeroext(x));
}

#endif // SIMD_NEON

// These take in int types, this is what comparison gens from a < b, etc.
Expand All @@ -63,6 +84,19 @@ SIMD_CALL bool all(int2 x) {
SIMD_CALL bool all(int4 x) {
return _mm_movemask_ps((__m128)x) == 0xf; // 4 bits
}

// TODO: need SSE ops for this,
SIMD_CALL int reduce_add(int4 x) {
int2 r = x.lo + x.hi;
return r.x + r.y;
}
SIMD_CALL int reduce_add(int2 x) {
return x.x + x.y;
}
SIMD_CALL int reduce_add(int3 x) {
return x.x + x.y + x.z;
}

#endif // SIMD_SSE

// any-all
Expand Down
3 changes: 3 additions & 0 deletions libkram/vectormath/vectormath234.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@
// older but good talk on simd
// https://people.inf.ethz.ch/markusp/teaching/263-2300-ETH-spring14/slides/11-simd.pdf

// another article
// https://www.cs.uaf.edu/courses/cs441/notes/sse-avx/

// aarch64
// https://en.wikipedia.org/wiki/AArch64

Expand Down

0 comments on commit 3a1b00a

Please sign in to comment.