kram - simd - a few more ops

alecazam · Oct 12, 2024 · 3a1b00a · 3a1b00a
1 parent 926a335
commit 3a1b00a
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 0 deletions.
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
@@ -279,6 +279,11 @@ SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) { return x + t*(y - x); }
 SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) { return x + t*(y - x); }
 SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) { return x + t*(y - x); }
 
+SIMD_CALL float2 lerp(float2 x, float2 y, float t) { return x + t*(y - x); }
+SIMD_CALL float3 lerp(float3 x, float3 y, float t) { return x + t*(y - x); }
+SIMD_CALL float4 lerp(float4 x, float4 y, float t) { return x + t*(y - x); }
+
+
 // dot
 SIMD_CALL float dot(float2 x, float2 y) { return reduce_add(x * y); }
 SIMD_CALL float dot(float3 x, float3 y) { return reduce_add(x * y); }
@@ -829,6 +834,13 @@ SIMD_CALL float3 operator*(quatf q, float3 v) {
     return v + qv.w * t + cross(qv.xyz, t);
 }
 
+SIMD_CALL bool equal(quatf x, quatf y) {
+    return all(x.v == y.v);
+}
+SIMD_CALL bool operator==(quatf x, quatf y) {
+    return all(x.v == y.v);
+}
+
 float4x4 float4x4m(quatf q);
 
 // how many quatf ops are needed?

diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h
@@ -26,6 +26,13 @@ namespace SIMD_NAMESPACE {
 
 macroVector4TypesStorageRenames(int, int)
 
+SIMD_CALL int4 zeroext(int2 x) {
+    int4 v = 0; v.xy = x; return v;
+}
+SIMD_CALL int4 zeroext(int3 x) {
+    int4 v = 0; v.xyz = x; return v;
+}
+
 //-----------------------------------
 // imlementation - only code simd arch specific
 
@@ -45,6 +52,20 @@ SIMD_CALL bool all(int4 x) {
     return vminvq_u32(x) & 0x80000000;
 }
 
+SIMD_CALL int reduce_add(int2 x) {
+    x = vpadd_s32(x, x);
+    return x.x; // repeat x to all values
+}
+SIMD_CALL int reduce_add(int4 x) {
+    // 4:1 reduction
+    x = vpaddq_s32(x, x); // xy = x+y,z+w
+    x = vpaddq_s32(x, x); // x  = x+y
+    return x.x; // repeat x to all values
+}
+SIMD_CALL int reduce_add(int3 x) {
+    return reduce_add(zeroext(x));
+}
+
 #endif // SIMD_NEON
 
 // These take in int types, this is what comparison gens from a < b, etc.
@@ -63,6 +84,19 @@ SIMD_CALL bool all(int2 x) {
 SIMD_CALL bool all(int4 x) {
     return _mm_movemask_ps((__m128)x) == 0xf; // 4 bits
 }
+
+// TODO: need SSE ops for this,
+SIMD_CALL int reduce_add(int4 x) {
+    int2 r = x.lo + x.hi;
+    return r.x + r.y;
+}
+SIMD_CALL int reduce_add(int2 x) {
+    return x.x + x.y;
+}
+SIMD_CALL int reduce_add(int3 x) {
+    return x.x + x.y + x.z;
+}
+
 #endif // SIMD_SSE
 
 // any-all

diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp
@@ -89,6 +89,9 @@
 // older but good talk on simd
 // https://people.inf.ethz.ch/markusp/teaching/263-2300-ETH-spring14/slides/11-simd.pdf
 
+// another article
+// https://www.cs.uaf.edu/courses/cs441/notes/sse-avx/
+
 // aarch64
 // https://en.wikipedia.org/wiki/AArch64