diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h index 1dc3422..4ecfc61 100644 --- a/libkram/vectormath/float234.h +++ b/libkram/vectormath/float234.h @@ -279,6 +279,11 @@ SIMD_CALL float2 lerp(float2 x, float2 y, float2 t) { return x + t*(y - x); } SIMD_CALL float3 lerp(float3 x, float3 y, float3 t) { return x + t*(y - x); } SIMD_CALL float4 lerp(float4 x, float4 y, float4 t) { return x + t*(y - x); } +SIMD_CALL float2 lerp(float2 x, float2 y, float t) { return x + t*(y - x); } +SIMD_CALL float3 lerp(float3 x, float3 y, float t) { return x + t*(y - x); } +SIMD_CALL float4 lerp(float4 x, float4 y, float t) { return x + t*(y - x); } + + // dot SIMD_CALL float dot(float2 x, float2 y) { return reduce_add(x * y); } SIMD_CALL float dot(float3 x, float3 y) { return reduce_add(x * y); } @@ -829,6 +834,13 @@ SIMD_CALL float3 operator*(quatf q, float3 v) { return v + qv.w * t + cross(qv.xyz, t); } +SIMD_CALL bool equal(quatf x, quatf y) { + return all(x.v == y.v); +} +SIMD_CALL bool operator==(quatf x, quatf y) { + return all(x.v == y.v); +} + float4x4 float4x4m(quatf q); // how many quatf ops are needed? diff --git a/libkram/vectormath/int234.h b/libkram/vectormath/int234.h index ca528bc..4750ea6 100644 --- a/libkram/vectormath/int234.h +++ b/libkram/vectormath/int234.h @@ -26,6 +26,13 @@ namespace SIMD_NAMESPACE { macroVector4TypesStorageRenames(int, int) +SIMD_CALL int4 zeroext(int2 x) { + int4 v = 0; v.xy = x; return v; +} +SIMD_CALL int4 zeroext(int3 x) { + int4 v = 0; v.xyz = x; return v; +} + //----------------------------------- // imlementation - only code simd arch specific @@ -45,6 +52,20 @@ SIMD_CALL bool all(int4 x) { return vminvq_u32(x) & 0x80000000; } +SIMD_CALL int reduce_add(int2 x) { + x = vpadd_s32(x, x); + return x.x; // repeat x to all values +} +SIMD_CALL int reduce_add(int4 x) { + // 4:1 reduction + x = vpaddq_s32(x, x); // xy = x+y,z+w + x = vpaddq_s32(x, x); // x = x+y + return x.x; // repeat x to all values +} +SIMD_CALL int reduce_add(int3 x) { + return reduce_add(zeroext(x)); +} + #endif // SIMD_NEON // These take in int types, this is what comparison gens from a < b, etc. @@ -63,6 +84,19 @@ SIMD_CALL bool all(int2 x) { SIMD_CALL bool all(int4 x) { return _mm_movemask_ps((__m128)x) == 0xf; // 4 bits } + +// TODO: need SSE ops for this, +SIMD_CALL int reduce_add(int4 x) { + int2 r = x.lo + x.hi; + return r.x + r.y; +} +SIMD_CALL int reduce_add(int2 x) { + return x.x + x.y; +} +SIMD_CALL int reduce_add(int3 x) { + return x.x + x.y + x.z; +} + #endif // SIMD_SSE // any-all diff --git a/libkram/vectormath/vectormath234.cpp b/libkram/vectormath/vectormath234.cpp index b12c7db..d931b3d 100644 --- a/libkram/vectormath/vectormath234.cpp +++ b/libkram/vectormath/vectormath234.cpp @@ -89,6 +89,9 @@ // older but good talk on simd // https://people.inf.ethz.ch/markusp/teaching/263-2300-ETH-spring14/slides/11-simd.pdf +// another article +// https://www.cs.uaf.edu/courses/cs441/notes/sse-avx/ + // aarch64 // https://en.wikipedia.org/wiki/AArch64