Add single output _mm_cvt[t]ss_* variants

The *_pi variants are currently blocked by rust-lang#74
nominolo · Oct 22, 2017 · f4633aa · f4633aa
1 parent 59de334
commit f4633aa
Showing 1 changed file with 205 additions and 0 deletions.
diff --git a/src/x86/sse.rs b/src/x86/sse.rs
@@ -598,6 +598,102 @@ pub unsafe fn _mm_ucomineq_ss(a: f32x4, b: f32x4) -> i32 {
     ucomineq_ss(a, b)
 }
 
+/// Convert the lowest 32 bit float in the input vector to a 32 bit integer.
+///
+/// The result is rounded according to the current rounding mode. If the result
+/// cannot be represented as a 32 bit integer the result will be `0x8000_0000`
+/// (`std::i32::MIN`) or an invalid operation floating point exception if
+/// unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
+///
+/// This corresponds to the `CVTSS2SI` instruction (with 32 bit output).
+#[inline(always)]
+#[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(cvtss2si))]
+pub unsafe fn _mm_cvtss_si32(a: f32x4) -> i32 {
+    cvtss2si(a)
+}
+
+/// Alias for [`_mm_cvtss_si32`](fn._mm_cvtss_si32.html).
+#[inline(always)]
+#[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(cvtss2si))]
+pub unsafe fn _mm_cvt_ss2si(a: f32x4) -> i32 {
+    _mm_cvtss_si32(a)
+}
+
+/// Convert the lowest 32 bit float in the input vector to a 64 bit integer.
+///
+/// The result is rounded according to the current rounding mode. If the result
+/// cannot be represented as a 64 bit integer the result will be
+/// `0x8000_0000_0000_0000` (`std::i64::MIN`) or trigger an invalid operation
+/// floating point exception if unmasked (see
+/// [`_mm_setcsr`](fn._mm_setcsr.html)).
+///
+/// This corresponds to the `CVTSS2SI` instruction (with 64 bit output).
+#[inline(always)]
+#[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(cvtss2si))]
+pub unsafe fn _mm_cvtss_si64(a: f32x4) -> i64 {
+    cvtss2si64(a)
+}
+
+// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
+// pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2
+// pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { _mm_cvtps_pi32(a) }
+
+/// Convert the lowest 32 bit float in the input vector to a 32 bit integer with
+/// truncation.
+///
+/// The result is rounded always using truncation (round towards zero). If the
+/// result cannot be represented as a 32 bit integer the result will be
+/// `0x8000_0000` (`std::i32::MIN`) or an invalid operation floating point
+/// exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
+///
+/// This corresponds to the `CVTTSS2SI` instruction (with 32 bit output).
+#[inline(always)]
+#[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(cvttss2si))]
+pub unsafe fn _mm_cvttss_si32(a: f32x4) -> i32 {
+    cvttss2si(a)
+}
+
+/// Alias for [`_mm_cvttss_si32`](fn._mm_cvttss_si32.html).
+#[inline(always)]
+#[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(cvttss2si))]
+pub unsafe fn _mm_cvtt_ss2si(a: f32x4) -> i32 {
+    _mm_cvttss_si32(a)
+}
+
+/// Convert the lowest 32 bit float in the input vector to a 64 bit integer with
+/// truncation.
+///
+/// The result is rounded always using truncation (round towards zero). If the
+/// result cannot be represented as a 64 bit integer the result will be
+/// `0x8000_0000_0000_0000` (`std::i64::MIN`) or an invalid operation floating
+/// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
+///
+/// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output).
+#[inline(always)]
+#[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(cvttss2si))]
+pub unsafe fn _mm_cvttss_si64(a: f32x4) -> i64 {
+    cvttss2si64(a)
+}
+
+// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
+// pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2;
+// pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 { _mm_cvttps_pi32(a) }
+
+/// Extract the lowest 32 bit float from the input vector.
+#[inline(always)]
+#[target_feature = "+sse"]
+// No point in using assert_instrs. In Unix x86_64 calling convention this is a
+// no-op, and on Windows it's just a `mov`.
+pub unsafe fn _mm_cvtss_f32(a: f32x4) -> f32 {
+    a.extract(0)
+}
+
 /// Construct a `f32x4` with the lowest element set to `a` and the rest set to
 /// zero.
 #[inline(always)]
@@ -1542,6 +1638,14 @@ extern {
     fn ucomige_ss(a: f32x4, b: f32x4) -> i32;
     #[link_name = "llvm.x86.sse.ucomineq.ss"]
     fn ucomineq_ss(a: f32x4, b: f32x4) -> i32;
+    #[link_name = "llvm.x86.sse.cvtss2si"]
+    fn cvtss2si(a: f32x4) -> i32;
+    #[link_name = "llvm.x86.sse.cvtss2si64"]
+    fn cvtss2si64(a: f32x4) -> i64;
+    #[link_name = "llvm.x86.sse.cvttss2si"]
+    fn cvttss2si(a: f32x4) -> i32;
+    #[link_name = "llvm.x86.sse.cvttss2si64"]
+    fn cvttss2si64(a: f32x4) -> i64;
     #[link_name = "llvm.x86.sse.sfence"]
     fn sfence();
     #[link_name = "llvm.x86.sse.stmxcsr"]
@@ -2532,6 +2636,107 @@ mod tests {
         }
     }
 
+    #[simd_test = "sse"]
+    unsafe fn _mm_cvtss_si32() {
+        use std::f32::NAN;
+        use std::i32::MIN;
+        let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
+        let result = &[42i32,   -3,   MIN,    0,       MIN, 2147483520];
+        for i in 0..inputs.len() {
+            let x = f32x4::new(inputs[i], 1.0, 3.0, 4.0);
+            let e = result[i];
+            let r = sse::_mm_cvtss_si32(x);
+            assert_eq!(e, r,
+                "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
+                i, x, r, e);
+        }
+    }
+
+    #[simd_test = "sse"]
+    unsafe fn _mm_cvtss_si64() {
+        use std::f32::NAN;
+        use std::i64::MIN;
+        let inputs = &[
+            (42.0f32,  42i64),
+            (-31.4,   -31),
+            (-33.5,   -34),
+            (-34.5,   -34),
+            (4.0e10,  40_000_000_000),
+            (4.0e-10, 0),
+            (NAN, MIN),
+            (2147483500.1, 2147483520),
+            (9.223371e18, 9223370937343148032)
+        ];
+        for i in 0..inputs.len() {
+            let (xi, e) = inputs[i];
+            let x = f32x4::new(xi, 1.0, 3.0, 4.0);
+            let r = sse::_mm_cvtss_si64(x);
+            assert_eq!(e, r,
+                "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}",
+                i, x, r, e);
+        }
+    }
+
+    #[simd_test = "sse"]
+    unsafe fn _mm_cvttss_si32() {
+        use std::f32::NAN;
+        use std::i32::MIN;
+        let inputs = &[
+            (42.0f32,  42i32),
+            (-31.4,   -31),
+            (-33.5,   -33),
+            (-34.5,   -34),
+            (10.999,   10),
+            (-5.99,    -5),
+            (4.0e10,  MIN),
+            (4.0e-10, 0),
+            (NAN, MIN),
+            (2147483500.1, 2147483520),
+        ];
+        for i in 0..inputs.len() {
+            let (xi, e) = inputs[i];
+            let x = f32x4::new(xi, 1.0, 3.0, 4.0);
+            let r = sse::_mm_cvttss_si32(x);
+            assert_eq!(e, r,
+                "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
+                i, x, r, e);
+        }
+    }
+
+    #[simd_test = "sse"]
+    unsafe fn _mm_cvttss_si64() {
+        use std::f32::NAN;
+        use std::i64::MIN;
+        let inputs = &[
+            (42.0f32,  42i64),
+            (-31.4,   -31),
+            (-33.5,   -33),
+            (-34.5,   -34),
+            (10.999,   10),
+            (-5.99,    -5),
+            (4.0e10,  40_000_000_000),
+            (4.0e-10, 0),
+            (NAN, MIN),
+            (2147483500.1, 2147483520),
+            (9.223371e18, 9223370937343148032),
+            (9.223372e18, MIN),
+        ];
+        for i in 0..inputs.len() {
+            let (xi, e) = inputs[i];
+            let x = f32x4::new(xi, 1.0, 3.0, 4.0);
+            let r = sse::_mm_cvttss_si64(x);
+            assert_eq!(e, r,
+                "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}",
+                i, x, r, e);
+        }
+    }
+
+    #[simd_test = "sse"]
+    pub unsafe fn _mm_cvtss_f32() {
+        let a = f32x4::new(312.0134, 5.0, 6.0, 7.0);
+        assert_eq!(sse::_mm_cvtss_f32(a), 312.0134);
+    }
+
     #[simd_test = "sse"]
     unsafe fn _mm_set_ss() {
         let r = sse::_mm_set_ss(black_box(4.25));