rust-lang
diff --git a/‎coresimd/src/x86/i586/sse2.rs
Lines changed: 245 additions & 0 deletions b/‎coresimd/src/x86/i586/sse2.rs
Lines changed: 245 additions & 0 deletions
@@ -1865,6 +1865,35 @@ pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> f64x2 {
     *(mem_addr as *const f64x2)
 }
 
+/// Loads a 64-bit double-precision value to the low element of a
+/// 128-bit integer vector and clears the upper element.
+#[inline(always)]
+#[target_feature = "+sse2"]
+#[cfg_attr(test, assert_instr(movsd))]
+pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> f64x2 {
+    f64x2::new(*mem_addr, 0.)
+}
+
+/// Loads a double-precision value into the high-order bits of a 128-bit
+/// vector of [2 x double]. The low-order bits are copied from the low-order
+/// bits of the first operand.
+#[inline(always)]
+#[target_feature = "+sse2"]
+#[cfg_attr(test, assert_instr(movhpd))]
+pub unsafe fn _mm_loadh_pd(a: f64x2, mem_addr: *const f64) -> f64x2 {
+    f64x2::new(a.extract(0), *mem_addr)
+}
+
+/// Loads a double-precision value into the low-order bits of a 128-bit
+/// vector of [2 x double]. The high-order bits are copied from the
+/// high-order bits of the first operand.
+#[inline(always)]
+#[target_feature = "+sse2"]
+#[cfg_attr(test, assert_instr(movlpd))]
+pub unsafe fn _mm_loadl_pd(a: f64x2, mem_addr: *const f64) -> f64x2 {
+    f64x2::new(*mem_addr, a.extract(1))
+}
+
 /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit
 /// aligned memory location.
 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
@@ -1876,6 +1905,15 @@ pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: f64x2) {
     ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
 }
 
+/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
+/// memory location.
+#[inline(always)]
+#[target_feature = "+sse2"]
+#[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movsd only on windows
+pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: f64x2) {
+    *mem_addr = a.extract(0)
+}
+
 /// Store 128-bits (composed of 2 packed double-precision (64-bit)
 /// floating-point elements) from `a` into memory. `mem_addr` must be aligned
 /// on a 16-byte boundary or a general-protection exception may be generated.
@@ -1931,6 +1969,24 @@ pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: f64x2) {
     *(mem_addr as *mut f64x2) = b;
 }
 
+/// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a
+/// memory location.
+#[inline(always)]
+#[target_feature = "+sse2"]
+#[cfg_attr(test, assert_instr(movhpd))]
+pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: f64x2) {
+    *mem_addr = a.extract(1)
+}
+
+/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
+/// memory location.
+#[inline(always)]
+#[target_feature = "+sse2"]
+#[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movlpd (movsd on windows)
+pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: f64x2) {
+    *mem_addr = a.extract(0)
+}
+
 /// Load a double-precision (64-bit) floating-point element from memory
 /// into both elements of returned vector.
 #[inline(always)]
@@ -1976,6 +2032,79 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> f64x2 {
     dst
 }
 
+/// Constructs a 128-bit floating-point vector of [2 x double] from two
+/// 128-bit vector parameters of [2 x double], using the immediate-value
+/// parameter as a specifier.
+#[inline(always)]
+#[target_feature = "+sse2"]
+#[cfg_attr(test, assert_instr(shufpd, imm8 = 1))]
+pub unsafe fn _mm_shuffle_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 {
+    match imm8 & 0b11 {
+        0b00 => simd_shuffle2(a, b, [0, 2]),
+        0b01 => simd_shuffle2(a, b, [1, 2]),
+        0b10 => simd_shuffle2(a, b, [0, 3]),
+        _ => simd_shuffle2(a, b, [1, 3]),
+    }
+}
+
+/// Constructs a 128-bit floating-point vector of [2 x double]. The lower
+/// 64 bits are set to the lower 64 bits of the second parameter. The upper
+/// 64 bits are set to the upper 64 bits of the first parameter.
+#[inline(always)]
+#[target_feature = "+sse2"]
+#[cfg_attr(test, assert_instr(movsd))]
+pub unsafe fn _mm_move_sd(a: f64x2, b: f64x2) -> f64x2 {
+    f64x2::new(b.extract(0), a.extract(1))
+}
+
+/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
+/// floating-point vector of [4 x float].
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub unsafe fn _mm_castpd_ps(a: f64x2) -> f32x4 {
+    mem::transmute(a)
+}
+
+/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
+/// integer vector.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub unsafe fn _mm_castpd_si128(a: f64x2) -> __m128i {
+    simd_cast(a)
+}
+
+/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
+/// floating-point vector of [2 x double].
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub unsafe fn _mm_castps_pd(a: f32x4) -> f64x2 {
+    mem::transmute(a)
+}
+
+/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
+/// integer vector.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub unsafe fn _mm_castps_si128(a: f32x4) -> __m128i {
+    mem::transmute(a)
+}
+
+/// Casts a 128-bit integer vector into a 128-bit floating-point vector
+/// of [2 x double].
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub unsafe fn _mm_castsi128_pd(a: __m128i) -> f64x2 {
+    simd_cast(a)
+}
+
+/// Casts a 128-bit integer vector into a 128-bit floating-point vector
+/// of [4 x float].
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub unsafe fn _mm_castsi128_ps(a: __m128i) -> f32x4 {
+    mem::transmute(a)
+}
+
 /// Return vector of type __m128d with undefined elements.
 #[inline(always)]
 #[target_feature = "+sse2"]
@@ -3760,6 +3889,32 @@ mod tests {
         assert_eq!(r, f64x2::new(1.0, 2.0));
     }
 
+    #[simd_test = "sse2"]
+    unsafe fn _mm_load_sd() {
+        let a = 1.;
+        let expected = f64x2::new(a, 0.);
+        let r = sse2::_mm_load_sd(&a);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_loadh_pd() {
+        let a = f64x2::new(1., 2.);
+        let b = 3.;
+        let expected = f64x2::new(a.extract(0), 3.);
+        let r = sse2::_mm_loadh_pd(a, &b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_loadl_pd() {
+        let a = f64x2::new(1., 2.);
+        let b = 3.;
+        let expected = f64x2::new(3., a.extract(1));
+        let r = sse2::_mm_loadl_pd(a, &b);
+        assert_eq!(r, expected);
+    }
+
     #[simd_test = "sse2"]
     unsafe fn _mm_stream_pd() {
         #[repr(align(128))]
@@ -3775,6 +3930,14 @@ mod tests {
         }
     }
 
+    #[simd_test = "sse2"]
+    unsafe fn _mm_store_sd() {
+        let mut dest = 0.;
+        let a = f64x2::new(1., 2.);
+        sse2::_mm_store_sd(&mut dest, a);
+        assert_eq!(dest, a.extract(0));
+    }
+
     #[simd_test = "sse2"]
     unsafe fn _mm_store_pd() {
         let mut mem = Memory { data: [0.0f64; 4] };
@@ -3847,6 +4010,22 @@ mod tests {
         assert_eq!(vals[1], 1.0);
     }
 
+    #[simd_test = "sse2"]
+    unsafe fn _mm_storeh_pd() {
+        let mut dest = 0.;
+        let a = f64x2::new(1., 2.);
+        sse2::_mm_storeh_pd(&mut dest, a);
+        assert_eq!(dest, a.extract(1));
+    }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_storel_pd() {
+        let mut dest = 0.;
+        let a = f64x2::new(1., 2.);
+        sse2::_mm_storel_pd(&mut dest, a);
+        assert_eq!(dest, a.extract(0));
+    }
+
     #[simd_test = "sse2"]
     unsafe fn _mm_loadr_pd() {
         let mut mem = Memory {
@@ -4105,4 +4284,70 @@ mod tests {
         let r = sse2::_mm_unpacklo_pd(a, b);
         assert_eq!(r, f64x2::new(1.0, 3.0));
     }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_shuffle_pd() {
+        let a = f64x2::new(1., 2.);
+        let b = f64x2::new(3., 4.);
+        let expected = f64x2::new(1., 3.);
+        let r = sse2::_mm_shuffle_pd(a, b, 0);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_move_sd() {
+        let a = f64x2::new(1., 2.);
+        let b = f64x2::new(3., 4.);
+        let expected = f64x2::new(3., 2.);
+        let r = sse2::_mm_move_sd(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_castpd_ps() {
+        let a = f64x2::splat(0.);
+        let expected = f32x4::splat(0.);
+        let r = sse2::_mm_castpd_ps(a);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_castpd_si128() {
+        let a = f64x2::splat(0.);
+        let expected = i64x2::splat(0);
+        let r = sse2::_mm_castpd_si128(a);
+        assert_eq!(r, __m128i::from(expected));
+    }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_castps_pd() {
+        let a = f32x4::splat(0.);
+        let expected = f64x2::splat(0.);
+        let r = sse2::_mm_castps_pd(a);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_castps_si128() {
+        let a = f32x4::splat(0.);
+        let expected = i32x4::splat(0);
+        let r = sse2::_mm_castps_si128(a);
+        assert_eq!(r, __m128i::from(expected));
+    }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_castsi128_pd() {
+        let a = __m128i::from(i64x2::splat(0));
+        let expected = f64x2::splat(0.);
+        let r = sse2::_mm_castsi128_pd(a);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "sse2"]
+    unsafe fn _mm_castsi128_ps() {
+        let a = __m128i::from(i32x4::splat(0));
+        let expected = f32x4::splat(0.);
+        let r = sse2::_mm_castsi128_ps(a);
+        assert_eq!(r, expected);
+    }
 }