@@ -1865,6 +1865,35 @@ pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> f64x2 {
1865
1865
* ( mem_addr as * const f64x2 )
1866
1866
}
1867
1867
1868
+ /// Loads a 64-bit double-precision value to the low element of a
1869
+ /// 128-bit integer vector and clears the upper element.
1870
+ #[ inline( always) ]
1871
+ #[ target_feature = "+sse2" ]
1872
+ #[ cfg_attr( test, assert_instr( movsd) ) ]
1873
+ pub unsafe fn _mm_load_sd ( mem_addr : * const f64 ) -> f64x2 {
1874
+ f64x2:: new ( * mem_addr, 0. )
1875
+ }
1876
+
1877
+ /// Loads a double-precision value into the high-order bits of a 128-bit
1878
+ /// vector of [2 x double]. The low-order bits are copied from the low-order
1879
+ /// bits of the first operand.
1880
+ #[ inline( always) ]
1881
+ #[ target_feature = "+sse2" ]
1882
+ #[ cfg_attr( test, assert_instr( movhpd) ) ]
1883
+ pub unsafe fn _mm_loadh_pd ( a : f64x2 , mem_addr : * const f64 ) -> f64x2 {
1884
+ f64x2:: new ( a. extract ( 0 ) , * mem_addr)
1885
+ }
1886
+
1887
+ /// Loads a double-precision value into the low-order bits of a 128-bit
1888
+ /// vector of [2 x double]. The high-order bits are copied from the
1889
+ /// high-order bits of the first operand.
1890
+ #[ inline( always) ]
1891
+ #[ target_feature = "+sse2" ]
1892
+ #[ cfg_attr( test, assert_instr( movlpd) ) ]
1893
+ pub unsafe fn _mm_loadl_pd ( a : f64x2 , mem_addr : * const f64 ) -> f64x2 {
1894
+ f64x2:: new ( * mem_addr, a. extract ( 1 ) )
1895
+ }
1896
+
1868
1897
/// Stores a 128-bit floating point vector of [2 x double] to a 128-bit
1869
1898
/// aligned memory location.
1870
1899
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
@@ -1876,6 +1905,15 @@ pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: f64x2) {
1876
1905
:: core:: intrinsics:: nontemporal_store ( mem:: transmute ( mem_addr) , a) ;
1877
1906
}
1878
1907
1908
+ /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
1909
+ /// memory location.
1910
+ #[ inline( always) ]
1911
+ #[ target_feature = "+sse2" ]
1912
+ #[ cfg_attr( all( test, not( windows) ) , assert_instr( movlps) ) ] // FIXME movsd only on windows
1913
+ pub unsafe fn _mm_store_sd ( mem_addr : * mut f64 , a : f64x2 ) {
1914
+ * mem_addr = a. extract ( 0 )
1915
+ }
1916
+
1879
1917
/// Store 128-bits (composed of 2 packed double-precision (64-bit)
1880
1918
/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
1881
1919
/// on a 16-byte boundary or a general-protection exception may be generated.
@@ -1931,6 +1969,24 @@ pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: f64x2) {
1931
1969
* ( mem_addr as * mut f64x2 ) = b;
1932
1970
}
1933
1971
1972
+ /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a
1973
+ /// memory location.
1974
+ #[ inline( always) ]
1975
+ #[ target_feature = "+sse2" ]
1976
+ #[ cfg_attr( test, assert_instr( movhpd) ) ]
1977
+ pub unsafe fn _mm_storeh_pd ( mem_addr : * mut f64 , a : f64x2 ) {
1978
+ * mem_addr = a. extract ( 1 )
1979
+ }
1980
+
1981
+ /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
1982
+ /// memory location.
1983
+ #[ inline( always) ]
1984
+ #[ target_feature = "+sse2" ]
1985
+ #[ cfg_attr( all( test, not( windows) ) , assert_instr( movlps) ) ] // FIXME movlpd (movsd on windows)
1986
+ pub unsafe fn _mm_storel_pd ( mem_addr : * mut f64 , a : f64x2 ) {
1987
+ * mem_addr = a. extract ( 0 )
1988
+ }
1989
+
1934
1990
/// Load a double-precision (64-bit) floating-point element from memory
1935
1991
/// into both elements of returned vector.
1936
1992
#[ inline( always) ]
@@ -1976,6 +2032,79 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> f64x2 {
1976
2032
dst
1977
2033
}
1978
2034
2035
+ /// Constructs a 128-bit floating-point vector of [2 x double] from two
2036
+ /// 128-bit vector parameters of [2 x double], using the immediate-value
2037
+ /// parameter as a specifier.
2038
+ #[ inline( always) ]
2039
+ #[ target_feature = "+sse2" ]
2040
+ #[ cfg_attr( test, assert_instr( shufpd, imm8 = 1 ) ) ]
2041
+ pub unsafe fn _mm_shuffle_pd ( a : f64x2 , b : f64x2 , imm8 : i32 ) -> f64x2 {
2042
+ match imm8 & 0b11 {
2043
+ 0b00 => simd_shuffle2 ( a, b, [ 0 , 2 ] ) ,
2044
+ 0b01 => simd_shuffle2 ( a, b, [ 1 , 2 ] ) ,
2045
+ 0b10 => simd_shuffle2 ( a, b, [ 0 , 3 ] ) ,
2046
+ _ => simd_shuffle2 ( a, b, [ 1 , 3 ] ) ,
2047
+ }
2048
+ }
2049
+
2050
+ /// Constructs a 128-bit floating-point vector of [2 x double]. The lower
2051
+ /// 64 bits are set to the lower 64 bits of the second parameter. The upper
2052
+ /// 64 bits are set to the upper 64 bits of the first parameter.
2053
+ #[ inline( always) ]
2054
+ #[ target_feature = "+sse2" ]
2055
+ #[ cfg_attr( test, assert_instr( movsd) ) ]
2056
+ pub unsafe fn _mm_move_sd ( a : f64x2 , b : f64x2 ) -> f64x2 {
2057
+ f64x2:: new ( b. extract ( 0 ) , a. extract ( 1 ) )
2058
+ }
2059
+
2060
+ /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
2061
+ /// floating-point vector of [4 x float].
2062
+ #[ inline( always) ]
2063
+ #[ target_feature = "+sse2" ]
2064
+ pub unsafe fn _mm_castpd_ps ( a : f64x2 ) -> f32x4 {
2065
+ mem:: transmute ( a)
2066
+ }
2067
+
2068
+ /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
2069
+ /// integer vector.
2070
+ #[ inline( always) ]
2071
+ #[ target_feature = "+sse2" ]
2072
+ pub unsafe fn _mm_castpd_si128 ( a : f64x2 ) -> __m128i {
2073
+ simd_cast ( a)
2074
+ }
2075
+
2076
+ /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
2077
+ /// floating-point vector of [2 x double].
2078
+ #[ inline( always) ]
2079
+ #[ target_feature = "+sse2" ]
2080
+ pub unsafe fn _mm_castps_pd ( a : f32x4 ) -> f64x2 {
2081
+ mem:: transmute ( a)
2082
+ }
2083
+
2084
+ /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
2085
+ /// integer vector.
2086
+ #[ inline( always) ]
2087
+ #[ target_feature = "+sse2" ]
2088
+ pub unsafe fn _mm_castps_si128 ( a : f32x4 ) -> __m128i {
2089
+ mem:: transmute ( a)
2090
+ }
2091
+
2092
+ /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2093
+ /// of [2 x double].
2094
+ #[ inline( always) ]
2095
+ #[ target_feature = "+sse2" ]
2096
+ pub unsafe fn _mm_castsi128_pd ( a : __m128i ) -> f64x2 {
2097
+ simd_cast ( a)
2098
+ }
2099
+
2100
+ /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2101
+ /// of [4 x float].
2102
+ #[ inline( always) ]
2103
+ #[ target_feature = "+sse2" ]
2104
+ pub unsafe fn _mm_castsi128_ps ( a : __m128i ) -> f32x4 {
2105
+ mem:: transmute ( a)
2106
+ }
2107
+
1979
2108
/// Return vector of type __m128d with undefined elements.
1980
2109
#[ inline( always) ]
1981
2110
#[ target_feature = "+sse2" ]
@@ -3760,6 +3889,32 @@ mod tests {
3760
3889
assert_eq ! ( r, f64x2:: new( 1.0 , 2.0 ) ) ;
3761
3890
}
3762
3891
3892
+ #[ simd_test = "sse2" ]
3893
+ unsafe fn _mm_load_sd ( ) {
3894
+ let a = 1. ;
3895
+ let expected = f64x2:: new ( a, 0. ) ;
3896
+ let r = sse2:: _mm_load_sd ( & a) ;
3897
+ assert_eq ! ( r, expected) ;
3898
+ }
3899
+
3900
+ #[ simd_test = "sse2" ]
3901
+ unsafe fn _mm_loadh_pd ( ) {
3902
+ let a = f64x2:: new ( 1. , 2. ) ;
3903
+ let b = 3. ;
3904
+ let expected = f64x2:: new ( a. extract ( 0 ) , 3. ) ;
3905
+ let r = sse2:: _mm_loadh_pd ( a, & b) ;
3906
+ assert_eq ! ( r, expected) ;
3907
+ }
3908
+
3909
+ #[ simd_test = "sse2" ]
3910
+ unsafe fn _mm_loadl_pd ( ) {
3911
+ let a = f64x2:: new ( 1. , 2. ) ;
3912
+ let b = 3. ;
3913
+ let expected = f64x2:: new ( 3. , a. extract ( 1 ) ) ;
3914
+ let r = sse2:: _mm_loadl_pd ( a, & b) ;
3915
+ assert_eq ! ( r, expected) ;
3916
+ }
3917
+
3763
3918
#[ simd_test = "sse2" ]
3764
3919
unsafe fn _mm_stream_pd ( ) {
3765
3920
#[ repr( align( 128 ) ) ]
@@ -3775,6 +3930,14 @@ mod tests {
3775
3930
}
3776
3931
}
3777
3932
3933
+ #[ simd_test = "sse2" ]
3934
+ unsafe fn _mm_store_sd ( ) {
3935
+ let mut dest = 0. ;
3936
+ let a = f64x2:: new ( 1. , 2. ) ;
3937
+ sse2:: _mm_store_sd ( & mut dest, a) ;
3938
+ assert_eq ! ( dest, a. extract( 0 ) ) ;
3939
+ }
3940
+
3778
3941
#[ simd_test = "sse2" ]
3779
3942
unsafe fn _mm_store_pd ( ) {
3780
3943
let mut mem = Memory { data : [ 0.0f64 ; 4 ] } ;
@@ -3847,6 +4010,22 @@ mod tests {
3847
4010
assert_eq ! ( vals[ 1 ] , 1.0 ) ;
3848
4011
}
3849
4012
4013
+ #[ simd_test = "sse2" ]
4014
+ unsafe fn _mm_storeh_pd ( ) {
4015
+ let mut dest = 0. ;
4016
+ let a = f64x2:: new ( 1. , 2. ) ;
4017
+ sse2:: _mm_storeh_pd ( & mut dest, a) ;
4018
+ assert_eq ! ( dest, a. extract( 1 ) ) ;
4019
+ }
4020
+
4021
+ #[ simd_test = "sse2" ]
4022
+ unsafe fn _mm_storel_pd ( ) {
4023
+ let mut dest = 0. ;
4024
+ let a = f64x2:: new ( 1. , 2. ) ;
4025
+ sse2:: _mm_storel_pd ( & mut dest, a) ;
4026
+ assert_eq ! ( dest, a. extract( 0 ) ) ;
4027
+ }
4028
+
3850
4029
#[ simd_test = "sse2" ]
3851
4030
unsafe fn _mm_loadr_pd ( ) {
3852
4031
let mut mem = Memory {
@@ -4105,4 +4284,70 @@ mod tests {
4105
4284
let r = sse2:: _mm_unpacklo_pd ( a, b) ;
4106
4285
assert_eq ! ( r, f64x2:: new( 1.0 , 3.0 ) ) ;
4107
4286
}
4287
+
4288
+ #[ simd_test = "sse2" ]
4289
+ unsafe fn _mm_shuffle_pd ( ) {
4290
+ let a = f64x2:: new ( 1. , 2. ) ;
4291
+ let b = f64x2:: new ( 3. , 4. ) ;
4292
+ let expected = f64x2:: new ( 1. , 3. ) ;
4293
+ let r = sse2:: _mm_shuffle_pd ( a, b, 0 ) ;
4294
+ assert_eq ! ( r, expected) ;
4295
+ }
4296
+
4297
+ #[ simd_test = "sse2" ]
4298
+ unsafe fn _mm_move_sd ( ) {
4299
+ let a = f64x2:: new ( 1. , 2. ) ;
4300
+ let b = f64x2:: new ( 3. , 4. ) ;
4301
+ let expected = f64x2:: new ( 3. , 2. ) ;
4302
+ let r = sse2:: _mm_move_sd ( a, b) ;
4303
+ assert_eq ! ( r, expected) ;
4304
+ }
4305
+
4306
+ #[ simd_test = "sse2" ]
4307
+ unsafe fn _mm_castpd_ps ( ) {
4308
+ let a = f64x2:: splat ( 0. ) ;
4309
+ let expected = f32x4:: splat ( 0. ) ;
4310
+ let r = sse2:: _mm_castpd_ps ( a) ;
4311
+ assert_eq ! ( r, expected) ;
4312
+ }
4313
+
4314
+ #[ simd_test = "sse2" ]
4315
+ unsafe fn _mm_castpd_si128 ( ) {
4316
+ let a = f64x2:: splat ( 0. ) ;
4317
+ let expected = i64x2:: splat ( 0 ) ;
4318
+ let r = sse2:: _mm_castpd_si128 ( a) ;
4319
+ assert_eq ! ( r, __m128i:: from( expected) ) ;
4320
+ }
4321
+
4322
+ #[ simd_test = "sse2" ]
4323
+ unsafe fn _mm_castps_pd ( ) {
4324
+ let a = f32x4:: splat ( 0. ) ;
4325
+ let expected = f64x2:: splat ( 0. ) ;
4326
+ let r = sse2:: _mm_castps_pd ( a) ;
4327
+ assert_eq ! ( r, expected) ;
4328
+ }
4329
+
4330
+ #[ simd_test = "sse2" ]
4331
+ unsafe fn _mm_castps_si128 ( ) {
4332
+ let a = f32x4:: splat ( 0. ) ;
4333
+ let expected = i32x4:: splat ( 0 ) ;
4334
+ let r = sse2:: _mm_castps_si128 ( a) ;
4335
+ assert_eq ! ( r, __m128i:: from( expected) ) ;
4336
+ }
4337
+
4338
+ #[ simd_test = "sse2" ]
4339
+ unsafe fn _mm_castsi128_pd ( ) {
4340
+ let a = __m128i:: from ( i64x2:: splat ( 0 ) ) ;
4341
+ let expected = f64x2:: splat ( 0. ) ;
4342
+ let r = sse2:: _mm_castsi128_pd ( a) ;
4343
+ assert_eq ! ( r, expected) ;
4344
+ }
4345
+
4346
+ #[ simd_test = "sse2" ]
4347
+ unsafe fn _mm_castsi128_ps ( ) {
4348
+ let a = __m128i:: from ( i32x4:: splat ( 0 ) ) ;
4349
+ let expected = f32x4:: splat ( 0. ) ;
4350
+ let r = sse2:: _mm_castsi128_ps ( a) ;
4351
+ assert_eq ! ( r, expected) ;
4352
+ }
4108
4353
}
0 commit comments