@@ -361,12 +361,50 @@ extern "C" {
361
361
fn vmhraddshs (
362
362
a : vector_signed_short , b : vector_signed_short , c : vector_signed_short ,
363
363
) -> vector_signed_short ;
364
+ #[ link_name = "llvm.ppc.altivec.vmsumuhs" ]
365
+ fn vmsumuhs (
366
+ a : vector_unsigned_short , b : vector_unsigned_short , c : vector_unsigned_int ) -> vector_unsigned_int ;
367
+ #[ link_name = "llvm.ppc.altivec.vmsumshs" ]
368
+ fn vmsumshs (
369
+ a : vector_signed_short , b : vector_signed_short , c : vector_signed_int ) -> vector_signed_int ;
364
370
}
365
371
366
372
mod sealed {
367
373
368
374
use super :: * ;
369
375
376
+ #[ inline]
377
+ #[ target_feature( enable = "altivec" ) ]
378
+ #[ cfg_attr( test, assert_instr( vmsumuhs) ) ]
379
+ unsafe fn vec_vmsumuhs (
380
+ a : vector_unsigned_short , b : vector_unsigned_short , c : vector_unsigned_int ) -> vector_unsigned_int {
381
+ vmsumuhs ( a, b, c)
382
+ }
383
+
384
+ #[ inline]
385
+ #[ target_feature( enable = "altivec" ) ]
386
+ #[ cfg_attr( test, assert_instr( vmsumshs) ) ]
387
+ unsafe fn vec_vmsumshs (
388
+ a : vector_signed_short , b : vector_signed_short , c : vector_signed_int ) -> vector_signed_int {
389
+ vmsumshs ( a, b, c)
390
+ }
391
+
392
+ pub trait VectorMsums < Other > {
393
+ unsafe fn vec_msums ( self , b : Self , c : Other ) -> Other ;
394
+ }
395
+
396
+ impl VectorMsums < vector_unsigned_int > for vector_unsigned_short {
397
+ unsafe fn vec_msums ( self , b : Self , c : vector_unsigned_int ) -> vector_unsigned_int {
398
+ vmsumuhs ( self , b, c)
399
+ }
400
+ }
401
+
402
+ impl VectorMsums < vector_signed_int > for vector_signed_short {
403
+ unsafe fn vec_msums ( self , b : Self , c : vector_signed_int ) -> vector_signed_int {
404
+ vmsumshs ( self , b, c)
405
+ }
406
+ }
407
+
370
408
#[ inline]
371
409
#[ target_feature( enable = "altivec" ) ]
372
410
#[ cfg_attr( test, assert_instr( vperm) ) ]
@@ -746,6 +784,14 @@ pub unsafe fn vec_mradds(
746
784
vmhraddshs ( a, b, c)
747
785
}
748
786
787
+ /// Vector Multiply Sum Saturated
788
+ #[ inline]
789
+ #[ target_feature( enable = "altivec" ) ]
790
+ pub unsafe fn vec_msums < T , U > ( a : T , b : T , c : U ) -> U
791
+ where T : sealed:: VectorMsums < U > {
792
+ a. vec_msums ( b, c)
793
+ }
794
+
749
795
#[ cfg( target_endian = "big" ) ]
750
796
mod endian {
751
797
use super :: * ;
@@ -904,6 +950,56 @@ mod tests {
904
950
assert_eq ! ( d, vec_mradds( a, b, c) . into_bits( ) ) ;
905
951
}
906
952
953
+ #[ simd_test( enable = "altivec" ) ]
954
+ unsafe fn test_vec_msums_unsigned ( ) {
955
+ let a: vector_unsigned_short = u16x8:: new (
956
+ 0 * 256 ,
957
+ 1 * 256 ,
958
+ 2 * 256 ,
959
+ 3 * 256 ,
960
+ 4 * 256 ,
961
+ 5 * 256 ,
962
+ 6 * 256 ,
963
+ 7 * 256 ,
964
+ ) . into_bits ( ) ;
965
+ let b: vector_unsigned_short =
966
+ u16x8:: new ( 256 , 256 , 256 , 256 , 256 , 256 , 256 , 256 ) . into_bits ( ) ;
967
+ let c: vector_unsigned_int = u32x4:: new ( 0 , 1 , 2 , 3 ) . into_bits ( ) ;
968
+ let d = u32x4:: new (
969
+ ( 0 + 1 ) * 256 * 256 + 0 ,
970
+ ( 2 + 3 ) * 256 * 256 + 1 ,
971
+ ( 4 + 5 ) * 256 * 256 + 2 ,
972
+ ( 6 + 7 ) * 256 * 256 + 3 ,
973
+ ) ;
974
+
975
+ assert_eq ! ( d, vec_msums( a, b, c) . into_bits( ) ) ;
976
+ }
977
+
978
+ #[ simd_test( enable = "altivec" ) ]
979
+ unsafe fn test_vec_msums_signed ( ) {
980
+ let a: vector_signed_short = i16x8:: new (
981
+ 0 * 256 ,
982
+ -1 * 256 ,
983
+ 2 * 256 ,
984
+ -3 * 256 ,
985
+ 4 * 256 ,
986
+ -5 * 256 ,
987
+ 6 * 256 ,
988
+ -7 * 256 ,
989
+ ) . into_bits ( ) ;
990
+ let b: vector_signed_short =
991
+ i16x8:: new ( 256 , 256 , 256 , 256 , 256 , 256 , 256 , 256 ) . into_bits ( ) ;
992
+ let c: vector_signed_int = i32x4:: new ( 0 , 1 , 2 , 3 ) . into_bits ( ) ;
993
+ let d = i32x4:: new (
994
+ ( 0 - 1 ) * 256 * 256 + 0 ,
995
+ ( 2 - 3 ) * 256 * 256 + 1 ,
996
+ ( 4 - 5 ) * 256 * 256 + 2 ,
997
+ ( 6 - 7 ) * 256 * 256 + 3 ,
998
+ ) ;
999
+
1000
+ assert_eq ! ( d, vec_msums( a, b, c) . into_bits( ) ) ;
1001
+ }
1002
+
907
1003
#[ simd_test( enable = "altivec" ) ]
908
1004
unsafe fn vec_add_i32x4_i32x4 ( ) {
909
1005
let x = i32x4:: new ( 1 , 2 , 3 , 4 ) ;
0 commit comments