@@ -1552,86 +1552,31 @@ impl BlockContext<'_> {
1552
1552
Mf :: Pack2x16unorm => MathOp :: Ext ( spirv:: GLOp :: PackUnorm2x16 ) ,
1553
1553
Mf :: Pack2x16snorm => MathOp :: Ext ( spirv:: GLOp :: PackSnorm2x16 ) ,
1554
1554
fun @ ( Mf :: Pack4xI8 | Mf :: Pack4xU8 | Mf :: Pack4xI8Clamp | Mf :: Pack4xU8Clamp ) => {
1555
- let ( int_type, is_signed) = match fun {
1556
- Mf :: Pack4xI8 | Mf :: Pack4xI8Clamp => ( crate :: ScalarKind :: Sint , true ) ,
1557
- Mf :: Pack4xU8 | Mf :: Pack4xU8Clamp => ( crate :: ScalarKind :: Uint , false ) ,
1558
- _ => unreachable ! ( ) ,
1559
- } ;
1560
-
1555
+ let is_signed = matches ! ( fun, Mf :: Pack4xI8 | Mf :: Pack4xI8Clamp ) ;
1561
1556
let should_clamp = matches ! ( fun, Mf :: Pack4xI8Clamp | Mf :: Pack4xU8Clamp ) ;
1562
1557
1563
- let wide_vector_type_id = self . get_numeric_type_id ( NumericType :: Vector {
1564
- size : crate :: VectorSize :: Quad ,
1565
- scalar : crate :: Scalar {
1566
- kind : int_type,
1567
- width : 4 ,
1568
- } ,
1569
- } ) ;
1570
- let packed_vector_type_id = self . get_numeric_type_id ( NumericType :: Vector {
1571
- size : crate :: VectorSize :: Quad ,
1572
- scalar : crate :: Scalar {
1573
- kind : crate :: ScalarKind :: Uint ,
1574
- width : 1 ,
1575
- } ,
1576
- } ) ;
1577
-
1578
- let mut wide_vector = arg0_id;
1579
- if should_clamp {
1580
- let ( min, max, clamp_op) = if is_signed {
1581
- (
1582
- crate :: Literal :: I32 ( -128 ) ,
1583
- crate :: Literal :: I32 ( 127 ) ,
1584
- spirv:: GLOp :: SClamp ,
1558
+ let last_instruction =
1559
+ if self . writer . require_all ( & [ spirv:: Capability :: Int8 ] ) . is_ok ( ) {
1560
+ self . write_pack4x8_optimized (
1561
+ block,
1562
+ result_type_id,
1563
+ arg0_id,
1564
+ id,
1565
+ is_signed,
1566
+ should_clamp,
1585
1567
)
1586
1568
} else {
1587
- (
1588
- crate :: Literal :: U32 ( 0 ) ,
1589
- crate :: Literal :: U32 ( 255 ) ,
1590
- spirv:: GLOp :: UClamp ,
1569
+ self . write_pack4x8_polyfill (
1570
+ block,
1571
+ result_type_id,
1572
+ arg0_id,
1573
+ id,
1574
+ is_signed,
1575
+ should_clamp,
1591
1576
)
1592
1577
} ;
1593
- let [ min, max] = [ min, max] . map ( |lit| {
1594
- let scalar = self . writer . get_constant_scalar ( lit) ;
1595
- // TODO: can we cache these constant vectors somehow?
1596
- let id = self . gen_id ( ) ;
1597
- block. body . push ( Instruction :: composite_construct (
1598
- wide_vector_type_id,
1599
- id,
1600
- & [ scalar; 4 ] ,
1601
- ) ) ;
1602
- id
1603
- } ) ;
1604
1578
1605
- let clamp_id = self . gen_id ( ) ;
1606
- block. body . push ( Instruction :: ext_inst (
1607
- self . writer . gl450_ext_inst_id ,
1608
- clamp_op,
1609
- wide_vector_type_id,
1610
- clamp_id,
1611
- & [ wide_vector, min, max] ,
1612
- ) ) ;
1613
-
1614
- wide_vector = clamp_id;
1615
- }
1616
-
1617
- let packed_vector = self . gen_id ( ) ;
1618
- block. body . push ( Instruction :: unary (
1619
- spirv:: Op :: UConvert , // We truncate, so `UConvert` and `SConvert` behave identically.
1620
- packed_vector_type_id,
1621
- packed_vector,
1622
- wide_vector,
1623
- ) ) ;
1624
-
1625
- // The SPIR-V spec [1] defines the bit order for bit casting between a vector
1626
- // and a scalar precisely as required by the WGSL spec [2].
1627
- // [1]: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast
1628
- // [2]: https://www.w3.org/TR/WGSL/#pack4xI8-builtin
1629
- MathOp :: Custom ( Instruction :: unary (
1630
- spirv:: Op :: Bitcast ,
1631
- result_type_id,
1632
- id,
1633
- packed_vector,
1634
- ) )
1579
+ MathOp :: Custom ( last_instruction)
1635
1580
}
1636
1581
Mf :: Unpack4x8unorm => MathOp :: Ext ( spirv:: GLOp :: UnpackUnorm4x8 ) ,
1637
1582
Mf :: Unpack4x8snorm => MathOp :: Ext ( spirv:: GLOp :: UnpackSnorm4x8 ) ,
@@ -2679,6 +2624,194 @@ impl BlockContext<'_> {
2679
2624
}
2680
2625
}
2681
2626
2627
+ fn write_pack4x8_optimized (
2628
+ & mut self ,
2629
+ block : & mut Block ,
2630
+ result_type_id : u32 ,
2631
+ arg0_id : u32 ,
2632
+ id : u32 ,
2633
+ is_signed : bool ,
2634
+ should_clamp : bool ,
2635
+ ) -> Instruction {
2636
+ let int_type = if is_signed {
2637
+ crate :: ScalarKind :: Sint
2638
+ } else {
2639
+ crate :: ScalarKind :: Uint
2640
+ } ;
2641
+ let wide_vector_type_id = self . get_numeric_type_id ( NumericType :: Vector {
2642
+ size : crate :: VectorSize :: Quad ,
2643
+ scalar : crate :: Scalar {
2644
+ kind : int_type,
2645
+ width : 4 ,
2646
+ } ,
2647
+ } ) ;
2648
+ let packed_vector_type_id = self . get_numeric_type_id ( NumericType :: Vector {
2649
+ size : crate :: VectorSize :: Quad ,
2650
+ scalar : crate :: Scalar {
2651
+ kind : crate :: ScalarKind :: Uint ,
2652
+ width : 1 ,
2653
+ } ,
2654
+ } ) ;
2655
+
2656
+ let mut wide_vector = arg0_id;
2657
+ if should_clamp {
2658
+ let ( min, max, clamp_op) = if is_signed {
2659
+ (
2660
+ crate :: Literal :: I32 ( -128 ) ,
2661
+ crate :: Literal :: I32 ( 127 ) ,
2662
+ spirv:: GLOp :: SClamp ,
2663
+ )
2664
+ } else {
2665
+ (
2666
+ crate :: Literal :: U32 ( 0 ) ,
2667
+ crate :: Literal :: U32 ( 255 ) ,
2668
+ spirv:: GLOp :: UClamp ,
2669
+ )
2670
+ } ;
2671
+ let [ min, max] = [ min, max] . map ( |lit| {
2672
+ let scalar = self . writer . get_constant_scalar ( lit) ;
2673
+ // TODO: can we cache these constant vectors somehow?
2674
+ let id = self . gen_id ( ) ;
2675
+ block. body . push ( Instruction :: composite_construct (
2676
+ wide_vector_type_id,
2677
+ id,
2678
+ & [ scalar; 4 ] ,
2679
+ ) ) ;
2680
+ id
2681
+ } ) ;
2682
+
2683
+ let clamp_id = self . gen_id ( ) ;
2684
+ block. body . push ( Instruction :: ext_inst (
2685
+ self . writer . gl450_ext_inst_id ,
2686
+ clamp_op,
2687
+ wide_vector_type_id,
2688
+ clamp_id,
2689
+ & [ wide_vector, min, max] ,
2690
+ ) ) ;
2691
+
2692
+ wide_vector = clamp_id;
2693
+ }
2694
+
2695
+ let packed_vector = self . gen_id ( ) ;
2696
+ block. body . push ( Instruction :: unary (
2697
+ spirv:: Op :: UConvert , // We truncate, so `UConvert` and `SConvert` behave identically.
2698
+ packed_vector_type_id,
2699
+ packed_vector,
2700
+ wide_vector,
2701
+ ) ) ;
2702
+
2703
+ // The SPIR-V spec [1] defines the bit order for bit casting between a vector
2704
+ // and a scalar precisely as required by the WGSL spec [2].
2705
+ // [1]: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast
2706
+ // [2]: https://www.w3.org/TR/WGSL/#pack4xI8-builtin
2707
+ Instruction :: unary ( spirv:: Op :: Bitcast , result_type_id, id, packed_vector)
2708
+ }
2709
+
2710
+ fn write_pack4x8_polyfill (
2711
+ & mut self ,
2712
+ block : & mut Block ,
2713
+ result_type_id : u32 ,
2714
+ arg0_id : u32 ,
2715
+ id : u32 ,
2716
+ is_signed : bool ,
2717
+ should_clamp : bool ,
2718
+ ) -> Instruction {
2719
+ let int_type = if is_signed {
2720
+ crate :: ScalarKind :: Sint
2721
+ } else {
2722
+ crate :: ScalarKind :: Uint
2723
+ } ;
2724
+ let uint_type_id = self . get_numeric_type_id ( NumericType :: Scalar ( crate :: Scalar :: U32 ) ) ;
2725
+ let int_type_id = self . get_numeric_type_id ( NumericType :: Scalar ( crate :: Scalar {
2726
+ kind : int_type,
2727
+ width : 4 ,
2728
+ } ) ) ;
2729
+
2730
+ let mut last_instruction = Instruction :: new ( spirv:: Op :: Nop ) ;
2731
+
2732
+ let zero = self . writer . get_constant_scalar ( crate :: Literal :: U32 ( 0 ) ) ;
2733
+ let mut preresult = zero;
2734
+ block
2735
+ . body
2736
+ . reserve ( usize:: from ( VEC_LENGTH ) * ( 2 + usize:: from ( is_signed) ) ) ;
2737
+
2738
+ let eight = self . writer . get_constant_scalar ( crate :: Literal :: U32 ( 8 ) ) ;
2739
+ const VEC_LENGTH : u8 = 4 ;
2740
+ for i in 0 ..u32:: from ( VEC_LENGTH ) {
2741
+ let offset = self . writer . get_constant_scalar ( crate :: Literal :: U32 ( i * 8 ) ) ;
2742
+ let mut extracted = self . gen_id ( ) ;
2743
+ block. body . push ( Instruction :: binary (
2744
+ spirv:: Op :: CompositeExtract ,
2745
+ int_type_id,
2746
+ extracted,
2747
+ arg0_id,
2748
+ i,
2749
+ ) ) ;
2750
+ if is_signed {
2751
+ let casted = self . gen_id ( ) ;
2752
+ block. body . push ( Instruction :: unary (
2753
+ spirv:: Op :: Bitcast ,
2754
+ uint_type_id,
2755
+ casted,
2756
+ extracted,
2757
+ ) ) ;
2758
+ extracted = casted;
2759
+ }
2760
+ if should_clamp {
2761
+ let ( min, max, clamp_op) = if is_signed {
2762
+ (
2763
+ crate :: Literal :: I32 ( -128 ) ,
2764
+ crate :: Literal :: I32 ( 127 ) ,
2765
+ spirv:: GLOp :: SClamp ,
2766
+ )
2767
+ } else {
2768
+ (
2769
+ crate :: Literal :: U32 ( 0 ) ,
2770
+ crate :: Literal :: U32 ( 255 ) ,
2771
+ spirv:: GLOp :: UClamp ,
2772
+ )
2773
+ } ;
2774
+ let [ min, max] = [ min, max] . map ( |lit| self . writer . get_constant_scalar ( lit) ) ;
2775
+
2776
+ let clamp_id = self . gen_id ( ) ;
2777
+ block. body . push ( Instruction :: ext_inst (
2778
+ self . writer . gl450_ext_inst_id ,
2779
+ clamp_op,
2780
+ result_type_id,
2781
+ clamp_id,
2782
+ & [ extracted, min, max] ,
2783
+ ) ) ;
2784
+
2785
+ extracted = clamp_id;
2786
+ }
2787
+ let is_last = i == u32:: from ( VEC_LENGTH - 1 ) ;
2788
+ if is_last {
2789
+ last_instruction = Instruction :: quaternary (
2790
+ spirv:: Op :: BitFieldInsert ,
2791
+ result_type_id,
2792
+ id,
2793
+ preresult,
2794
+ extracted,
2795
+ offset,
2796
+ eight,
2797
+ )
2798
+ } else {
2799
+ let new_preresult = self . gen_id ( ) ;
2800
+ block. body . push ( Instruction :: quaternary (
2801
+ spirv:: Op :: BitFieldInsert ,
2802
+ result_type_id,
2803
+ new_preresult,
2804
+ preresult,
2805
+ extracted,
2806
+ offset,
2807
+ eight,
2808
+ ) ) ;
2809
+ preresult = new_preresult;
2810
+ }
2811
+ }
2812
+ last_instruction
2813
+ }
2814
+
2682
2815
/// Generate one or more SPIR-V blocks for `naga_block`.
2683
2816
///
2684
2817
/// Use `label_id` as the label for the SPIR-V entry point block.
0 commit comments