Require Capability::Int8 for vectorized [un]pack

robamler · robamler · commit c8e640f466f5 · 2025-05-03T22:52:49.000+02:00
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
@@ -1552,86 +1552,31 @@ impl BlockContext<'_> {
                     Mf::Pack2x16unorm => MathOp::Ext(spirv::GLOp::PackUnorm2x16),
                     Mf::Pack2x16snorm => MathOp::Ext(spirv::GLOp::PackSnorm2x16),
                     fun @ (Mf::Pack4xI8 | Mf::Pack4xU8 | Mf::Pack4xI8Clamp | Mf::Pack4xU8Clamp) => {
-                        let (int_type, is_signed) = match fun {
-                            Mf::Pack4xI8 | Mf::Pack4xI8Clamp => (crate::ScalarKind::Sint, true),
-                            Mf::Pack4xU8 | Mf::Pack4xU8Clamp => (crate::ScalarKind::Uint, false),
-                            _ => unreachable!(),
-                        };
-
+                        let is_signed = matches!(fun, Mf::Pack4xI8 | Mf::Pack4xI8Clamp);
                         let should_clamp = matches!(fun, Mf::Pack4xI8Clamp | Mf::Pack4xU8Clamp);
 
-                        let wide_vector_type_id = self.get_numeric_type_id(NumericType::Vector {
-                            size: crate::VectorSize::Quad,
-                            scalar: crate::Scalar {
-                                kind: int_type,
-                                width: 4,
-                            },
-                        });
-                        let packed_vector_type_id = self.get_numeric_type_id(NumericType::Vector {
-                            size: crate::VectorSize::Quad,
-                            scalar: crate::Scalar {
-                                kind: crate::ScalarKind::Uint,
-                                width: 1,
-                            },
-                        });
-
-                        let mut wide_vector = arg0_id;
-                        if should_clamp {
-                            let (min, max, clamp_op) = if is_signed {
-                                (
-                                    crate::Literal::I32(-128),
-                                    crate::Literal::I32(127),
-                                    spirv::GLOp::SClamp,
+                        let last_instruction =
+                            if self.writer.require_all(&[spirv::Capability::Int8]).is_ok() {
+                                self.write_pack4x8_optimized(
+                                    block,
+                                    result_type_id,
+                                    arg0_id,
+                                    id,
+                                    is_signed,
+                                    should_clamp,
                                 )
                             } else {
-                                (
-                                    crate::Literal::U32(0),
-                                    crate::Literal::U32(255),
-                                    spirv::GLOp::UClamp,
+                                self.write_pack4x8_polyfill(
+                                    block,
+                                    result_type_id,
+                                    arg0_id,
+                                    id,
+                                    is_signed,
+                                    should_clamp,
                                 )
                             };
-                            let [min, max] = [min, max].map(|lit| {
-                                let scalar = self.writer.get_constant_scalar(lit);
-                                // TODO: can we cache these constant vectors somehow?
-                                let id = self.gen_id();
-                                block.body.push(Instruction::composite_construct(
-                                    wide_vector_type_id,
-                                    id,
-                                    &[scalar; 4],
-                                ));
-                                id
-                            });
 
-                            let clamp_id = self.gen_id();
-                            block.body.push(Instruction::ext_inst(
-                                self.writer.gl450_ext_inst_id,
-                                clamp_op,
-                                wide_vector_type_id,
-                                clamp_id,
-                                &[wide_vector, min, max],
-                            ));
-
-                            wide_vector = clamp_id;
-                        }
-
-                        let packed_vector = self.gen_id();
-                        block.body.push(Instruction::unary(
-                            spirv::Op::UConvert, // We truncate, so `UConvert` and `SConvert` behave identically.
-                            packed_vector_type_id,
-                            packed_vector,
-                            wide_vector,
-                        ));
-
-                        // The SPIR-V spec [1] defines the bit order for bit casting between a vector
-                        // and a scalar precisely as required by the WGSL spec [2].
-                        // [1]: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast
-                        // [2]: https://www.w3.org/TR/WGSL/#pack4xI8-builtin
-                        MathOp::Custom(Instruction::unary(
-                            spirv::Op::Bitcast,
-                            result_type_id,
-                            id,
-                            packed_vector,
-                        ))
+                        MathOp::Custom(last_instruction)
                     }
                     Mf::Unpack4x8unorm => MathOp::Ext(spirv::GLOp::UnpackUnorm4x8),
                     Mf::Unpack4x8snorm => MathOp::Ext(spirv::GLOp::UnpackSnorm4x8),
@@ -2679,6 +2624,194 @@ impl BlockContext<'_> {
         }
     }
 
+    fn write_pack4x8_optimized(
+        &mut self,
+        block: &mut Block,
+        result_type_id: u32,
+        arg0_id: u32,
+        id: u32,
+        is_signed: bool,
+        should_clamp: bool,
+    ) -> Instruction {
+        let int_type = if is_signed {
+            crate::ScalarKind::Sint
+        } else {
+            crate::ScalarKind::Uint
+        };
+        let wide_vector_type_id = self.get_numeric_type_id(NumericType::Vector {
+            size: crate::VectorSize::Quad,
+            scalar: crate::Scalar {
+                kind: int_type,
+                width: 4,
+            },
+        });
+        let packed_vector_type_id = self.get_numeric_type_id(NumericType::Vector {
+            size: crate::VectorSize::Quad,
+            scalar: crate::Scalar {
+                kind: crate::ScalarKind::Uint,
+                width: 1,
+            },
+        });
+
+        let mut wide_vector = arg0_id;
+        if should_clamp {
+            let (min, max, clamp_op) = if is_signed {
+                (
+                    crate::Literal::I32(-128),
+                    crate::Literal::I32(127),
+                    spirv::GLOp::SClamp,
+                )
+            } else {
+                (
+                    crate::Literal::U32(0),
+                    crate::Literal::U32(255),
+                    spirv::GLOp::UClamp,
+                )
+            };
+            let [min, max] = [min, max].map(|lit| {
+                let scalar = self.writer.get_constant_scalar(lit);
+                // TODO: can we cache these constant vectors somehow?
+                let id = self.gen_id();
+                block.body.push(Instruction::composite_construct(
+                    wide_vector_type_id,
+                    id,
+                    &[scalar; 4],
+                ));
+                id
+            });
+
+            let clamp_id = self.gen_id();
+            block.body.push(Instruction::ext_inst(
+                self.writer.gl450_ext_inst_id,
+                clamp_op,
+                wide_vector_type_id,
+                clamp_id,
+                &[wide_vector, min, max],
+            ));
+
+            wide_vector = clamp_id;
+        }
+
+        let packed_vector = self.gen_id();
+        block.body.push(Instruction::unary(
+            spirv::Op::UConvert, // We truncate, so `UConvert` and `SConvert` behave identically.
+            packed_vector_type_id,
+            packed_vector,
+            wide_vector,
+        ));
+
+        // The SPIR-V spec [1] defines the bit order for bit casting between a vector
+        // and a scalar precisely as required by the WGSL spec [2].
+        // [1]: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast
+        // [2]: https://www.w3.org/TR/WGSL/#pack4xI8-builtin
+        Instruction::unary(spirv::Op::Bitcast, result_type_id, id, packed_vector)
+    }
+
+    fn write_pack4x8_polyfill(
+        &mut self,
+        block: &mut Block,
+        result_type_id: u32,
+        arg0_id: u32,
+        id: u32,
+        is_signed: bool,
+        should_clamp: bool,
+    ) -> Instruction {
+        let int_type = if is_signed {
+            crate::ScalarKind::Sint
+        } else {
+            crate::ScalarKind::Uint
+        };
+        let uint_type_id = self.get_numeric_type_id(NumericType::Scalar(crate::Scalar::U32));
+        let int_type_id = self.get_numeric_type_id(NumericType::Scalar(crate::Scalar {
+            kind: int_type,
+            width: 4,
+        }));
+
+        let mut last_instruction = Instruction::new(spirv::Op::Nop);
+
+        let zero = self.writer.get_constant_scalar(crate::Literal::U32(0));
+        let mut preresult = zero;
+        block
+            .body
+            .reserve(usize::from(VEC_LENGTH) * (2 + usize::from(is_signed)));
+
+        let eight = self.writer.get_constant_scalar(crate::Literal::U32(8));
+        const VEC_LENGTH: u8 = 4;
+        for i in 0..u32::from(VEC_LENGTH) {
+            let offset = self.writer.get_constant_scalar(crate::Literal::U32(i * 8));
+            let mut extracted = self.gen_id();
+            block.body.push(Instruction::binary(
+                spirv::Op::CompositeExtract,
+                int_type_id,
+                extracted,
+                arg0_id,
+                i,
+            ));
+            if is_signed {
+                let casted = self.gen_id();
+                block.body.push(Instruction::unary(
+                    spirv::Op::Bitcast,
+                    uint_type_id,
+                    casted,
+                    extracted,
+                ));
+                extracted = casted;
+            }
+            if should_clamp {
+                let (min, max, clamp_op) = if is_signed {
+                    (
+                        crate::Literal::I32(-128),
+                        crate::Literal::I32(127),
+                        spirv::GLOp::SClamp,
+                    )
+                } else {
+                    (
+                        crate::Literal::U32(0),
+                        crate::Literal::U32(255),
+                        spirv::GLOp::UClamp,
+                    )
+                };
+                let [min, max] = [min, max].map(|lit| self.writer.get_constant_scalar(lit));
+
+                let clamp_id = self.gen_id();
+                block.body.push(Instruction::ext_inst(
+                    self.writer.gl450_ext_inst_id,
+                    clamp_op,
+                    result_type_id,
+                    clamp_id,
+                    &[extracted, min, max],
+                ));
+
+                extracted = clamp_id;
+            }
+            let is_last = i == u32::from(VEC_LENGTH - 1);
+            if is_last {
+                last_instruction = Instruction::quaternary(
+                    spirv::Op::BitFieldInsert,
+                    result_type_id,
+                    id,
+                    preresult,
+                    extracted,
+                    offset,
+                    eight,
+                )
+            } else {
+                let new_preresult = self.gen_id();
+                block.body.push(Instruction::quaternary(
+                    spirv::Op::BitFieldInsert,
+                    result_type_id,
+                    new_preresult,
+                    preresult,
+                    extracted,
+                    offset,
+                    eight,
+                ));
+                preresult = new_preresult;
+            }
+        }
+        last_instruction
+    }
+
     /// Generate one or more SPIR-V blocks for `naga_block`.
     ///
     /// Use `label_id` as the label for the SPIR-V entry point block.