Skip to content

Commit c8e640f

Browse files
committed
Require Capability::Int8 for vectorized [un]pack
1 parent f6e7693 commit c8e640f

File tree

1 file changed

+206
-73
lines changed

1 file changed

+206
-73
lines changed

naga/src/back/spv/block.rs

+206-73
Original file line numberDiff line numberDiff line change
@@ -1552,86 +1552,31 @@ impl BlockContext<'_> {
15521552
Mf::Pack2x16unorm => MathOp::Ext(spirv::GLOp::PackUnorm2x16),
15531553
Mf::Pack2x16snorm => MathOp::Ext(spirv::GLOp::PackSnorm2x16),
15541554
fun @ (Mf::Pack4xI8 | Mf::Pack4xU8 | Mf::Pack4xI8Clamp | Mf::Pack4xU8Clamp) => {
1555-
let (int_type, is_signed) = match fun {
1556-
Mf::Pack4xI8 | Mf::Pack4xI8Clamp => (crate::ScalarKind::Sint, true),
1557-
Mf::Pack4xU8 | Mf::Pack4xU8Clamp => (crate::ScalarKind::Uint, false),
1558-
_ => unreachable!(),
1559-
};
1560-
1555+
let is_signed = matches!(fun, Mf::Pack4xI8 | Mf::Pack4xI8Clamp);
15611556
let should_clamp = matches!(fun, Mf::Pack4xI8Clamp | Mf::Pack4xU8Clamp);
15621557

1563-
let wide_vector_type_id = self.get_numeric_type_id(NumericType::Vector {
1564-
size: crate::VectorSize::Quad,
1565-
scalar: crate::Scalar {
1566-
kind: int_type,
1567-
width: 4,
1568-
},
1569-
});
1570-
let packed_vector_type_id = self.get_numeric_type_id(NumericType::Vector {
1571-
size: crate::VectorSize::Quad,
1572-
scalar: crate::Scalar {
1573-
kind: crate::ScalarKind::Uint,
1574-
width: 1,
1575-
},
1576-
});
1577-
1578-
let mut wide_vector = arg0_id;
1579-
if should_clamp {
1580-
let (min, max, clamp_op) = if is_signed {
1581-
(
1582-
crate::Literal::I32(-128),
1583-
crate::Literal::I32(127),
1584-
spirv::GLOp::SClamp,
1558+
let last_instruction =
1559+
if self.writer.require_all(&[spirv::Capability::Int8]).is_ok() {
1560+
self.write_pack4x8_optimized(
1561+
block,
1562+
result_type_id,
1563+
arg0_id,
1564+
id,
1565+
is_signed,
1566+
should_clamp,
15851567
)
15861568
} else {
1587-
(
1588-
crate::Literal::U32(0),
1589-
crate::Literal::U32(255),
1590-
spirv::GLOp::UClamp,
1569+
self.write_pack4x8_polyfill(
1570+
block,
1571+
result_type_id,
1572+
arg0_id,
1573+
id,
1574+
is_signed,
1575+
should_clamp,
15911576
)
15921577
};
1593-
let [min, max] = [min, max].map(|lit| {
1594-
let scalar = self.writer.get_constant_scalar(lit);
1595-
// TODO: can we cache these constant vectors somehow?
1596-
let id = self.gen_id();
1597-
block.body.push(Instruction::composite_construct(
1598-
wide_vector_type_id,
1599-
id,
1600-
&[scalar; 4],
1601-
));
1602-
id
1603-
});
16041578

1605-
let clamp_id = self.gen_id();
1606-
block.body.push(Instruction::ext_inst(
1607-
self.writer.gl450_ext_inst_id,
1608-
clamp_op,
1609-
wide_vector_type_id,
1610-
clamp_id,
1611-
&[wide_vector, min, max],
1612-
));
1613-
1614-
wide_vector = clamp_id;
1615-
}
1616-
1617-
let packed_vector = self.gen_id();
1618-
block.body.push(Instruction::unary(
1619-
spirv::Op::UConvert, // We truncate, so `UConvert` and `SConvert` behave identically.
1620-
packed_vector_type_id,
1621-
packed_vector,
1622-
wide_vector,
1623-
));
1624-
1625-
// The SPIR-V spec [1] defines the bit order for bit casting between a vector
1626-
// and a scalar precisely as required by the WGSL spec [2].
1627-
// [1]: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast
1628-
// [2]: https://www.w3.org/TR/WGSL/#pack4xI8-builtin
1629-
MathOp::Custom(Instruction::unary(
1630-
spirv::Op::Bitcast,
1631-
result_type_id,
1632-
id,
1633-
packed_vector,
1634-
))
1579+
MathOp::Custom(last_instruction)
16351580
}
16361581
Mf::Unpack4x8unorm => MathOp::Ext(spirv::GLOp::UnpackUnorm4x8),
16371582
Mf::Unpack4x8snorm => MathOp::Ext(spirv::GLOp::UnpackSnorm4x8),
@@ -2679,6 +2624,194 @@ impl BlockContext<'_> {
26792624
}
26802625
}
26812626

2627+
fn write_pack4x8_optimized(
2628+
&mut self,
2629+
block: &mut Block,
2630+
result_type_id: u32,
2631+
arg0_id: u32,
2632+
id: u32,
2633+
is_signed: bool,
2634+
should_clamp: bool,
2635+
) -> Instruction {
2636+
let int_type = if is_signed {
2637+
crate::ScalarKind::Sint
2638+
} else {
2639+
crate::ScalarKind::Uint
2640+
};
2641+
let wide_vector_type_id = self.get_numeric_type_id(NumericType::Vector {
2642+
size: crate::VectorSize::Quad,
2643+
scalar: crate::Scalar {
2644+
kind: int_type,
2645+
width: 4,
2646+
},
2647+
});
2648+
let packed_vector_type_id = self.get_numeric_type_id(NumericType::Vector {
2649+
size: crate::VectorSize::Quad,
2650+
scalar: crate::Scalar {
2651+
kind: crate::ScalarKind::Uint,
2652+
width: 1,
2653+
},
2654+
});
2655+
2656+
let mut wide_vector = arg0_id;
2657+
if should_clamp {
2658+
let (min, max, clamp_op) = if is_signed {
2659+
(
2660+
crate::Literal::I32(-128),
2661+
crate::Literal::I32(127),
2662+
spirv::GLOp::SClamp,
2663+
)
2664+
} else {
2665+
(
2666+
crate::Literal::U32(0),
2667+
crate::Literal::U32(255),
2668+
spirv::GLOp::UClamp,
2669+
)
2670+
};
2671+
let [min, max] = [min, max].map(|lit| {
2672+
let scalar = self.writer.get_constant_scalar(lit);
2673+
// TODO: can we cache these constant vectors somehow?
2674+
let id = self.gen_id();
2675+
block.body.push(Instruction::composite_construct(
2676+
wide_vector_type_id,
2677+
id,
2678+
&[scalar; 4],
2679+
));
2680+
id
2681+
});
2682+
2683+
let clamp_id = self.gen_id();
2684+
block.body.push(Instruction::ext_inst(
2685+
self.writer.gl450_ext_inst_id,
2686+
clamp_op,
2687+
wide_vector_type_id,
2688+
clamp_id,
2689+
&[wide_vector, min, max],
2690+
));
2691+
2692+
wide_vector = clamp_id;
2693+
}
2694+
2695+
let packed_vector = self.gen_id();
2696+
block.body.push(Instruction::unary(
2697+
spirv::Op::UConvert, // We truncate, so `UConvert` and `SConvert` behave identically.
2698+
packed_vector_type_id,
2699+
packed_vector,
2700+
wide_vector,
2701+
));
2702+
2703+
// The SPIR-V spec [1] defines the bit order for bit casting between a vector
2704+
// and a scalar precisely as required by the WGSL spec [2].
2705+
// [1]: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast
2706+
// [2]: https://www.w3.org/TR/WGSL/#pack4xI8-builtin
2707+
Instruction::unary(spirv::Op::Bitcast, result_type_id, id, packed_vector)
2708+
}
2709+
2710+
fn write_pack4x8_polyfill(
2711+
&mut self,
2712+
block: &mut Block,
2713+
result_type_id: u32,
2714+
arg0_id: u32,
2715+
id: u32,
2716+
is_signed: bool,
2717+
should_clamp: bool,
2718+
) -> Instruction {
2719+
let int_type = if is_signed {
2720+
crate::ScalarKind::Sint
2721+
} else {
2722+
crate::ScalarKind::Uint
2723+
};
2724+
let uint_type_id = self.get_numeric_type_id(NumericType::Scalar(crate::Scalar::U32));
2725+
let int_type_id = self.get_numeric_type_id(NumericType::Scalar(crate::Scalar {
2726+
kind: int_type,
2727+
width: 4,
2728+
}));
2729+
2730+
let mut last_instruction = Instruction::new(spirv::Op::Nop);
2731+
2732+
let zero = self.writer.get_constant_scalar(crate::Literal::U32(0));
2733+
let mut preresult = zero;
2734+
block
2735+
.body
2736+
.reserve(usize::from(VEC_LENGTH) * (2 + usize::from(is_signed)));
2737+
2738+
let eight = self.writer.get_constant_scalar(crate::Literal::U32(8));
2739+
const VEC_LENGTH: u8 = 4;
2740+
for i in 0..u32::from(VEC_LENGTH) {
2741+
let offset = self.writer.get_constant_scalar(crate::Literal::U32(i * 8));
2742+
let mut extracted = self.gen_id();
2743+
block.body.push(Instruction::binary(
2744+
spirv::Op::CompositeExtract,
2745+
int_type_id,
2746+
extracted,
2747+
arg0_id,
2748+
i,
2749+
));
2750+
if is_signed {
2751+
let casted = self.gen_id();
2752+
block.body.push(Instruction::unary(
2753+
spirv::Op::Bitcast,
2754+
uint_type_id,
2755+
casted,
2756+
extracted,
2757+
));
2758+
extracted = casted;
2759+
}
2760+
if should_clamp {
2761+
let (min, max, clamp_op) = if is_signed {
2762+
(
2763+
crate::Literal::I32(-128),
2764+
crate::Literal::I32(127),
2765+
spirv::GLOp::SClamp,
2766+
)
2767+
} else {
2768+
(
2769+
crate::Literal::U32(0),
2770+
crate::Literal::U32(255),
2771+
spirv::GLOp::UClamp,
2772+
)
2773+
};
2774+
let [min, max] = [min, max].map(|lit| self.writer.get_constant_scalar(lit));
2775+
2776+
let clamp_id = self.gen_id();
2777+
block.body.push(Instruction::ext_inst(
2778+
self.writer.gl450_ext_inst_id,
2779+
clamp_op,
2780+
result_type_id,
2781+
clamp_id,
2782+
&[extracted, min, max],
2783+
));
2784+
2785+
extracted = clamp_id;
2786+
}
2787+
let is_last = i == u32::from(VEC_LENGTH - 1);
2788+
if is_last {
2789+
last_instruction = Instruction::quaternary(
2790+
spirv::Op::BitFieldInsert,
2791+
result_type_id,
2792+
id,
2793+
preresult,
2794+
extracted,
2795+
offset,
2796+
eight,
2797+
)
2798+
} else {
2799+
let new_preresult = self.gen_id();
2800+
block.body.push(Instruction::quaternary(
2801+
spirv::Op::BitFieldInsert,
2802+
result_type_id,
2803+
new_preresult,
2804+
preresult,
2805+
extracted,
2806+
offset,
2807+
eight,
2808+
));
2809+
preresult = new_preresult;
2810+
}
2811+
}
2812+
last_instruction
2813+
}
2814+
26822815
/// Generate one or more SPIR-V blocks for `naga_block`.
26832816
///
26842817
/// Use `label_id` as the label for the SPIR-V entry point block.

0 commit comments

Comments
 (0)