Skip to content

Commit 9b7a965

Browse files
bradwerthteoxoy
authored andcommitted
Add an experimental vertex pulling flag to Metal pipelines.
This proves a flag in msl::PipelineOptions that attempts to write all Metal vertex entry points to use a vertex pulling technique. It does this by: 1) Forcing the _buffer_sizes structure to be generated for all vertex entry points. The structure has additional buffer_size members that contain the byte sizes of the vertex buffers. 2) Adding new args to vertex entry points for the vertex id and/or the instance id and for the bound buffers. If there is an existing @Builtin(vertex_index) or @Builtin(instance_index) param, then no duplicate arg is created. 3) Adding code at the beginning of the function for vertex entry points to compare the vertex id or instance id against the lengths of all the bound buffers, and force an early-exit if the bounds are violated. 4) Extracting the raw bytes from the vertex buffer(s) and unpacking those bytes into the bound attributes with the expected types. 5) Replacing the varyings input and instead using the unpacked attributes to fill any structs-as-args that are rebuilt in the entry point. A new naga test is added which exercises this flag and demonstrates the effect of the transform. The msl generated by this test passes validation. Eventually this transformation will be the default, always-on behavior for Metal pipelines, though the flag may remain so that naga translation tests can be run with and without the tranformation.
1 parent 480d4db commit 9b7a965

25 files changed

+1540
-71
lines changed

deno_webgpu/pipeline.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ pub fn op_webgpu_create_compute_pipeline(
114114
entry_point: compute.entry_point.map(Cow::from),
115115
constants: Cow::Owned(compute.constants.unwrap_or_default()),
116116
zero_initialize_workgroup_memory: true,
117+
vertex_pulling_transform: false,
117118
},
118119
cache: None,
119120
};
@@ -363,6 +364,7 @@ pub fn op_webgpu_create_render_pipeline(
363364
constants: Cow::Owned(fragment.constants.unwrap_or_default()),
364365
// Required to be true for WebGPU
365366
zero_initialize_workgroup_memory: true,
367+
vertex_pulling_transform: false,
366368
},
367369
targets: Cow::Owned(fragment.targets),
368370
})
@@ -388,6 +390,7 @@ pub fn op_webgpu_create_render_pipeline(
388390
constants: Cow::Owned(args.vertex.constants.unwrap_or_default()),
389391
// Required to be true for WebGPU
390392
zero_initialize_workgroup_memory: true,
393+
vertex_pulling_transform: false,
391394
},
392395
buffers: Cow::Owned(vertex_buffers),
393396
},

naga/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ For changelogs after v0.14, see [the wgpu changelog](../CHANGELOG.md).
7979

8080
- Add and fix minimum Metal version checks for optional functionality. ([#2486](https://github.com/gfx-rs/naga/pull/2486)) **@teoxoy**
8181
- Make varyings' struct members unique. ([#2521](https://github.com/gfx-rs/naga/pull/2521)) **@evahop**
82+
- Add experimental vertex pulling transform flag. ([#5254](https://github.com/gfx-rs/wgpu/pull/5254)) **@bradwerth**
8283

8384
#### GLSL-OUT
8485

naga/src/back/msl/mod.rs

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,113 @@ impl Default for Options {
222222
}
223223
}
224224

225+
/// Corresponds to [WebGPU `GPUVertexFormat`](
226+
/// https://gpuweb.github.io/gpuweb/#enumdef-gpuvertexformat).
227+
#[repr(u32)]
228+
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
229+
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
230+
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
231+
pub enum VertexFormat {
232+
/// Two unsigned bytes (u8). `vec2<u32>` in shaders.
233+
Uint8x2 = 0,
234+
/// Four unsigned bytes (u8). `vec4<u32>` in shaders.
235+
Uint8x4 = 1,
236+
/// Two signed bytes (i8). `vec2<i32>` in shaders.
237+
Sint8x2 = 2,
238+
/// Four signed bytes (i8). `vec4<i32>` in shaders.
239+
Sint8x4 = 3,
240+
/// Two unsigned bytes (u8). [0, 255] converted to float [0, 1] `vec2<f32>` in shaders.
241+
Unorm8x2 = 4,
242+
/// Four unsigned bytes (u8). [0, 255] converted to float [0, 1] `vec4<f32>` in shaders.
243+
Unorm8x4 = 5,
244+
/// Two signed bytes (i8). [-127, 127] converted to float [-1, 1] `vec2<f32>` in shaders.
245+
Snorm8x2 = 6,
246+
/// Four signed bytes (i8). [-127, 127] converted to float [-1, 1] `vec4<f32>` in shaders.
247+
Snorm8x4 = 7,
248+
/// Two unsigned shorts (u16). `vec2<u32>` in shaders.
249+
Uint16x2 = 8,
250+
/// Four unsigned shorts (u16). `vec4<u32>` in shaders.
251+
Uint16x4 = 9,
252+
/// Two signed shorts (i16). `vec2<i32>` in shaders.
253+
Sint16x2 = 10,
254+
/// Four signed shorts (i16). `vec4<i32>` in shaders.
255+
Sint16x4 = 11,
256+
/// Two unsigned shorts (u16). [0, 65535] converted to float [0, 1] `vec2<f32>` in shaders.
257+
Unorm16x2 = 12,
258+
/// Four unsigned shorts (u16). [0, 65535] converted to float [0, 1] `vec4<f32>` in shaders.
259+
Unorm16x4 = 13,
260+
/// Two signed shorts (i16). [-32767, 32767] converted to float [-1, 1] `vec2<f32>` in shaders.
261+
Snorm16x2 = 14,
262+
/// Four signed shorts (i16). [-32767, 32767] converted to float [-1, 1] `vec4<f32>` in shaders.
263+
Snorm16x4 = 15,
264+
/// Two half-precision floats (no Rust equiv). `vec2<f32>` in shaders.
265+
Float16x2 = 16,
266+
/// Four half-precision floats (no Rust equiv). `vec4<f32>` in shaders.
267+
Float16x4 = 17,
268+
/// One single-precision float (f32). `f32` in shaders.
269+
Float32 = 18,
270+
/// Two single-precision floats (f32). `vec2<f32>` in shaders.
271+
Float32x2 = 19,
272+
/// Three single-precision floats (f32). `vec3<f32>` in shaders.
273+
Float32x3 = 20,
274+
/// Four single-precision floats (f32). `vec4<f32>` in shaders.
275+
Float32x4 = 21,
276+
/// One unsigned int (u32). `u32` in shaders.
277+
Uint32 = 22,
278+
/// Two unsigned ints (u32). `vec2<u32>` in shaders.
279+
Uint32x2 = 23,
280+
/// Three unsigned ints (u32). `vec3<u32>` in shaders.
281+
Uint32x3 = 24,
282+
/// Four unsigned ints (u32). `vec4<u32>` in shaders.
283+
Uint32x4 = 25,
284+
/// One signed int (i32). `i32` in shaders.
285+
Sint32 = 26,
286+
/// Two signed ints (i32). `vec2<i32>` in shaders.
287+
Sint32x2 = 27,
288+
/// Three signed ints (i32). `vec3<i32>` in shaders.
289+
Sint32x3 = 28,
290+
/// Four signed ints (i32). `vec4<i32>` in shaders.
291+
Sint32x4 = 29,
292+
/// Three unsigned 10-bit integers and one 2-bit integer, packed into a 32-bit integer (u32). [0, 1024] converted to float [0, 1] `vec4<f32>` in shaders.
293+
#[cfg_attr(feature = "serde", serde(rename = "unorm10-10-10-2"))]
294+
Unorm10_10_10_2 = 34,
295+
}
296+
297+
/// A mapping of vertex buffers and their attributes to shader
298+
/// locations.
299+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
300+
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
301+
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
302+
pub struct AttributeMapping {
303+
/// Shader location associated with this attribute
304+
pub shader_location: u32,
305+
/// Offset in bytes from start of vertex buffer structure
306+
pub offset: u32,
307+
/// Format code to help us unpack the attribute into the type
308+
/// used by the shader. Codes correspond to a 0-based index of
309+
/// <https://gpuweb.github.io/gpuweb/#enumdef-gpuvertexformat>.
310+
/// The conversion process is described by
311+
/// <https://gpuweb.github.io/gpuweb/#vertex-processing>.
312+
pub format: VertexFormat,
313+
}
314+
315+
/// A description of a vertex buffer with all the information we
316+
/// need to address the attributes within it.
317+
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
318+
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
319+
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
320+
pub struct VertexBufferMapping {
321+
/// Shader location associated with this buffer
322+
pub id: u32,
323+
/// Size of the structure in bytes
324+
pub stride: u32,
325+
/// True if the buffer is indexed by vertex, false if indexed
326+
/// by instance.
327+
pub indexed_by_vertex: bool,
328+
/// Vec of the attributes within the structure
329+
pub attributes: Vec<AttributeMapping>,
330+
}
331+
225332
/// A subset of options that are meant to be changed per pipeline.
226333
#[derive(Debug, Default, Clone)]
227334
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
@@ -234,6 +341,17 @@ pub struct PipelineOptions {
234341
///
235342
/// Enable this for vertex shaders with point primitive topologies.
236343
pub allow_and_force_point_size: bool,
344+
345+
/// If set, when generating the Metal vertex shader, transform it
346+
/// to receive the vertex buffers, lengths, and vertex id as args,
347+
/// and bounds-check the vertex id and use the index into the
348+
/// vertex buffers to access attributes, rather than using Metal's
349+
/// [[stage-in]] assembled attribute data.
350+
pub vertex_pulling_transform: bool,
351+
352+
/// vertex_buffer_mappings are used during shader translation to
353+
/// support vertex pulling.
354+
pub vertex_buffer_mappings: Vec<VertexBufferMapping>,
237355
}
238356

239357
impl Options {

0 commit comments

Comments
 (0)