Skip to content

Commit f4dab8a

Browse files
JMS55Elabajabamockersf
authored
Multithreaded render command encoding (#9172)
# Objective - Encoding many GPU commands (such as in a renderpass with many draws, such as the main opaque pass) onto a `wgpu::CommandEncoder` is very expensive, and takes a long time. - To improve performance, we want to perform the command encoding for these heavy passes in parallel. ## Solution - `RenderContext` can now queue up "command buffer generation tasks" which are closures that will generate a command buffer when called. - When finalizing the render context to produce the final list of command buffers, these tasks are run in parallel on the `ComputeTaskPool` to produce their corresponding command buffers. - The general idea is that the node graph will run in serial, but in a node, instead of doing rendering work, you can add tasks to do render work in parallel with other node's tasks that get ran at the end of the graph execution. ## Nodes Parallelized - `MainOpaquePass3dNode` - `PrepassNode` - `DeferredGBufferPrepassNode` - `ShadowPassNode` (One task per view) ## Future Work - For large number of draws calls, might be worth further subdividing passes into 2+ tasks. - Extend this to UI, 2d, transparent, and transmissive nodes? - Needs testing - small command buffers are inefficient - it may be worth reverting to the serial command encoder usage for render phases with few items. - All "serial" (traditional) rendering work must finish before parallel rendering tasks (the new stuff) can start to run. - There is still only one submission to the graphics queue at the end of the graph execution. There is still no ability to submit work earlier. ## Performance Improvement Thanks to @Elabajaba for testing on Bistro. ![image](https://github.com/bevyengine/bevy/assets/47158642/be50dafa-85eb-4da5-a5cd-c0a044f1e76f) TLDR: Without shadow mapping, this PR has no impact. _With_ shadow mapping, this PR gives **~40 more fps** than main. --- ## Changelog - `MainOpaquePass3dNode`, `PrepassNode`, `DeferredGBufferPrepassNode`, and each shadow map within `ShadowPassNode` are now encoded in parallel, giving _greatly_ increased CPU performance, mainly when shadow mapping is enabled. - Does not work on WASM or AMD+Windows+Vulkan. - Added `RenderContext::add_command_buffer_generation_task()`. - `RenderContext::new()` now takes adapter info - Some render graph and Node related types and methods now have additional lifetime constraints. ## Migration Guide `RenderContext::new()` now takes adapter info - Some render graph and Node related types and methods now have additional lifetime constraints. --------- Co-authored-by: Elabajaba <[email protected]> Co-authored-by: François <[email protected]>
1 parent 5313730 commit f4dab8a

File tree

7 files changed

+260
-137
lines changed

7 files changed

+260
-137
lines changed

crates/bevy_core_pipeline/src/core_3d/main_opaque_pass_3d_node.rs

Lines changed: 59 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ use bevy_ecs::{prelude::World, query::QueryItem};
66
use bevy_render::{
77
camera::ExtractedCamera,
88
render_graph::{NodeRunError, RenderGraphContext, ViewNode},
9-
render_phase::RenderPhase,
10-
render_resource::{PipelineCache, RenderPassDescriptor, StoreOp},
9+
render_phase::{RenderPhase, TrackedRenderPass},
10+
render_resource::{CommandEncoderDescriptor, PipelineCache, RenderPassDescriptor, StoreOp},
1111
renderer::RenderContext,
1212
view::{ViewDepthTexture, ViewTarget, ViewUniformOffset},
1313
};
@@ -31,10 +31,10 @@ impl ViewNode for MainOpaquePass3dNode {
3131
&'static ViewUniformOffset,
3232
);
3333

34-
fn run(
34+
fn run<'w>(
3535
&self,
3636
graph: &mut RenderGraphContext,
37-
render_context: &mut RenderContext,
37+
render_context: &mut RenderContext<'w>,
3838
(
3939
camera,
4040
opaque_phase,
@@ -44,52 +44,69 @@ impl ViewNode for MainOpaquePass3dNode {
4444
skybox_pipeline,
4545
skybox_bind_group,
4646
view_uniform_offset,
47-
): QueryItem<Self::ViewQuery>,
48-
world: &World,
47+
): QueryItem<'w, Self::ViewQuery>,
48+
world: &'w World,
4949
) -> Result<(), NodeRunError> {
50-
// Run the opaque pass, sorted by pipeline key and mesh id to greatly improve batching.
51-
// NOTE: Scoped to drop the mutable borrow of render_context
52-
#[cfg(feature = "trace")]
53-
let _main_opaque_pass_3d_span = info_span!("main_opaque_pass_3d").entered();
50+
let color_attachments = [Some(target.get_color_attachment())];
51+
let depth_stencil_attachment = Some(depth.get_attachment(StoreOp::Store));
5452

55-
// Setup render pass
56-
let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
57-
label: Some("main_opaque_pass_3d"),
58-
color_attachments: &[Some(target.get_color_attachment())],
59-
depth_stencil_attachment: Some(depth.get_attachment(StoreOp::Store)),
60-
timestamp_writes: None,
61-
occlusion_query_set: None,
62-
});
53+
let view_entity = graph.view_entity();
54+
render_context.add_command_buffer_generation_task(move |render_device| {
55+
#[cfg(feature = "trace")]
56+
let _main_opaque_pass_3d_span = info_span!("main_opaque_pass_3d").entered();
6357

64-
if let Some(viewport) = camera.viewport.as_ref() {
65-
render_pass.set_camera_viewport(viewport);
66-
}
58+
// Command encoder setup
59+
let mut command_encoder =
60+
render_device.create_command_encoder(&CommandEncoderDescriptor {
61+
label: Some("main_opaque_pass_3d_command_encoder"),
62+
});
6763

68-
let view_entity = graph.view_entity();
64+
// Render pass setup
65+
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
66+
label: Some("main_opaque_pass_3d"),
67+
color_attachments: &color_attachments,
68+
depth_stencil_attachment,
69+
timestamp_writes: None,
70+
occlusion_query_set: None,
71+
});
72+
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
73+
if let Some(viewport) = camera.viewport.as_ref() {
74+
render_pass.set_camera_viewport(viewport);
75+
}
6976

70-
// Opaque draws
71-
opaque_phase.render(&mut render_pass, world, view_entity);
77+
// Opaque draws
78+
if !opaque_phase.items.is_empty() {
79+
#[cfg(feature = "trace")]
80+
let _opaque_main_pass_3d_span = info_span!("opaque_main_pass_3d").entered();
81+
opaque_phase.render(&mut render_pass, world, view_entity);
82+
}
7283

73-
// Alpha draws
74-
if !alpha_mask_phase.items.is_empty() {
75-
alpha_mask_phase.render(&mut render_pass, world, view_entity);
76-
}
84+
// Alpha draws
85+
if !alpha_mask_phase.items.is_empty() {
86+
#[cfg(feature = "trace")]
87+
let _alpha_mask_main_pass_3d_span = info_span!("alpha_mask_main_pass_3d").entered();
88+
alpha_mask_phase.render(&mut render_pass, world, view_entity);
89+
}
7790

78-
// Draw the skybox using a fullscreen triangle
79-
if let (Some(skybox_pipeline), Some(SkyboxBindGroup(skybox_bind_group))) =
80-
(skybox_pipeline, skybox_bind_group)
81-
{
82-
let pipeline_cache = world.resource::<PipelineCache>();
83-
if let Some(pipeline) = pipeline_cache.get_render_pipeline(skybox_pipeline.0) {
84-
render_pass.set_render_pipeline(pipeline);
85-
render_pass.set_bind_group(
86-
0,
87-
&skybox_bind_group.0,
88-
&[view_uniform_offset.offset, skybox_bind_group.1],
89-
);
90-
render_pass.draw(0..3, 0..1);
91+
// Skybox draw using a fullscreen triangle
92+
if let (Some(skybox_pipeline), Some(SkyboxBindGroup(skybox_bind_group))) =
93+
(skybox_pipeline, skybox_bind_group)
94+
{
95+
let pipeline_cache = world.resource::<PipelineCache>();
96+
if let Some(pipeline) = pipeline_cache.get_render_pipeline(skybox_pipeline.0) {
97+
render_pass.set_render_pipeline(pipeline);
98+
render_pass.set_bind_group(
99+
0,
100+
&skybox_bind_group.0,
101+
&[view_uniform_offset.offset, skybox_bind_group.1],
102+
);
103+
render_pass.draw(0..3, 0..1);
104+
}
91105
}
92-
}
106+
107+
drop(render_pass);
108+
command_encoder.finish()
109+
});
93110

94111
Ok(())
95112
}

crates/bevy_core_pipeline/src/deferred/node.rs

Lines changed: 40 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ use bevy_ecs::prelude::*;
22
use bevy_ecs::query::QueryItem;
33
use bevy_render::render_graph::ViewNode;
44

5-
use bevy_render::render_resource::StoreOp;
5+
use bevy_render::render_phase::TrackedRenderPass;
6+
use bevy_render::render_resource::{CommandEncoderDescriptor, StoreOp};
67
use bevy_render::{
78
camera::ExtractedCamera,
89
render_graph::{NodeRunError, RenderGraphContext},
@@ -33,21 +34,19 @@ impl ViewNode for DeferredGBufferPrepassNode {
3334
&'static ViewPrepassTextures,
3435
);
3536

36-
fn run(
37+
fn run<'w>(
3738
&self,
3839
graph: &mut RenderGraphContext,
39-
render_context: &mut RenderContext,
40+
render_context: &mut RenderContext<'w>,
4041
(
4142
camera,
4243
opaque_deferred_phase,
4344
alpha_mask_deferred_phase,
4445
view_depth_texture,
4546
view_prepass_textures,
46-
): QueryItem<Self::ViewQuery>,
47-
world: &World,
47+
): QueryItem<'w, Self::ViewQuery>,
48+
world: &'w World,
4849
) -> Result<(), NodeRunError> {
49-
let view_entity = graph.view_entity();
50-
5150
let mut color_attachments = vec![];
5251
color_attachments.push(
5352
view_prepass_textures
@@ -107,49 +106,64 @@ impl ViewNode for DeferredGBufferPrepassNode {
107106
.map(|deferred_lighting_pass_id| deferred_lighting_pass_id.get_attachment()),
108107
);
109108

109+
// If all color attachments are none: clear the color attachment list so that no fragment shader is required
110110
if color_attachments.iter().all(Option::is_none) {
111-
// All attachments are none: clear the attachment list so that no fragment shader is required.
112111
color_attachments.clear();
113112
}
114113

115-
{
116-
// Set up the pass descriptor with the depth attachment and optional color attachments.
117-
let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
114+
let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store));
115+
116+
let view_entity = graph.view_entity();
117+
render_context.add_command_buffer_generation_task(move |render_device| {
118+
#[cfg(feature = "trace")]
119+
let _deferred_span = info_span!("deferred").entered();
120+
121+
// Command encoder setup
122+
let mut command_encoder =
123+
render_device.create_command_encoder(&CommandEncoderDescriptor {
124+
label: Some("deferred_command_encoder"),
125+
});
126+
127+
// Render pass setup
128+
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
118129
label: Some("deferred"),
119130
color_attachments: &color_attachments,
120-
depth_stencil_attachment: Some(view_depth_texture.get_attachment(StoreOp::Store)),
131+
depth_stencil_attachment,
121132
timestamp_writes: None,
122133
occlusion_query_set: None,
123134
});
124-
135+
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
125136
if let Some(viewport) = camera.viewport.as_ref() {
126137
render_pass.set_camera_viewport(viewport);
127138
}
128139

129-
// Always run deferred pass to ensure the deferred gbuffer and deferred_lighting_pass_id are cleared.
130-
{
131-
// Run the prepass, sorted front-to-back.
140+
// Opaque draws
141+
if !opaque_deferred_phase.items.is_empty() {
132142
#[cfg(feature = "trace")]
133143
let _opaque_prepass_span = info_span!("opaque_deferred").entered();
134144
opaque_deferred_phase.render(&mut render_pass, world, view_entity);
135145
}
136146

147+
// Alpha masked draws
137148
if !alpha_mask_deferred_phase.items.is_empty() {
138-
// Run the deferred, sorted front-to-back.
139149
#[cfg(feature = "trace")]
140150
let _alpha_mask_deferred_span = info_span!("alpha_mask_deferred").entered();
141151
alpha_mask_deferred_phase.render(&mut render_pass, world, view_entity);
142152
}
143-
}
144153

145-
if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
146-
// Copy depth buffer to texture.
147-
render_context.command_encoder().copy_texture_to_texture(
148-
view_depth_texture.texture.as_image_copy(),
149-
prepass_depth_texture.texture.texture.as_image_copy(),
150-
view_prepass_textures.size,
151-
);
152-
}
154+
drop(render_pass);
155+
156+
// Copy prepass depth to the main depth texture
157+
if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
158+
command_encoder.copy_texture_to_texture(
159+
view_depth_texture.texture.as_image_copy(),
160+
prepass_depth_texture.texture.texture.as_image_copy(),
161+
view_prepass_textures.size,
162+
);
163+
}
164+
165+
command_encoder.finish()
166+
});
153167

154168
Ok(())
155169
}
Lines changed: 45 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
use bevy_ecs::prelude::*;
22
use bevy_ecs::query::QueryItem;
3-
use bevy_render::render_graph::ViewNode;
4-
use bevy_render::render_resource::StoreOp;
53
use bevy_render::{
64
camera::ExtractedCamera,
7-
render_graph::{NodeRunError, RenderGraphContext},
8-
render_phase::RenderPhase,
9-
render_resource::RenderPassDescriptor,
5+
render_graph::{NodeRunError, RenderGraphContext, ViewNode},
6+
render_phase::{RenderPhase, TrackedRenderPass},
7+
render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp},
108
renderer::RenderContext,
119
view::ViewDepthTexture,
1210
};
@@ -31,22 +29,20 @@ impl ViewNode for PrepassNode {
3129
Option<&'static DeferredPrepass>,
3230
);
3331

34-
fn run(
32+
fn run<'w>(
3533
&self,
3634
graph: &mut RenderGraphContext,
37-
render_context: &mut RenderContext,
35+
render_context: &mut RenderContext<'w>,
3836
(
3937
camera,
4038
opaque_prepass_phase,
4139
alpha_mask_prepass_phase,
4240
view_depth_texture,
4341
view_prepass_textures,
4442
deferred_prepass,
45-
): QueryItem<Self::ViewQuery>,
46-
world: &World,
43+
): QueryItem<'w, Self::ViewQuery>,
44+
world: &'w World,
4745
) -> Result<(), NodeRunError> {
48-
let view_entity = graph.view_entity();
49-
5046
let mut color_attachments = vec![
5147
view_prepass_textures
5248
.normal
@@ -56,55 +52,72 @@ impl ViewNode for PrepassNode {
5652
.motion_vectors
5753
.as_ref()
5854
.map(|motion_vectors_texture| motion_vectors_texture.get_attachment()),
59-
// Use None in place of Deferred attachments
55+
// Use None in place of deferred attachments
6056
None,
6157
None,
6258
];
6359

60+
// If all color attachments are none: clear the color attachment list so that no fragment shader is required
6461
if color_attachments.iter().all(Option::is_none) {
65-
// all attachments are none: clear the attachment list so that no fragment shader is required
6662
color_attachments.clear();
6763
}
6864

69-
{
70-
// Set up the pass descriptor with the depth attachment and optional color attachments
71-
let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
65+
let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store));
66+
67+
let view_entity = graph.view_entity();
68+
render_context.add_command_buffer_generation_task(move |render_device| {
69+
#[cfg(feature = "trace")]
70+
let _prepass_span = info_span!("prepass").entered();
71+
72+
// Command encoder setup
73+
let mut command_encoder =
74+
render_device.create_command_encoder(&CommandEncoderDescriptor {
75+
label: Some("prepass_command_encoder"),
76+
});
77+
78+
// Render pass setup
79+
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
7280
label: Some("prepass"),
7381
color_attachments: &color_attachments,
74-
depth_stencil_attachment: Some(view_depth_texture.get_attachment(StoreOp::Store)),
82+
depth_stencil_attachment,
7583
timestamp_writes: None,
7684
occlusion_query_set: None,
7785
});
86+
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
7887
if let Some(viewport) = camera.viewport.as_ref() {
7988
render_pass.set_camera_viewport(viewport);
8089
}
8190

82-
// Always run opaque pass to ensure screen is cleared
83-
{
84-
// Run the prepass, sorted front-to-back
91+
// Opaque draws
92+
if !opaque_prepass_phase.items.is_empty() {
8593
#[cfg(feature = "trace")]
8694
let _opaque_prepass_span = info_span!("opaque_prepass").entered();
8795
opaque_prepass_phase.render(&mut render_pass, world, view_entity);
8896
}
8997

98+
// Alpha masked draws
9099
if !alpha_mask_prepass_phase.items.is_empty() {
91-
// Run the prepass, sorted front-to-back
92100
#[cfg(feature = "trace")]
93101
let _alpha_mask_prepass_span = info_span!("alpha_mask_prepass").entered();
94102
alpha_mask_prepass_phase.render(&mut render_pass, world, view_entity);
95103
}
96-
}
97-
if deferred_prepass.is_none() {
98-
// Copy if deferred isn't going to
99-
if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
100-
// Copy depth buffer to texture
101-
render_context.command_encoder().copy_texture_to_texture(
102-
view_depth_texture.texture.as_image_copy(),
103-
prepass_depth_texture.texture.texture.as_image_copy(),
104-
view_prepass_textures.size,
105-
);
104+
105+
drop(render_pass);
106+
107+
// Copy prepass depth to the main depth texture if deferred isn't going to
108+
if deferred_prepass.is_none() {
109+
if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
110+
command_encoder.copy_texture_to_texture(
111+
view_depth_texture.texture.as_image_copy(),
112+
prepass_depth_texture.texture.texture.as_image_copy(),
113+
view_prepass_textures.size,
114+
);
115+
}
106116
}
107-
}
117+
118+
command_encoder.finish()
119+
});
120+
108121
Ok(())
109122
}
110123
}

0 commit comments

Comments
 (0)