From 089bbf35ca37a73222a0f51dd4c2a342acac564c Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Thu, 22 May 2025 12:51:51 -0400 Subject: [PATCH 1/5] refactor(dx12): detect `UnrestrictedBufferTextureCopyPitchSupported` --- wgpu-hal/src/dx12/adapter.rs | 17 +++++++++++++++++ wgpu-hal/src/dx12/mod.rs | 1 + 2 files changed, 18 insertions(+) diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 472742cefdb..9272d6e7e66 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -198,6 +198,21 @@ impl super::Adapter { .is_ok() }; + let unrestricted_buffer_texture_copy_pitch_supported = { + let mut features13 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS13::default(); + unsafe { + device.CheckFeatureSupport( + Direct3D12::D3D12_FEATURE_D3D12_OPTIONS13, + <*mut _>::cast(&mut features13), + size_of_val(&features13) as u32, + ) + } + .is_ok() + && features13 + .UnrestrictedBufferTextureCopyPitchSupported + .as_bool() + }; + let mut max_sampler_descriptor_heap_size = Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE; { @@ -302,6 +317,8 @@ impl super::Adapter { suballocation_supported: !info.name.contains("Iris(R) Xe"), shader_model, max_sampler_descriptor_heap_size, + _unrestricted_buffer_texture_copy_pitch_supported: + unrestricted_buffer_texture_copy_pitch_supported, }; // Theoretically vram limited, but in practice 2^20 is the limit diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 2394533b764..7ae1df060f2 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -574,6 +574,7 @@ struct PrivateCapabilities { suballocation_supported: bool, shader_model: naga::back::hlsl::ShaderModel, max_sampler_descriptor_heap_size: u32, + _unrestricted_buffer_texture_copy_pitch_supported: bool, } #[derive(Default)] From 78850066414eab464ae000bf2d68c30d9760c544 Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Thu, 22 May 2025 12:51:51 -0400 Subject: [PATCH 2/5] refactor(dx12): lower `list` extraction for copies between textures and buffers --- wgpu-hal/src/dx12/command.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index f57e6b9238a..55180e4ab49 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -612,8 +612,9 @@ impl crate::CommandEncoder for super::CommandEncoder { ) where T: Iterator, { - let list = self.list.as_ref().unwrap(); for r in regions { + let list = self.list.as_ref().unwrap(); + let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION { pResource: unsafe { borrow_interface_temporarily(&src.resource) }, Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, @@ -652,8 +653,9 @@ impl crate::CommandEncoder for super::CommandEncoder { ) where T: Iterator, { - let list = self.list.as_ref().unwrap(); for r in regions { + let list = self.list.as_ref().unwrap(); + let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION { pResource: unsafe { borrow_interface_temporarily(&src.resource) }, Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, From 8ea1569f48c31874171b11038a13c4922fbe635c Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Thu, 22 May 2025 12:51:51 -0400 Subject: [PATCH 3/5] refactor(hal): impl. `From` conversions b/w `wgh::CopyExtent` and `wgt::Extent3d` --- wgpu-hal/src/lib.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 4184c8c6f1b..7193bcfdc34 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -2315,6 +2315,36 @@ pub struct CopyExtent { pub depth: u32, } +impl From for CopyExtent { + fn from(value: wgt::Extent3d) -> Self { + let wgt::Extent3d { + width, + height, + depth_or_array_layers, + } = value; + Self { + width, + height, + depth: depth_or_array_layers, + } + } +} + +impl From for wgt::Extent3d { + fn from(value: CopyExtent) -> Self { + let CopyExtent { + width, + height, + depth, + } = value; + Self { + width, + height, + depth_or_array_layers: depth, + } + } +} + #[derive(Clone, Debug)] pub struct TextureCopy { pub src_base: TextureCopyBase, From b804b940fbefdcd786dc1a9490f0f2d51782fd95 Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Fri, 23 May 2025 17:06:29 -0400 Subject: [PATCH 4/5] refactor(dx12): extract `copy_aligned` in `copy_texture_to_buffer` --- wgpu-hal/src/dx12/command.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 55180e4ab49..1d30c367e4b 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -653,8 +653,11 @@ impl crate::CommandEncoder for super::CommandEncoder { ) where T: Iterator, { - for r in regions { - let list = self.list.as_ref().unwrap(); + let copy_aligned = |this: &mut Self, + src: &super::Texture, + dst: &super::Buffer, + r: crate::BufferTextureCopy| { + let list = this.list.as_ref().unwrap(); let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION { pResource: unsafe { borrow_interface_temporarily(&src.resource) }, @@ -675,6 +678,10 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe { list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, Some(&src_box)) }; + }; + + for r in regions { + copy_aligned(this, src, dst, r); } } From 505438d89561fa01c9c9c01185c8a7e9c7c75356 Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Thu, 22 May 2025 12:51:51 -0400 Subject: [PATCH 5/5] WIP: fix(dx12): align tex. <-> buf. copies via intermediate buffer if `!UnrestrictedBufferTextureCopyPitchSupported` TODO: resolve `TODO` comments --- CHANGELOG.md | 1 + tests/tests/wgpu-gpu/regression/issue_6827.rs | 12 +- wgpu-hal/src/dx12/adapter.rs | 4 +- wgpu-hal/src/dx12/command.rs | 131 +++++++++++++++++- wgpu-hal/src/dx12/device.rs | 1 + wgpu-hal/src/dx12/mod.rs | 25 +++- 6 files changed, 156 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca26ae282f2..bea94df9250 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -84,6 +84,7 @@ Bottom level categories: #### DX12 - Get `vertex_index` & `instance_index` builtins working for indirect draws. By @teoxoy in [#7535](https://github.com/gfx-rs/wgpu/pull/7535) +- Align copies b/w textures and buffers when `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` is `false`. By @ErichDonGubler in [#7721](https://github.com/gfx-rs/wgpu/pull/7721). #### Metal diff --git a/tests/tests/wgpu-gpu/regression/issue_6827.rs b/tests/tests/wgpu-gpu/regression/issue_6827.rs index 2cb1bbd0392..588990d3501 100644 --- a/tests/tests/wgpu-gpu/regression/issue_6827.rs +++ b/tests/tests/wgpu-gpu/regression/issue_6827.rs @@ -19,17 +19,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new() .expect_fail(FailureCase::backend_adapter( wgpu::Backends::METAL, "Apple Paravirtual device", // CI on M1 - )) - .expect_fail( - // Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` - // is true, which we have no way to encode. This reproduces in CI though, so not too worried about it. - FailureCase::backend(wgpu::Backends::DX12) - .flaky() - .validation_error( - "D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512", - ) - .panic("GraphicsCommandList::close failed: The parameter is incorrect"), - ), + )), ) .run_async(|ctx| async move { run_test(ctx, true).await }); diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 9272d6e7e66..2d49d43ee23 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -317,8 +317,7 @@ impl super::Adapter { suballocation_supported: !info.name.contains("Iris(R) Xe"), shader_model, max_sampler_descriptor_heap_size, - _unrestricted_buffer_texture_copy_pitch_supported: - unrestricted_buffer_texture_copy_pitch_supported, + unrestricted_buffer_texture_copy_pitch_supported, }; // Theoretically vram limited, but in practice 2^20 is the limit @@ -684,6 +683,7 @@ impl crate::Adapter for super::Adapter { queue: super::Queue { raw: queue, temp_lists: Mutex::new(Vec::new()), + intermediate_copy_bufs: Mutex::new(Vec::new()), }, }) } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 1d30c367e4b..f44d5889b4f 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -14,7 +14,7 @@ use crate::{ dxgi::{name::ObjectExt, result::HResult as _}, }, dx12::borrow_interface_temporarily, - AccelerationStructureEntries, + AccelerationStructureEntries, CommandEncoder as _, }; fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX { @@ -312,6 +312,78 @@ impl super::CommandEncoder { } } } + + unsafe fn buf_tex_intermediate( + &mut self, + region: crate::BufferTextureCopy, + tex_fmt: wgt::TextureFormat, + copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T, + ) -> Option<(T, super::Buffer)> { + let size = { + let copy_info = region.buffer_layout.get_buffer_texture_copy_info( + tex_fmt, + region.texture_base.aspect.map(), + ®ion.size.into(), + ); + copy_info.bytes_in_copy + }; + + let size = wgt::BufferSize::new(size)?; + + let buffer = { + let (resource, allocation) = + super::suballocation::DeviceAllocationContext::from(&*self) + .create_buffer(&crate::BufferDescriptor { + label: None, + size: size.get(), + usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST, + memory_flags: crate::MemoryFlags::empty(), + }) + .expect(concat!( + "internal error: ", + "failed to allocate intermediate buffer ", + "for offset alignment" + )); + super::Buffer { + resource, + size: size.get(), + allocation, + } + }; + + let mut region = region; + region.buffer_layout.offset = 0; + + unsafe { + self.transition_buffers( + [crate::BufferBarrier { + buffer: &buffer, + usage: crate::StateTransition { + from: wgt::BufferUses::empty(), + to: wgt::BufferUses::COPY_DST, + }, + }] + .into_iter(), + ) + }; + + let t = copy_op(self, &buffer, size, region); + + unsafe { + self.transition_buffers( + [crate::BufferBarrier { + buffer: &buffer, + usage: crate::StateTransition { + from: wgt::BufferUses::COPY_DST, + to: wgt::BufferUses::COPY_SRC, + }, + }] + .into_iter(), + ) + }; + + Some((t, buffer)) + } } impl crate::CommandEncoder for super::CommandEncoder { @@ -363,7 +435,10 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn end_encoding(&mut self) -> Result { let raw = self.list.take().unwrap(); unsafe { raw.Close() }.into_device_result("GraphicsCommandList::close")?; - Ok(super::CommandBuffer { raw }) + Ok(super::CommandBuffer { + raw, + intermediate_copy_bufs: mem::take(&mut self.intermediate_copy_bufs).into(), + }) } unsafe fn reset_all>(&mut self, command_buffers: I) { for cmd_buf in command_buffers { @@ -612,7 +687,32 @@ impl crate::CommandEncoder for super::CommandEncoder { ) where T: Iterator, { + let offset_alignment = self.shared.private_caps.texture_data_placement_alignment(); + for r in regions { + let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0; + let (r, src) = if is_offset_aligned { + (r, src) + } else { + let Some((r, src)) = (unsafe { + let src_offset = r.buffer_layout.offset; + self.buf_tex_intermediate(r, dst.format, |this, buf, size, r| { + let layout = crate::BufferCopy { + src_offset, + dst_offset: 0, + size, + }; + this.copy_buffer_to_buffer(src, buf, [layout].into_iter()); + r + }) + }) else { + continue; + }; + self.intermediate_copy_bufs.push(src); + let src = self.intermediate_copy_bufs.last().unwrap(); + (r, src) + }; + let list = self.list.as_ref().unwrap(); let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION { @@ -680,8 +780,33 @@ impl crate::CommandEncoder for super::CommandEncoder { }; }; + let offset_alignment = self.shared.private_caps.texture_data_placement_alignment(); + for r in regions { - copy_aligned(this, src, dst, r); + let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0; + if is_offset_aligned { + copy_aligned(self, src, dst, r) + } else { + let orig_offset = r.buffer_layout.offset; + let Some((r, src)) = (unsafe { + self.buf_tex_intermediate(r, src.format, |this, buf, size, r| { + copy_aligned(this, src, buf, r); + crate::BufferCopy { + src_offset: orig_offset, + dst_offset: 0, + size, + } + }) + }) else { + continue; + }; + + unsafe { + self.copy_buffer_to_buffer(&src, dst, [r].into_iter()); + } + + self.intermediate_copy_bufs.push(src); + }; } } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index c73780bc77b..9989443dd32 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -756,6 +756,7 @@ impl crate::Device for super::Device { mem_allocator: self.mem_allocator.clone(), rtv_pool: Arc::clone(&self.rtv_pool), temp_rtv_handles: Vec::new(), + intermediate_copy_bufs: Vec::new(), null_rtv_handle: self.null_rtv_handle, list: None, free_lists: Vec::new(), diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 7ae1df060f2..f0873da3272 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -95,7 +95,11 @@ use windows::{ core::{Free, Interface}, Win32::{ Foundation, - Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi}, + Graphics::{ + Direct3D, + Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT}, + DirectComposition, Dxgi, + }, System::Threading, }, }; @@ -574,7 +578,17 @@ struct PrivateCapabilities { suballocation_supported: bool, shader_model: naga::back::hlsl::ShaderModel, max_sampler_descriptor_heap_size: u32, - _unrestricted_buffer_texture_copy_pitch_supported: bool, + unrestricted_buffer_texture_copy_pitch_supported: bool, +} + +impl PrivateCapabilities { + fn texture_data_placement_alignment(&self) -> u64 { + if self.unrestricted_buffer_texture_copy_pitch_supported { + D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into() + } else { + 4 + } + } } #[derive(Default)] @@ -682,6 +696,7 @@ unsafe impl Sync for Device {} pub struct Queue { raw: Direct3D12::ID3D12CommandQueue, temp_lists: Mutex>>, + intermediate_copy_bufs: Mutex>>>, } impl Queue { @@ -804,6 +819,8 @@ pub struct CommandEncoder { rtv_pool: Arc>, temp_rtv_handles: Vec, + intermediate_copy_bufs: Vec, + null_rtv_handle: descriptor::Handle, list: Option, free_lists: Vec, @@ -832,6 +849,7 @@ impl fmt::Debug for CommandEncoder { #[derive(Debug)] pub struct CommandBuffer { raw: Direct3D12::ID3D12GraphicsCommandList, + intermediate_copy_bufs: Arc>, } impl crate::DynCommandBuffer for CommandBuffer {} @@ -1436,8 +1454,11 @@ impl crate::Queue for Queue { ) -> Result<(), crate::DeviceError> { let mut temp_lists = self.temp_lists.lock(); temp_lists.clear(); + let mut intermediate_copy_bufs = self.intermediate_copy_bufs.lock(); for cmd_buf in command_buffers { temp_lists.push(Some(cmd_buf.raw.clone().into())); + intermediate_copy_bufs.push(Arc::clone(&cmd_buf.intermediate_copy_bufs)); + // TODO: When to clean accumulated copy buffers up? } {