From fe09d9192f8d82e0f18df47ff2d8f033f5261f49 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Wed, 9 Apr 2025 18:57:26 -0400 Subject: [PATCH 1/9] Extract texture <-> buffer copy logic to wgpu-types --- wgpu-core/src/command/transfer.rs | 93 ++++----- wgpu-types/src/lib.rs | 2 + wgpu-types/src/transfers.rs | 324 ++++++++++++++++++++++++++++++ wgpu/src/lib.rs | 23 ++- 4 files changed, 380 insertions(+), 62 deletions(-) create mode 100644 wgpu-types/src/transfers.rs diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index 894afee544d..b91facb0a09 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -235,62 +235,65 @@ pub(crate) fn validate_linear_texture_data( copy_size: &Extent3d, need_copy_aligned_rows: bool, ) -> Result<(BufferAddress, BufferAddress), TransferError> { - // Convert all inputs to BufferAddress (u64) to avoid some of the overflow issues - // Note: u64 is not always enough to prevent overflow, especially when multiplying - // something with a potentially large depth value, so it is preferable to validate - // the copy size before calling this function (for example via `validate_texture_copy_range`). - let copy_width = copy_size.width as BufferAddress; - let copy_height = copy_size.height as BufferAddress; - let depth_or_array_layers = copy_size.depth_or_array_layers as BufferAddress; - - let offset = layout.offset; - - let block_size = format.block_copy_size(Some(aspect)).unwrap() as BufferAddress; - let (block_width, block_height) = format.block_dimensions(); - let block_width = block_width as BufferAddress; - let block_height = block_height as BufferAddress; - - if copy_width % block_width != 0 { + let wgt::BufferTextureCopyInfo { + copy_width, + copy_height, + depth_or_array_layers, + + offset, + + block_size_bytes, + block_width_texels, + block_height_texels, + + width_blocks: _, + height_blocks, + + row_bytes_dense, + row_stride_bytes, + + image_stride_rows: _, + image_stride_bytes, + + image_rows_dense: _, + image_bytes_dense: _, + + bytes_in_copy, + } = layout.get_buffer_texture_copy_info(format, aspect, copy_size); + + if copy_width % block_width_texels != 0 { return Err(TransferError::UnalignedCopyWidth); } - if copy_height % block_height != 0 { + if copy_height % block_height_texels != 0 { return Err(TransferError::UnalignedCopyHeight); } - let width_in_blocks = copy_width / block_width; - let height_in_blocks = copy_height / block_height; - - let bytes_in_last_row = width_in_blocks * block_size; - - let bytes_per_row = if let Some(bytes_per_row) = layout.bytes_per_row { - let bytes_per_row = bytes_per_row as BufferAddress; - if bytes_per_row < bytes_in_last_row { + if let Some(raw_bytes_per_row) = layout.bytes_per_row { + let raw_bytes_per_row = raw_bytes_per_row as BufferAddress; + if raw_bytes_per_row < row_bytes_dense { return Err(TransferError::InvalidBytesPerRow); } - bytes_per_row } else { - if depth_or_array_layers > 1 || height_in_blocks > 1 { + if depth_or_array_layers > 1 || height_blocks > 1 { return Err(TransferError::UnspecifiedBytesPerRow); } - 0 - }; - let rows_per_image = if let Some(rows_per_image) = layout.rows_per_image { - let rows_per_image = rows_per_image as BufferAddress; - if rows_per_image < height_in_blocks { + } + + if let Some(raw_rows_per_image) = layout.rows_per_image { + let raw_rows_per_image = raw_rows_per_image as BufferAddress; + if raw_rows_per_image < height_blocks { return Err(TransferError::InvalidRowsPerImage); } - rows_per_image } else { if depth_or_array_layers > 1 { return Err(TransferError::UnspecifiedRowsPerImage); } - 0 }; if need_copy_aligned_rows { let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT as BufferAddress; - let mut offset_alignment = block_size; + let mut offset_alignment = block_size_bytes; if format.is_depth_stencil_format() { offset_alignment = 4 } @@ -298,33 +301,21 @@ pub(crate) fn validate_linear_texture_data( return Err(TransferError::UnalignedBufferOffset(offset)); } - if bytes_per_row % bytes_per_row_alignment != 0 { + if row_stride_bytes % bytes_per_row_alignment != 0 { return Err(TransferError::UnalignedBytesPerRow); } } - let bytes_per_image = bytes_per_row * rows_per_image; - - let required_bytes_in_copy = if depth_or_array_layers == 0 { - 0 - } else { - let mut required_bytes_in_copy = bytes_per_image * (depth_or_array_layers - 1); - if height_in_blocks > 0 { - required_bytes_in_copy += bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row; - } - required_bytes_in_copy - }; - - if offset + required_bytes_in_copy > buffer_size { + if offset + bytes_in_copy > buffer_size { return Err(TransferError::BufferOverrun { start_offset: offset, - end_offset: offset + required_bytes_in_copy, + end_offset: offset + bytes_in_copy, buffer_size, side: buffer_side, }); } - Ok((required_bytes_in_copy, bytes_per_image)) + Ok((bytes_in_copy, image_stride_bytes)) } /// WebGPU's [validating texture copy range][vtcr] algorithm. diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 77b07743460..db23549bcfd 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -39,10 +39,12 @@ mod env; mod features; pub mod instance; pub mod math; +mod transfers; pub use counters::*; pub use features::*; pub use instance::*; +pub use transfers::*; /// Integral type used for [`Buffer`] offsets and sizes. /// diff --git a/wgpu-types/src/transfers.rs b/wgpu-types/src/transfers.rs new file mode 100644 index 00000000000..6a0d3b18ba6 --- /dev/null +++ b/wgpu-types/src/transfers.rs @@ -0,0 +1,324 @@ +use crate::{BufferAddress, Extent3d, TexelCopyBufferLayout, TextureAspect, TextureFormat}; + +impl TexelCopyBufferLayout { + /// Extract a variety of information about the given copy operation. + #[inline(always)] + pub fn get_buffer_texture_copy_info( + &self, + format: TextureFormat, + aspect: TextureAspect, + copy_size: &Extent3d, + ) -> BufferTextureCopyInfo { + // Convert all inputs to BufferAddress (u64) to avoid some of the overflow issues + // Note: u64 is not always enough to prevent overflow, especially when multiplying + // something with a potentially large depth value, so it is preferable to validate + // the copy size before calling this function (for example via `validate_texture_copy_range`). + let copy_width = copy_size.width as BufferAddress; + let copy_height = copy_size.height as BufferAddress; + let depth_or_array_layers = copy_size.depth_or_array_layers as BufferAddress; + + let block_size_bytes = format.block_copy_size(Some(aspect)).unwrap() as BufferAddress; + let (block_width, block_height) = format.block_dimensions(); + let block_width_texels = block_width as BufferAddress; + let block_height_texels = block_height as BufferAddress; + + let width_blocks = copy_width.div_ceil(block_width_texels); + let height_blocks = copy_height.div_ceil(block_height_texels); + + let row_bytes_dense = width_blocks * block_size_bytes; + + let row_stride_bytes = self.bytes_per_row.map_or(row_bytes_dense, u64::from); + let image_stride_rows = self.rows_per_image.map_or(height_blocks, u64::from); + + let image_stride_bytes = row_stride_bytes * image_stride_rows; + + let image_rows_dense = height_blocks; + let image_bytes_dense = + image_rows_dense.saturating_sub(1) * row_stride_bytes + row_bytes_dense; + + let mut bytes_in_copy = image_stride_bytes * depth_or_array_layers.saturating_sub(1); + if height_blocks > 0 { + bytes_in_copy += row_stride_bytes * (height_blocks - 1) + row_bytes_dense; + } + + BufferTextureCopyInfo { + copy_width, + copy_height, + depth_or_array_layers, + + offset: self.offset, + + block_size_bytes, + block_width_texels, + block_height_texels, + + width_blocks, + height_blocks, + + row_bytes_dense, + row_stride_bytes, + + image_stride_rows, + image_stride_bytes, + + image_rows_dense, + image_bytes_dense, + + bytes_in_copy, + } + } +} + +/// Information about a copy between a buffer and a texture. +/// +/// Mostly used for internal calculations, but useful nonetheless. +/// Generated by [`TexelCopyBufferLayout::extract_linear_texture_data`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct BufferTextureCopyInfo { + /// The width of the copy region in pixels. + pub copy_width: u64, + /// The height of the copy region in pixels. + pub copy_height: u64, + /// The depth of the copy region in pixels. + pub depth_or_array_layers: u64, + + /// The offset in the buffer where the copy starts. + pub offset: u64, + + /// The size of a single texture texel block in bytes. + pub block_size_bytes: u64, + /// The number of texel in a texel block in the x direction. + pub block_width_texels: u64, + /// The number of texel in a texel block in the y direction. + pub block_height_texels: u64, + + /// The width of the copy region in blocks. + pub width_blocks: u64, + /// The height of the copy region in blocks. + pub height_blocks: u64, + + /// The number of bytes in the last row of the copy region. + pub row_bytes_dense: u64, + /// The stride in bytes between the start of one row in an image and the next row in the same image. + /// + /// This includes any padding between one row and the next row. + pub row_stride_bytes: u64, + + /// The stride in rows between the start of one image and the next image. + pub image_stride_rows: u64, + /// The stride in bytes between the start of one image and the next image. + pub image_stride_bytes: u64, + + /// The number of rows in a densely packed list of images. + /// + /// This is the number of rows in the image that are actually used for texel data, + /// and does not include any padding rows, unlike `image_stride_rows`. + pub image_rows_dense: u64, + /// The number of bytes in a densely packed list of images. + /// + /// This is the number of bytes in the image that are actually used for texel data, + /// or are used for padding between _rows_. Padding at the end of the last row and + /// between _images_ is not included. + pub image_bytes_dense: u64, + + /// The total number of bytes in the copy region. + /// + /// This includes all padding except the padding after the last row in the copy. + pub bytes_in_copy: u64, +} + +#[cfg(test)] +mod tests { + use super::*; + + struct LTDTest { + layout: TexelCopyBufferLayout, + format: TextureFormat, + aspect: TextureAspect, + copy_size: Extent3d, + expected_result: BufferTextureCopyInfo, + } + + impl LTDTest { + #[track_caller] + fn run(&self) { + let linear_texture_data = + self.layout + .get_buffer_texture_copy_info(self.format, self.aspect, &self.copy_size); + assert_eq!(linear_texture_data, self.expected_result); + } + } + + #[test] + fn linear_texture_data_1d_copy() { + let mut test = LTDTest { + layout: TexelCopyBufferLayout { + offset: 0, + bytes_per_row: None, + rows_per_image: None, + }, + format: TextureFormat::Rgba8Unorm, + aspect: TextureAspect::All, + copy_size: Extent3d { + width: 4, + height: 1, + depth_or_array_layers: 1, + }, + expected_result: BufferTextureCopyInfo { + copy_width: 4, + copy_height: 1, + depth_or_array_layers: 1, + offset: 0, + block_size_bytes: 4, + block_width_texels: 1, + block_height_texels: 1, + width_blocks: 4, + height_blocks: 1, + row_bytes_dense: 16, + row_stride_bytes: 16, + image_stride_rows: 1, + image_stride_bytes: 16, + image_rows_dense: 1, + image_bytes_dense: 16, + bytes_in_copy: 16, + }, + }; + + test.run(); + + // Changing bytes_per_row should only change the bytes_per_row, not the bytes_in_copy + // as that is only affected by the last row size. + test.layout.bytes_per_row = Some(32); + test.expected_result.row_stride_bytes = 32; + test.expected_result.image_stride_bytes = 32; + + test.run(); + + // Changing rows_per_image should only change the rows_per_image and bytes_per_image, nothing else + test.layout.rows_per_image = Some(4); + test.expected_result.image_stride_bytes = 128; // 32 * 4 + test.expected_result.image_stride_rows = 4; + + test.run(); + + // Changing the offset should change nothing. + + test.layout.offset = 4; + test.expected_result.offset = 4; + + test.run(); + } + + #[test] + fn linear_texture_data_2d_3d_copy() { + let mut test = LTDTest { + layout: TexelCopyBufferLayout { + offset: 0, + bytes_per_row: None, + rows_per_image: None, + }, + format: TextureFormat::Rgba8Unorm, + aspect: TextureAspect::All, + copy_size: Extent3d { + width: 7, + height: 12, + depth_or_array_layers: 1, + }, + expected_result: BufferTextureCopyInfo { + copy_width: 7, + copy_height: 12, + depth_or_array_layers: 1, + offset: 0, + block_size_bytes: 4, + block_width_texels: 1, + block_height_texels: 1, + width_blocks: 7, + height_blocks: 12, + row_bytes_dense: 4 * 7, + row_stride_bytes: 4 * 7, + image_stride_rows: 12, + image_stride_bytes: 4 * 7 * 12, + image_rows_dense: 12, + image_bytes_dense: 4 * 7 * 12, + bytes_in_copy: 4 * 7 * 12, + }, + }; + + test.run(); + + // Changing bytes_per_row changes a number of other properties. + test.layout.bytes_per_row = Some(48); + test.expected_result.row_stride_bytes = 48; + test.expected_result.image_stride_bytes = 48 * 12; + test.expected_result.image_bytes_dense = 48 * 11 + (4 * 7); + test.expected_result.bytes_in_copy = 48 * 11 + (4 * 7); + + // Making this a 3D copy only changes the depth_or_array_layers and the bytes_in_copy. + test.copy_size.depth_or_array_layers = 4; + test.expected_result.depth_or_array_layers = 4; + test.expected_result.bytes_in_copy = 48 * 12 * 3 + 48 * 11 + (4 * 7); // 4 layers + + test.run(); + } + + #[test] + fn linear_texture_data_2d_3d_compressed_copy() { + let mut test = LTDTest { + layout: TexelCopyBufferLayout { + offset: 0, + bytes_per_row: None, + rows_per_image: None, + }, + format: TextureFormat::Bc1RgbaUnorm, + aspect: TextureAspect::All, + copy_size: Extent3d { + width: 7, + height: 13, + depth_or_array_layers: 1, + }, + expected_result: BufferTextureCopyInfo { + copy_width: 7, + copy_height: 13, + depth_or_array_layers: 1, + offset: 0, + block_size_bytes: 8, + block_width_texels: 4, + block_height_texels: 4, + width_blocks: 2, + height_blocks: 4, + row_bytes_dense: 8 * 2, // block size * width_blocks + row_stride_bytes: 8 * 2, + image_stride_rows: 4, + image_stride_bytes: 8 * 2 * 4, // block size * width_blocks * height_blocks + image_rows_dense: 4, + image_bytes_dense: 8 * 2 * 4, + bytes_in_copy: 8 * 2 * 4, + }, + }; + + test.run(); + + // Changing bytes_per_row. + test.layout.bytes_per_row = Some(48); + test.expected_result.row_stride_bytes = 48; + test.expected_result.image_stride_bytes = 48 * 4; + test.expected_result.image_bytes_dense = 48 * 3 + (8 * 2); + test.expected_result.bytes_in_copy = 48 * 3 + (8 * 2); + + test.run(); + + // Changing rows_per_image. + test.layout.rows_per_image = Some(8); + test.expected_result.image_stride_bytes = 48 * 8; + test.expected_result.image_stride_rows = 8; + + test.run(); + + // Making this a 3D copy only changes the depth_or_array_layers and the bytes_in_copy. + test.copy_size.depth_or_array_layers = 4; + test.expected_result.depth_or_array_layers = 4; + test.expected_result.bytes_in_copy = 48 * 8 * 3 + 48 * 3 + (8 * 2); // 4 layers + + test.run(); + } +} diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index d1ec8a17ff0..17168f64bf7 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -70,17 +70,18 @@ pub use api::*; pub use wgt::{ AdapterInfo, AddressMode, AllocatorReport, AstcBlock, AstcChannel, Backend, BackendOptions, Backends, BindGroupLayoutEntry, BindingType, BlendComponent, BlendFactor, BlendOperation, - BlendState, BufferAddress, BufferBindingType, BufferSize, BufferTransition, BufferUsages, - BufferUses, Color, ColorTargetState, ColorWrites, CommandBufferDescriptor, CompareFunction, - CompositeAlphaMode, CopyExternalImageDestInfo, CoreCounters, DepthBiasState, DepthStencilState, - DeviceLostReason, DeviceType, DownlevelCapabilities, DownlevelFlags, DownlevelLimits, - Dx12BackendOptions, Dx12Compiler, DxcShaderModel, DynamicOffset, Extent3d, Face, Features, - FeaturesWGPU, FeaturesWebGPU, FilterMode, FrontFace, GlBackendOptions, GlFenceBehavior, - Gles3MinorVersion, HalCounters, ImageSubresourceRange, IndexFormat, InstanceDescriptor, - InstanceFlags, InternalCounters, Limits, MemoryHints, MultisampleState, NoopBackendOptions, - Origin2d, Origin3d, PipelineStatisticsTypes, PollError, PollStatus, PolygonMode, - PowerPreference, PredefinedColorSpace, PresentMode, PresentationTimestamp, PrimitiveState, - PrimitiveTopology, PushConstantRange, QueryType, RenderBundleDepthStencil, RequestAdapterError, + BlendState, BufferAddress, BufferBindingType, BufferSize, BufferTextureCopyInfo, + BufferTransition, BufferUsages, BufferUses, Color, ColorTargetState, ColorWrites, + CommandBufferDescriptor, CompareFunction, CompositeAlphaMode, CopyExternalImageDestInfo, + CoreCounters, DepthBiasState, DepthStencilState, DeviceLostReason, DeviceType, + DownlevelCapabilities, DownlevelFlags, DownlevelLimits, Dx12BackendOptions, Dx12Compiler, + DxcShaderModel, DynamicOffset, Extent3d, Face, Features, FeaturesWGPU, FeaturesWebGPU, + FilterMode, FrontFace, GlBackendOptions, GlFenceBehavior, Gles3MinorVersion, HalCounters, + ImageSubresourceRange, IndexFormat, InstanceDescriptor, InstanceFlags, InternalCounters, + Limits, MemoryHints, MultisampleState, NoopBackendOptions, Origin2d, Origin3d, + PipelineStatisticsTypes, PollError, PollStatus, PolygonMode, PowerPreference, + PredefinedColorSpace, PresentMode, PresentationTimestamp, PrimitiveState, PrimitiveTopology, + PushConstantRange, QueryType, RenderBundleDepthStencil, RequestAdapterError, SamplerBindingType, SamplerBorderColor, ShaderLocation, ShaderModel, ShaderRuntimeChecks, ShaderStages, StencilFaceState, StencilOperation, StencilState, StorageTextureAccess, SurfaceCapabilities, SurfaceStatus, TexelCopyBufferLayout, TextureAspect, TextureDimension, From 892d63feae5f9c264f0a2f170685a3c1fa650876 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Thu, 3 Apr 2025 12:47:16 -0400 Subject: [PATCH 2/9] Add asserts --- tests/tests/wgpu-gpu/regression/issue_6827.rs | 1 - wgpu-hal/src/dx12/command.rs | 26 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/tests/wgpu-gpu/regression/issue_6827.rs b/tests/tests/wgpu-gpu/regression/issue_6827.rs index 2cb1bbd0392..4d4a300e428 100644 --- a/tests/tests/wgpu-gpu/regression/issue_6827.rs +++ b/tests/tests/wgpu-gpu/regression/issue_6827.rs @@ -24,7 +24,6 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new() // Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` // is true, which we have no way to encode. This reproduces in CI though, so not too worried about it. FailureCase::backend(wgpu::Backends::DX12) - .flaky() .validation_error( "D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512", ) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 839d445982f..6dfa807f8d6 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -621,6 +621,19 @@ impl crate::CommandEncoder for super::CommandEncoder { }, }; + { + let offset = unsafe { src_location.Anonymous.PlacedFootprint.Offset }; + let remainder = offset % Direct3D12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64; + + assert_eq!( + remainder, + 0, + "D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512. Was {}. Remainder is {}", + offset, + remainder + ); + }; + let src_box = make_box(&wgt::Origin3d::ZERO, &r.size); unsafe { list.CopyTextureRegion( @@ -661,6 +674,19 @@ impl crate::CommandEncoder for super::CommandEncoder { }, }; + { + let offset = unsafe { dst_location.Anonymous.PlacedFootprint.Offset }; + let remainder = offset % Direct3D12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64; + + assert_eq!( + remainder, + 0, + "D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512. Was {}. Remainder is {}", + offset, + remainder + ); + }; + let src_box = make_box(&r.texture_base.origin, &r.size); unsafe { list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, Some(&src_box)) From fd0cd038a2cde64b05aa1d4120b5c37bdcaf6a56 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Thu, 3 Apr 2025 13:18:35 -0400 Subject: [PATCH 3/9] Framework of texture copy utils --- wgpu-hal/src/dx12/mod.rs | 1 + wgpu-hal/src/dx12/texture_copies.rs | 54 +++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 wgpu-hal/src/dx12/texture_copies.rs diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 09c4f884202..32509b3a192 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -83,6 +83,7 @@ mod instance; mod sampler; mod shader_compilation; mod suballocation; +mod texture_copies; mod types; mod view; diff --git a/wgpu-hal/src/dx12/texture_copies.rs b/wgpu-hal/src/dx12/texture_copies.rs new file mode 100644 index 00000000000..d83a8de468f --- /dev/null +++ b/wgpu-hal/src/dx12/texture_copies.rs @@ -0,0 +1,54 @@ +use windows::Win32::Graphics::Direct3D12::{ + ID3D12Device, ID3D12GraphicsCommandList, ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, +}; + +use crate::dx12::PrivateCapabilities; + +pub struct TextureCopyHandler {} + +impl TextureCopyHandler { + pub fn new(device: &ID3D12Device) -> Self { + Self {} + } + + pub fn encode_copy( + &self, + list: &ID3D12GraphicsCommandList, + src: &super::Buffer, + dst: &super::Texture, + copy: crate::BufferTextureCopy, + ) -> ID3D12Resource { + todo!() + } +} + +enum CopyType { + /// Copy can be expressed natively using a D3D12 copy command. + Native, + /// Copy needs to be done layer by layer, because the rows per image is not + /// the "natural" rows per image. + LayerByLayer, + /// Offset is not properly aligned, so the entire upload needs to be + /// copied in bulk to a temporary buffer first. + AlignmentOnly, +} + +impl CopyType { + fn from_copy(c: &crate::BufferTextureCopy) -> Self { + let natural_rows_per_image = c.size.depth; + let rows_per_image = c + .buffer_layout + .rows_per_image + .unwrap_or(natural_rows_per_image); + + if rows_per_image != natural_rows_per_image { + return CopyType::LayerByLayer; + } + + if c.buffer_layout.offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64 != 0 { + return CopyType::AlignmentOnly; + } + + CopyType::Native + } +} From b9c085ffe7a86f99008f565975623b62dd86450e Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Thu, 3 Apr 2025 15:17:43 -0400 Subject: [PATCH 4/9] Support unrestricted buffer copy pitch --- wgpu-hal/src/dx12/adapter.rs | 17 +++++++++++++++++ wgpu-hal/src/dx12/mod.rs | 1 + 2 files changed, 18 insertions(+) diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 4d214e77059..16c00fd0696 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -195,6 +195,22 @@ impl super::Adapter { .is_ok() }; + let unrestricted_buffer_texture_copy_pitch = { + let mut features13 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS13::default(); + let res = unsafe { + device.CheckFeatureSupport( + Direct3D12::D3D12_FEATURE_D3D12_OPTIONS13, + <*mut _>::cast(&mut features13), + size_of_val(&features13) as u32, + ) + } + .is_ok(); + + res && features13 + .UnrestrictedBufferTextureCopyPitchSupported + .as_bool() + }; + let mut max_sampler_descriptor_heap_size = Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE; { @@ -299,6 +315,7 @@ impl super::Adapter { suballocation_supported: !info.name.contains("Iris(R) Xe"), shader_model, max_sampler_descriptor_heap_size, + unrestricted_buffer_texture_copy_pitch, }; // Theoretically vram limited, but in practice 2^20 is the limit diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 32509b3a192..d0c37891432 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -574,6 +574,7 @@ struct PrivateCapabilities { suballocation_supported: bool, shader_model: naga::back::hlsl::ShaderModel, max_sampler_descriptor_heap_size: u32, + unrestricted_buffer_texture_copy_pitch: bool, } #[derive(Default)] From 4221f3ba798d4fd713725783dfae7f8517b9976e Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Thu, 3 Apr 2025 15:21:06 -0400 Subject: [PATCH 5/9] More temporary infrastructure --- wgpu-hal/src/dx12/texture_copies.rs | 91 +++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 10 deletions(-) diff --git a/wgpu-hal/src/dx12/texture_copies.rs b/wgpu-hal/src/dx12/texture_copies.rs index d83a8de468f..249819d0a01 100644 --- a/wgpu-hal/src/dx12/texture_copies.rs +++ b/wgpu-hal/src/dx12/texture_copies.rs @@ -1,23 +1,38 @@ -use windows::Win32::Graphics::Direct3D12::{ - ID3D12Device, ID3D12GraphicsCommandList, ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, +use core::mem; + +use alloc::{string::String, sync::Arc}; + +use windows::Win32::Graphics::{Direct3D12::*, Dxgi::Common::*}; + +use crate::{ + auxil::dxgi::result::HResult, + dx12::{suballocation::AllocationWrapper, PrivateCapabilities}, + Device, }; -use crate::dx12::PrivateCapabilities; +const DEFAULT_SIZE: u64 = 1 << 18; // 256 KiB -pub struct TextureCopyHandler {} +pub struct TextureCopyHandler { + temporary_buffer: TemporaryBuffer, +} impl TextureCopyHandler { - pub fn new(device: &ID3D12Device) -> Self { - Self {} + pub fn new(device: &super::Device) -> Result { + Ok(Self { + temporary_buffer: TemporaryBuffer::new(device)?, + }) } - pub fn encode_copy( + pub unsafe fn encode_copy( &self, + caps: &PrivateCapabilities, list: &ID3D12GraphicsCommandList, src: &super::Buffer, dst: &super::Texture, - copy: crate::BufferTextureCopy, - ) -> ID3D12Resource { + copy: &crate::BufferTextureCopy, + ) -> Result, crate::DeviceError> { + let copy_type = CopyType::from_copy(caps, copy); + todo!() } } @@ -34,7 +49,7 @@ enum CopyType { } impl CopyType { - fn from_copy(c: &crate::BufferTextureCopy) -> Self { + fn from_copy(caps: &PrivateCapabilities, c: &crate::BufferTextureCopy) -> Self { let natural_rows_per_image = c.size.depth; let rows_per_image = c .buffer_layout @@ -45,6 +60,12 @@ impl CopyType { return CopyType::LayerByLayer; } + // If unrestricted pitch is supported, we can use the native copy + // even if the offset is not aligned. + if caps.unrestricted_buffer_texture_copy_pitch { + return CopyType::Native; + } + if c.buffer_layout.offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64 != 0 { return CopyType::AlignmentOnly; } @@ -52,3 +73,53 @@ impl CopyType { CopyType::Native } } + +pub struct TemporaryBuffer { + buffer: Arc, +} + +impl TemporaryBuffer { + pub fn new(device: &super::Device) -> Result { + let size = DEFAULT_SIZE; + + let label = label(size); + let desc = buffer_desc(&label, size); + + let buffer = Arc::new(unsafe { device.create_buffer(&desc)? }); + + Ok(Self { buffer }) + } + + pub fn get_resource(&mut self, device: &super::Device, size: u64) -> Arc { + if size > self.buffer.size { + let label = label(size); + let desc = buffer_desc(&label, size); + + // Recreate the buffer with the new size + let new_buffer = Arc::new(unsafe { device.create_buffer(&desc).unwrap() }); + + // Update the buffer + let old_buffer = mem::replace(&mut self.buffer, new_buffer); + + // Release the old buffer if we are the last reference to it + if let Some(buffer) = Arc::into_inner(old_buffer) { + unsafe { device.destroy_buffer(buffer) }; + } + } + + Arc::clone(&self.buffer) + } +} + +fn label(size: u64) -> String { + format!("wgpu-hal texture copy temporary buffer ({} bytes)", size) +} + +fn buffer_desc<'label>(label: &'label str, size: u64) -> crate::BufferDescriptor<'label> { + crate::BufferDescriptor { + label: Some(label), + size, + usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST, + memory_flags: crate::MemoryFlags::empty(), + } +} From d1263824d5ed3e10e017a08351706074a6a2c92f Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Thu, 3 Apr 2025 17:24:00 -0400 Subject: [PATCH 6/9] Use context in texture copy machinery --- wgpu-hal/src/dx12/texture_copies.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/wgpu-hal/src/dx12/texture_copies.rs b/wgpu-hal/src/dx12/texture_copies.rs index 249819d0a01..b0a98a244fa 100644 --- a/wgpu-hal/src/dx12/texture_copies.rs +++ b/wgpu-hal/src/dx12/texture_copies.rs @@ -6,7 +6,10 @@ use windows::Win32::Graphics::{Direct3D12::*, Dxgi::Common::*}; use crate::{ auxil::dxgi::result::HResult, - dx12::{suballocation::AllocationWrapper, PrivateCapabilities}, + dx12::{ + suballocation::{AllocationWrapper, DeviceAllocationContext}, + PrivateCapabilities, + }, Device, }; @@ -90,7 +93,7 @@ impl TemporaryBuffer { Ok(Self { buffer }) } - pub fn get_resource(&mut self, device: &super::Device, size: u64) -> Arc { + pub fn get_resource(&mut self, ctx: DeviceAllocationContext, size: u64) -> Arc { if size > self.buffer.size { let label = label(size); let desc = buffer_desc(&label, size); @@ -103,7 +106,7 @@ impl TemporaryBuffer { // Release the old buffer if we are the last reference to it if let Some(buffer) = Arc::into_inner(old_buffer) { - unsafe { device.destroy_buffer(buffer) }; + unsafe { ctx.raw.destroy_buffer(buffer) }; } } From 5279bf3026a8206a1cd44545feb6a810eed9900d Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Thu, 3 Apr 2025 21:16:22 -0400 Subject: [PATCH 7/9] Imports --- wgpu-hal/src/dx12/texture_copies.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/wgpu-hal/src/dx12/texture_copies.rs b/wgpu-hal/src/dx12/texture_copies.rs index b0a98a244fa..93f7277061c 100644 --- a/wgpu-hal/src/dx12/texture_copies.rs +++ b/wgpu-hal/src/dx12/texture_copies.rs @@ -5,11 +5,7 @@ use alloc::{string::String, sync::Arc}; use windows::Win32::Graphics::{Direct3D12::*, Dxgi::Common::*}; use crate::{ - auxil::dxgi::result::HResult, - dx12::{ - suballocation::{AllocationWrapper, DeviceAllocationContext}, - PrivateCapabilities, - }, + dx12::{suballocation::DeviceAllocationContext, PrivateCapabilities}, Device, }; From df7c41b7139d0e86102689c5a4690ede7971aeaf Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Wed, 9 Apr 2025 18:56:18 -0400 Subject: [PATCH 8/9] [d3d12] Allow comparing buffers --- wgpu-hal/src/dx12/mod.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index d0c37891432..ecb8d1d4362 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -843,6 +843,20 @@ pub struct Buffer { allocation: suballocation::Allocation, } +impl PartialEq for Buffer { + fn eq(&self, other: &Self) -> bool { + self.resource == other.resource + } +} + +impl Eq for Buffer {} + +impl std::hash::Hash for Buffer { + fn hash(&self, state: &mut H) { + self.resource.as_raw().hash(state); + } +} + unsafe impl Send for Buffer {} unsafe impl Sync for Buffer {} From 844381070186b959239847e74f901bcbb72f538c Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Wed, 9 Apr 2025 18:57:40 -0400 Subject: [PATCH 9/9] Further work on temporary buffers --- wgpu-hal/src/dx12/texture_copies.rs | 73 ++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/wgpu-hal/src/dx12/texture_copies.rs b/wgpu-hal/src/dx12/texture_copies.rs index 93f7277061c..a6057e72c58 100644 --- a/wgpu-hal/src/dx12/texture_copies.rs +++ b/wgpu-hal/src/dx12/texture_copies.rs @@ -2,6 +2,7 @@ use core::mem; use alloc::{string::String, sync::Arc}; +use parking_lot::Mutex; use windows::Win32::Graphics::{Direct3D12::*, Dxgi::Common::*}; use crate::{ @@ -12,25 +13,26 @@ use crate::{ const DEFAULT_SIZE: u64 = 1 << 18; // 256 KiB pub struct TextureCopyHandler { - temporary_buffer: TemporaryBuffer, + temporary_buffer: Mutex, } impl TextureCopyHandler { pub fn new(device: &super::Device) -> Result { Ok(Self { - temporary_buffer: TemporaryBuffer::new(device)?, + temporary_buffer: Mutex::new(TemporaryBuffer::new(device)?), }) } pub unsafe fn encode_copy( &self, caps: &PrivateCapabilities, + ctx: &DeviceAllocationContext, list: &ID3D12GraphicsCommandList, src: &super::Buffer, dst: &super::Texture, copy: &crate::BufferTextureCopy, ) -> Result, crate::DeviceError> { - let copy_type = CopyType::from_copy(caps, copy); + // let copy_type = CopyType::from_copy(caps, copy); todo!() } @@ -41,14 +43,22 @@ enum CopyType { Native, /// Copy needs to be done layer by layer, because the rows per image is not /// the "natural" rows per image. - LayerByLayer, + LayerByLayer { + // layer_size: u64, + rows_per_image: u32, + natural_rows_per_image: u32, + }, /// Offset is not properly aligned, so the entire upload needs to be /// copied in bulk to a temporary buffer first. AlignmentOnly, } impl CopyType { - fn from_copy(caps: &PrivateCapabilities, c: &crate::BufferTextureCopy) -> Self { + fn from_copy( + caps: &PrivateCapabilities, + format: wgt::TextureFormat, + c: &crate::BufferTextureCopy, + ) -> Self { let natural_rows_per_image = c.size.depth; let rows_per_image = c .buffer_layout @@ -56,7 +66,10 @@ impl CopyType { .unwrap_or(natural_rows_per_image); if rows_per_image != natural_rows_per_image { - return CopyType::LayerByLayer; + return CopyType::LayerByLayer { + rows_per_image, + natural_rows_per_image, + }; } // If unrestricted pitch is supported, we can use the native copy @@ -81,32 +94,58 @@ impl TemporaryBuffer { pub fn new(device: &super::Device) -> Result { let size = DEFAULT_SIZE; - let label = label(size); - let desc = buffer_desc(&label, size); - - let buffer = Arc::new(unsafe { device.create_buffer(&desc)? }); + let ctx = DeviceAllocationContext::from(device); + let buffer = create_buffer(&ctx, size)?; Ok(Self { buffer }) } - pub fn get_resource(&mut self, ctx: DeviceAllocationContext, size: u64) -> Arc { + pub fn get_resource( + &mut self, + ctx: &DeviceAllocationContext, + size: u64, + ) -> Result, crate::DeviceError> { if size > self.buffer.size { - let label = label(size); - let desc = buffer_desc(&label, size); - // Recreate the buffer with the new size - let new_buffer = Arc::new(unsafe { device.create_buffer(&desc).unwrap() }); + let new_buffer = create_buffer(ctx, size)?; // Update the buffer let old_buffer = mem::replace(&mut self.buffer, new_buffer); // Release the old buffer if we are the last reference to it if let Some(buffer) = Arc::into_inner(old_buffer) { - unsafe { ctx.raw.destroy_buffer(buffer) }; + ctx.free_resource(buffer.resource, buffer.allocation); } } - Arc::clone(&self.buffer) + Ok(Arc::clone(&self.buffer)) + } +} + +fn create_buffer( + ctx: &DeviceAllocationContext, + size: u64, +) -> Result, crate::DeviceError> { + let label = format!("wgpu-hal texture copy temporary buffer ({} bytes)", size); + let desc = crate::BufferDescriptor { + label: Some(&label), + size: size, + usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST, + memory_flags: crate::MemoryFlags::empty(), + }; + + let (resource, allocation) = ctx.create_buffer(&desc)?; + Ok(Arc::new(super::Buffer { + resource, + size, + allocation, + })) +} + +fn destroy_buffer(ctx: &DeviceAllocationContext, buffer: Arc) { + if let Some(buffer) = Arc::into_inner(buffer) { + // If we are the last reference to the buffer, free it + ctx.free_resource(buffer.resource, buffer.allocation); } }