[core, hal, types] Clarify wgpu_hal's bounds check promises.

In `wgpu_hal`: - Document that `wgpu_hal` guarantees that shaders will not access buffer contents beyond the bindgroups' bound regions, rounded up to some adapter-specific alignment. Introduce the term "accessible region" for the portion of the buffer that shaders can actually get at. - Document that all bets are off if you disable bounds checks with `ShaderModuleDescriptor::runtime_checks`. - Provide this alignment in `wgpu_hal::Alignments`. Update all backends appropriately. - In the Vulkan backend, use Naga to inject bounds checks on buffer accesses unless `robustBufferAccess2` is available; `robustBufferAccess` is not sufficient. Retrieve `VK_EXT_robustness2`'s properties, as needed to discover the alignment above. In `wgpu_core`: - Use buffer bindings' accessible regions to determine which parts of the buffer need to be initialized. In `wgpu_types`: - Document some of the possible effects of using `ShaderBoundsChecks::unchecked`. Fixes gfx-rs#1813.
jimblandy · Sep 2, 2024 · b7d4d93 · b7d4d93
1 parent 0aa2793
commit b7d4d93
Show file tree

Hide file tree

Showing 10 changed files with 192 additions and 10 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -96,6 +96,7 @@ By @teoxoy [#6134](https://github.com/gfx-rs/wgpu/pull/6134).
 - Fix crash when dropping the surface after the device. By @wumpf in [#6052](https://github.com/gfx-rs/wgpu/pull/6052)
 - Fix error message that is thrown in create_render_pass to no longer say `compute_pass`. By @matthew-wong1 [#6041](https://github.com/gfx-rs/wgpu/pull/6041)
 - Add `VideoFrame` to `ExternalImageSource` enum. By @jprochazk in [#6170](https://github.com/gfx-rs/wgpu/pull/6170)
+- Document `wgpu_hal` bounds-checking promises, and adapt `wgpu_core`'s lazy initialization logic to the slightly weaker-than-expected guarantees. By @jimblandy in [#6201](https://github.com/gfx-rs/wgpu/pull/6201)
 
 #### GLES / OpenGL
 

diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
@@ -884,6 +884,16 @@ pub(crate) fn buffer_binding_type_alignment(
     }
 }
 
+pub(crate) fn buffer_binding_type_bounds_check_alignment(
+    alignments: &hal::Alignments,
+    binding_type: wgt::BufferBindingType,
+) -> wgt::BufferAddress {
+    match binding_type {
+        wgt::BufferBindingType::Uniform => alignments.uniform_bounds_check_alignment.get(),
+        wgt::BufferBindingType::Storage { .. } => wgt::COPY_BUFFER_ALIGNMENT,
+    }
+}
+
 #[derive(Debug)]
 pub struct BindGroup {
     pub(crate) raw: Snatchable<Box<dyn hal::DynBindGroup>>,

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
@@ -39,7 +39,9 @@ use once_cell::sync::OnceCell;
 
 use smallvec::SmallVec;
 use thiserror::Error;
-use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension};
+use wgt::{
+    math::align_to, DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension,
+};
 
 use std::{
     borrow::Cow,
@@ -2004,10 +2006,21 @@ impl Device {
             late_buffer_binding_sizes.insert(binding, late_size);
         }
 
+        // This was checked against the device's alignment requirements above,
+        // which should always be a multiple of `COPY_BUFFER_ALIGNMENT`.
         assert_eq!(bb.offset % wgt::COPY_BUFFER_ALIGNMENT, 0);
+
+        // `wgpu_hal` only restricts shader access to bound buffer regions with
+        // a certain resolution. For the sake of lazy initialization, round up
+        // the size of the bound range to reflect how much of the buffer is
+        // actually going to be visible to the shader.
+        let bounds_check_alignment =
+            binding_model::buffer_binding_type_bounds_check_alignment(&self.alignments, binding_ty);
+        let visible_size = align_to(bind_size, bounds_check_alignment);
+
         used_buffer_ranges.extend(buffer.initialization_status.read().create_action(
             buffer,
-            bb.offset..bb.offset + bind_size,
+            bb.offset..bb.offset + visible_size,
             MemoryInitKind::NeedsInitializedMemory,
         ));
 

diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
@@ -524,6 +524,9 @@ impl super::Adapter {
                         Direct3D12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64,
                     )
                     .unwrap(),
+                    // Direct3D correctly bounds-checks all array accesses:
+                    // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#18.6.8.2%20Device%20Memory%20Reads
+                    uniform_bounds_check_alignment: wgt::BufferSize::new(1).unwrap(),
                 },
                 downlevel,
             },

diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs
@@ -841,6 +841,16 @@ impl super::Adapter {
                 alignments: crate::Alignments {
                     buffer_copy_offset: wgt::BufferSize::new(4).unwrap(),
                     buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(),
+                    // #6151: `wgpu_hal::gles` doesn't ask Naga to inject bounds
+                    // checks in GLSL, and it doesn't request extensions like
+                    // `KHR_robust_buffer_access_behavior` that would provide
+                    // them, so we can't really implement the checks promised by
+                    // [`crate::BufferBinding`].
+                    //
+                    // Since this is a pre-existing condition, for the time
+                    // being, provide 1 as the value here, to cause as little
+                    // trouble as possible.
+                    uniform_bounds_check_alignment: wgt::BufferSize::new(1).unwrap(),
                 },
             },
         })

diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
@@ -1635,9 +1635,27 @@ pub struct InstanceDescriptor<'a> {
 pub struct Alignments {
     /// The alignment of the start of the buffer used as a GPU copy source.
     pub buffer_copy_offset: wgt::BufferSize,
+
     /// The alignment of the row pitch of the texture data stored in a buffer that is
     /// used in a GPU copy operation.
     pub buffer_copy_pitch: wgt::BufferSize,
+
+    /// The finest alignment of bound range checking for uniform buffers.
+    ///
+    /// When `wgpu_hal` restricts shader references to the [accessible
+    /// region][ar] of a [`Uniform`] buffer, the size of the accessible region
+    /// is the bind group binding's stated [size], rounded up to the next
+    /// multiple of this value.
+    ///
+    /// We don't need an analogous field for storage buffer bindings, because
+    /// all our backends promise to enforce the size at least to a four-byte
+    /// alignment, and `wgpu_hal` requires bound range lengths to be a multiple
+    /// of four anyway.
+    ///
+    /// [ar]: struct.BufferBinding.html#accessible-region
+    /// [`Uniform`]: wgt::BufferBindingType::Uniform
+    /// [size]: BufferBinding::size
+    pub uniform_bounds_check_alignment: wgt::BufferSize,
 }
 
 #[derive(Clone, Debug)]
@@ -1807,6 +1825,40 @@ pub struct PipelineLayoutDescriptor<'a, B: DynBindGroupLayout + ?Sized> {
     pub push_constant_ranges: &'a [wgt::PushConstantRange],
 }
 
+/// A region of a buffer made visible to shaders via a [`BindGroup`].
+///
+/// [`BindGroup`]: Api::BindGroup
+///
+/// ## Accessible region
+///
+/// `wgpu_hal` guarantees that shaders compiled with
+/// [`ShaderModuleDescriptor::runtime_checks`] set to `true` cannot read or
+/// write data via this binding outside the *accessible region* of [`buffer`]:
+///
+/// - The accessible region starts at [`offset`].
+///
+/// - For [`Storage`] bindings, the size of the accessible region is [`size`],
+///   which must be a multiple of 4.
+///
+/// - For [`Uniform`] bindings, the size of the accessible region is [`size`]
+///   rounded up to the next multiple of
+///   [`Alignments::uniform_bounds_check_alignment`].
+///
+/// Note that this guarantee is stricter than WGSL's requirements for
+/// [out-of-bounds accesses][woob], as WGSL allows them to return values from
+/// elsewhere in the buffer. But this guarantee is necessary anyway, to permit
+/// `wgpu-core` to avoid clearing uninitialized regions of buffers that will
+/// never be read by the application before they are overwritten. This
+/// optimization consults bind group buffer binding regions to determine which
+/// parts of which buffers shaders might observe. This optimization is only
+/// sound if shader access is bounds-checked.
+///
+/// [`buffer`]: BufferBinding::buffer
+/// [`offset`]: BufferBinding::offset
+/// [`size`]: BufferBinding::size
+/// [`Storage`]: wgt::BufferBindingType::Storage
+/// [`Uniform`]: wgt::BufferBindingType::Uniform
+/// [woob]: https://gpuweb.github.io/gpuweb/wgsl/#out-of-bounds-access-sec
 #[derive(Debug)]
 pub struct BufferBinding<'a, B: DynBuffer + ?Sized> {
     /// The buffer being bound.
@@ -1925,6 +1977,26 @@ pub enum ShaderInput<'a> {
 
 pub struct ShaderModuleDescriptor<'a> {
     pub label: Label<'a>,
+
+    /// Enforce bounds checks in shaders, even if the underlying driver doesn't
+    /// support doing so natively.
+    ///
+    /// When this is `true`, `wgpu_hal` promises that shaders can only read or
+    /// write the [accessible region][ar] of a bindgroup's buffer bindings. If
+    /// the underlying graphics platform cannot implement these bounds checks
+    /// itself, `wgpu_hal` will inject bounds checks before presenting the
+    /// shader to the platform.
+    ///
+    /// When this is `false`, `wgpu_hal` only enforces such bounds checks if the
+    /// underlying platform provides a way to do so itself. `wgpu_hal` does not
+    /// itself add any bounds checks to generated shader code.
+    ///
+    /// Note that `wgpu_hal` users may try to initialize only those portions of
+    /// buffers that they anticipate might be read from. Passing `false` here
+    /// may allow shaders to see wider regions of the buffers than expected,
+    /// making such deferred initialization visible to the application.
+    ///
+    /// [ar]: struct.BufferBinding.html#accessible-region
     pub runtime_checks: bool,
 }
 

diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
@@ -997,6 +997,10 @@ impl super::PrivateCapabilities {
             alignments: crate::Alignments {
                 buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(),
                 buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(),
+                // This backend has Naga incorporate bounds checks into the
+                // Metal Shading Language it generates, so from `wgpu_hal`'s
+                // users' point of view, references are tightly checked.
+                uniform_bounds_check_alignment: wgt::BufferSize::new(1),
             },
             downlevel,
         }

diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
@@ -342,9 +342,6 @@ impl PhysicalDeviceFeatures {
                 None
             },
             robustness2: if enabled_extensions.contains(&ext::robustness2::NAME) {
-                // Note: enabling `robust_buffer_access2` isn't requires, strictly speaking
-                // since we can enable `robust_buffer_access` all the time. But it improves
-                // program portability, so we opt into it if they are supported.
                 Some(
                     vk::PhysicalDeviceRobustness2FeaturesEXT::default()
                         .robust_buffer_access2(private_caps.robust_buffer_access2)
@@ -842,6 +839,10 @@ pub struct PhysicalDeviceProperties {
     /// `VK_EXT_subgroup_size_control` extension, promoted to Vulkan 1.3.
     subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlProperties<'static>>,
 
+    /// Additional `vk::PhysicalDevice` properties from the
+    /// `VK_EXT_robustness2` extension.
+    robustness2: Option<vk::PhysicalDeviceRobustness2PropertiesEXT<'static>>,
+
     /// The device API version.
     ///
     /// Which is the version of Vulkan supported for device-level functionality.
@@ -1097,13 +1098,38 @@ impl PhysicalDeviceProperties {
         }
     }
 
-    fn to_hal_alignments(&self) -> crate::Alignments {
+    /// Return a `wgpu_hal::Alignments` structure describing this adapter.
+    ///
+    /// The `using_robustness2` argument says how this adapter will implement
+    /// `wgpu_hal`'s guarantee that shaders can only read the [accessible
+    /// region][ar] of bindgroup's buffer bindings:
+    ///
+    /// - If this adapter will depend on `VK_EXT_robustness2`'s
+    ///   `robustBufferAccess2` feature to apply bounds checks to shader buffer
+    ///   access, `using_robustness2` must be `true`.
+    ///
+    /// - Otherwise, this adapter must use Naga to inject bounds checks on
+    ///   buffer accesses, and `using_robustness2` must be `false`.
+    ///
+    /// [ar]: ../../struct.BufferBinding.html#accessible-region
+    fn to_hal_alignments(&self, using_robustness2: bool) -> crate::Alignments {
         let limits = &self.properties.limits;
         crate::Alignments {
             buffer_copy_offset: wgt::BufferSize::new(limits.optimal_buffer_copy_offset_alignment)
                 .unwrap(),
             buffer_copy_pitch: wgt::BufferSize::new(limits.optimal_buffer_copy_row_pitch_alignment)
                 .unwrap(),
+            uniform_bounds_check_alignment: {
+                let alignment = if using_robustness2 {
+                    self.robustness2
+                        .unwrap() // if we're using it, we should have its properties
+                        .robust_uniform_buffer_access_size_alignment
+                } else {
+                    // If the `robustness2` properties are unavailable, then `robustness2` is not available either Naga-injected bounds checks are precise.
+                    1
+                };
+                wgt::BufferSize::new(alignment).unwrap()
+            },
         }
     }
 }
@@ -1133,6 +1159,7 @@ impl super::InstanceShared {
                 let supports_subgroup_size_control = capabilities.device_api_version
                     >= vk::API_VERSION_1_3
                     || capabilities.supports_extension(ext::subgroup_size_control::NAME);
+                let supports_robustness2 = capabilities.supports_extension(ext::robustness2::NAME);
 
                 let supports_acceleration_structure =
                     capabilities.supports_extension(khr::acceleration_structure::NAME);
@@ -1180,6 +1207,13 @@ impl super::InstanceShared {
                     properties2 = properties2.push_next(next);
                 }
 
+                if supports_robustness2 {
+                    let next = capabilities
+                        .robustness2
+                        .insert(vk::PhysicalDeviceRobustness2PropertiesEXT::default());
+                    properties2 = properties2.push_next(next);
+                }
+
                 unsafe {
                     get_device_properties.get_physical_device_properties2(phd, &mut properties2)
                 };
@@ -1191,6 +1225,7 @@ impl super::InstanceShared {
                     capabilities
                         .supported_extensions
                         .retain(|&x| x.extension_name_as_c_str() != Ok(ext::robustness2::NAME));
+                    capabilities.robustness2 = None;
                 }
             };
             capabilities
@@ -1507,7 +1542,7 @@ impl super::Instance {
         };
         let capabilities = crate::Capabilities {
             limits: phd_capabilities.to_wgpu_limits(),
-            alignments: phd_capabilities.to_hal_alignments(),
+            alignments: phd_capabilities.to_hal_alignments(private_caps.robust_buffer_access2),
             downlevel: wgt::DownlevelCapabilities {
                 flags: downlevel_flags,
                 limits: wgt::DownlevelLimits {},
@@ -1779,7 +1814,7 @@ impl super::Adapter {
                 capabilities: Some(capabilities.iter().cloned().collect()),
                 bounds_check_policies: naga::proc::BoundsCheckPolicies {
                     index: naga::proc::BoundsCheckPolicy::Restrict,
-                    buffer: if self.private_caps.robust_buffer_access {
+                    buffer: if self.private_caps.robust_buffer_access2 {
                         naga::proc::BoundsCheckPolicy::Unchecked
                     } else {
                         naga::proc::BoundsCheckPolicy::Restrict

diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
@@ -493,9 +493,36 @@ struct PrivateCapabilities {
     /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
     can_present: bool,
     non_coherent_map_mask: wgt::BufferAddress,
+
+    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
+    ///
+    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
+    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
+    /// a given bindgroup binding outside that binding's [accessible
+    /// region][ar]. Enabling `robustBufferAccess` does ensure that
+    /// out-of-bounds reads and writes are not undefined behavior (that's good),
+    /// but still permits out-of-bounds reads to return data from anywhere
+    /// within the buffer, not just the accessible region.
+    ///
+    /// [ar]: ../struct.BufferBinding.html#accessible-region
+    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
     robust_buffer_access: bool,
+
     robust_image_access: bool,
+
+    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
+    /// [`robustBufferAccess2`] feature.
+    ///
+    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
+    /// shader accesses to buffer contents. If this feature is not available,
+    /// this backend must have Naga inject bounds checks in the generated
+    /// SPIR-V.
+    ///
+    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
+    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
+    /// [ar]: ../struct.BufferBinding.html#accessible-region
     robust_buffer_access2: bool,
+
     robust_image_access2: bool,
     zero_initialize_workgroup_memory: bool,
     image_format_list: bool,

diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
@@ -7309,8 +7309,15 @@ impl ShaderBoundChecks {
     /// Creates a new configuration where the shader isn't bound checked.
     ///
     /// # Safety
-    /// The caller MUST ensure that all shaders built with this configuration don't perform any
-    /// out of bounds reads or writes.
+    ///
+    /// The caller MUST ensure that all shaders built with this configuration
+    /// don't perform any out of bounds reads or writes.
+    ///
+    /// Note that `wgpu_core`, in particular, initializes only those portions of
+    /// buffers that it expects might be read, and it does not expect contents
+    /// outside the ranges bound in bindgroups to be accessible, so using this
+    /// configuration with ill-behaved shaders could expose uninitialized GPU
+    /// memory contents to the application.
     #[must_use]
     pub unsafe fn unchecked() -> Self {
         ShaderBoundChecks {