gfx-rs · Wumpf · Jul 14, 2024 · Jun 1, 2024 · Jun 1, 2024 · Jun 2, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -183,6 +183,7 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 - Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
 - Avoid introducing spurious features for optional dependencies. By @bjorn3 in [#5691](https://github.com/gfx-rs/wgpu/pull/5691)
 - `wgpu::Error` is now `Sync`, making it possible to be wrapped in `anyhow::Error` or `eyre::Report`. By @nolanderc in [#5820](https://github.com/gfx-rs/wgpu/pull/5820)
+- Added benchmark suite. By @cwfitzgerald in [#5694](https://github.com/gfx-rs/wgpu/pull/5694), compute passes by @wumpf in [#5767](https://github.com/gfx-rs/wgpu/pull/5767)
 
 #### Metal
 - Removed the `link` Cargo feature.

@@ -43,4 +43,4 @@ pollster.workspace = true
 profiling.workspace = true
 rayon.workspace = true
 tracy-client = { workspace = true, optional = true }
-wgpu.workspace = true
+wgpu = { workspace = true, features = ["wgsl"] }
@@ -24,6 +24,21 @@ By default it measures 10k draw calls, with 90k total resources.
 
 Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting
 the render pass into multiple passes over multiple command buffers.
+If available, it also tests a bindless approach, binding all textures at once instead of switching
+the bind group for every draw call.
+
+#### `Computepass`
+
+This benchmark measures the performance of recording and submitting a compute pass with a large
+number of dispatches and resources.
+By default it measures 10k dispatch calls, with 60k total resources, emulating an unusually complex and sequential compute workload.
+
+Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting
+the compute pass into multiple passes over multiple command buffers.
+If available, it also tests a bindless approach, binding all resources at once instead of switching
+the bind group for every draw call.
+TODO(https://github.com/gfx-rs/wgpu/issues/5766): The bindless version uses only 1k dispatches with 6k resources since it would be too slow for a reasonable benchmarking time otherwise.
+
 
 #### `Resource Creation`
 

@@ -0,0 +1,26 @@
+@group(0) @binding(0)
+var tex: binding_array<texture_2d<f32>>;
+
+@group(0) @binding(1)
+// TODO(https://github.com/gfx-rs/wgpu/issues/5765): The extra whitespace between the angle brackets is needed to workaround a parsing bug.
+var images: binding_array<texture_storage_2d<r32float, read_write> >;
+struct BufferElement {
+    element: vec4f,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> buffers: binding_array<BufferElement>;
+
+@compute
+@workgroup_size(16)
+fn cs_main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
+    let offset = global_invocation_id.x; // Would be nice to offset this dynamically (it's just 0 always in the current setup)
+
+    let idx0 = offset * 2 + 0;
+    let idx1 = offset * 2 + 1;
+
+    let tex = textureLoad(tex[idx0], vec2u(0), 0) + textureLoad(tex[idx0], vec2u(0), 0);
+    let image = textureLoad(images[idx0], vec2u(0)) + textureLoad(images[idx1], vec2u(0));
+    buffers[idx0].element = tex.rrrr;
+    buffers[idx1].element = image.rrrr;
+}