From ba7f1a255f883e707f6e4b3fcf747b8b5afd2dab Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Sun, 29 Sep 2024 21:53:58 +0200 Subject: [PATCH 01/19] Switch flag enums to u64 --- wgpu-hal/src/vulkan/device.rs | 2 +- wgpu-types/src/lib.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 9f0fc67738..75594bac4e 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -1930,7 +1930,7 @@ impl crate::Device for super::Device { for cat in desc.color_targets { let (key, attarchment) = if let Some(cat) = cat.as_ref() { let mut vk_attachment = vk::PipelineColorBlendAttachmentState::default() - .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits())); + .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits() as u32)); if let Some(ref blend) = cat.blend { let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color); let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha); diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index c7167f826f..ef5d58a9e0 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -1900,7 +1900,7 @@ bitflags::bitflags! { /// https://gpuweb.github.io/gpuweb/#typedefdef-gpushaderstageflags). #[repr(transparent)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] - pub struct ShaderStages: u32 { + pub struct ShaderStages: u64 { /// Binding is not visible from any shader stage. const NONE = 0; /// Binding is visible from the vertex shader of a render pipeline. @@ -4653,7 +4653,7 @@ bitflags::bitflags! { /// https://gpuweb.github.io/gpuweb/#typedefdef-gpucolorwriteflags). #[repr(transparent)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] - pub struct ColorWrites: u32 { + pub struct ColorWrites: u64 { /// Enable red channel writes const RED = 1 << 0; /// Enable green channel writes @@ -5265,7 +5265,7 @@ bitflags::bitflags! { /// https://gpuweb.github.io/gpuweb/#typedefdef-gpubufferusageflags). #[repr(transparent)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] - pub struct BufferUsages: u32 { + pub struct BufferUsages: u64 { /// Allow a buffer to be mapped for reading using [`Buffer::map_async`] + [`Buffer::get_mapped_range`]. /// This does not include creating a buffer with [`BufferDescriptor::mapped_at_creation`] set. /// @@ -5484,7 +5484,7 @@ bitflags::bitflags! { /// https://gpuweb.github.io/gpuweb/#typedefdef-gputextureusageflags). #[repr(transparent)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] - pub struct TextureUsages: u32 { + pub struct TextureUsages: u64 { /// Allows a texture to be the source in a [`CommandEncoder::copy_texture_to_buffer`] or /// [`CommandEncoder::copy_texture_to_texture`] operation. const COPY_SRC = 1 << 0; From 2d283e3a5c27b03cb77ffe22f064736d6808c217 Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Wed, 2 Oct 2024 08:20:53 +0200 Subject: [PATCH 02/19] try another way to implement futures --- wgpu-core/src/instance.rs | 59 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index 581c5ce0d9..2242c8856a 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -60,6 +60,41 @@ pub struct Instance { /// The ordering in this list implies prioritization and needs to be preserved. pub instance_per_backend: Vec<(Backend, Box)>, pub flags: wgt::InstanceFlags, + + // WIP Future mechanism + pub futures: FutureRegistry, +} + +#[derive(Default)] +pub struct Future { + completed: bool, +} + +// A very simplistic Registry type that replaces wgc::Registry for now +// for futures. +// Test only: There should be a way not to store all completed futures for the +// whole lifetime of the prorgam, and also this is not threadsafe. +#[derive(Default)] +struct FutureRegistry { + next_id: u64, + storage: HashMap, +} +impl FutureRegistry { + fn new() -> Self { + return Self{ + next_id: 0, + storage: HashMap::new(), + } + } + fn register(&mut self, value: Future) -> u64 { + let id = self.next_id; + self.next_id += 1; + self.storage.insert(id, value); + return id; + } + fn get_mut(&mut self, id: u64) -> Option<&mut Future> { + return self.storage.get_mut(&id); + } } impl Instance { @@ -111,6 +146,7 @@ impl Instance { name: name.to_string(), instance_per_backend, flags: instance_desc.flags, + futures: FutureRegistry::new(), } } @@ -815,6 +851,29 @@ impl Global { } } +impl Global { + pub fn instance_create_future( + &mut self, + ) -> u64 { + let future_id = self.instance.futures.register(Future{ + completed: false, + }); + return future_id; + } + + pub fn instance_complete_future( + &mut self, + future_id: u64, + ) { + match self.instance.futures.get_mut(future_id) { + Some(future) => { + future.completed = true; + }, + None => panic!("invalid future id") + } + } +} + /// Generates a set of backends from a comma separated list of case-insensitive backend names. /// /// Whitespace is stripped, so both 'gl, dx12' and 'gl,dx12' are valid. From a93cbdc98c19f0ae502e1342462d2b4a8b484b0f Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Thu, 3 Oct 2024 08:54:23 +0200 Subject: [PATCH 03/19] Add submission indices --- wgpu-core/src/device/global.rs | 30 +++++++--------- wgpu-core/src/device/life.rs | 4 ++- wgpu-core/src/device/queue.rs | 8 +++-- wgpu-core/src/device/resource.rs | 12 +++---- wgpu-core/src/instance.rs | 59 -------------------------------- wgpu-core/src/resource.rs | 14 +++++--- 6 files changed, 36 insertions(+), 91 deletions(-) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 10b82a73ae..88a9667992 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -2147,33 +2147,27 @@ impl Global { offset: BufferAddress, size: Option, op: BufferMapOperation, - ) -> BufferAccessResult { + ) -> Result { profiling::scope!("Buffer::map_async"); api_log!("Buffer::map_async {buffer_id:?} offset {offset:?} size {size:?} op: {op:?}"); let hub = &self.hub; - let op_and_err = 'error: { - let buffer = match hub.buffers.get(buffer_id).get() { - Ok(buffer) => buffer, - Err(e) => break 'error Some((op, e.into())), - }; - - buffer.map_async(offset, size, op).err() + let map_result = match hub.buffers.get(buffer_id).get() { + Ok(buffer) => buffer.map_async(offset, size, op), + Err(e) => Err((op, e.into())), }; - // User callbacks must not be called while holding `buffer.map_async`'s locks, so we - // defer the error callback if it needs to be called immediately (typically when running - // into errors). - if let Some((mut operation, err)) = op_and_err { - if let Some(callback) = operation.callback.take() { - callback.call(Err(err.clone())); + match map_result { + Ok(submission_index) => Ok(submission_index), + Err((mut operation, err)) => { + if let Some(callback) = operation.callback.take() { + callback.call(Err(err.clone())); + } + log::error!("Buffer::map_async error: {err}"); + Err(err) } - log::error!("Buffer::map_async error: {err}"); - return Err(err); } - - Ok(()) } pub fn buffer_get_mapped_range( diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index e6aed78a08..ee37b78f1c 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -304,15 +304,17 @@ impl LifetimeTracker { } } - pub fn add_work_done_closure(&mut self, closure: SubmittedWorkDoneClosure) { + pub fn add_work_done_closure(&mut self, closure: SubmittedWorkDoneClosure) -> Option { match self.active.last_mut() { Some(active) => { active.work_done_closures.push(closure); + Some(active.index) } // We must defer the closure until all previously occurring map_async closures // have fired. This is required by the spec. None => { self.work_done_closures.push(closure); + None } } } diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index bd6d99f1c3..5777dbe9c5 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -1334,12 +1334,16 @@ impl Global { &self, queue_id: QueueId, closure: SubmittedWorkDoneClosure, - ) { + ) -> SubmissionIndex { api_log!("Queue::on_submitted_work_done {queue_id:?}"); //TODO: flush pending writes let queue = self.hub.queues.get(queue_id); - queue.device.lock_life().add_work_done_closure(closure); + let result = queue.device.lock_life().add_work_done_closure(closure); + match result { + Some(submission_index) => submission_index, + None => queue.device.last_successful_submission_index.load(Ordering::Acquire), + } } } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 03b183e085..9f0cbf33f1 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -437,13 +437,11 @@ impl Device { .last_successful_submission_index .load(Ordering::Acquire); - if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain { - if submission_index > last_successful_submission_index { - return Err(WaitIdleError::WrongSubmissionIndex( - submission_index, - last_successful_submission_index, - )); - } + if submission_index > last_successful_submission_index { + return Err(WaitIdleError::WrongSubmissionIndex( + submission_index, + last_successful_submission_index, + )); } submission_index diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index 2242c8856a..581c5ce0d9 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -60,41 +60,6 @@ pub struct Instance { /// The ordering in this list implies prioritization and needs to be preserved. pub instance_per_backend: Vec<(Backend, Box)>, pub flags: wgt::InstanceFlags, - - // WIP Future mechanism - pub futures: FutureRegistry, -} - -#[derive(Default)] -pub struct Future { - completed: bool, -} - -// A very simplistic Registry type that replaces wgc::Registry for now -// for futures. -// Test only: There should be a way not to store all completed futures for the -// whole lifetime of the prorgam, and also this is not threadsafe. -#[derive(Default)] -struct FutureRegistry { - next_id: u64, - storage: HashMap, -} -impl FutureRegistry { - fn new() -> Self { - return Self{ - next_id: 0, - storage: HashMap::new(), - } - } - fn register(&mut self, value: Future) -> u64 { - let id = self.next_id; - self.next_id += 1; - self.storage.insert(id, value); - return id; - } - fn get_mut(&mut self, id: u64) -> Option<&mut Future> { - return self.storage.get_mut(&id); - } } impl Instance { @@ -146,7 +111,6 @@ impl Instance { name: name.to_string(), instance_per_backend, flags: instance_desc.flags, - futures: FutureRegistry::new(), } } @@ -851,29 +815,6 @@ impl Global { } } -impl Global { - pub fn instance_create_future( - &mut self, - ) -> u64 { - let future_id = self.instance.futures.register(Future{ - completed: false, - }); - return future_id; - } - - pub fn instance_complete_future( - &mut self, - future_id: u64, - ) { - match self.instance.futures.get_mut(future_id) { - Some(future) => { - future.completed = true; - }, - None => panic!("invalid future id") - } - } -} - /// Generates a set of backends from a comma separated list of case-insensitive backend names. /// /// Whitespace is stripped, so both 'gl, dx12' and 'gl,dx12' are valid. diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 5df285da54..3529705121 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -14,7 +14,7 @@ use crate::{ resource_log, snatch::{SnatchGuard, Snatchable}, track::{SharedTrackerIndexAllocator, TextureSelector, TrackerIndex}, - Label, LabelHelpers, + Label, LabelHelpers, SubmissionIndex, }; use smallvec::SmallVec; @@ -303,7 +303,7 @@ impl BufferMapCallback { // SAFETY: the contract of the call to from_c says that this unsafe is sound. BufferMapCallbackInner::C { inner } => unsafe { let status = match result { - Ok(()) => BufferMapAsyncStatus::Success, + Ok(_) => BufferMapAsyncStatus::Success, Err(BufferAccessError::Device(_)) => BufferMapAsyncStatus::ContextLost, Err(BufferAccessError::InvalidResource(_)) | Err(BufferAccessError::DestroyedResource(_)) => BufferMapAsyncStatus::Invalid, @@ -537,7 +537,7 @@ impl Buffer { offset: wgt::BufferAddress, size: Option, op: BufferMapOperation, - ) -> Result<(), (BufferMapOperation, BufferAccessError)> { + ) -> Result { let range_size = if let Some(size) = size { size } else if offset > self.size { @@ -624,9 +624,15 @@ impl Buffer { .buffers .set_single(self, internal_use); + let mut fence = device.fence.write(); // is this needed to be able to increment active_submission_index? + // should we increment last_successful_submission_index instead? + let submit_index = device + .active_submission_index + .fetch_add(1, core::sync::atomic::Ordering::SeqCst) + + 1; device.lock_life().map(self); - Ok(()) + Ok(submit_index) } // Note: This must not be called while holding a lock. From 2e7160bcfc6237f2419da96a0a3bcfcb6d772ebc Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Thu, 3 Oct 2024 09:14:00 +0200 Subject: [PATCH 04/19] Remove changes not related to this PR --- wgpu-hal/src/vulkan/device.rs | 2 +- wgpu-types/src/lib.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 75594bac4e..9f0fc67738 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -1930,7 +1930,7 @@ impl crate::Device for super::Device { for cat in desc.color_targets { let (key, attarchment) = if let Some(cat) = cat.as_ref() { let mut vk_attachment = vk::PipelineColorBlendAttachmentState::default() - .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits() as u32)); + .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits())); if let Some(ref blend) = cat.blend { let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color); let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha); diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index ef5d58a9e0..c7167f826f 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -1900,7 +1900,7 @@ bitflags::bitflags! { /// https://gpuweb.github.io/gpuweb/#typedefdef-gpushaderstageflags). #[repr(transparent)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] - pub struct ShaderStages: u64 { + pub struct ShaderStages: u32 { /// Binding is not visible from any shader stage. const NONE = 0; /// Binding is visible from the vertex shader of a render pipeline. @@ -4653,7 +4653,7 @@ bitflags::bitflags! { /// https://gpuweb.github.io/gpuweb/#typedefdef-gpucolorwriteflags). #[repr(transparent)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] - pub struct ColorWrites: u64 { + pub struct ColorWrites: u32 { /// Enable red channel writes const RED = 1 << 0; /// Enable green channel writes @@ -5265,7 +5265,7 @@ bitflags::bitflags! { /// https://gpuweb.github.io/gpuweb/#typedefdef-gpubufferusageflags). #[repr(transparent)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] - pub struct BufferUsages: u64 { + pub struct BufferUsages: u32 { /// Allow a buffer to be mapped for reading using [`Buffer::map_async`] + [`Buffer::get_mapped_range`]. /// This does not include creating a buffer with [`BufferDescriptor::mapped_at_creation`] set. /// @@ -5484,7 +5484,7 @@ bitflags::bitflags! { /// https://gpuweb.github.io/gpuweb/#typedefdef-gputextureusageflags). #[repr(transparent)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] - pub struct TextureUsages: u64 { + pub struct TextureUsages: u32 { /// Allows a texture to be the source in a [`CommandEncoder::copy_texture_to_buffer`] or /// [`CommandEncoder::copy_texture_to_texture`] operation. const COPY_SRC = 1 << 0; From 4ae7b0180278a73ad187bea6c5089cbbe1dbcf2b Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Sat, 5 Oct 2024 11:52:37 +0200 Subject: [PATCH 05/19] map_async returns submission index in wgpu-rs --- wgpu-core/src/device/life.rs | 5 ++++- wgpu-core/src/device/queue.rs | 5 ++++- wgpu-core/src/resource.rs | 3 +-- wgpu/src/api/buffer.rs | 8 +++++--- wgpu/src/backend/wgpu_core.rs | 7 ++++--- wgpu/src/context.rs | 9 +++++---- 6 files changed, 23 insertions(+), 14 deletions(-) diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index ee37b78f1c..f1ca8f7e7e 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -304,7 +304,10 @@ impl LifetimeTracker { } } - pub fn add_work_done_closure(&mut self, closure: SubmittedWorkDoneClosure) -> Option { + pub fn add_work_done_closure( + &mut self, + closure: SubmittedWorkDoneClosure, + ) -> Option { match self.active.last_mut() { Some(active) => { active.work_done_closures.push(closure); diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 5777dbe9c5..2dc03d3a32 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -1342,7 +1342,10 @@ impl Global { let result = queue.device.lock_life().add_work_done_closure(closure); match result { Some(submission_index) => submission_index, - None => queue.device.last_successful_submission_index.load(Ordering::Acquire), + None => queue + .device + .last_successful_submission_index + .load(Ordering::Acquire), } } } diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 3529705121..b330010294 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -624,8 +624,7 @@ impl Buffer { .buffers .set_single(self, internal_use); - let mut fence = device.fence.write(); // is this needed to be able to increment active_submission_index? - // should we increment last_successful_submission_index instead? + // TODO: should we increment last_successful_submission_index instead? let submit_index = device .active_submission_index .fetch_add(1, core::sync::atomic::Ordering::SeqCst) diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs index 9d490616d3..ea4bddc146 100644 --- a/wgpu/src/api/buffer.rs +++ b/wgpu/src/api/buffer.rs @@ -337,7 +337,7 @@ impl<'a> BufferSlice<'a> { &self, mode: MapMode, callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static, - ) { + ) -> SubmissionIndex { let mut mc = self.buffer.map_context.lock(); assert_eq!(mc.initial_range, 0..0, "Buffer is already mapped"); let end = match self.size { @@ -346,13 +346,15 @@ impl<'a> BufferSlice<'a> { }; mc.initial_range = self.offset..end; - DynContext::buffer_map_async( + let data = DynContext::buffer_map_async( &*self.buffer.context, self.buffer.data.as_ref(), mode, self.offset..end, Box::new(callback), - ) + ); + + SubmissionIndex { data } } /// Gain read-only access to the bytes of a [mapped] [`Buffer`]. diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index 652df388ff..e1690864ce 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -1391,7 +1391,7 @@ impl crate::Context for ContextWgpuCore { mode: MapMode, range: Range, callback: crate::context::BufferMapCallback, - ) { + ) -> Self::SubmissionIndexData { let operation = wgc::resource::BufferMapOperation { host: match mode { MapMode::Read => wgc::device::HostMap::Read, @@ -1411,9 +1411,10 @@ impl crate::Context for ContextWgpuCore { Some(range.end - range.start), operation, ) { - Ok(()) => (), + Ok(index) => index, Err(cause) => { - self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async") + self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async"); + Self::SubmissionIndexData::MAX // invalid submission index } } } diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs index a27459ab45..3696fe6d8d 100644 --- a/wgpu/src/context.rs +++ b/wgpu/src/context.rs @@ -218,7 +218,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized { mode: MapMode, range: Range, callback: BufferMapCallback, - ); + ) -> Self::SubmissionIndexData; fn buffer_get_mapped_range( &self, buffer_data: &Self::BufferData, @@ -908,7 +908,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { mode: MapMode, range: Range, callback: BufferMapCallback, - ); + ) -> Arc; fn buffer_get_mapped_range( &self, buffer_data: &crate::Data, @@ -1688,9 +1688,10 @@ where mode: MapMode, range: Range, callback: BufferMapCallback, - ) { + ) -> Arc { let buffer_data = downcast_ref(buffer_data); - Context::buffer_map_async(self, buffer_data, mode, range, callback) + let data = Context::buffer_map_async(self, buffer_data, mode, range, callback); + Arc::new(data) as _ } fn buffer_get_mapped_range( From 6374487c7a44ac6182a2be2b34de7d624d2bc5dc Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Sat, 5 Oct 2024 11:54:37 +0200 Subject: [PATCH 06/19] on_submitted_work_done returns submission index in wgpu-rs --- wgpu/src/api/queue.rs | 10 +++++++--- wgpu/src/backend/wgpu_core.rs | 4 ++-- wgpu/src/context.rs | 9 +++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs index b57b33ece3..93f18636f3 100644 --- a/wgpu/src/api/queue.rs +++ b/wgpu/src/api/queue.rs @@ -275,11 +275,15 @@ impl Queue { /// has completed. There are no restrictions on the code you can run in the callback, however on native the /// call to the function will not complete until the callback returns, so prefer keeping callbacks short /// and used to set flags, send messages, etc. - pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) { - DynContext::queue_on_submitted_work_done( + pub fn on_submitted_work_done( + &self, + callback: impl FnOnce() + Send + 'static, + ) -> SubmissionIndex { + let data = DynContext::queue_on_submitted_work_done( &*self.context, self.data.as_ref(), Box::new(callback), - ) + ); + SubmissionIndex { data } } } diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index e1690864ce..555942bf1b 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -2096,9 +2096,9 @@ impl crate::Context for ContextWgpuCore { &self, queue_data: &Self::QueueData, callback: crate::context::SubmittedWorkDoneCallback, - ) { + ) -> Self::SubmissionIndexData { let closure = wgc::device::queue::SubmittedWorkDoneClosure::from_rust(callback); - self.0.queue_on_submitted_work_done(queue_data.id, closure); + self.0.queue_on_submitted_work_done(queue_data.id, closure) } fn device_start_capture(&self, device_data: &Self::DeviceData) { diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs index 3696fe6d8d..0571c483d3 100644 --- a/wgpu/src/context.rs +++ b/wgpu/src/context.rs @@ -413,7 +413,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized { &self, queue_data: &Self::QueueData, callback: SubmittedWorkDoneCallback, - ); + ) -> Self::SubmissionIndexData; fn device_start_capture(&self, device_data: &Self::DeviceData); fn device_stop_capture(&self, device_data: &Self::DeviceData); @@ -1092,7 +1092,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { &self, queue_data: &crate::Data, callback: SubmittedWorkDoneCallback, - ); + ) -> Arc; fn device_start_capture(&self, data: &crate::Data); fn device_stop_capture(&self, data: &crate::Data); @@ -2112,9 +2112,10 @@ where &self, queue_data: &crate::Data, callback: SubmittedWorkDoneCallback, - ) { + ) -> Arc { let queue_data = downcast_ref(queue_data); - Context::queue_on_submitted_work_done(self, queue_data, callback) + let data = Context::queue_on_submitted_work_done(self, queue_data, callback); + Arc::new(data) as _ } fn device_start_capture(&self, device_data: &crate::Data) { From 1c8c0add339858cd8b4c6c2853c96fd916d43cae Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Sat, 5 Oct 2024 12:08:06 +0200 Subject: [PATCH 07/19] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 894da6ddc7..048bb393dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -86,6 +86,7 @@ By @bradwerth [#6216](https://github.com/gfx-rs/wgpu/pull/6216). #### General - Add `VideoFrame` to `ExternalImageSource` enum. By @jprochazk in [#6170](https://github.com/gfx-rs/wgpu/pull/6170) +- Return submission index in `map_async` and `on_submitted_work_done` to track down completion of async callbacks. By @eliemichel in [#6360](https://github.com/gfx-rs/wgpu/pull/6360) #### Vulkan From 4e948dcac072aad13e047119fba1296b7e14f328 Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Sun, 20 Oct 2024 18:57:18 +0200 Subject: [PATCH 08/19] Fix return value of on_submitted_work_done --- wgpu-core/src/device/queue.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index d4c1d6cc76..7e015b6aba 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -1256,10 +1256,13 @@ impl Queue { unsafe { self.raw().get_timestamp_period() } } - pub fn on_submitted_work_done(&self, closure: SubmittedWorkDoneClosure) { + pub fn on_submitted_work_done( + &self, + closure: SubmittedWorkDoneClosure, + ) -> Option { api_log!("Queue::on_submitted_work_done"); //TODO: flush pending writes - self.device.lock_life().add_work_done_closure(closure); + self.device.lock_life().add_work_done_closure(closure) } } From 51aebeccf26d4ea3cea7e426b0995bed55c57f43 Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Tue, 22 Oct 2024 22:16:40 +0200 Subject: [PATCH 09/19] WIP define BufferMapFuture and SubmittedWorkDoneFuture --- wgpu/src/api/buffer.rs | 9 +++------ wgpu/src/api/queue.rs | 8 +++----- wgpu/src/backend/wgpu_core.rs | 3 +++ wgpu/src/context.rs | 33 +++++++++++++++++++++++---------- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs index 61e01492af..b94c8be829 100644 --- a/wgpu/src/api/buffer.rs +++ b/wgpu/src/api/buffer.rs @@ -1,8 +1,5 @@ use std::{ - error, fmt, - ops::{Bound, Deref, DerefMut, Range, RangeBounds}, - sync::Arc, - thread, + error, fmt, future::Future, ops::{Bound, Deref, DerefMut, Range, RangeBounds}, sync::Arc, thread }; use parking_lot::Mutex; @@ -338,7 +335,7 @@ impl<'a> BufferSlice<'a> { &self, mode: MapMode, callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static, - ) -> SubmissionIndex { + ) -> impl Future> + WasmNotSend { let mut mc = self.buffer.map_context.lock(); assert_eq!(mc.initial_range, 0..0, "Buffer is already mapped"); let end = match self.size { @@ -355,7 +352,7 @@ impl<'a> BufferSlice<'a> { Box::new(callback), ); - SubmissionIndex { data } + async move { data.await } } /// Gain read-only access to the bytes of a [mapped] [`Buffer`]. diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs index 93f18636f3..a1ffec4054 100644 --- a/wgpu/src/api/queue.rs +++ b/wgpu/src/api/queue.rs @@ -1,7 +1,5 @@ use std::{ - ops::{Deref, DerefMut}, - sync::Arc, - thread, + future::Future, ops::{Deref, DerefMut}, sync::Arc, thread }; use crate::context::{DynContext, QueueWriteBuffer}; @@ -278,12 +276,12 @@ impl Queue { pub fn on_submitted_work_done( &self, callback: impl FnOnce() + Send + 'static, - ) -> SubmissionIndex { + ) -> impl Future + WasmNotSend { let data = DynContext::queue_on_submitted_work_done( &*self.context, self.data.as_ref(), Box::new(callback), ); - SubmissionIndex { data } + async move { data.await } } } diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index 555942bf1b..262ede50dc 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -540,6 +540,9 @@ impl crate::Context for ContextWgpuCore { type PopErrorScopeFuture = Ready>; type CompilationInfoFuture = Ready; + type BufferMapFuture = wgc::SubmissionIndex; + type SubmittedWorkDoneFuture = wgc::SubmissionIndex; + fn init(instance_desc: wgt::InstanceDescriptor) -> Self { Self(wgc::global::Global::new("wgpu", instance_desc)) } diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs index 0571c483d3..8412ac26c3 100644 --- a/wgpu/src/context.rs +++ b/wgpu/src/context.rs @@ -60,6 +60,9 @@ pub trait Context: Debug + WasmNotSendSync + Sized { type CompilationInfoFuture: Future + WasmNotSend + 'static; + type BufferMapFuture: Future> + WasmNotSend + 'static; + type SubmittedWorkDoneFuture: Future + WasmNotSend + 'static; + #[cfg(not(target_os = "emscripten"))] fn init(instance_desc: wgt::InstanceDescriptor) -> Self; unsafe fn instance_create_surface( @@ -218,7 +221,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized { mode: MapMode, range: Range, callback: BufferMapCallback, - ) -> Self::SubmissionIndexData; + ) -> Self::BufferMapFuture; fn buffer_get_mapped_range( &self, buffer_data: &Self::BufferData, @@ -413,7 +416,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized { &self, queue_data: &Self::QueueData, callback: SubmittedWorkDoneCallback, - ) -> Self::SubmissionIndexData; + ) -> Self::SubmittedWorkDoneFuture; fn device_start_capture(&self, device_data: &Self::DeviceData); fn device_stop_capture(&self, device_data: &Self::DeviceData); @@ -750,6 +753,16 @@ pub type DeviceLostCallback = Box; +#[cfg(send_sync)] +pub type BufferMapFuture = Box> + Send>; +#[cfg(not(send_sync))] +pub type BufferMapFuture = Box>>; + +#[cfg(send_sync)] +pub type SubmittedWorkDoneFuture = Box + Send>; +#[cfg(not(send_sync))] +pub type SubmittedWorkDoneFuture = Box>; + /// An object safe variant of [`Context`] implemented by all types that implement [`Context`]. pub(crate) trait DynContext: Debug + WasmNotSendSync { #[cfg(not(target_os = "emscripten"))] @@ -908,7 +921,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { mode: MapMode, range: Range, callback: BufferMapCallback, - ) -> Arc; + ) -> Pin; fn buffer_get_mapped_range( &self, buffer_data: &crate::Data, @@ -1092,7 +1105,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { &self, queue_data: &crate::Data, callback: SubmittedWorkDoneCallback, - ) -> Arc; + ) -> Pin; fn device_start_capture(&self, data: &crate::Data); fn device_stop_capture(&self, data: &crate::Data); @@ -1688,10 +1701,10 @@ where mode: MapMode, range: Range, callback: BufferMapCallback, - ) -> Arc { + ) -> Pin { let buffer_data = downcast_ref(buffer_data); - let data = Context::buffer_map_async(self, buffer_data, mode, range, callback); - Arc::new(data) as _ + let future = Context::buffer_map_async(self, buffer_data, mode, range, callback); + Box::pin(async move { future.await }) } fn buffer_get_mapped_range( @@ -2112,10 +2125,10 @@ where &self, queue_data: &crate::Data, callback: SubmittedWorkDoneCallback, - ) -> Arc { + ) -> Pin { let queue_data = downcast_ref(queue_data); - let data = Context::queue_on_submitted_work_done(self, queue_data, callback); - Arc::new(data) as _ + let future = Context::queue_on_submitted_work_done(self, queue_data, callback); + Box::pin(async move { future.await }) } fn device_start_capture(&self, device_data: &crate::Data) { From 404c387e850f1c8a9df29808ad6ff75e2ae127b1 Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Tue, 22 Oct 2024 23:09:21 +0200 Subject: [PATCH 10/19] Introduce WgpuFuture --- wgpu/src/api/buffer.rs | 11 +++++++---- wgpu/src/api/instance.rs | 11 +++++++++++ wgpu/src/api/queue.rs | 14 +++++++------- wgpu/src/backend/wgpu_core.rs | 7 +++---- wgpu/src/context.rs | 34 ++++++++++++---------------------- 5 files changed, 40 insertions(+), 37 deletions(-) diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs index b94c8be829..a469782a70 100644 --- a/wgpu/src/api/buffer.rs +++ b/wgpu/src/api/buffer.rs @@ -1,5 +1,8 @@ use std::{ - error, fmt, future::Future, ops::{Bound, Deref, DerefMut, Range, RangeBounds}, sync::Arc, thread + error, fmt, + ops::{Bound, Deref, DerefMut, Range, RangeBounds}, + sync::Arc, + thread, }; use parking_lot::Mutex; @@ -335,7 +338,7 @@ impl<'a> BufferSlice<'a> { &self, mode: MapMode, callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static, - ) -> impl Future> + WasmNotSend { + ) -> WgpuFuture { let mut mc = self.buffer.map_context.lock(); assert_eq!(mc.initial_range, 0..0, "Buffer is already mapped"); let end = match self.size { @@ -344,7 +347,7 @@ impl<'a> BufferSlice<'a> { }; mc.initial_range = self.offset..end; - let data = DynContext::buffer_map_async( + let id = DynContext::buffer_map_async( &*self.buffer.context, self.buffer.data.as_ref(), mode, @@ -352,7 +355,7 @@ impl<'a> BufferSlice<'a> { Box::new(callback), ); - async move { data.await } + WgpuFuture { id } } /// Gain read-only access to the bytes of a [mapped] [`Buffer`]. diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs index b21c9f70ec..d97b542c64 100644 --- a/wgpu/src/api/instance.rs +++ b/wgpu/src/api/instance.rs @@ -33,6 +33,17 @@ impl Default for Instance { } } +/// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried for completion, but does not hold any returned data. +/// +/// It's 'id' field is to be interpreted as a submission id (like wgc::SubmissionId) +#[derive(Debug, Clone)] +pub struct WgpuFuture { + #[cfg_attr(not(native), allow(dead_code))] + pub(crate) id: Arc, +} +#[cfg(send_sync)] +static_assertions::assert_impl_all!(WgpuFuture: Send, Sync); + impl Instance { /// Returns which backends can be picked for the current build configuration. /// diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs index a1ffec4054..0e600e4acd 100644 --- a/wgpu/src/api/queue.rs +++ b/wgpu/src/api/queue.rs @@ -1,5 +1,7 @@ use std::{ - future::Future, ops::{Deref, DerefMut}, sync::Arc, thread + ops::{Deref, DerefMut}, + sync::Arc, + thread, }; use crate::context::{DynContext, QueueWriteBuffer}; @@ -34,6 +36,7 @@ impl Drop for Queue { /// /// This type is unique to the Rust API of `wgpu`. /// There is no analogue in the WebGPU specification. +/// NB: WgpuFuture should probably be used instead of this #[derive(Debug, Clone)] pub struct SubmissionIndex { #[cfg_attr(not(native), allow(dead_code))] @@ -273,15 +276,12 @@ impl Queue { /// has completed. There are no restrictions on the code you can run in the callback, however on native the /// call to the function will not complete until the callback returns, so prefer keeping callbacks short /// and used to set flags, send messages, etc. - pub fn on_submitted_work_done( - &self, - callback: impl FnOnce() + Send + 'static, - ) -> impl Future + WasmNotSend { - let data = DynContext::queue_on_submitted_work_done( + pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) -> WgpuFuture { + let id = DynContext::queue_on_submitted_work_done( &*self.context, self.data.as_ref(), Box::new(callback), ); - async move { data.await } + WgpuFuture { id } } } diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index 262ede50dc..64e7463dbc 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -540,8 +540,7 @@ impl crate::Context for ContextWgpuCore { type PopErrorScopeFuture = Ready>; type CompilationInfoFuture = Ready; - type BufferMapFuture = wgc::SubmissionIndex; - type SubmittedWorkDoneFuture = wgc::SubmissionIndex; + type WgpuFuture = wgc::SubmissionIndex; fn init(instance_desc: wgt::InstanceDescriptor) -> Self { Self(wgc::global::Global::new("wgpu", instance_desc)) @@ -1394,7 +1393,7 @@ impl crate::Context for ContextWgpuCore { mode: MapMode, range: Range, callback: crate::context::BufferMapCallback, - ) -> Self::SubmissionIndexData { + ) -> Self::WgpuFuture { let operation = wgc::resource::BufferMapOperation { host: match mode { MapMode::Read => wgc::device::HostMap::Read, @@ -2099,7 +2098,7 @@ impl crate::Context for ContextWgpuCore { &self, queue_data: &Self::QueueData, callback: crate::context::SubmittedWorkDoneCallback, - ) -> Self::SubmissionIndexData { + ) -> Self::WgpuFuture { let closure = wgc::device::queue::SubmittedWorkDoneClosure::from_rust(callback); self.0.queue_on_submitted_work_done(queue_data.id, closure) } diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs index 8412ac26c3..fa039d1c7a 100644 --- a/wgpu/src/context.rs +++ b/wgpu/src/context.rs @@ -60,8 +60,8 @@ pub trait Context: Debug + WasmNotSendSync + Sized { type CompilationInfoFuture: Future + WasmNotSend + 'static; - type BufferMapFuture: Future> + WasmNotSend + 'static; - type SubmittedWorkDoneFuture: Future + WasmNotSend + 'static; + /// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried for completion, but does not hold any returned data. + type WgpuFuture: ContextData + Copy; #[cfg(not(target_os = "emscripten"))] fn init(instance_desc: wgt::InstanceDescriptor) -> Self; @@ -221,7 +221,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized { mode: MapMode, range: Range, callback: BufferMapCallback, - ) -> Self::BufferMapFuture; + ) -> Self::WgpuFuture; fn buffer_get_mapped_range( &self, buffer_data: &Self::BufferData, @@ -416,7 +416,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized { &self, queue_data: &Self::QueueData, callback: SubmittedWorkDoneCallback, - ) -> Self::SubmittedWorkDoneFuture; + ) -> Self::WgpuFuture; fn device_start_capture(&self, device_data: &Self::DeviceData); fn device_stop_capture(&self, device_data: &Self::DeviceData); @@ -753,16 +753,6 @@ pub type DeviceLostCallback = Box; -#[cfg(send_sync)] -pub type BufferMapFuture = Box> + Send>; -#[cfg(not(send_sync))] -pub type BufferMapFuture = Box>>; - -#[cfg(send_sync)] -pub type SubmittedWorkDoneFuture = Box + Send>; -#[cfg(not(send_sync))] -pub type SubmittedWorkDoneFuture = Box>; - /// An object safe variant of [`Context`] implemented by all types that implement [`Context`]. pub(crate) trait DynContext: Debug + WasmNotSendSync { #[cfg(not(target_os = "emscripten"))] @@ -921,7 +911,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { mode: MapMode, range: Range, callback: BufferMapCallback, - ) -> Pin; + ) -> Arc; fn buffer_get_mapped_range( &self, buffer_data: &crate::Data, @@ -1105,7 +1095,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { &self, queue_data: &crate::Data, callback: SubmittedWorkDoneCallback, - ) -> Pin; + ) -> Arc; fn device_start_capture(&self, data: &crate::Data); fn device_stop_capture(&self, data: &crate::Data); @@ -1701,10 +1691,10 @@ where mode: MapMode, range: Range, callback: BufferMapCallback, - ) -> Pin { + ) -> Arc { let buffer_data = downcast_ref(buffer_data); - let future = Context::buffer_map_async(self, buffer_data, mode, range, callback); - Box::pin(async move { future.await }) + let handle = Context::buffer_map_async(self, buffer_data, mode, range, callback); + Arc::new(handle) as _ } fn buffer_get_mapped_range( @@ -2125,10 +2115,10 @@ where &self, queue_data: &crate::Data, callback: SubmittedWorkDoneCallback, - ) -> Pin { + ) -> Arc { let queue_data = downcast_ref(queue_data); - let future = Context::queue_on_submitted_work_done(self, queue_data, callback); - Box::pin(async move { future.await }) + let handle = Context::queue_on_submitted_work_done(self, queue_data, callback); + Arc::new(handle) as _ } fn device_start_capture(&self, device_data: &crate::Data) { From 4ec8eb8d445069de2a9606d08bd7443e1c11fc13 Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Tue, 22 Oct 2024 23:24:16 +0200 Subject: [PATCH 11/19] Implement WgpuFuture for webgpu --- wgpu/src/backend/webgpu.rs | 7 +++++-- wgpu/src/context.rs | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index e0cf006e6e..1e4ddc13e6 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -1162,6 +1162,8 @@ impl crate::context::Context for ContextWebGpu { Box CompilationInfo>, >; + type WgpuFuture = Promise; + fn init(_instance_desc: wgt::InstanceDescriptor) -> Self { let Ok(gpu) = get_browser_gpu_property() else { panic!( @@ -2150,7 +2152,7 @@ impl crate::context::Context for ContextWebGpu { mode: crate::MapMode, range: Range, callback: crate::context::BufferMapCallback, - ) { + ) -> Self::WgpuFuture { let map_promise = buffer_data.0.buffer.map_async_with_f64_and_f64( map_map_mode(mode), range.start as f64, @@ -2160,6 +2162,7 @@ impl crate::context::Context for ContextWebGpu { buffer_data.0.set_mapped_range(range); register_then_closures(&map_promise, callback, Ok(()), Err(crate::BufferAsyncError)); + map_promise } fn buffer_get_mapped_range( @@ -2773,7 +2776,7 @@ impl crate::context::Context for ContextWebGpu { &self, _queue_data: &Self::QueueData, _callback: crate::context::SubmittedWorkDoneCallback, - ) { + ) -> Self::WgpuFuture { unimplemented!() } diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs index fa039d1c7a..c115089416 100644 --- a/wgpu/src/context.rs +++ b/wgpu/src/context.rs @@ -61,7 +61,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized { type CompilationInfoFuture: Future + WasmNotSend + 'static; /// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried for completion, but does not hold any returned data. - type WgpuFuture: ContextData + Copy; + type WgpuFuture: ContextData; #[cfg(not(target_os = "emscripten"))] fn init(instance_desc: wgt::InstanceDescriptor) -> Self; From 4f37591af691870e6dd9a1967aac2a46c2a9e057 Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Wed, 23 Oct 2024 00:41:48 +0200 Subject: [PATCH 12/19] Try introducing instance_wait_any --- wgpu/src/api/buffer.rs | 4 ++-- wgpu/src/api/instance.rs | 30 +++++++++++++++++++----------- wgpu/src/api/queue.rs | 14 ++++++++++++-- wgpu/src/backend/webgpu.rs | 12 +++++++++++- wgpu/src/backend/wgpu_core.rs | 9 +++++++++ wgpu/src/context.rs | 20 +++++++++++++++++++- 6 files changed, 72 insertions(+), 17 deletions(-) diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs index a469782a70..9d37e361b0 100644 --- a/wgpu/src/api/buffer.rs +++ b/wgpu/src/api/buffer.rs @@ -347,7 +347,7 @@ impl<'a> BufferSlice<'a> { }; mc.initial_range = self.offset..end; - let id = DynContext::buffer_map_async( + let data = DynContext::buffer_map_async( &*self.buffer.context, self.buffer.data.as_ref(), mode, @@ -355,7 +355,7 @@ impl<'a> BufferSlice<'a> { Box::new(callback), ); - WgpuFuture { id } + WgpuFuture { data } } /// Gain read-only access to the bytes of a [mapped] [`Buffer`]. diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs index d97b542c64..fcb8a54121 100644 --- a/wgpu/src/api/instance.rs +++ b/wgpu/src/api/instance.rs @@ -33,17 +33,6 @@ impl Default for Instance { } } -/// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried for completion, but does not hold any returned data. -/// -/// It's 'id' field is to be interpreted as a submission id (like wgc::SubmissionId) -#[derive(Debug, Clone)] -pub struct WgpuFuture { - #[cfg_attr(not(native), allow(dead_code))] - pub(crate) id: Arc, -} -#[cfg(send_sync)] -static_assertions::assert_impl_all!(WgpuFuture: Send, Sync); - impl Instance { /// Returns which backends can be picked for the current build configuration. /// @@ -405,3 +394,22 @@ impl Instance { .map(|ctx| ctx.generate_report()) } } + +/// Status returned when waiting on WgpuFuture objects. +#[derive(Clone, Debug)] +pub(crate) enum WaitStatus { + // At least one WgpuFuture completed successfully. + //Success, + + // No WgpuFuture completed within the timeout. + //TimedOut, + + /// A Timed-Wait was performed when timedWaitAnyEnable instance feature is false. + UnsupportedTimeout, + + // The number of futures waited on in a Timed-Wait is greater than the supported timedWaitAnyMaxCount. + //UnsupportedCount, + + // An invalid wait was performed with Mixed-Sources. + //UnsupportedMixedSources, +} diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs index 0e600e4acd..15d3c990ab 100644 --- a/wgpu/src/api/queue.rs +++ b/wgpu/src/api/queue.rs @@ -51,6 +51,16 @@ pub type Maintain = wgt::Maintain; #[cfg(send_sync)] static_assertions::assert_impl_all!(Maintain: Send, Sync); + +/// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried +/// for completion, but does not hold any returned data. +/// +/// It's 'id' field is to be interpreted as a submission id (like wgc::SubmissionId) +pub type WgpuFuture = SubmissionIndex; +#[cfg(send_sync)] +static_assertions::assert_impl_all!(WgpuFuture: Send, Sync); + + /// A write-only view into a staging buffer. /// /// Reading into this buffer won't yield the contents of the buffer from the @@ -277,11 +287,11 @@ impl Queue { /// call to the function will not complete until the callback returns, so prefer keeping callbacks short /// and used to set flags, send messages, etc. pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) -> WgpuFuture { - let id = DynContext::queue_on_submitted_work_done( + let data = DynContext::queue_on_submitted_work_done( &*self.context, self.data.as_ref(), Box::new(callback), ); - WgpuFuture { id } + WgpuFuture { data } } } diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index 1e4ddc13e6..d9c450e794 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -1253,6 +1253,16 @@ impl crate::context::Context for ContextWebGpu { } } + fn instance_wait_any( + &self, + _futures: &[&Self::WgpuFuture], + _timeout_ns: u64, + ) -> crate::WaitStatus { + // TODO: Yield back to the browser, run the equivalent of the following JavaScript: + // > await Promise.any([ ...futures, new Promise(resolve => setTimeout(timeout_ns, resolve) ])) + crate::WaitStatus::UnsupportedTimeout + } + fn adapter_request_device( &self, adapter_data: &Self::AdapterData, @@ -2162,7 +2172,7 @@ impl crate::context::Context for ContextWebGpu { buffer_data.0.set_mapped_range(range); register_then_closures(&map_promise, callback, Ok(()), Err(crate::BufferAsyncError)); - map_promise + map_promise.into() } fn buffer_get_mapped_range( diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index 64e7463dbc..98b24b1230 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -608,6 +608,15 @@ impl crate::Context for ContextWgpuCore { ready(id.ok()) } + fn instance_wait_any( + &self, + _futures: &[&Self::WgpuFuture], + _timeout_ns: u64, + ) -> crate::WaitStatus { + // TODO: We need to know at the instance level whether a submission ID is completed... + crate::WaitStatus::UnsupportedTimeout + } + fn adapter_request_device( &self, adapter_data: &Self::AdapterData, diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs index c115089416..c4dd6e4ef6 100644 --- a/wgpu/src/context.rs +++ b/wgpu/src/context.rs @@ -16,7 +16,7 @@ use crate::{ RenderBundleEncoderDescriptor, RenderPassDescriptor, RenderPipelineDescriptor, RequestAdapterOptions, RequestDeviceError, SamplerDescriptor, ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, TextureDescriptor, TextureViewDescriptor, - UncapturedErrorHandler, + UncapturedErrorHandler, WaitStatus }; /// Meta trait for an data associated with an id tracked by a context. /// @@ -73,6 +73,11 @@ pub trait Context: Debug + WasmNotSendSync + Sized { &self, options: &RequestAdapterOptions<'_, '_>, ) -> Self::RequestAdapterFuture; + fn instance_wait_any( + &self, + futures: &[&Self::WgpuFuture], + timeout_ns: u64, + ) -> WaitStatus; fn adapter_request_device( &self, adapter_data: &Self::AdapterData, @@ -767,6 +772,11 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { &self, options: &RequestAdapterOptions<'_, '_>, ) -> Pin; + fn instance_wait_any( + &self, + futures_data: &[&crate::Data], + timeout_ns: u64, + ) -> WaitStatus; fn adapter_request_device( &self, adapter_data: &crate::Data, @@ -1373,6 +1383,14 @@ where Box::pin(async move { future.await.map(|data| Box::new(data) as _) }) } + fn instance_wait_any( + &self, + _futures_data: &[&crate::Data], + _timeout_ns: u64, + ) -> WaitStatus { + unimplemented!(); + } + fn adapter_request_device( &self, adapter_data: &crate::Data, From 320989756b1a798b3df96525de096a119dd0b8b9 Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Wed, 23 Oct 2024 00:42:58 +0200 Subject: [PATCH 13/19] Run cargo fmt --- wgpu/src/api/instance.rs | 4 +--- wgpu/src/api/queue.rs | 2 -- wgpu/src/context.rs | 20 ++++---------------- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs index fcb8a54121..b3aab1aeb4 100644 --- a/wgpu/src/api/instance.rs +++ b/wgpu/src/api/instance.rs @@ -403,13 +403,11 @@ pub(crate) enum WaitStatus { // No WgpuFuture completed within the timeout. //TimedOut, - /// A Timed-Wait was performed when timedWaitAnyEnable instance feature is false. UnsupportedTimeout, - // The number of futures waited on in a Timed-Wait is greater than the supported timedWaitAnyMaxCount. //UnsupportedCount, - + // An invalid wait was performed with Mixed-Sources. //UnsupportedMixedSources, } diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs index 15d3c990ab..1f2ad629d9 100644 --- a/wgpu/src/api/queue.rs +++ b/wgpu/src/api/queue.rs @@ -51,7 +51,6 @@ pub type Maintain = wgt::Maintain; #[cfg(send_sync)] static_assertions::assert_impl_all!(Maintain: Send, Sync); - /// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried /// for completion, but does not hold any returned data. /// @@ -60,7 +59,6 @@ pub type WgpuFuture = SubmissionIndex; #[cfg(send_sync)] static_assertions::assert_impl_all!(WgpuFuture: Send, Sync); - /// A write-only view into a staging buffer. /// /// Reading into this buffer won't yield the contents of the buffer from the diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs index c4dd6e4ef6..fe37928af8 100644 --- a/wgpu/src/context.rs +++ b/wgpu/src/context.rs @@ -16,7 +16,7 @@ use crate::{ RenderBundleEncoderDescriptor, RenderPassDescriptor, RenderPipelineDescriptor, RequestAdapterOptions, RequestDeviceError, SamplerDescriptor, ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, TextureDescriptor, TextureViewDescriptor, - UncapturedErrorHandler, WaitStatus + UncapturedErrorHandler, WaitStatus, }; /// Meta trait for an data associated with an id tracked by a context. /// @@ -73,11 +73,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized { &self, options: &RequestAdapterOptions<'_, '_>, ) -> Self::RequestAdapterFuture; - fn instance_wait_any( - &self, - futures: &[&Self::WgpuFuture], - timeout_ns: u64, - ) -> WaitStatus; + fn instance_wait_any(&self, futures: &[&Self::WgpuFuture], timeout_ns: u64) -> WaitStatus; fn adapter_request_device( &self, adapter_data: &Self::AdapterData, @@ -772,11 +768,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { &self, options: &RequestAdapterOptions<'_, '_>, ) -> Pin; - fn instance_wait_any( - &self, - futures_data: &[&crate::Data], - timeout_ns: u64, - ) -> WaitStatus; + fn instance_wait_any(&self, futures_data: &[&crate::Data], timeout_ns: u64) -> WaitStatus; fn adapter_request_device( &self, adapter_data: &crate::Data, @@ -1383,11 +1375,7 @@ where Box::pin(async move { future.await.map(|data| Box::new(data) as _) }) } - fn instance_wait_any( - &self, - _futures_data: &[&crate::Data], - _timeout_ns: u64, - ) -> WaitStatus { + fn instance_wait_any(&self, _futures_data: &[&crate::Data], _timeout_ns: u64) -> WaitStatus { unimplemented!(); } From be41ff6a9626b72c33569c8cc7f5966d5d2faca6 Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Wed, 23 Oct 2024 08:45:16 +0200 Subject: [PATCH 14/19] Refine map async submission index --- wgpu-core/src/device/life.rs | 13 +++++++++++-- wgpu-core/src/resource.rs | 12 +++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index f1ca8f7e7e..bc66a4d1b9 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -211,8 +211,17 @@ impl LifetimeTracker { }); } - pub(crate) fn map(&mut self, value: &Arc) { - self.mapped.push(value.clone()); + pub(crate) fn map(&mut self, buffer: &Arc) -> Option { + self.mapped.push(buffer.clone()); + + // Warning: this duplicates what is in triage_mapped() + let submission = self + .active + .iter_mut() + .rev() + .find(|a| a.contains_buffer(&buffer)); + + submission.map(|s| s.index) } /// Returns the submission index of the most recent submission that uses the diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 95945b983f..af9bc551ba 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -27,7 +27,7 @@ use std::{ mem::{self, ManuallyDrop}, ops::Range, ptr::NonNull, - sync::Arc, + sync::{atomic::Ordering, Arc}, }; /// Information about the wgpu-core resource. @@ -633,12 +633,10 @@ impl Buffer { .buffers .set_single(self, internal_use); - // TODO: should we increment last_successful_submission_index instead? - let submit_index = device - .active_submission_index - .fetch_add(1, core::sync::atomic::Ordering::SeqCst) - + 1; - device.lock_life().map(self); + let submit_index = match device.lock_life().map(self) { + Some(index) => index, + None => device.active_submission_index.load(Ordering::SeqCst), + }; Ok(submit_index) } From 353ddbe29aee6a8e7e62f948db16e02dc944219e Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Wed, 23 Oct 2024 08:48:39 +0200 Subject: [PATCH 15/19] Use Future instead of Promise in webgpu implem of WgpuFuture --- wgpu/src/backend/webgpu.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index d9c450e794..650d99edb5 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -1162,7 +1162,7 @@ impl crate::context::Context for ContextWebGpu { Box CompilationInfo>, >; - type WgpuFuture = Promise; + type WgpuFuture = wasm_bindgen_futures::JsFuture; fn init(_instance_desc: wgt::InstanceDescriptor) -> Self { let Ok(gpu) = get_browser_gpu_property() else { From 4c9371fda560928ff3feb88b912f90ccfcc90f0f Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Sun, 27 Oct 2024 09:30:49 +0100 Subject: [PATCH 16/19] Revert changes to wgpu, focus on wgpu-core --- wgpu-core/src/device/queue.rs | 5 +-- wgpu-core/src/resource.rs | 4 +-- wgpu/src/api/buffer.rs | 64 +++-------------------------------- wgpu/src/api/instance.rs | 30 +++------------- wgpu/src/api/queue.rs | 16 ++------- wgpu/src/backend/webgpu.rs | 28 ++++----------- wgpu/src/backend/wgpu_core.rs | 22 +++--------- wgpu/src/context.rs | 29 +++++----------- 8 files changed, 34 insertions(+), 164 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 7e015b6aba..1203c9d5a8 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -1413,10 +1413,7 @@ impl Global { let result = queue.on_submitted_work_done(closure); match result { Some(submission_index) => submission_index, - None => queue - .device - .last_successful_submission_index - .load(Ordering::Acquire), + None => 0, // meaning no wait is necessary } } } diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index af9bc551ba..bd2950c277 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -27,7 +27,7 @@ use std::{ mem::{self, ManuallyDrop}, ops::Range, ptr::NonNull, - sync::{atomic::Ordering, Arc}, + sync::Arc, }; /// Information about the wgpu-core resource. @@ -635,7 +635,7 @@ impl Buffer { let submit_index = match device.lock_life().map(self) { Some(index) => index, - None => device.active_submission_index.load(Ordering::SeqCst), + None => 0, // meaning no wait is necessary }; Ok(submit_index) diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs index 9d37e361b0..9d490616d3 100644 --- a/wgpu/src/api/buffer.rs +++ b/wgpu/src/api/buffer.rs @@ -242,7 +242,6 @@ impl Buffer { /// end of the buffer. pub fn slice>(&self, bounds: S) -> BufferSlice<'_> { let (offset, size) = range_to_offset_size(bounds); - check_buffer_bounds(self.size, offset, size); BufferSlice { buffer: self, offset, @@ -338,7 +337,7 @@ impl<'a> BufferSlice<'a> { &self, mode: MapMode, callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static, - ) -> WgpuFuture { + ) { let mut mc = self.buffer.map_context.lock(); assert_eq!(mc.initial_range, 0..0, "Buffer is already mapped"); let end = match self.size { @@ -347,15 +346,13 @@ impl<'a> BufferSlice<'a> { }; mc.initial_range = self.offset..end; - let data = DynContext::buffer_map_async( + DynContext::buffer_map_async( &*self.buffer.context, self.buffer.data.as_ref(), mode, self.offset..end, Box::new(callback), - ); - - WgpuFuture { data } + ) } /// Gain read-only access to the bytes of a [mapped] [`Buffer`]. @@ -676,31 +673,6 @@ impl Drop for Buffer { } } -fn check_buffer_bounds( - buffer_size: BufferAddress, - offset: BufferAddress, - size: Option, -) { - // A slice of length 0 is invalid, so the offset must not be equal to or greater than the buffer size. - if offset >= buffer_size { - panic!( - "slice offset {} is out of range for buffer of size {}", - offset, buffer_size - ); - } - - if let Some(size) = size { - // Detect integer overflow. - let end = offset.checked_add(size.get()); - if end.map_or(true, |end| end > buffer_size) { - panic!( - "slice offset {} size {} is out of range for buffer of size {}", - offset, size, buffer_size - ); - } - } -} - fn range_to_offset_size>( bounds: S, ) -> (BufferAddress, Option) { @@ -718,10 +690,9 @@ fn range_to_offset_size>( (offset, size) } - #[cfg(test)] mod tests { - use super::{check_buffer_bounds, range_to_offset_size, BufferSize}; + use super::{range_to_offset_size, BufferSize}; #[test] fn range_to_offset_size_works() { @@ -744,31 +715,4 @@ mod tests { fn range_to_offset_size_panics_for_unbounded_empty_range() { range_to_offset_size(..0); } - - #[test] - #[should_panic] - fn check_buffer_bounds_panics_for_offset_at_size() { - check_buffer_bounds(100, 100, None); - } - - #[test] - fn check_buffer_bounds_works_for_end_in_range() { - check_buffer_bounds(200, 100, BufferSize::new(50)); - check_buffer_bounds(200, 100, BufferSize::new(100)); - check_buffer_bounds(u64::MAX, u64::MAX - 100, BufferSize::new(100)); - check_buffer_bounds(u64::MAX, 0, BufferSize::new(u64::MAX)); - check_buffer_bounds(u64::MAX, 1, BufferSize::new(u64::MAX - 1)); - } - - #[test] - #[should_panic] - fn check_buffer_bounds_panics_for_end_over_size() { - check_buffer_bounds(200, 100, BufferSize::new(101)); - } - - #[test] - #[should_panic] - fn check_buffer_bounds_panics_for_end_wraparound() { - check_buffer_bounds(u64::MAX, 1, BufferSize::new(u64::MAX)); - } } diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs index b3aab1aeb4..af6775b86b 100644 --- a/wgpu/src/api/instance.rs +++ b/wgpu/src/api/instance.rs @@ -93,15 +93,10 @@ impl Instance { /// during instantiation, and which [DX12 shader compiler][Dx12Compiler] wgpu will use. /// /// [`Backends::BROWSER_WEBGPU`] takes a special role: - /// If it is set and a [`navigator.gpu`](https://developer.mozilla.org/en-US/docs/Web/API/Navigator/gpu) - /// object is present, this instance will *only* be able to create WebGPU adapters. - /// - /// ⚠️ On some browsers this check is insufficient to determine whether WebGPU is supported, - /// as the browser may define the `navigator.gpu` object, but be unable to create any WebGPU adapters. - /// For targeting _both_ WebGPU & WebGL is recommended to use [`crate::util::new_instance_with_webgpu_detection`]. - /// - /// If you instead want to force use of WebGL, either disable the `webgpu` compile-time feature - /// or don't add the [`Backends::BROWSER_WEBGPU`] flag to the the `instance_desc`'s `backends` field. + /// If it is set and WebGPU support is detected, this instance will *only* be able to create + /// WebGPU adapters. If you instead want to force use of WebGL, either + /// disable the `webgpu` compile-time feature or don't add the [`Backends::BROWSER_WEBGPU`] + /// flag to the the `instance_desc`'s `backends` field. /// If it is set and WebGPU support is *not* detected, the instance will use wgpu-core /// to create adapters. Meaning that if the `webgl` feature is enabled, it is able to create /// a WebGL adapter. @@ -394,20 +389,3 @@ impl Instance { .map(|ctx| ctx.generate_report()) } } - -/// Status returned when waiting on WgpuFuture objects. -#[derive(Clone, Debug)] -pub(crate) enum WaitStatus { - // At least one WgpuFuture completed successfully. - //Success, - - // No WgpuFuture completed within the timeout. - //TimedOut, - /// A Timed-Wait was performed when timedWaitAnyEnable instance feature is false. - UnsupportedTimeout, - // The number of futures waited on in a Timed-Wait is greater than the supported timedWaitAnyMaxCount. - //UnsupportedCount, - - // An invalid wait was performed with Mixed-Sources. - //UnsupportedMixedSources, -} diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs index 1f2ad629d9..b57b33ece3 100644 --- a/wgpu/src/api/queue.rs +++ b/wgpu/src/api/queue.rs @@ -36,7 +36,6 @@ impl Drop for Queue { /// /// This type is unique to the Rust API of `wgpu`. /// There is no analogue in the WebGPU specification. -/// NB: WgpuFuture should probably be used instead of this #[derive(Debug, Clone)] pub struct SubmissionIndex { #[cfg_attr(not(native), allow(dead_code))] @@ -51,14 +50,6 @@ pub type Maintain = wgt::Maintain; #[cfg(send_sync)] static_assertions::assert_impl_all!(Maintain: Send, Sync); -/// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried -/// for completion, but does not hold any returned data. -/// -/// It's 'id' field is to be interpreted as a submission id (like wgc::SubmissionId) -pub type WgpuFuture = SubmissionIndex; -#[cfg(send_sync)] -static_assertions::assert_impl_all!(WgpuFuture: Send, Sync); - /// A write-only view into a staging buffer. /// /// Reading into this buffer won't yield the contents of the buffer from the @@ -284,12 +275,11 @@ impl Queue { /// has completed. There are no restrictions on the code you can run in the callback, however on native the /// call to the function will not complete until the callback returns, so prefer keeping callbacks short /// and used to set flags, send messages, etc. - pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) -> WgpuFuture { - let data = DynContext::queue_on_submitted_work_done( + pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) { + DynContext::queue_on_submitted_work_done( &*self.context, self.data.as_ref(), Box::new(callback), - ); - WgpuFuture { data } + ) } } diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index 650d99edb5..e982300e70 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -815,6 +815,7 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits { max_vertex_buffer_array_stride: limits.max_vertex_buffer_array_stride(), min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment(), min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment(), + max_inter_stage_shader_components: limits.max_inter_stage_shader_components(), max_color_attachments: limits.max_color_attachments(), max_color_attachment_bytes_per_sample: limits.max_color_attachment_bytes_per_sample(), max_compute_workgroup_storage_size: limits.max_compute_workgroup_storage_size(), @@ -828,7 +829,6 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits { max_subgroup_size: wgt::Limits::default().max_subgroup_size, max_push_constant_size: wgt::Limits::default().max_push_constant_size, max_non_sampler_bindings: wgt::Limits::default().max_non_sampler_bindings, - max_inter_stage_shader_components: wgt::Limits::default().max_inter_stage_shader_components, } } @@ -875,6 +875,7 @@ fn map_js_sys_limits(limits: &wgt::Limits) -> js_sys::Object { (maxBufferSize, max_buffer_size), (maxVertexAttributes, max_vertex_attributes), (maxVertexBufferArrayStride, max_vertex_buffer_array_stride), + (maxInterStageShaderComponents, max_inter_stage_shader_components), (maxComputeWorkgroupStorageSize, max_compute_workgroup_storage_size), (maxComputeInvocationsPerWorkgroup, max_compute_invocations_per_workgroup), (maxComputeWorkgroupSizeX, max_compute_workgroup_size_x), @@ -1087,12 +1088,8 @@ pub struct BrowserGpuPropertyInaccessible; /// Returns the browser's gpu object or `Err(BrowserGpuPropertyInaccessible)` if /// the current context is neither the main thread nor a dedicated worker. /// -/// If WebGPU is not supported, the Gpu property may (!) be `undefined`, -/// and so this function will return `Ok(None)`. -/// Note that this check is insufficient to determine whether WebGPU is -/// supported, as the browser may define the Gpu property, but be unable to -/// create any WebGPU adapters. -/// To detect whether WebGPU is supported, use the [`crate::utils::is_browser_webgpu_supported`] function. +/// If WebGPU is not supported, the Gpu property is `undefined`, and so this +/// function will return `Ok(None)`. /// /// See: /// * @@ -1162,8 +1159,6 @@ impl crate::context::Context for ContextWebGpu { Box CompilationInfo>, >; - type WgpuFuture = wasm_bindgen_futures::JsFuture; - fn init(_instance_desc: wgt::InstanceDescriptor) -> Self { let Ok(gpu) = get_browser_gpu_property() else { panic!( @@ -1253,16 +1248,6 @@ impl crate::context::Context for ContextWebGpu { } } - fn instance_wait_any( - &self, - _futures: &[&Self::WgpuFuture], - _timeout_ns: u64, - ) -> crate::WaitStatus { - // TODO: Yield back to the browser, run the equivalent of the following JavaScript: - // > await Promise.any([ ...futures, new Promise(resolve => setTimeout(timeout_ns, resolve) ])) - crate::WaitStatus::UnsupportedTimeout - } - fn adapter_request_device( &self, adapter_data: &Self::AdapterData, @@ -2162,7 +2147,7 @@ impl crate::context::Context for ContextWebGpu { mode: crate::MapMode, range: Range, callback: crate::context::BufferMapCallback, - ) -> Self::WgpuFuture { + ) { let map_promise = buffer_data.0.buffer.map_async_with_f64_and_f64( map_map_mode(mode), range.start as f64, @@ -2172,7 +2157,6 @@ impl crate::context::Context for ContextWebGpu { buffer_data.0.set_mapped_range(range); register_then_closures(&map_promise, callback, Ok(()), Err(crate::BufferAsyncError)); - map_promise.into() } fn buffer_get_mapped_range( @@ -2786,7 +2770,7 @@ impl crate::context::Context for ContextWebGpu { &self, _queue_data: &Self::QueueData, _callback: crate::context::SubmittedWorkDoneCallback, - ) -> Self::WgpuFuture { + ) { unimplemented!() } diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index 98b24b1230..712e01cc44 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -540,8 +540,6 @@ impl crate::Context for ContextWgpuCore { type PopErrorScopeFuture = Ready>; type CompilationInfoFuture = Ready; - type WgpuFuture = wgc::SubmissionIndex; - fn init(instance_desc: wgt::InstanceDescriptor) -> Self { Self(wgc::global::Global::new("wgpu", instance_desc)) } @@ -608,15 +606,6 @@ impl crate::Context for ContextWgpuCore { ready(id.ok()) } - fn instance_wait_any( - &self, - _futures: &[&Self::WgpuFuture], - _timeout_ns: u64, - ) -> crate::WaitStatus { - // TODO: We need to know at the instance level whether a submission ID is completed... - crate::WaitStatus::UnsupportedTimeout - } - fn adapter_request_device( &self, adapter_data: &Self::AdapterData, @@ -1402,7 +1391,7 @@ impl crate::Context for ContextWgpuCore { mode: MapMode, range: Range, callback: crate::context::BufferMapCallback, - ) -> Self::WgpuFuture { + ) { let operation = wgc::resource::BufferMapOperation { host: match mode { MapMode::Read => wgc::device::HostMap::Read, @@ -1422,10 +1411,9 @@ impl crate::Context for ContextWgpuCore { Some(range.end - range.start), operation, ) { - Ok(index) => index, + Ok(_) => (), Err(cause) => { - self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async"); - Self::SubmissionIndexData::MAX // invalid submission index + self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async") } } } @@ -2107,9 +2095,9 @@ impl crate::Context for ContextWgpuCore { &self, queue_data: &Self::QueueData, callback: crate::context::SubmittedWorkDoneCallback, - ) -> Self::WgpuFuture { + ) { let closure = wgc::device::queue::SubmittedWorkDoneClosure::from_rust(callback); - self.0.queue_on_submitted_work_done(queue_data.id, closure) + self.0.queue_on_submitted_work_done(queue_data.id, closure); } fn device_start_capture(&self, device_data: &Self::DeviceData) { diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs index fe37928af8..a27459ab45 100644 --- a/wgpu/src/context.rs +++ b/wgpu/src/context.rs @@ -16,7 +16,7 @@ use crate::{ RenderBundleEncoderDescriptor, RenderPassDescriptor, RenderPipelineDescriptor, RequestAdapterOptions, RequestDeviceError, SamplerDescriptor, ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, TextureDescriptor, TextureViewDescriptor, - UncapturedErrorHandler, WaitStatus, + UncapturedErrorHandler, }; /// Meta trait for an data associated with an id tracked by a context. /// @@ -60,9 +60,6 @@ pub trait Context: Debug + WasmNotSendSync + Sized { type CompilationInfoFuture: Future + WasmNotSend + 'static; - /// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried for completion, but does not hold any returned data. - type WgpuFuture: ContextData; - #[cfg(not(target_os = "emscripten"))] fn init(instance_desc: wgt::InstanceDescriptor) -> Self; unsafe fn instance_create_surface( @@ -73,7 +70,6 @@ pub trait Context: Debug + WasmNotSendSync + Sized { &self, options: &RequestAdapterOptions<'_, '_>, ) -> Self::RequestAdapterFuture; - fn instance_wait_any(&self, futures: &[&Self::WgpuFuture], timeout_ns: u64) -> WaitStatus; fn adapter_request_device( &self, adapter_data: &Self::AdapterData, @@ -222,7 +218,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized { mode: MapMode, range: Range, callback: BufferMapCallback, - ) -> Self::WgpuFuture; + ); fn buffer_get_mapped_range( &self, buffer_data: &Self::BufferData, @@ -417,7 +413,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized { &self, queue_data: &Self::QueueData, callback: SubmittedWorkDoneCallback, - ) -> Self::WgpuFuture; + ); fn device_start_capture(&self, device_data: &Self::DeviceData); fn device_stop_capture(&self, device_data: &Self::DeviceData); @@ -768,7 +764,6 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { &self, options: &RequestAdapterOptions<'_, '_>, ) -> Pin; - fn instance_wait_any(&self, futures_data: &[&crate::Data], timeout_ns: u64) -> WaitStatus; fn adapter_request_device( &self, adapter_data: &crate::Data, @@ -913,7 +908,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { mode: MapMode, range: Range, callback: BufferMapCallback, - ) -> Arc; + ); fn buffer_get_mapped_range( &self, buffer_data: &crate::Data, @@ -1097,7 +1092,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { &self, queue_data: &crate::Data, callback: SubmittedWorkDoneCallback, - ) -> Arc; + ); fn device_start_capture(&self, data: &crate::Data); fn device_stop_capture(&self, data: &crate::Data); @@ -1375,10 +1370,6 @@ where Box::pin(async move { future.await.map(|data| Box::new(data) as _) }) } - fn instance_wait_any(&self, _futures_data: &[&crate::Data], _timeout_ns: u64) -> WaitStatus { - unimplemented!(); - } - fn adapter_request_device( &self, adapter_data: &crate::Data, @@ -1697,10 +1688,9 @@ where mode: MapMode, range: Range, callback: BufferMapCallback, - ) -> Arc { + ) { let buffer_data = downcast_ref(buffer_data); - let handle = Context::buffer_map_async(self, buffer_data, mode, range, callback); - Arc::new(handle) as _ + Context::buffer_map_async(self, buffer_data, mode, range, callback) } fn buffer_get_mapped_range( @@ -2121,10 +2111,9 @@ where &self, queue_data: &crate::Data, callback: SubmittedWorkDoneCallback, - ) -> Arc { + ) { let queue_data = downcast_ref(queue_data); - let handle = Context::queue_on_submitted_work_done(self, queue_data, callback); - Arc::new(handle) as _ + Context::queue_on_submitted_work_done(self, queue_data, callback) } fn device_start_capture(&self, device_data: &crate::Data) { From c4f9f46b3fb3b2d7b59f96fe9148f24405430c7f Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Sun, 27 Oct 2024 09:42:19 +0100 Subject: [PATCH 17/19] Remove triage_mapped --- wgpu-core/src/device/life.rs | 54 ++++++-------------------------- wgpu-core/src/device/resource.rs | 2 -- 2 files changed, 10 insertions(+), 46 deletions(-) diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index bc66a4d1b9..47e33ab11f 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -126,35 +126,20 @@ pub enum WaitIdleError { /// - Each buffer's `ResourceInfo::submission_index` records the index of the /// most recent queue submission that uses that buffer. /// -/// - Calling `Global::buffer_map_async` adds the buffer to -/// `self.mapped`, and changes `Buffer::map_state` to prevent it -/// from being used in any new submissions. -/// /// - When the device is polled, the following `LifetimeTracker` methods decide /// what should happen next: /// -/// 1) `triage_mapped` drains `self.mapped`, checking the submission index -/// of each buffer against the queue submissions that have finished -/// execution. Buffers used by submissions still in flight go in -/// `self.active[index].mapped`, and the rest go into -/// `self.ready_to_map`. -/// -/// 2) `triage_submissions` moves entries in `self.active[i]` for completed +/// 1) `triage_submissions` moves entries in `self.active[i]` for completed /// submissions to `self.ready_to_map`. At this point, both /// `self.active` and `self.ready_to_map` are up to date with the given /// submission index. /// -/// 3) `handle_mapping` drains `self.ready_to_map` and actually maps the +/// 2) `handle_mapping` drains `self.ready_to_map` and actually maps the /// buffers, collecting a list of notification closures to call. /// /// Only calling `Global::buffer_map_async` clones a new `Arc` for the /// buffer. This new `Arc` is only dropped by `handle_mapping`. pub(crate) struct LifetimeTracker { - /// Buffers for which a call to [`Buffer::map_async`] has succeeded, but - /// which haven't been examined by `triage_mapped` yet to decide when they - /// can be mapped. - mapped: Vec>, - /// Resources used by queue submissions still in flight. One entry per /// submission, with older submissions appearing before younger. /// @@ -182,7 +167,6 @@ pub(crate) struct LifetimeTracker { impl LifetimeTracker { pub fn new() -> Self { Self { - mapped: Vec::new(), active: Vec::new(), ready_to_map: Vec::new(), work_done_closures: SmallVec::new(), @@ -212,16 +196,20 @@ impl LifetimeTracker { } pub(crate) fn map(&mut self, buffer: &Arc) -> Option { - self.mapped.push(buffer.clone()); - - // Warning: this duplicates what is in triage_mapped() + // Determine which buffers are ready to map, and which must wait for the GPU. let submission = self .active .iter_mut() .rev() .find(|a| a.contains_buffer(&buffer)); - submission.map(|s| s.index) + let maybe_submission_index = submission.as_ref().map(|s| s.index.clone()); + + submission + .map_or(&mut self.ready_to_map, |a| &mut a.mapped) + .push(buffer.clone()); + + maybe_submission_index } /// Returns the submission index of the most recent submission that uses the @@ -331,28 +319,6 @@ impl LifetimeTracker { } } - /// Determine which buffers are ready to map, and which must wait for the - /// GPU. - /// - /// See the documentation for [`LifetimeTracker`] for details. - pub(crate) fn triage_mapped(&mut self) { - if self.mapped.is_empty() { - return; - } - - for buffer in self.mapped.drain(..) { - let submission = self - .active - .iter_mut() - .rev() - .find(|a| a.contains_buffer(&buffer)); - - submission - .map_or(&mut self.ready_to_map, |a| &mut a.mapped) - .push(buffer); - } - } - /// Map the buffers in `self.ready_to_map`. /// /// Return a list of mapping notifications to send. diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 21ecf85d24..46271f7ac9 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -493,8 +493,6 @@ impl Device { let submission_closures = life_tracker.triage_submissions(submission_index, &self.command_allocator); - life_tracker.triage_mapped(); - let mapping_closures = life_tracker.handle_mapping(self.raw(), &snatch_guard); let queue_empty = life_tracker.queue_empty(); From 22bc30de6d8e9ef6f378c3fbe16181f42477ce49 Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Sun, 27 Oct 2024 09:46:53 +0100 Subject: [PATCH 18/19] Style update --- wgpu-core/src/device/life.rs | 4 ++-- wgpu-core/src/device/queue.rs | 5 +---- wgpu-core/src/resource.rs | 5 +---- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 47e33ab11f..84ff18440a 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -201,9 +201,9 @@ impl LifetimeTracker { .active .iter_mut() .rev() - .find(|a| a.contains_buffer(&buffer)); + .find(|a| a.contains_buffer(buffer)); - let maybe_submission_index = submission.as_ref().map(|s| s.index.clone()); + let maybe_submission_index = submission.as_ref().map(|s| s.index); submission .map_or(&mut self.ready_to_map, |a| &mut a.mapped) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 1203c9d5a8..25e8806c9c 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -1411,10 +1411,7 @@ impl Global { //TODO: flush pending writes let queue = self.hub.queues.get(queue_id); let result = queue.on_submitted_work_done(closure); - match result { - Some(submission_index) => submission_index, - None => 0, // meaning no wait is necessary - } + result.unwrap_or(0) // '0' means no wait is necessary } } diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index bd2950c277..157b83641a 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -633,10 +633,7 @@ impl Buffer { .buffers .set_single(self, internal_use); - let submit_index = match device.lock_life().map(self) { - Some(index) => index, - None => 0, // meaning no wait is necessary - }; + let submit_index = device.lock_life().map(self).unwrap_or(0); // '0' means no wait is necessary Ok(submit_index) } From 93934273f76bcfcb9b1d4e0ff459cac07a1c619a Mon Sep 17 00:00:00 2001 From: Elie Michel Date: Sun, 27 Oct 2024 14:02:33 +0100 Subject: [PATCH 19/19] Fix merge with trunk --- wgpu/src/api/buffer.rs | 56 +++++++++++++++++++++++++++++++++++++- wgpu/src/api/instance.rs | 13 ++++++--- wgpu/src/backend/webgpu.rs | 11 +++++--- 3 files changed, 71 insertions(+), 9 deletions(-) diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs index 9d490616d3..fa9c7f9ec0 100644 --- a/wgpu/src/api/buffer.rs +++ b/wgpu/src/api/buffer.rs @@ -242,6 +242,7 @@ impl Buffer { /// end of the buffer. pub fn slice>(&self, bounds: S) -> BufferSlice<'_> { let (offset, size) = range_to_offset_size(bounds); + check_buffer_bounds(self.size, offset, size); BufferSlice { buffer: self, offset, @@ -673,6 +674,31 @@ impl Drop for Buffer { } } +fn check_buffer_bounds( + buffer_size: BufferAddress, + offset: BufferAddress, + size: Option, +) { + // A slice of length 0 is invalid, so the offset must not be equal to or greater than the buffer size. + if offset >= buffer_size { + panic!( + "slice offset {} is out of range for buffer of size {}", + offset, buffer_size + ); + } + + if let Some(size) = size { + // Detect integer overflow. + let end = offset.checked_add(size.get()); + if end.map_or(true, |end| end > buffer_size) { + panic!( + "slice offset {} size {} is out of range for buffer of size {}", + offset, size, buffer_size + ); + } + } +} + fn range_to_offset_size>( bounds: S, ) -> (BufferAddress, Option) { @@ -690,9 +716,10 @@ fn range_to_offset_size>( (offset, size) } + #[cfg(test)] mod tests { - use super::{range_to_offset_size, BufferSize}; + use super::{check_buffer_bounds, range_to_offset_size, BufferSize}; #[test] fn range_to_offset_size_works() { @@ -715,4 +742,31 @@ mod tests { fn range_to_offset_size_panics_for_unbounded_empty_range() { range_to_offset_size(..0); } + + #[test] + #[should_panic] + fn check_buffer_bounds_panics_for_offset_at_size() { + check_buffer_bounds(100, 100, None); + } + + #[test] + fn check_buffer_bounds_works_for_end_in_range() { + check_buffer_bounds(200, 100, BufferSize::new(50)); + check_buffer_bounds(200, 100, BufferSize::new(100)); + check_buffer_bounds(u64::MAX, u64::MAX - 100, BufferSize::new(100)); + check_buffer_bounds(u64::MAX, 0, BufferSize::new(u64::MAX)); + check_buffer_bounds(u64::MAX, 1, BufferSize::new(u64::MAX - 1)); + } + + #[test] + #[should_panic] + fn check_buffer_bounds_panics_for_end_over_size() { + check_buffer_bounds(200, 100, BufferSize::new(101)); + } + + #[test] + #[should_panic] + fn check_buffer_bounds_panics_for_end_wraparound() { + check_buffer_bounds(u64::MAX, 1, BufferSize::new(u64::MAX)); + } } diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs index af6775b86b..b21c9f70ec 100644 --- a/wgpu/src/api/instance.rs +++ b/wgpu/src/api/instance.rs @@ -93,10 +93,15 @@ impl Instance { /// during instantiation, and which [DX12 shader compiler][Dx12Compiler] wgpu will use. /// /// [`Backends::BROWSER_WEBGPU`] takes a special role: - /// If it is set and WebGPU support is detected, this instance will *only* be able to create - /// WebGPU adapters. If you instead want to force use of WebGL, either - /// disable the `webgpu` compile-time feature or don't add the [`Backends::BROWSER_WEBGPU`] - /// flag to the the `instance_desc`'s `backends` field. + /// If it is set and a [`navigator.gpu`](https://developer.mozilla.org/en-US/docs/Web/API/Navigator/gpu) + /// object is present, this instance will *only* be able to create WebGPU adapters. + /// + /// ⚠️ On some browsers this check is insufficient to determine whether WebGPU is supported, + /// as the browser may define the `navigator.gpu` object, but be unable to create any WebGPU adapters. + /// For targeting _both_ WebGPU & WebGL is recommended to use [`crate::util::new_instance_with_webgpu_detection`]. + /// + /// If you instead want to force use of WebGL, either disable the `webgpu` compile-time feature + /// or don't add the [`Backends::BROWSER_WEBGPU`] flag to the the `instance_desc`'s `backends` field. /// If it is set and WebGPU support is *not* detected, the instance will use wgpu-core /// to create adapters. Meaning that if the `webgl` feature is enabled, it is able to create /// a WebGL adapter. diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index e982300e70..e0cf006e6e 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -815,7 +815,6 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits { max_vertex_buffer_array_stride: limits.max_vertex_buffer_array_stride(), min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment(), min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment(), - max_inter_stage_shader_components: limits.max_inter_stage_shader_components(), max_color_attachments: limits.max_color_attachments(), max_color_attachment_bytes_per_sample: limits.max_color_attachment_bytes_per_sample(), max_compute_workgroup_storage_size: limits.max_compute_workgroup_storage_size(), @@ -829,6 +828,7 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits { max_subgroup_size: wgt::Limits::default().max_subgroup_size, max_push_constant_size: wgt::Limits::default().max_push_constant_size, max_non_sampler_bindings: wgt::Limits::default().max_non_sampler_bindings, + max_inter_stage_shader_components: wgt::Limits::default().max_inter_stage_shader_components, } } @@ -875,7 +875,6 @@ fn map_js_sys_limits(limits: &wgt::Limits) -> js_sys::Object { (maxBufferSize, max_buffer_size), (maxVertexAttributes, max_vertex_attributes), (maxVertexBufferArrayStride, max_vertex_buffer_array_stride), - (maxInterStageShaderComponents, max_inter_stage_shader_components), (maxComputeWorkgroupStorageSize, max_compute_workgroup_storage_size), (maxComputeInvocationsPerWorkgroup, max_compute_invocations_per_workgroup), (maxComputeWorkgroupSizeX, max_compute_workgroup_size_x), @@ -1088,8 +1087,12 @@ pub struct BrowserGpuPropertyInaccessible; /// Returns the browser's gpu object or `Err(BrowserGpuPropertyInaccessible)` if /// the current context is neither the main thread nor a dedicated worker. /// -/// If WebGPU is not supported, the Gpu property is `undefined`, and so this -/// function will return `Ok(None)`. +/// If WebGPU is not supported, the Gpu property may (!) be `undefined`, +/// and so this function will return `Ok(None)`. +/// Note that this check is insufficient to determine whether WebGPU is +/// supported, as the browser may define the Gpu property, but be unable to +/// create any WebGPU adapters. +/// To detect whether WebGPU is supported, use the [`crate::utils::is_browser_webgpu_supported`] function. /// /// See: /// *