From ba7f1a255f883e707f6e4b3fcf747b8b5afd2dab Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Sun, 29 Sep 2024 21:53:58 +0200
Subject: [PATCH 01/19] Switch flag enums to u64

---
 wgpu-hal/src/vulkan/device.rs | 2 +-
 wgpu-types/src/lib.rs         | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 9f0fc67738..75594bac4e 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1930,7 +1930,7 @@ impl crate::Device for super::Device {
         for cat in desc.color_targets {
             let (key, attarchment) = if let Some(cat) = cat.as_ref() {
                 let mut vk_attachment = vk::PipelineColorBlendAttachmentState::default()
-                    .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits()));
+                    .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits() as u32));
                 if let Some(ref blend) = cat.blend {
                     let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color);
                     let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha);
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index c7167f826f..ef5d58a9e0 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1900,7 +1900,7 @@ bitflags::bitflags! {
     /// https://gpuweb.github.io/gpuweb/#typedefdef-gpushaderstageflags).
     #[repr(transparent)]
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-    pub struct ShaderStages: u32 {
+    pub struct ShaderStages: u64 {
         /// Binding is not visible from any shader stage.
         const NONE = 0;
         /// Binding is visible from the vertex shader of a render pipeline.
@@ -4653,7 +4653,7 @@ bitflags::bitflags! {
     /// https://gpuweb.github.io/gpuweb/#typedefdef-gpucolorwriteflags).
     #[repr(transparent)]
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-    pub struct ColorWrites: u32 {
+    pub struct ColorWrites: u64 {
         /// Enable red channel writes
         const RED = 1 << 0;
         /// Enable green channel writes
@@ -5265,7 +5265,7 @@ bitflags::bitflags! {
     /// https://gpuweb.github.io/gpuweb/#typedefdef-gpubufferusageflags).
     #[repr(transparent)]
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-    pub struct BufferUsages: u32 {
+    pub struct BufferUsages: u64 {
         /// Allow a buffer to be mapped for reading using [`Buffer::map_async`] + [`Buffer::get_mapped_range`].
         /// This does not include creating a buffer with [`BufferDescriptor::mapped_at_creation`] set.
         ///
@@ -5484,7 +5484,7 @@ bitflags::bitflags! {
     /// https://gpuweb.github.io/gpuweb/#typedefdef-gputextureusageflags).
     #[repr(transparent)]
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-    pub struct TextureUsages: u32 {
+    pub struct TextureUsages: u64 {
         /// Allows a texture to be the source in a [`CommandEncoder::copy_texture_to_buffer`] or
         /// [`CommandEncoder::copy_texture_to_texture`] operation.
         const COPY_SRC = 1 << 0;

From 2d283e3a5c27b03cb77ffe22f064736d6808c217 Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Wed, 2 Oct 2024 08:20:53 +0200
Subject: [PATCH 02/19] try another way to implement futures

---
 wgpu-core/src/instance.rs | 59 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 581c5ce0d9..2242c8856a 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -60,6 +60,41 @@ pub struct Instance {
     /// The ordering in this list implies prioritization and needs to be preserved.
     pub instance_per_backend: Vec<(Backend, Box<dyn hal::DynInstance>)>,
     pub flags: wgt::InstanceFlags,
+
+    // WIP Future mechanism
+    pub futures: FutureRegistry,
+}
+
+#[derive(Default)]
+pub struct Future {
+    completed: bool,
+}
+
+// A very simplistic Registry type that replaces wgc::Registry for now
+// for futures.
+// Test only: There should be a way not to store all completed futures for the
+// whole lifetime of the prorgam, and also this is not threadsafe.
+#[derive(Default)]
+struct FutureRegistry {
+    next_id: u64,
+    storage: HashMap<u64,Future>,
+}
+impl FutureRegistry {
+    fn new() -> Self {
+        return Self{
+            next_id: 0,
+            storage: HashMap::new(),
+        }
+    }
+    fn register(&mut self, value: Future) -> u64 {
+        let id = self.next_id;
+        self.next_id += 1;
+        self.storage.insert(id, value);
+        return id;
+    }
+    fn get_mut(&mut self, id: u64) -> Option<&mut Future> {
+        return self.storage.get_mut(&id);
+    }
 }
 
 impl Instance {
@@ -111,6 +146,7 @@ impl Instance {
             name: name.to_string(),
             instance_per_backend,
             flags: instance_desc.flags,
+            futures: FutureRegistry::new(),
         }
     }
 
@@ -815,6 +851,29 @@ impl Global {
     }
 }
 
+impl Global {
+    pub fn instance_create_future(
+        &mut self,
+    ) -> u64 {
+        let future_id = self.instance.futures.register(Future{
+            completed: false,
+        });
+        return future_id;
+    }
+
+    pub fn instance_complete_future(
+        &mut self,
+        future_id: u64,
+    ) {
+        match self.instance.futures.get_mut(future_id) {
+            Some(future) => {
+                future.completed = true;
+            },
+            None => panic!("invalid future id")
+        }
+    }
+}
+
 /// Generates a set of backends from a comma separated list of case-insensitive backend names.
 ///
 /// Whitespace is stripped, so both 'gl, dx12' and 'gl,dx12' are valid.

From a93cbdc98c19f0ae502e1342462d2b4a8b484b0f Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Thu, 3 Oct 2024 08:54:23 +0200
Subject: [PATCH 03/19] Add submission indices

---
 wgpu-core/src/device/global.rs   | 30 +++++++---------
 wgpu-core/src/device/life.rs     |  4 ++-
 wgpu-core/src/device/queue.rs    |  8 +++--
 wgpu-core/src/device/resource.rs | 12 +++----
 wgpu-core/src/instance.rs        | 59 --------------------------------
 wgpu-core/src/resource.rs        | 14 +++++---
 6 files changed, 36 insertions(+), 91 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 10b82a73ae..88a9667992 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -2147,33 +2147,27 @@ impl Global {
         offset: BufferAddress,
         size: Option<BufferAddress>,
         op: BufferMapOperation,
-    ) -> BufferAccessResult {
+    ) -> Result<crate::SubmissionIndex, BufferAccessError> {
         profiling::scope!("Buffer::map_async");
         api_log!("Buffer::map_async {buffer_id:?} offset {offset:?} size {size:?} op: {op:?}");
 
         let hub = &self.hub;
 
-        let op_and_err = 'error: {
-            let buffer = match hub.buffers.get(buffer_id).get() {
-                Ok(buffer) => buffer,
-                Err(e) => break 'error Some((op, e.into())),
-            };
-
-            buffer.map_async(offset, size, op).err()
+        let map_result = match hub.buffers.get(buffer_id).get() {
+            Ok(buffer) => buffer.map_async(offset, size, op),
+            Err(e) => Err((op, e.into())),
         };
 
-        // User callbacks must not be called while holding `buffer.map_async`'s locks, so we
-        // defer the error callback if it needs to be called immediately (typically when running
-        // into errors).
-        if let Some((mut operation, err)) = op_and_err {
-            if let Some(callback) = operation.callback.take() {
-                callback.call(Err(err.clone()));
+        match map_result {
+            Ok(submission_index) => Ok(submission_index),
+            Err((mut operation, err)) => {
+                if let Some(callback) = operation.callback.take() {
+                    callback.call(Err(err.clone()));
+                }
+                log::error!("Buffer::map_async error: {err}");
+                Err(err)
             }
-            log::error!("Buffer::map_async error: {err}");
-            return Err(err);
         }
-
-        Ok(())
     }
 
     pub fn buffer_get_mapped_range(
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index e6aed78a08..ee37b78f1c 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -304,15 +304,17 @@ impl LifetimeTracker {
         }
     }
 
-    pub fn add_work_done_closure(&mut self, closure: SubmittedWorkDoneClosure) {
+    pub fn add_work_done_closure(&mut self, closure: SubmittedWorkDoneClosure) -> Option<SubmissionIndex> {
         match self.active.last_mut() {
             Some(active) => {
                 active.work_done_closures.push(closure);
+                Some(active.index)
             }
             // We must defer the closure until all previously occurring map_async closures
             // have fired. This is required by the spec.
             None => {
                 self.work_done_closures.push(closure);
+                None
             }
         }
     }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index bd6d99f1c3..5777dbe9c5 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1334,12 +1334,16 @@ impl Global {
         &self,
         queue_id: QueueId,
         closure: SubmittedWorkDoneClosure,
-    ) {
+    ) -> SubmissionIndex {
         api_log!("Queue::on_submitted_work_done {queue_id:?}");
 
         //TODO: flush pending writes
         let queue = self.hub.queues.get(queue_id);
-        queue.device.lock_life().add_work_done_closure(closure);
+        let result = queue.device.lock_life().add_work_done_closure(closure);
+        match result {
+            Some(submission_index) => submission_index,
+            None => queue.device.last_successful_submission_index.load(Ordering::Acquire),
+        }
     }
 }
 
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 03b183e085..9f0cbf33f1 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -437,13 +437,11 @@ impl Device {
                     .last_successful_submission_index
                     .load(Ordering::Acquire);
 
-                if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain {
-                    if submission_index > last_successful_submission_index {
-                        return Err(WaitIdleError::WrongSubmissionIndex(
-                            submission_index,
-                            last_successful_submission_index,
-                        ));
-                    }
+                if submission_index > last_successful_submission_index {
+                    return Err(WaitIdleError::WrongSubmissionIndex(
+                        submission_index,
+                        last_successful_submission_index,
+                    ));
                 }
 
                 submission_index
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 2242c8856a..581c5ce0d9 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -60,41 +60,6 @@ pub struct Instance {
     /// The ordering in this list implies prioritization and needs to be preserved.
     pub instance_per_backend: Vec<(Backend, Box<dyn hal::DynInstance>)>,
     pub flags: wgt::InstanceFlags,
-
-    // WIP Future mechanism
-    pub futures: FutureRegistry,
-}
-
-#[derive(Default)]
-pub struct Future {
-    completed: bool,
-}
-
-// A very simplistic Registry type that replaces wgc::Registry for now
-// for futures.
-// Test only: There should be a way not to store all completed futures for the
-// whole lifetime of the prorgam, and also this is not threadsafe.
-#[derive(Default)]
-struct FutureRegistry {
-    next_id: u64,
-    storage: HashMap<u64,Future>,
-}
-impl FutureRegistry {
-    fn new() -> Self {
-        return Self{
-            next_id: 0,
-            storage: HashMap::new(),
-        }
-    }
-    fn register(&mut self, value: Future) -> u64 {
-        let id = self.next_id;
-        self.next_id += 1;
-        self.storage.insert(id, value);
-        return id;
-    }
-    fn get_mut(&mut self, id: u64) -> Option<&mut Future> {
-        return self.storage.get_mut(&id);
-    }
 }
 
 impl Instance {
@@ -146,7 +111,6 @@ impl Instance {
             name: name.to_string(),
             instance_per_backend,
             flags: instance_desc.flags,
-            futures: FutureRegistry::new(),
         }
     }
 
@@ -851,29 +815,6 @@ impl Global {
     }
 }
 
-impl Global {
-    pub fn instance_create_future(
-        &mut self,
-    ) -> u64 {
-        let future_id = self.instance.futures.register(Future{
-            completed: false,
-        });
-        return future_id;
-    }
-
-    pub fn instance_complete_future(
-        &mut self,
-        future_id: u64,
-    ) {
-        match self.instance.futures.get_mut(future_id) {
-            Some(future) => {
-                future.completed = true;
-            },
-            None => panic!("invalid future id")
-        }
-    }
-}
-
 /// Generates a set of backends from a comma separated list of case-insensitive backend names.
 ///
 /// Whitespace is stripped, so both 'gl, dx12' and 'gl,dx12' are valid.
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 5df285da54..3529705121 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -14,7 +14,7 @@ use crate::{
     resource_log,
     snatch::{SnatchGuard, Snatchable},
     track::{SharedTrackerIndexAllocator, TextureSelector, TrackerIndex},
-    Label, LabelHelpers,
+    Label, LabelHelpers, SubmissionIndex,
 };
 
 use smallvec::SmallVec;
@@ -303,7 +303,7 @@ impl BufferMapCallback {
             // SAFETY: the contract of the call to from_c says that this unsafe is sound.
             BufferMapCallbackInner::C { inner } => unsafe {
                 let status = match result {
-                    Ok(()) => BufferMapAsyncStatus::Success,
+                    Ok(_) => BufferMapAsyncStatus::Success,
                     Err(BufferAccessError::Device(_)) => BufferMapAsyncStatus::ContextLost,
                     Err(BufferAccessError::InvalidResource(_))
                     | Err(BufferAccessError::DestroyedResource(_)) => BufferMapAsyncStatus::Invalid,
@@ -537,7 +537,7 @@ impl Buffer {
         offset: wgt::BufferAddress,
         size: Option<wgt::BufferAddress>,
         op: BufferMapOperation,
-    ) -> Result<(), (BufferMapOperation, BufferAccessError)> {
+    ) -> Result<SubmissionIndex, (BufferMapOperation, BufferAccessError)> {
         let range_size = if let Some(size) = size {
             size
         } else if offset > self.size {
@@ -624,9 +624,15 @@ impl Buffer {
             .buffers
             .set_single(self, internal_use);
 
+        let mut fence = device.fence.write(); // is this needed to be able to increment active_submission_index?
+        // should we increment last_successful_submission_index instead?
+        let submit_index = device
+            .active_submission_index
+            .fetch_add(1, core::sync::atomic::Ordering::SeqCst)
+            + 1;
         device.lock_life().map(self);
 
-        Ok(())
+        Ok(submit_index)
     }
 
     // Note: This must not be called while holding a lock.

From 2e7160bcfc6237f2419da96a0a3bcfcb6d772ebc Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Thu, 3 Oct 2024 09:14:00 +0200
Subject: [PATCH 04/19] Remove changes not related to this PR

---
 wgpu-hal/src/vulkan/device.rs | 2 +-
 wgpu-types/src/lib.rs         | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 75594bac4e..9f0fc67738 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1930,7 +1930,7 @@ impl crate::Device for super::Device {
         for cat in desc.color_targets {
             let (key, attarchment) = if let Some(cat) = cat.as_ref() {
                 let mut vk_attachment = vk::PipelineColorBlendAttachmentState::default()
-                    .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits() as u32));
+                    .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits()));
                 if let Some(ref blend) = cat.blend {
                     let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color);
                     let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha);
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index ef5d58a9e0..c7167f826f 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1900,7 +1900,7 @@ bitflags::bitflags! {
     /// https://gpuweb.github.io/gpuweb/#typedefdef-gpushaderstageflags).
     #[repr(transparent)]
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-    pub struct ShaderStages: u64 {
+    pub struct ShaderStages: u32 {
         /// Binding is not visible from any shader stage.
         const NONE = 0;
         /// Binding is visible from the vertex shader of a render pipeline.
@@ -4653,7 +4653,7 @@ bitflags::bitflags! {
     /// https://gpuweb.github.io/gpuweb/#typedefdef-gpucolorwriteflags).
     #[repr(transparent)]
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-    pub struct ColorWrites: u64 {
+    pub struct ColorWrites: u32 {
         /// Enable red channel writes
         const RED = 1 << 0;
         /// Enable green channel writes
@@ -5265,7 +5265,7 @@ bitflags::bitflags! {
     /// https://gpuweb.github.io/gpuweb/#typedefdef-gpubufferusageflags).
     #[repr(transparent)]
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-    pub struct BufferUsages: u64 {
+    pub struct BufferUsages: u32 {
         /// Allow a buffer to be mapped for reading using [`Buffer::map_async`] + [`Buffer::get_mapped_range`].
         /// This does not include creating a buffer with [`BufferDescriptor::mapped_at_creation`] set.
         ///
@@ -5484,7 +5484,7 @@ bitflags::bitflags! {
     /// https://gpuweb.github.io/gpuweb/#typedefdef-gputextureusageflags).
     #[repr(transparent)]
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-    pub struct TextureUsages: u64 {
+    pub struct TextureUsages: u32 {
         /// Allows a texture to be the source in a [`CommandEncoder::copy_texture_to_buffer`] or
         /// [`CommandEncoder::copy_texture_to_texture`] operation.
         const COPY_SRC = 1 << 0;

From 4ae7b0180278a73ad187bea6c5089cbbe1dbcf2b Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Sat, 5 Oct 2024 11:52:37 +0200
Subject: [PATCH 05/19] map_async returns submission index in wgpu-rs

---
 wgpu-core/src/device/life.rs  | 5 ++++-
 wgpu-core/src/device/queue.rs | 5 ++++-
 wgpu-core/src/resource.rs     | 3 +--
 wgpu/src/api/buffer.rs        | 8 +++++---
 wgpu/src/backend/wgpu_core.rs | 7 ++++---
 wgpu/src/context.rs           | 9 +++++----
 6 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index ee37b78f1c..f1ca8f7e7e 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -304,7 +304,10 @@ impl LifetimeTracker {
         }
     }
 
-    pub fn add_work_done_closure(&mut self, closure: SubmittedWorkDoneClosure) -> Option<SubmissionIndex> {
+    pub fn add_work_done_closure(
+        &mut self,
+        closure: SubmittedWorkDoneClosure,
+    ) -> Option<SubmissionIndex> {
         match self.active.last_mut() {
             Some(active) => {
                 active.work_done_closures.push(closure);
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 5777dbe9c5..2dc03d3a32 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1342,7 +1342,10 @@ impl Global {
         let result = queue.device.lock_life().add_work_done_closure(closure);
         match result {
             Some(submission_index) => submission_index,
-            None => queue.device.last_successful_submission_index.load(Ordering::Acquire),
+            None => queue
+                .device
+                .last_successful_submission_index
+                .load(Ordering::Acquire),
         }
     }
 }
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 3529705121..b330010294 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -624,8 +624,7 @@ impl Buffer {
             .buffers
             .set_single(self, internal_use);
 
-        let mut fence = device.fence.write(); // is this needed to be able to increment active_submission_index?
-        // should we increment last_successful_submission_index instead?
+        // TODO: should we increment last_successful_submission_index instead?
         let submit_index = device
             .active_submission_index
             .fetch_add(1, core::sync::atomic::Ordering::SeqCst)
diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs
index 9d490616d3..ea4bddc146 100644
--- a/wgpu/src/api/buffer.rs
+++ b/wgpu/src/api/buffer.rs
@@ -337,7 +337,7 @@ impl<'a> BufferSlice<'a> {
         &self,
         mode: MapMode,
         callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
-    ) {
+    ) -> SubmissionIndex {
         let mut mc = self.buffer.map_context.lock();
         assert_eq!(mc.initial_range, 0..0, "Buffer is already mapped");
         let end = match self.size {
@@ -346,13 +346,15 @@ impl<'a> BufferSlice<'a> {
         };
         mc.initial_range = self.offset..end;
 
-        DynContext::buffer_map_async(
+        let data = DynContext::buffer_map_async(
             &*self.buffer.context,
             self.buffer.data.as_ref(),
             mode,
             self.offset..end,
             Box::new(callback),
-        )
+        );
+
+        SubmissionIndex { data }
     }
 
     /// Gain read-only access to the bytes of a [mapped] [`Buffer`].
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 652df388ff..e1690864ce 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1391,7 +1391,7 @@ impl crate::Context for ContextWgpuCore {
         mode: MapMode,
         range: Range<wgt::BufferAddress>,
         callback: crate::context::BufferMapCallback,
-    ) {
+    ) -> Self::SubmissionIndexData {
         let operation = wgc::resource::BufferMapOperation {
             host: match mode {
                 MapMode::Read => wgc::device::HostMap::Read,
@@ -1411,9 +1411,10 @@ impl crate::Context for ContextWgpuCore {
             Some(range.end - range.start),
             operation,
         ) {
-            Ok(()) => (),
+            Ok(index) => index,
             Err(cause) => {
-                self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async")
+                self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async");
+                Self::SubmissionIndexData::MAX // invalid submission index
             }
         }
     }
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index a27459ab45..3696fe6d8d 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -218,7 +218,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    );
+    ) -> Self::SubmissionIndexData;
     fn buffer_get_mapped_range(
         &self,
         buffer_data: &Self::BufferData,
@@ -908,7 +908,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    );
+    ) -> Arc<crate::Data>;
     fn buffer_get_mapped_range(
         &self,
         buffer_data: &crate::Data,
@@ -1688,9 +1688,10 @@ where
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    ) {
+    ) -> Arc<crate::Data> {
         let buffer_data = downcast_ref(buffer_data);
-        Context::buffer_map_async(self, buffer_data, mode, range, callback)
+        let data = Context::buffer_map_async(self, buffer_data, mode, range, callback);
+        Arc::new(data) as _
     }
 
     fn buffer_get_mapped_range(

From 6374487c7a44ac6182a2be2b34de7d624d2bc5dc Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Sat, 5 Oct 2024 11:54:37 +0200
Subject: [PATCH 06/19] on_submitted_work_done returns submission index in
 wgpu-rs

---
 wgpu/src/api/queue.rs         | 10 +++++++---
 wgpu/src/backend/wgpu_core.rs |  4 ++--
 wgpu/src/context.rs           |  9 +++++----
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
index b57b33ece3..93f18636f3 100644
--- a/wgpu/src/api/queue.rs
+++ b/wgpu/src/api/queue.rs
@@ -275,11 +275,15 @@ impl Queue {
     /// has completed. There are no restrictions on the code you can run in the callback, however on native the
     /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
     /// and used to set flags, send messages, etc.
-    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) {
-        DynContext::queue_on_submitted_work_done(
+    pub fn on_submitted_work_done(
+        &self,
+        callback: impl FnOnce() + Send + 'static,
+    ) -> SubmissionIndex {
+        let data = DynContext::queue_on_submitted_work_done(
             &*self.context,
             self.data.as_ref(),
             Box::new(callback),
-        )
+        );
+        SubmissionIndex { data }
     }
 }
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index e1690864ce..555942bf1b 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -2096,9 +2096,9 @@ impl crate::Context for ContextWgpuCore {
         &self,
         queue_data: &Self::QueueData,
         callback: crate::context::SubmittedWorkDoneCallback,
-    ) {
+    ) -> Self::SubmissionIndexData {
         let closure = wgc::device::queue::SubmittedWorkDoneClosure::from_rust(callback);
-        self.0.queue_on_submitted_work_done(queue_data.id, closure);
+        self.0.queue_on_submitted_work_done(queue_data.id, closure)
     }
 
     fn device_start_capture(&self, device_data: &Self::DeviceData) {
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index 3696fe6d8d..0571c483d3 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -413,7 +413,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         &self,
         queue_data: &Self::QueueData,
         callback: SubmittedWorkDoneCallback,
-    );
+    ) -> Self::SubmissionIndexData;
 
     fn device_start_capture(&self, device_data: &Self::DeviceData);
     fn device_stop_capture(&self, device_data: &Self::DeviceData);
@@ -1092,7 +1092,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         &self,
         queue_data: &crate::Data,
         callback: SubmittedWorkDoneCallback,
-    );
+    ) -> Arc<crate::Data>;
 
     fn device_start_capture(&self, data: &crate::Data);
     fn device_stop_capture(&self, data: &crate::Data);
@@ -2112,9 +2112,10 @@ where
         &self,
         queue_data: &crate::Data,
         callback: SubmittedWorkDoneCallback,
-    ) {
+    ) -> Arc<crate::Data> {
         let queue_data = downcast_ref(queue_data);
-        Context::queue_on_submitted_work_done(self, queue_data, callback)
+        let data = Context::queue_on_submitted_work_done(self, queue_data, callback);
+        Arc::new(data) as _
     }
 
     fn device_start_capture(&self, device_data: &crate::Data) {

From 1c8c0add339858cd8b4c6c2853c96fd916d43cae Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Sat, 5 Oct 2024 12:08:06 +0200
Subject: [PATCH 07/19] Update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 894da6ddc7..048bb393dc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -86,6 +86,7 @@ By @bradwerth [#6216](https://github.com/gfx-rs/wgpu/pull/6216).
 #### General
 
 - Add `VideoFrame` to `ExternalImageSource` enum. By @jprochazk in [#6170](https://github.com/gfx-rs/wgpu/pull/6170)
+- Return submission index in `map_async` and `on_submitted_work_done` to track down completion of async callbacks. By @eliemichel in [#6360](https://github.com/gfx-rs/wgpu/pull/6360)
 
 #### Vulkan
 

From 4e948dcac072aad13e047119fba1296b7e14f328 Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Sun, 20 Oct 2024 18:57:18 +0200
Subject: [PATCH 08/19] Fix return value of on_submitted_work_done

---
 wgpu-core/src/device/queue.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index d4c1d6cc76..7e015b6aba 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1256,10 +1256,13 @@ impl Queue {
         unsafe { self.raw().get_timestamp_period() }
     }
 
-    pub fn on_submitted_work_done(&self, closure: SubmittedWorkDoneClosure) {
+    pub fn on_submitted_work_done(
+        &self,
+        closure: SubmittedWorkDoneClosure,
+    ) -> Option<SubmissionIndex> {
         api_log!("Queue::on_submitted_work_done");
         //TODO: flush pending writes
-        self.device.lock_life().add_work_done_closure(closure);
+        self.device.lock_life().add_work_done_closure(closure)
     }
 }
 

From 51aebeccf26d4ea3cea7e426b0995bed55c57f43 Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Tue, 22 Oct 2024 22:16:40 +0200
Subject: [PATCH 09/19] WIP define BufferMapFuture and SubmittedWorkDoneFuture

---
 wgpu/src/api/buffer.rs        |  9 +++------
 wgpu/src/api/queue.rs         |  8 +++-----
 wgpu/src/backend/wgpu_core.rs |  3 +++
 wgpu/src/context.rs           | 33 +++++++++++++++++++++++----------
 4 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs
index 61e01492af..b94c8be829 100644
--- a/wgpu/src/api/buffer.rs
+++ b/wgpu/src/api/buffer.rs
@@ -1,8 +1,5 @@
 use std::{
-    error, fmt,
-    ops::{Bound, Deref, DerefMut, Range, RangeBounds},
-    sync::Arc,
-    thread,
+    error, fmt, future::Future, ops::{Bound, Deref, DerefMut, Range, RangeBounds}, sync::Arc, thread
 };
 
 use parking_lot::Mutex;
@@ -338,7 +335,7 @@ impl<'a> BufferSlice<'a> {
         &self,
         mode: MapMode,
         callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
-    ) -> SubmissionIndex {
+    ) -> impl Future<Output = Result<(), BufferAsyncError>> + WasmNotSend {
         let mut mc = self.buffer.map_context.lock();
         assert_eq!(mc.initial_range, 0..0, "Buffer is already mapped");
         let end = match self.size {
@@ -355,7 +352,7 @@ impl<'a> BufferSlice<'a> {
             Box::new(callback),
         );
 
-        SubmissionIndex { data }
+        async move { data.await }
     }
 
     /// Gain read-only access to the bytes of a [mapped] [`Buffer`].
diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
index 93f18636f3..a1ffec4054 100644
--- a/wgpu/src/api/queue.rs
+++ b/wgpu/src/api/queue.rs
@@ -1,7 +1,5 @@
 use std::{
-    ops::{Deref, DerefMut},
-    sync::Arc,
-    thread,
+    future::Future, ops::{Deref, DerefMut}, sync::Arc, thread
 };
 
 use crate::context::{DynContext, QueueWriteBuffer};
@@ -278,12 +276,12 @@ impl Queue {
     pub fn on_submitted_work_done(
         &self,
         callback: impl FnOnce() + Send + 'static,
-    ) -> SubmissionIndex {
+    ) -> impl Future<Output = ()> + WasmNotSend {
         let data = DynContext::queue_on_submitted_work_done(
             &*self.context,
             self.data.as_ref(),
             Box::new(callback),
         );
-        SubmissionIndex { data }
+        async move { data.await }
     }
 }
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 555942bf1b..262ede50dc 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -540,6 +540,9 @@ impl crate::Context for ContextWgpuCore {
     type PopErrorScopeFuture = Ready<Option<crate::Error>>;
     type CompilationInfoFuture = Ready<CompilationInfo>;
 
+    type BufferMapFuture = wgc::SubmissionIndex;
+    type SubmittedWorkDoneFuture = wgc::SubmissionIndex;
+
     fn init(instance_desc: wgt::InstanceDescriptor) -> Self {
         Self(wgc::global::Global::new("wgpu", instance_desc))
     }
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index 0571c483d3..8412ac26c3 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -60,6 +60,9 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
 
     type CompilationInfoFuture: Future<Output = CompilationInfo> + WasmNotSend + 'static;
 
+    type BufferMapFuture: Future<Output = Result<(),BufferAsyncError>> + WasmNotSend + 'static;
+    type SubmittedWorkDoneFuture: Future<Output = ()> + WasmNotSend + 'static;
+
     #[cfg(not(target_os = "emscripten"))]
     fn init(instance_desc: wgt::InstanceDescriptor) -> Self;
     unsafe fn instance_create_surface(
@@ -218,7 +221,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    ) -> Self::SubmissionIndexData;
+    ) -> Self::BufferMapFuture;
     fn buffer_get_mapped_range(
         &self,
         buffer_data: &Self::BufferData,
@@ -413,7 +416,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         &self,
         queue_data: &Self::QueueData,
         callback: SubmittedWorkDoneCallback,
-    ) -> Self::SubmissionIndexData;
+    ) -> Self::SubmittedWorkDoneFuture;
 
     fn device_start_capture(&self, device_data: &Self::DeviceData);
     fn device_stop_capture(&self, device_data: &Self::DeviceData);
@@ -750,6 +753,16 @@ pub type DeviceLostCallback = Box<dyn Fn(DeviceLostReason, String) + Send + 'sta
 #[cfg(not(send_sync))]
 pub type DeviceLostCallback = Box<dyn Fn(DeviceLostReason, String) + 'static>;
 
+#[cfg(send_sync)]
+pub type BufferMapFuture = Box<dyn Future<Output = Result<(),BufferAsyncError>> + Send>;
+#[cfg(not(send_sync))]
+pub type BufferMapFuture = Box<dyn Future<Output = Result<(),BufferAsyncError>>>;
+
+#[cfg(send_sync)]
+pub type SubmittedWorkDoneFuture = Box<dyn Future<Output = ()> + Send>;
+#[cfg(not(send_sync))]
+pub type SubmittedWorkDoneFuture = Box<dyn Future<Output = ()>>;
+
 /// An object safe variant of [`Context`] implemented by all types that implement [`Context`].
 pub(crate) trait DynContext: Debug + WasmNotSendSync {
     #[cfg(not(target_os = "emscripten"))]
@@ -908,7 +921,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    ) -> Arc<crate::Data>;
+    ) -> Pin<BufferMapFuture>;
     fn buffer_get_mapped_range(
         &self,
         buffer_data: &crate::Data,
@@ -1092,7 +1105,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         &self,
         queue_data: &crate::Data,
         callback: SubmittedWorkDoneCallback,
-    ) -> Arc<crate::Data>;
+    ) -> Pin<SubmittedWorkDoneFuture>;
 
     fn device_start_capture(&self, data: &crate::Data);
     fn device_stop_capture(&self, data: &crate::Data);
@@ -1688,10 +1701,10 @@ where
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    ) -> Arc<crate::Data> {
+    ) -> Pin<BufferMapFuture> {
         let buffer_data = downcast_ref(buffer_data);
-        let data = Context::buffer_map_async(self, buffer_data, mode, range, callback);
-        Arc::new(data) as _
+        let future = Context::buffer_map_async(self, buffer_data, mode, range, callback);
+        Box::pin(async move { future.await })
     }
 
     fn buffer_get_mapped_range(
@@ -2112,10 +2125,10 @@ where
         &self,
         queue_data: &crate::Data,
         callback: SubmittedWorkDoneCallback,
-    ) -> Arc<crate::Data> {
+    ) -> Pin<SubmittedWorkDoneFuture> {
         let queue_data = downcast_ref(queue_data);
-        let data = Context::queue_on_submitted_work_done(self, queue_data, callback);
-        Arc::new(data) as _
+        let future = Context::queue_on_submitted_work_done(self, queue_data, callback);
+        Box::pin(async move { future.await })
     }
 
     fn device_start_capture(&self, device_data: &crate::Data) {

From 404c387e850f1c8a9df29808ad6ff75e2ae127b1 Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Tue, 22 Oct 2024 23:09:21 +0200
Subject: [PATCH 10/19] Introduce WgpuFuture

---
 wgpu/src/api/buffer.rs        | 11 +++++++----
 wgpu/src/api/instance.rs      | 11 +++++++++++
 wgpu/src/api/queue.rs         | 14 +++++++-------
 wgpu/src/backend/wgpu_core.rs |  7 +++----
 wgpu/src/context.rs           | 34 ++++++++++++----------------------
 5 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs
index b94c8be829..a469782a70 100644
--- a/wgpu/src/api/buffer.rs
+++ b/wgpu/src/api/buffer.rs
@@ -1,5 +1,8 @@
 use std::{
-    error, fmt, future::Future, ops::{Bound, Deref, DerefMut, Range, RangeBounds}, sync::Arc, thread
+    error, fmt,
+    ops::{Bound, Deref, DerefMut, Range, RangeBounds},
+    sync::Arc,
+    thread,
 };
 
 use parking_lot::Mutex;
@@ -335,7 +338,7 @@ impl<'a> BufferSlice<'a> {
         &self,
         mode: MapMode,
         callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
-    ) -> impl Future<Output = Result<(), BufferAsyncError>> + WasmNotSend {
+    ) -> WgpuFuture {
         let mut mc = self.buffer.map_context.lock();
         assert_eq!(mc.initial_range, 0..0, "Buffer is already mapped");
         let end = match self.size {
@@ -344,7 +347,7 @@ impl<'a> BufferSlice<'a> {
         };
         mc.initial_range = self.offset..end;
 
-        let data = DynContext::buffer_map_async(
+        let id = DynContext::buffer_map_async(
             &*self.buffer.context,
             self.buffer.data.as_ref(),
             mode,
@@ -352,7 +355,7 @@ impl<'a> BufferSlice<'a> {
             Box::new(callback),
         );
 
-        async move { data.await }
+        WgpuFuture { id }
     }
 
     /// Gain read-only access to the bytes of a [mapped] [`Buffer`].
diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs
index b21c9f70ec..d97b542c64 100644
--- a/wgpu/src/api/instance.rs
+++ b/wgpu/src/api/instance.rs
@@ -33,6 +33,17 @@ impl Default for Instance {
     }
 }
 
+/// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried for completion, but does not hold any returned data.
+///
+/// It's 'id' field is to be interpreted as a submission id (like wgc::SubmissionId)
+#[derive(Debug, Clone)]
+pub struct WgpuFuture {
+    #[cfg_attr(not(native), allow(dead_code))]
+    pub(crate) id: Arc<crate::Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(WgpuFuture: Send, Sync);
+
 impl Instance {
     /// Returns which backends can be picked for the current build configuration.
     ///
diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
index a1ffec4054..0e600e4acd 100644
--- a/wgpu/src/api/queue.rs
+++ b/wgpu/src/api/queue.rs
@@ -1,5 +1,7 @@
 use std::{
-    future::Future, ops::{Deref, DerefMut}, sync::Arc, thread
+    ops::{Deref, DerefMut},
+    sync::Arc,
+    thread,
 };
 
 use crate::context::{DynContext, QueueWriteBuffer};
@@ -34,6 +36,7 @@ impl Drop for Queue {
 ///
 /// This type is unique to the Rust API of `wgpu`.
 /// There is no analogue in the WebGPU specification.
+/// NB: WgpuFuture should probably be used instead of this
 #[derive(Debug, Clone)]
 pub struct SubmissionIndex {
     #[cfg_attr(not(native), allow(dead_code))]
@@ -273,15 +276,12 @@ impl Queue {
     /// has completed. There are no restrictions on the code you can run in the callback, however on native the
     /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
     /// and used to set flags, send messages, etc.
-    pub fn on_submitted_work_done(
-        &self,
-        callback: impl FnOnce() + Send + 'static,
-    ) -> impl Future<Output = ()> + WasmNotSend {
-        let data = DynContext::queue_on_submitted_work_done(
+    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) -> WgpuFuture {
+        let id = DynContext::queue_on_submitted_work_done(
             &*self.context,
             self.data.as_ref(),
             Box::new(callback),
         );
-        async move { data.await }
+        WgpuFuture { id }
     }
 }
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 262ede50dc..64e7463dbc 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -540,8 +540,7 @@ impl crate::Context for ContextWgpuCore {
     type PopErrorScopeFuture = Ready<Option<crate::Error>>;
     type CompilationInfoFuture = Ready<CompilationInfo>;
 
-    type BufferMapFuture = wgc::SubmissionIndex;
-    type SubmittedWorkDoneFuture = wgc::SubmissionIndex;
+    type WgpuFuture = wgc::SubmissionIndex;
 
     fn init(instance_desc: wgt::InstanceDescriptor) -> Self {
         Self(wgc::global::Global::new("wgpu", instance_desc))
@@ -1394,7 +1393,7 @@ impl crate::Context for ContextWgpuCore {
         mode: MapMode,
         range: Range<wgt::BufferAddress>,
         callback: crate::context::BufferMapCallback,
-    ) -> Self::SubmissionIndexData {
+    ) -> Self::WgpuFuture {
         let operation = wgc::resource::BufferMapOperation {
             host: match mode {
                 MapMode::Read => wgc::device::HostMap::Read,
@@ -2099,7 +2098,7 @@ impl crate::Context for ContextWgpuCore {
         &self,
         queue_data: &Self::QueueData,
         callback: crate::context::SubmittedWorkDoneCallback,
-    ) -> Self::SubmissionIndexData {
+    ) -> Self::WgpuFuture {
         let closure = wgc::device::queue::SubmittedWorkDoneClosure::from_rust(callback);
         self.0.queue_on_submitted_work_done(queue_data.id, closure)
     }
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index 8412ac26c3..fa039d1c7a 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -60,8 +60,8 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
 
     type CompilationInfoFuture: Future<Output = CompilationInfo> + WasmNotSend + 'static;
 
-    type BufferMapFuture: Future<Output = Result<(),BufferAsyncError>> + WasmNotSend + 'static;
-    type SubmittedWorkDoneFuture: Future<Output = ()> + WasmNotSend + 'static;
+    /// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried for completion, but does not hold any returned data.
+    type WgpuFuture: ContextData + Copy;
 
     #[cfg(not(target_os = "emscripten"))]
     fn init(instance_desc: wgt::InstanceDescriptor) -> Self;
@@ -221,7 +221,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    ) -> Self::BufferMapFuture;
+    ) -> Self::WgpuFuture;
     fn buffer_get_mapped_range(
         &self,
         buffer_data: &Self::BufferData,
@@ -416,7 +416,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         &self,
         queue_data: &Self::QueueData,
         callback: SubmittedWorkDoneCallback,
-    ) -> Self::SubmittedWorkDoneFuture;
+    ) -> Self::WgpuFuture;
 
     fn device_start_capture(&self, device_data: &Self::DeviceData);
     fn device_stop_capture(&self, device_data: &Self::DeviceData);
@@ -753,16 +753,6 @@ pub type DeviceLostCallback = Box<dyn Fn(DeviceLostReason, String) + Send + 'sta
 #[cfg(not(send_sync))]
 pub type DeviceLostCallback = Box<dyn Fn(DeviceLostReason, String) + 'static>;
 
-#[cfg(send_sync)]
-pub type BufferMapFuture = Box<dyn Future<Output = Result<(),BufferAsyncError>> + Send>;
-#[cfg(not(send_sync))]
-pub type BufferMapFuture = Box<dyn Future<Output = Result<(),BufferAsyncError>>>;
-
-#[cfg(send_sync)]
-pub type SubmittedWorkDoneFuture = Box<dyn Future<Output = ()> + Send>;
-#[cfg(not(send_sync))]
-pub type SubmittedWorkDoneFuture = Box<dyn Future<Output = ()>>;
-
 /// An object safe variant of [`Context`] implemented by all types that implement [`Context`].
 pub(crate) trait DynContext: Debug + WasmNotSendSync {
     #[cfg(not(target_os = "emscripten"))]
@@ -921,7 +911,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    ) -> Pin<BufferMapFuture>;
+    ) -> Arc<crate::Data>;
     fn buffer_get_mapped_range(
         &self,
         buffer_data: &crate::Data,
@@ -1105,7 +1095,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         &self,
         queue_data: &crate::Data,
         callback: SubmittedWorkDoneCallback,
-    ) -> Pin<SubmittedWorkDoneFuture>;
+    ) -> Arc<crate::Data>;
 
     fn device_start_capture(&self, data: &crate::Data);
     fn device_stop_capture(&self, data: &crate::Data);
@@ -1701,10 +1691,10 @@ where
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    ) -> Pin<BufferMapFuture> {
+    ) -> Arc<crate::Data> {
         let buffer_data = downcast_ref(buffer_data);
-        let future = Context::buffer_map_async(self, buffer_data, mode, range, callback);
-        Box::pin(async move { future.await })
+        let handle = Context::buffer_map_async(self, buffer_data, mode, range, callback);
+        Arc::new(handle) as _
     }
 
     fn buffer_get_mapped_range(
@@ -2125,10 +2115,10 @@ where
         &self,
         queue_data: &crate::Data,
         callback: SubmittedWorkDoneCallback,
-    ) -> Pin<SubmittedWorkDoneFuture> {
+    ) -> Arc<crate::Data> {
         let queue_data = downcast_ref(queue_data);
-        let future = Context::queue_on_submitted_work_done(self, queue_data, callback);
-        Box::pin(async move { future.await })
+        let handle = Context::queue_on_submitted_work_done(self, queue_data, callback);
+        Arc::new(handle) as _
     }
 
     fn device_start_capture(&self, device_data: &crate::Data) {

From 4ec8eb8d445069de2a9606d08bd7443e1c11fc13 Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Tue, 22 Oct 2024 23:24:16 +0200
Subject: [PATCH 11/19] Implement WgpuFuture for webgpu

---
 wgpu/src/backend/webgpu.rs | 7 +++++--
 wgpu/src/context.rs        | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index e0cf006e6e..1e4ddc13e6 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -1162,6 +1162,8 @@ impl crate::context::Context for ContextWebGpu {
         Box<dyn Fn(JsFutureResult) -> CompilationInfo>,
     >;
 
+    type WgpuFuture = Promise;
+
     fn init(_instance_desc: wgt::InstanceDescriptor) -> Self {
         let Ok(gpu) = get_browser_gpu_property() else {
             panic!(
@@ -2150,7 +2152,7 @@ impl crate::context::Context for ContextWebGpu {
         mode: crate::MapMode,
         range: Range<wgt::BufferAddress>,
         callback: crate::context::BufferMapCallback,
-    ) {
+    ) -> Self::WgpuFuture {
         let map_promise = buffer_data.0.buffer.map_async_with_f64_and_f64(
             map_map_mode(mode),
             range.start as f64,
@@ -2160,6 +2162,7 @@ impl crate::context::Context for ContextWebGpu {
         buffer_data.0.set_mapped_range(range);
 
         register_then_closures(&map_promise, callback, Ok(()), Err(crate::BufferAsyncError));
+        map_promise
     }
 
     fn buffer_get_mapped_range(
@@ -2773,7 +2776,7 @@ impl crate::context::Context for ContextWebGpu {
         &self,
         _queue_data: &Self::QueueData,
         _callback: crate::context::SubmittedWorkDoneCallback,
-    ) {
+    ) -> Self::WgpuFuture {
         unimplemented!()
     }
 
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index fa039d1c7a..c115089416 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -61,7 +61,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
     type CompilationInfoFuture: Future<Output = CompilationInfo> + WasmNotSend + 'static;
 
     /// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried for completion, but does not hold any returned data.
-    type WgpuFuture: ContextData + Copy;
+    type WgpuFuture: ContextData;
 
     #[cfg(not(target_os = "emscripten"))]
     fn init(instance_desc: wgt::InstanceDescriptor) -> Self;

From 4f37591af691870e6dd9a1967aac2a46c2a9e057 Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Wed, 23 Oct 2024 00:41:48 +0200
Subject: [PATCH 12/19] Try introducing instance_wait_any

---
 wgpu/src/api/buffer.rs        |  4 ++--
 wgpu/src/api/instance.rs      | 30 +++++++++++++++++++-----------
 wgpu/src/api/queue.rs         | 14 ++++++++++++--
 wgpu/src/backend/webgpu.rs    | 12 +++++++++++-
 wgpu/src/backend/wgpu_core.rs |  9 +++++++++
 wgpu/src/context.rs           | 20 +++++++++++++++++++-
 6 files changed, 72 insertions(+), 17 deletions(-)

diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs
index a469782a70..9d37e361b0 100644
--- a/wgpu/src/api/buffer.rs
+++ b/wgpu/src/api/buffer.rs
@@ -347,7 +347,7 @@ impl<'a> BufferSlice<'a> {
         };
         mc.initial_range = self.offset..end;
 
-        let id = DynContext::buffer_map_async(
+        let data = DynContext::buffer_map_async(
             &*self.buffer.context,
             self.buffer.data.as_ref(),
             mode,
@@ -355,7 +355,7 @@ impl<'a> BufferSlice<'a> {
             Box::new(callback),
         );
 
-        WgpuFuture { id }
+        WgpuFuture { data }
     }
 
     /// Gain read-only access to the bytes of a [mapped] [`Buffer`].
diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs
index d97b542c64..fcb8a54121 100644
--- a/wgpu/src/api/instance.rs
+++ b/wgpu/src/api/instance.rs
@@ -33,17 +33,6 @@ impl Default for Instance {
     }
 }
 
-/// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried for completion, but does not hold any returned data.
-///
-/// It's 'id' field is to be interpreted as a submission id (like wgc::SubmissionId)
-#[derive(Debug, Clone)]
-pub struct WgpuFuture {
-    #[cfg_attr(not(native), allow(dead_code))]
-    pub(crate) id: Arc<crate::Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(WgpuFuture: Send, Sync);
-
 impl Instance {
     /// Returns which backends can be picked for the current build configuration.
     ///
@@ -405,3 +394,22 @@ impl Instance {
             .map(|ctx| ctx.generate_report())
     }
 }
+
+/// Status returned when waiting on WgpuFuture objects.
+#[derive(Clone, Debug)]
+pub(crate) enum WaitStatus {
+    // At least one WgpuFuture completed successfully.
+    //Success,
+
+    // No WgpuFuture completed within the timeout.
+    //TimedOut,
+
+    /// A Timed-Wait was performed when timedWaitAnyEnable instance feature is false.
+    UnsupportedTimeout,
+
+    // The number of futures waited on in a Timed-Wait is greater than the supported timedWaitAnyMaxCount.
+    //UnsupportedCount,
+    
+    // An invalid wait was performed with Mixed-Sources.
+    //UnsupportedMixedSources,
+}
diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
index 0e600e4acd..15d3c990ab 100644
--- a/wgpu/src/api/queue.rs
+++ b/wgpu/src/api/queue.rs
@@ -51,6 +51,16 @@ pub type Maintain = wgt::Maintain<SubmissionIndex>;
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(Maintain: Send, Sync);
 
+
+/// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried
+/// for completion, but does not hold any returned data.
+///
+/// It's 'id' field is to be interpreted as a submission id (like wgc::SubmissionId)
+pub type WgpuFuture = SubmissionIndex;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(WgpuFuture: Send, Sync);
+
+
 /// A write-only view into a staging buffer.
 ///
 /// Reading into this buffer won't yield the contents of the buffer from the
@@ -277,11 +287,11 @@ impl Queue {
     /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
     /// and used to set flags, send messages, etc.
     pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) -> WgpuFuture {
-        let id = DynContext::queue_on_submitted_work_done(
+        let data = DynContext::queue_on_submitted_work_done(
             &*self.context,
             self.data.as_ref(),
             Box::new(callback),
         );
-        WgpuFuture { id }
+        WgpuFuture { data }
     }
 }
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index 1e4ddc13e6..d9c450e794 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -1253,6 +1253,16 @@ impl crate::context::Context for ContextWebGpu {
         }
     }
 
+    fn instance_wait_any(
+        &self,
+        _futures: &[&Self::WgpuFuture],
+        _timeout_ns: u64,
+    ) -> crate::WaitStatus {
+        // TODO: Yield back to the browser, run the equivalent of the following JavaScript:
+        // > await Promise.any([ ...futures, new Promise(resolve => setTimeout(timeout_ns, resolve) ]))
+        crate::WaitStatus::UnsupportedTimeout
+    }
+
     fn adapter_request_device(
         &self,
         adapter_data: &Self::AdapterData,
@@ -2162,7 +2172,7 @@ impl crate::context::Context for ContextWebGpu {
         buffer_data.0.set_mapped_range(range);
 
         register_then_closures(&map_promise, callback, Ok(()), Err(crate::BufferAsyncError));
-        map_promise
+        map_promise.into()
     }
 
     fn buffer_get_mapped_range(
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 64e7463dbc..98b24b1230 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -608,6 +608,15 @@ impl crate::Context for ContextWgpuCore {
         ready(id.ok())
     }
 
+    fn instance_wait_any(
+        &self,
+        _futures: &[&Self::WgpuFuture],
+        _timeout_ns: u64,
+    ) -> crate::WaitStatus {
+        // TODO: We need to know at the instance level whether a submission ID is completed...
+        crate::WaitStatus::UnsupportedTimeout
+    }
+
     fn adapter_request_device(
         &self,
         adapter_data: &Self::AdapterData,
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index c115089416..c4dd6e4ef6 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -16,7 +16,7 @@ use crate::{
     RenderBundleEncoderDescriptor, RenderPassDescriptor, RenderPipelineDescriptor,
     RequestAdapterOptions, RequestDeviceError, SamplerDescriptor, ShaderModuleDescriptor,
     ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, TextureDescriptor, TextureViewDescriptor,
-    UncapturedErrorHandler,
+    UncapturedErrorHandler, WaitStatus
 };
 /// Meta trait for an data associated with an id tracked by a context.
 ///
@@ -73,6 +73,11 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         &self,
         options: &RequestAdapterOptions<'_, '_>,
     ) -> Self::RequestAdapterFuture;
+    fn instance_wait_any(
+        &self,
+        futures: &[&Self::WgpuFuture],
+        timeout_ns: u64,
+    ) -> WaitStatus;
     fn adapter_request_device(
         &self,
         adapter_data: &Self::AdapterData,
@@ -767,6 +772,11 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         &self,
         options: &RequestAdapterOptions<'_, '_>,
     ) -> Pin<InstanceRequestAdapterFuture>;
+    fn instance_wait_any(
+        &self,
+        futures_data: &[&crate::Data],
+        timeout_ns: u64,
+    ) -> WaitStatus;
     fn adapter_request_device(
         &self,
         adapter_data: &crate::Data,
@@ -1373,6 +1383,14 @@ where
         Box::pin(async move { future.await.map(|data| Box::new(data) as _) })
     }
 
+    fn instance_wait_any(
+        &self,
+        _futures_data: &[&crate::Data],
+        _timeout_ns: u64,
+    ) -> WaitStatus {
+        unimplemented!();
+    }
+
     fn adapter_request_device(
         &self,
         adapter_data: &crate::Data,

From 320989756b1a798b3df96525de096a119dd0b8b9 Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Wed, 23 Oct 2024 00:42:58 +0200
Subject: [PATCH 13/19] Run cargo fmt

---
 wgpu/src/api/instance.rs |  4 +---
 wgpu/src/api/queue.rs    |  2 --
 wgpu/src/context.rs      | 20 ++++----------------
 3 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs
index fcb8a54121..b3aab1aeb4 100644
--- a/wgpu/src/api/instance.rs
+++ b/wgpu/src/api/instance.rs
@@ -403,13 +403,11 @@ pub(crate) enum WaitStatus {
 
     // No WgpuFuture completed within the timeout.
     //TimedOut,
-
     /// A Timed-Wait was performed when timedWaitAnyEnable instance feature is false.
     UnsupportedTimeout,
-
     // The number of futures waited on in a Timed-Wait is greater than the supported timedWaitAnyMaxCount.
     //UnsupportedCount,
-    
+
     // An invalid wait was performed with Mixed-Sources.
     //UnsupportedMixedSources,
 }
diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
index 15d3c990ab..1f2ad629d9 100644
--- a/wgpu/src/api/queue.rs
+++ b/wgpu/src/api/queue.rs
@@ -51,7 +51,6 @@ pub type Maintain = wgt::Maintain<SubmissionIndex>;
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(Maintain: Send, Sync);
 
-
 /// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried
 /// for completion, but does not hold any returned data.
 ///
@@ -60,7 +59,6 @@ pub type WgpuFuture = SubmissionIndex;
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(WgpuFuture: Send, Sync);
 
-
 /// A write-only view into a staging buffer.
 ///
 /// Reading into this buffer won't yield the contents of the buffer from the
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index c4dd6e4ef6..fe37928af8 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -16,7 +16,7 @@ use crate::{
     RenderBundleEncoderDescriptor, RenderPassDescriptor, RenderPipelineDescriptor,
     RequestAdapterOptions, RequestDeviceError, SamplerDescriptor, ShaderModuleDescriptor,
     ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, TextureDescriptor, TextureViewDescriptor,
-    UncapturedErrorHandler, WaitStatus
+    UncapturedErrorHandler, WaitStatus,
 };
 /// Meta trait for an data associated with an id tracked by a context.
 ///
@@ -73,11 +73,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         &self,
         options: &RequestAdapterOptions<'_, '_>,
     ) -> Self::RequestAdapterFuture;
-    fn instance_wait_any(
-        &self,
-        futures: &[&Self::WgpuFuture],
-        timeout_ns: u64,
-    ) -> WaitStatus;
+    fn instance_wait_any(&self, futures: &[&Self::WgpuFuture], timeout_ns: u64) -> WaitStatus;
     fn adapter_request_device(
         &self,
         adapter_data: &Self::AdapterData,
@@ -772,11 +768,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         &self,
         options: &RequestAdapterOptions<'_, '_>,
     ) -> Pin<InstanceRequestAdapterFuture>;
-    fn instance_wait_any(
-        &self,
-        futures_data: &[&crate::Data],
-        timeout_ns: u64,
-    ) -> WaitStatus;
+    fn instance_wait_any(&self, futures_data: &[&crate::Data], timeout_ns: u64) -> WaitStatus;
     fn adapter_request_device(
         &self,
         adapter_data: &crate::Data,
@@ -1383,11 +1375,7 @@ where
         Box::pin(async move { future.await.map(|data| Box::new(data) as _) })
     }
 
-    fn instance_wait_any(
-        &self,
-        _futures_data: &[&crate::Data],
-        _timeout_ns: u64,
-    ) -> WaitStatus {
+    fn instance_wait_any(&self, _futures_data: &[&crate::Data], _timeout_ns: u64) -> WaitStatus {
         unimplemented!();
     }
 

From be41ff6a9626b72c33569c8cc7f5966d5d2faca6 Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Wed, 23 Oct 2024 08:45:16 +0200
Subject: [PATCH 14/19] Refine map async submission index

---
 wgpu-core/src/device/life.rs | 13 +++++++++++--
 wgpu-core/src/resource.rs    | 12 +++++-------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index f1ca8f7e7e..bc66a4d1b9 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -211,8 +211,17 @@ impl LifetimeTracker {
         });
     }
 
-    pub(crate) fn map(&mut self, value: &Arc<Buffer>) {
-        self.mapped.push(value.clone());
+    pub(crate) fn map(&mut self, buffer: &Arc<Buffer>) -> Option<SubmissionIndex> {
+        self.mapped.push(buffer.clone());
+
+        // Warning: this duplicates what is in triage_mapped()
+        let submission = self
+            .active
+            .iter_mut()
+            .rev()
+            .find(|a| a.contains_buffer(&buffer));
+
+        submission.map(|s| s.index)
     }
 
     /// Returns the submission index of the most recent submission that uses the
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 95945b983f..af9bc551ba 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -27,7 +27,7 @@ use std::{
     mem::{self, ManuallyDrop},
     ops::Range,
     ptr::NonNull,
-    sync::Arc,
+    sync::{atomic::Ordering, Arc},
 };
 
 /// Information about the wgpu-core resource.
@@ -633,12 +633,10 @@ impl Buffer {
             .buffers
             .set_single(self, internal_use);
 
-        // TODO: should we increment last_successful_submission_index instead?
-        let submit_index = device
-            .active_submission_index
-            .fetch_add(1, core::sync::atomic::Ordering::SeqCst)
-            + 1;
-        device.lock_life().map(self);
+        let submit_index = match device.lock_life().map(self) {
+            Some(index) => index,
+            None => device.active_submission_index.load(Ordering::SeqCst),
+        };
 
         Ok(submit_index)
     }

From 353ddbe29aee6a8e7e62f948db16e02dc944219e Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Wed, 23 Oct 2024 08:48:39 +0200
Subject: [PATCH 15/19] Use Future instead of Promise in webgpu implem of
 WgpuFuture

---
 wgpu/src/backend/webgpu.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index d9c450e794..650d99edb5 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -1162,7 +1162,7 @@ impl crate::context::Context for ContextWebGpu {
         Box<dyn Fn(JsFutureResult) -> CompilationInfo>,
     >;
 
-    type WgpuFuture = Promise;
+    type WgpuFuture = wasm_bindgen_futures::JsFuture;
 
     fn init(_instance_desc: wgt::InstanceDescriptor) -> Self {
         let Ok(gpu) = get_browser_gpu_property() else {

From 4c9371fda560928ff3feb88b912f90ccfcc90f0f Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Sun, 27 Oct 2024 09:30:49 +0100
Subject: [PATCH 16/19] Revert changes to wgpu, focus on wgpu-core

---
 wgpu-core/src/device/queue.rs |  5 +--
 wgpu-core/src/resource.rs     |  4 +--
 wgpu/src/api/buffer.rs        | 64 +++--------------------------------
 wgpu/src/api/instance.rs      | 30 +++-------------
 wgpu/src/api/queue.rs         | 16 ++-------
 wgpu/src/backend/webgpu.rs    | 28 ++++-----------
 wgpu/src/backend/wgpu_core.rs | 22 +++---------
 wgpu/src/context.rs           | 29 +++++-----------
 8 files changed, 34 insertions(+), 164 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 7e015b6aba..1203c9d5a8 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1413,10 +1413,7 @@ impl Global {
         let result = queue.on_submitted_work_done(closure);
         match result {
             Some(submission_index) => submission_index,
-            None => queue
-                .device
-                .last_successful_submission_index
-                .load(Ordering::Acquire),
+            None => 0, // meaning no wait is necessary
         }
     }
 }
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index af9bc551ba..bd2950c277 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -27,7 +27,7 @@ use std::{
     mem::{self, ManuallyDrop},
     ops::Range,
     ptr::NonNull,
-    sync::{atomic::Ordering, Arc},
+    sync::Arc,
 };
 
 /// Information about the wgpu-core resource.
@@ -635,7 +635,7 @@ impl Buffer {
 
         let submit_index = match device.lock_life().map(self) {
             Some(index) => index,
-            None => device.active_submission_index.load(Ordering::SeqCst),
+            None => 0, // meaning no wait is necessary
         };
 
         Ok(submit_index)
diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs
index 9d37e361b0..9d490616d3 100644
--- a/wgpu/src/api/buffer.rs
+++ b/wgpu/src/api/buffer.rs
@@ -242,7 +242,6 @@ impl Buffer {
     /// end of the buffer.
     pub fn slice<S: RangeBounds<BufferAddress>>(&self, bounds: S) -> BufferSlice<'_> {
         let (offset, size) = range_to_offset_size(bounds);
-        check_buffer_bounds(self.size, offset, size);
         BufferSlice {
             buffer: self,
             offset,
@@ -338,7 +337,7 @@ impl<'a> BufferSlice<'a> {
         &self,
         mode: MapMode,
         callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
-    ) -> WgpuFuture {
+    ) {
         let mut mc = self.buffer.map_context.lock();
         assert_eq!(mc.initial_range, 0..0, "Buffer is already mapped");
         let end = match self.size {
@@ -347,15 +346,13 @@ impl<'a> BufferSlice<'a> {
         };
         mc.initial_range = self.offset..end;
 
-        let data = DynContext::buffer_map_async(
+        DynContext::buffer_map_async(
             &*self.buffer.context,
             self.buffer.data.as_ref(),
             mode,
             self.offset..end,
             Box::new(callback),
-        );
-
-        WgpuFuture { data }
+        )
     }
 
     /// Gain read-only access to the bytes of a [mapped] [`Buffer`].
@@ -676,31 +673,6 @@ impl Drop for Buffer {
     }
 }
 
-fn check_buffer_bounds(
-    buffer_size: BufferAddress,
-    offset: BufferAddress,
-    size: Option<BufferSize>,
-) {
-    // A slice of length 0 is invalid, so the offset must not be equal to or greater than the buffer size.
-    if offset >= buffer_size {
-        panic!(
-            "slice offset {} is out of range for buffer of size {}",
-            offset, buffer_size
-        );
-    }
-
-    if let Some(size) = size {
-        // Detect integer overflow.
-        let end = offset.checked_add(size.get());
-        if end.map_or(true, |end| end > buffer_size) {
-            panic!(
-                "slice offset {} size {} is out of range for buffer of size {}",
-                offset, size, buffer_size
-            );
-        }
-    }
-}
-
 fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
     bounds: S,
 ) -> (BufferAddress, Option<BufferSize>) {
@@ -718,10 +690,9 @@ fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
 
     (offset, size)
 }
-
 #[cfg(test)]
 mod tests {
-    use super::{check_buffer_bounds, range_to_offset_size, BufferSize};
+    use super::{range_to_offset_size, BufferSize};
 
     #[test]
     fn range_to_offset_size_works() {
@@ -744,31 +715,4 @@ mod tests {
     fn range_to_offset_size_panics_for_unbounded_empty_range() {
         range_to_offset_size(..0);
     }
-
-    #[test]
-    #[should_panic]
-    fn check_buffer_bounds_panics_for_offset_at_size() {
-        check_buffer_bounds(100, 100, None);
-    }
-
-    #[test]
-    fn check_buffer_bounds_works_for_end_in_range() {
-        check_buffer_bounds(200, 100, BufferSize::new(50));
-        check_buffer_bounds(200, 100, BufferSize::new(100));
-        check_buffer_bounds(u64::MAX, u64::MAX - 100, BufferSize::new(100));
-        check_buffer_bounds(u64::MAX, 0, BufferSize::new(u64::MAX));
-        check_buffer_bounds(u64::MAX, 1, BufferSize::new(u64::MAX - 1));
-    }
-
-    #[test]
-    #[should_panic]
-    fn check_buffer_bounds_panics_for_end_over_size() {
-        check_buffer_bounds(200, 100, BufferSize::new(101));
-    }
-
-    #[test]
-    #[should_panic]
-    fn check_buffer_bounds_panics_for_end_wraparound() {
-        check_buffer_bounds(u64::MAX, 1, BufferSize::new(u64::MAX));
-    }
 }
diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs
index b3aab1aeb4..af6775b86b 100644
--- a/wgpu/src/api/instance.rs
+++ b/wgpu/src/api/instance.rs
@@ -93,15 +93,10 @@ impl Instance {
     ///   during instantiation, and which [DX12 shader compiler][Dx12Compiler] wgpu will use.
     ///
     ///   [`Backends::BROWSER_WEBGPU`] takes a special role:
-    ///   If it is set and a [`navigator.gpu`](https://developer.mozilla.org/en-US/docs/Web/API/Navigator/gpu)
-    ///   object is present, this instance will *only* be able to create WebGPU adapters.
-    ///
-    ///   ⚠️ On some browsers this check is insufficient to determine whether WebGPU is supported,
-    ///   as the browser may define the `navigator.gpu` object, but be unable to create any WebGPU adapters.
-    ///   For targeting _both_ WebGPU & WebGL is recommended to use [`crate::util::new_instance_with_webgpu_detection`].
-    ///
-    ///   If you instead want to force use of WebGL, either disable the `webgpu` compile-time feature
-    ///   or don't add the [`Backends::BROWSER_WEBGPU`] flag to the the `instance_desc`'s `backends` field.
+    ///   If it is set and WebGPU support is detected, this instance will *only* be able to create
+    ///   WebGPU adapters. If you instead want to force use of WebGL, either
+    ///   disable the `webgpu` compile-time feature or don't add the [`Backends::BROWSER_WEBGPU`]
+    ///   flag to the the `instance_desc`'s `backends` field.
     ///   If it is set and WebGPU support is *not* detected, the instance will use wgpu-core
     ///   to create adapters. Meaning that if the `webgl` feature is enabled, it is able to create
     ///   a WebGL adapter.
@@ -394,20 +389,3 @@ impl Instance {
             .map(|ctx| ctx.generate_report())
     }
 }
-
-/// Status returned when waiting on WgpuFuture objects.
-#[derive(Clone, Debug)]
-pub(crate) enum WaitStatus {
-    // At least one WgpuFuture completed successfully.
-    //Success,
-
-    // No WgpuFuture completed within the timeout.
-    //TimedOut,
-    /// A Timed-Wait was performed when timedWaitAnyEnable instance feature is false.
-    UnsupportedTimeout,
-    // The number of futures waited on in a Timed-Wait is greater than the supported timedWaitAnyMaxCount.
-    //UnsupportedCount,
-
-    // An invalid wait was performed with Mixed-Sources.
-    //UnsupportedMixedSources,
-}
diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
index 1f2ad629d9..b57b33ece3 100644
--- a/wgpu/src/api/queue.rs
+++ b/wgpu/src/api/queue.rs
@@ -36,7 +36,6 @@ impl Drop for Queue {
 ///
 /// This type is unique to the Rust API of `wgpu`.
 /// There is no analogue in the WebGPU specification.
-/// NB: WgpuFuture should probably be used instead of this
 #[derive(Debug, Clone)]
 pub struct SubmissionIndex {
     #[cfg_attr(not(native), allow(dead_code))]
@@ -51,14 +50,6 @@ pub type Maintain = wgt::Maintain<SubmissionIndex>;
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(Maintain: Send, Sync);
 
-/// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried
-/// for completion, but does not hold any returned data.
-///
-/// It's 'id' field is to be interpreted as a submission id (like wgc::SubmissionId)
-pub type WgpuFuture = SubmissionIndex;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(WgpuFuture: Send, Sync);
-
 /// A write-only view into a staging buffer.
 ///
 /// Reading into this buffer won't yield the contents of the buffer from the
@@ -284,12 +275,11 @@ impl Queue {
     /// has completed. There are no restrictions on the code you can run in the callback, however on native the
     /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
     /// and used to set flags, send messages, etc.
-    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) -> WgpuFuture {
-        let data = DynContext::queue_on_submitted_work_done(
+    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) {
+        DynContext::queue_on_submitted_work_done(
             &*self.context,
             self.data.as_ref(),
             Box::new(callback),
-        );
-        WgpuFuture { data }
+        )
     }
 }
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index 650d99edb5..e982300e70 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -815,6 +815,7 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits {
         max_vertex_buffer_array_stride: limits.max_vertex_buffer_array_stride(),
         min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment(),
         min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment(),
+        max_inter_stage_shader_components: limits.max_inter_stage_shader_components(),
         max_color_attachments: limits.max_color_attachments(),
         max_color_attachment_bytes_per_sample: limits.max_color_attachment_bytes_per_sample(),
         max_compute_workgroup_storage_size: limits.max_compute_workgroup_storage_size(),
@@ -828,7 +829,6 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits {
         max_subgroup_size: wgt::Limits::default().max_subgroup_size,
         max_push_constant_size: wgt::Limits::default().max_push_constant_size,
         max_non_sampler_bindings: wgt::Limits::default().max_non_sampler_bindings,
-        max_inter_stage_shader_components: wgt::Limits::default().max_inter_stage_shader_components,
     }
 }
 
@@ -875,6 +875,7 @@ fn map_js_sys_limits(limits: &wgt::Limits) -> js_sys::Object {
         (maxBufferSize, max_buffer_size),
         (maxVertexAttributes, max_vertex_attributes),
         (maxVertexBufferArrayStride, max_vertex_buffer_array_stride),
+        (maxInterStageShaderComponents, max_inter_stage_shader_components),
         (maxComputeWorkgroupStorageSize, max_compute_workgroup_storage_size),
         (maxComputeInvocationsPerWorkgroup, max_compute_invocations_per_workgroup),
         (maxComputeWorkgroupSizeX, max_compute_workgroup_size_x),
@@ -1087,12 +1088,8 @@ pub struct BrowserGpuPropertyInaccessible;
 /// Returns the browser's gpu object or `Err(BrowserGpuPropertyInaccessible)` if
 /// the current context is neither the main thread nor a dedicated worker.
 ///
-/// If WebGPU is not supported, the Gpu property may (!) be `undefined`,
-/// and so this function will return `Ok(None)`.
-/// Note that this check is insufficient to determine whether WebGPU is
-/// supported, as the browser may define the Gpu property, but be unable to
-/// create any WebGPU adapters.
-/// To detect whether WebGPU is supported, use the [`crate::utils::is_browser_webgpu_supported`] function.
+/// If WebGPU is not supported, the Gpu property is `undefined`, and so this
+/// function will return `Ok(None)`.
 ///
 /// See:
 /// * <https://developer.mozilla.org/en-US/docs/Web/API/Navigator/gpu>
@@ -1162,8 +1159,6 @@ impl crate::context::Context for ContextWebGpu {
         Box<dyn Fn(JsFutureResult) -> CompilationInfo>,
     >;
 
-    type WgpuFuture = wasm_bindgen_futures::JsFuture;
-
     fn init(_instance_desc: wgt::InstanceDescriptor) -> Self {
         let Ok(gpu) = get_browser_gpu_property() else {
             panic!(
@@ -1253,16 +1248,6 @@ impl crate::context::Context for ContextWebGpu {
         }
     }
 
-    fn instance_wait_any(
-        &self,
-        _futures: &[&Self::WgpuFuture],
-        _timeout_ns: u64,
-    ) -> crate::WaitStatus {
-        // TODO: Yield back to the browser, run the equivalent of the following JavaScript:
-        // > await Promise.any([ ...futures, new Promise(resolve => setTimeout(timeout_ns, resolve) ]))
-        crate::WaitStatus::UnsupportedTimeout
-    }
-
     fn adapter_request_device(
         &self,
         adapter_data: &Self::AdapterData,
@@ -2162,7 +2147,7 @@ impl crate::context::Context for ContextWebGpu {
         mode: crate::MapMode,
         range: Range<wgt::BufferAddress>,
         callback: crate::context::BufferMapCallback,
-    ) -> Self::WgpuFuture {
+    ) {
         let map_promise = buffer_data.0.buffer.map_async_with_f64_and_f64(
             map_map_mode(mode),
             range.start as f64,
@@ -2172,7 +2157,6 @@ impl crate::context::Context for ContextWebGpu {
         buffer_data.0.set_mapped_range(range);
 
         register_then_closures(&map_promise, callback, Ok(()), Err(crate::BufferAsyncError));
-        map_promise.into()
     }
 
     fn buffer_get_mapped_range(
@@ -2786,7 +2770,7 @@ impl crate::context::Context for ContextWebGpu {
         &self,
         _queue_data: &Self::QueueData,
         _callback: crate::context::SubmittedWorkDoneCallback,
-    ) -> Self::WgpuFuture {
+    ) {
         unimplemented!()
     }
 
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 98b24b1230..712e01cc44 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -540,8 +540,6 @@ impl crate::Context for ContextWgpuCore {
     type PopErrorScopeFuture = Ready<Option<crate::Error>>;
     type CompilationInfoFuture = Ready<CompilationInfo>;
 
-    type WgpuFuture = wgc::SubmissionIndex;
-
     fn init(instance_desc: wgt::InstanceDescriptor) -> Self {
         Self(wgc::global::Global::new("wgpu", instance_desc))
     }
@@ -608,15 +606,6 @@ impl crate::Context for ContextWgpuCore {
         ready(id.ok())
     }
 
-    fn instance_wait_any(
-        &self,
-        _futures: &[&Self::WgpuFuture],
-        _timeout_ns: u64,
-    ) -> crate::WaitStatus {
-        // TODO: We need to know at the instance level whether a submission ID is completed...
-        crate::WaitStatus::UnsupportedTimeout
-    }
-
     fn adapter_request_device(
         &self,
         adapter_data: &Self::AdapterData,
@@ -1402,7 +1391,7 @@ impl crate::Context for ContextWgpuCore {
         mode: MapMode,
         range: Range<wgt::BufferAddress>,
         callback: crate::context::BufferMapCallback,
-    ) -> Self::WgpuFuture {
+    ) {
         let operation = wgc::resource::BufferMapOperation {
             host: match mode {
                 MapMode::Read => wgc::device::HostMap::Read,
@@ -1422,10 +1411,9 @@ impl crate::Context for ContextWgpuCore {
             Some(range.end - range.start),
             operation,
         ) {
-            Ok(index) => index,
+            Ok(_) => (),
             Err(cause) => {
-                self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async");
-                Self::SubmissionIndexData::MAX // invalid submission index
+                self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async")
             }
         }
     }
@@ -2107,9 +2095,9 @@ impl crate::Context for ContextWgpuCore {
         &self,
         queue_data: &Self::QueueData,
         callback: crate::context::SubmittedWorkDoneCallback,
-    ) -> Self::WgpuFuture {
+    ) {
         let closure = wgc::device::queue::SubmittedWorkDoneClosure::from_rust(callback);
-        self.0.queue_on_submitted_work_done(queue_data.id, closure)
+        self.0.queue_on_submitted_work_done(queue_data.id, closure);
     }
 
     fn device_start_capture(&self, device_data: &Self::DeviceData) {
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index fe37928af8..a27459ab45 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -16,7 +16,7 @@ use crate::{
     RenderBundleEncoderDescriptor, RenderPassDescriptor, RenderPipelineDescriptor,
     RequestAdapterOptions, RequestDeviceError, SamplerDescriptor, ShaderModuleDescriptor,
     ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, TextureDescriptor, TextureViewDescriptor,
-    UncapturedErrorHandler, WaitStatus,
+    UncapturedErrorHandler,
 };
 /// Meta trait for an data associated with an id tracked by a context.
 ///
@@ -60,9 +60,6 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
 
     type CompilationInfoFuture: Future<Output = CompilationInfo> + WasmNotSend + 'static;
 
-    /// This is not std::future, but rather a WGPUFuture, namely an opaque handle that can be queried for completion, but does not hold any returned data.
-    type WgpuFuture: ContextData;
-
     #[cfg(not(target_os = "emscripten"))]
     fn init(instance_desc: wgt::InstanceDescriptor) -> Self;
     unsafe fn instance_create_surface(
@@ -73,7 +70,6 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         &self,
         options: &RequestAdapterOptions<'_, '_>,
     ) -> Self::RequestAdapterFuture;
-    fn instance_wait_any(&self, futures: &[&Self::WgpuFuture], timeout_ns: u64) -> WaitStatus;
     fn adapter_request_device(
         &self,
         adapter_data: &Self::AdapterData,
@@ -222,7 +218,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    ) -> Self::WgpuFuture;
+    );
     fn buffer_get_mapped_range(
         &self,
         buffer_data: &Self::BufferData,
@@ -417,7 +413,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         &self,
         queue_data: &Self::QueueData,
         callback: SubmittedWorkDoneCallback,
-    ) -> Self::WgpuFuture;
+    );
 
     fn device_start_capture(&self, device_data: &Self::DeviceData);
     fn device_stop_capture(&self, device_data: &Self::DeviceData);
@@ -768,7 +764,6 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         &self,
         options: &RequestAdapterOptions<'_, '_>,
     ) -> Pin<InstanceRequestAdapterFuture>;
-    fn instance_wait_any(&self, futures_data: &[&crate::Data], timeout_ns: u64) -> WaitStatus;
     fn adapter_request_device(
         &self,
         adapter_data: &crate::Data,
@@ -913,7 +908,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    ) -> Arc<crate::Data>;
+    );
     fn buffer_get_mapped_range(
         &self,
         buffer_data: &crate::Data,
@@ -1097,7 +1092,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         &self,
         queue_data: &crate::Data,
         callback: SubmittedWorkDoneCallback,
-    ) -> Arc<crate::Data>;
+    );
 
     fn device_start_capture(&self, data: &crate::Data);
     fn device_stop_capture(&self, data: &crate::Data);
@@ -1375,10 +1370,6 @@ where
         Box::pin(async move { future.await.map(|data| Box::new(data) as _) })
     }
 
-    fn instance_wait_any(&self, _futures_data: &[&crate::Data], _timeout_ns: u64) -> WaitStatus {
-        unimplemented!();
-    }
-
     fn adapter_request_device(
         &self,
         adapter_data: &crate::Data,
@@ -1697,10 +1688,9 @@ where
         mode: MapMode,
         range: Range<BufferAddress>,
         callback: BufferMapCallback,
-    ) -> Arc<crate::Data> {
+    ) {
         let buffer_data = downcast_ref(buffer_data);
-        let handle = Context::buffer_map_async(self, buffer_data, mode, range, callback);
-        Arc::new(handle) as _
+        Context::buffer_map_async(self, buffer_data, mode, range, callback)
     }
 
     fn buffer_get_mapped_range(
@@ -2121,10 +2111,9 @@ where
         &self,
         queue_data: &crate::Data,
         callback: SubmittedWorkDoneCallback,
-    ) -> Arc<crate::Data> {
+    ) {
         let queue_data = downcast_ref(queue_data);
-        let handle = Context::queue_on_submitted_work_done(self, queue_data, callback);
-        Arc::new(handle) as _
+        Context::queue_on_submitted_work_done(self, queue_data, callback)
     }
 
     fn device_start_capture(&self, device_data: &crate::Data) {

From c4f9f46b3fb3b2d7b59f96fe9148f24405430c7f Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Sun, 27 Oct 2024 09:42:19 +0100
Subject: [PATCH 17/19] Remove triage_mapped

---
 wgpu-core/src/device/life.rs     | 54 ++++++--------------------------
 wgpu-core/src/device/resource.rs |  2 --
 2 files changed, 10 insertions(+), 46 deletions(-)

diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index bc66a4d1b9..47e33ab11f 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -126,35 +126,20 @@ pub enum WaitIdleError {
 /// -   Each buffer's `ResourceInfo::submission_index` records the index of the
 ///     most recent queue submission that uses that buffer.
 ///
-/// -   Calling `Global::buffer_map_async` adds the buffer to
-///     `self.mapped`, and changes `Buffer::map_state` to prevent it
-///     from being used in any new submissions.
-///
 /// -   When the device is polled, the following `LifetimeTracker` methods decide
 ///     what should happen next:
 ///
-///     1)  `triage_mapped` drains `self.mapped`, checking the submission index
-///         of each buffer against the queue submissions that have finished
-///         execution. Buffers used by submissions still in flight go in
-///         `self.active[index].mapped`, and the rest go into
-///         `self.ready_to_map`.
-///
-///     2)  `triage_submissions` moves entries in `self.active[i]` for completed
+///     1)  `triage_submissions` moves entries in `self.active[i]` for completed
 ///         submissions to `self.ready_to_map`.  At this point, both
 ///         `self.active` and `self.ready_to_map` are up to date with the given
 ///         submission index.
 ///
-///     3)  `handle_mapping` drains `self.ready_to_map` and actually maps the
+///     2)  `handle_mapping` drains `self.ready_to_map` and actually maps the
 ///         buffers, collecting a list of notification closures to call.
 ///
 /// Only calling `Global::buffer_map_async` clones a new `Arc` for the
 /// buffer. This new `Arc` is only dropped by `handle_mapping`.
 pub(crate) struct LifetimeTracker {
-    /// Buffers for which a call to [`Buffer::map_async`] has succeeded, but
-    /// which haven't been examined by `triage_mapped` yet to decide when they
-    /// can be mapped.
-    mapped: Vec<Arc<Buffer>>,
-
     /// Resources used by queue submissions still in flight. One entry per
     /// submission, with older submissions appearing before younger.
     ///
@@ -182,7 +167,6 @@ pub(crate) struct LifetimeTracker {
 impl LifetimeTracker {
     pub fn new() -> Self {
         Self {
-            mapped: Vec::new(),
             active: Vec::new(),
             ready_to_map: Vec::new(),
             work_done_closures: SmallVec::new(),
@@ -212,16 +196,20 @@ impl LifetimeTracker {
     }
 
     pub(crate) fn map(&mut self, buffer: &Arc<Buffer>) -> Option<SubmissionIndex> {
-        self.mapped.push(buffer.clone());
-
-        // Warning: this duplicates what is in triage_mapped()
+        // Determine which buffers are ready to map, and which must wait for the GPU.
         let submission = self
             .active
             .iter_mut()
             .rev()
             .find(|a| a.contains_buffer(&buffer));
 
-        submission.map(|s| s.index)
+        let maybe_submission_index = submission.as_ref().map(|s| s.index.clone());
+
+        submission
+            .map_or(&mut self.ready_to_map, |a| &mut a.mapped)
+            .push(buffer.clone());
+
+        maybe_submission_index
     }
 
     /// Returns the submission index of the most recent submission that uses the
@@ -331,28 +319,6 @@ impl LifetimeTracker {
         }
     }
 
-    /// Determine which buffers are ready to map, and which must wait for the
-    /// GPU.
-    ///
-    /// See the documentation for [`LifetimeTracker`] for details.
-    pub(crate) fn triage_mapped(&mut self) {
-        if self.mapped.is_empty() {
-            return;
-        }
-
-        for buffer in self.mapped.drain(..) {
-            let submission = self
-                .active
-                .iter_mut()
-                .rev()
-                .find(|a| a.contains_buffer(&buffer));
-
-            submission
-                .map_or(&mut self.ready_to_map, |a| &mut a.mapped)
-                .push(buffer);
-        }
-    }
-
     /// Map the buffers in `self.ready_to_map`.
     ///
     /// Return a list of mapping notifications to send.
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 21ecf85d24..46271f7ac9 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -493,8 +493,6 @@ impl Device {
         let submission_closures =
             life_tracker.triage_submissions(submission_index, &self.command_allocator);
 
-        life_tracker.triage_mapped();
-
         let mapping_closures = life_tracker.handle_mapping(self.raw(), &snatch_guard);
 
         let queue_empty = life_tracker.queue_empty();

From 22bc30de6d8e9ef6f378c3fbe16181f42477ce49 Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Sun, 27 Oct 2024 09:46:53 +0100
Subject: [PATCH 18/19] Style update

---
 wgpu-core/src/device/life.rs  | 4 ++--
 wgpu-core/src/device/queue.rs | 5 +----
 wgpu-core/src/resource.rs     | 5 +----
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 47e33ab11f..84ff18440a 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -201,9 +201,9 @@ impl LifetimeTracker {
             .active
             .iter_mut()
             .rev()
-            .find(|a| a.contains_buffer(&buffer));
+            .find(|a| a.contains_buffer(buffer));
 
-        let maybe_submission_index = submission.as_ref().map(|s| s.index.clone());
+        let maybe_submission_index = submission.as_ref().map(|s| s.index);
 
         submission
             .map_or(&mut self.ready_to_map, |a| &mut a.mapped)
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 1203c9d5a8..25e8806c9c 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1411,10 +1411,7 @@ impl Global {
         //TODO: flush pending writes
         let queue = self.hub.queues.get(queue_id);
         let result = queue.on_submitted_work_done(closure);
-        match result {
-            Some(submission_index) => submission_index,
-            None => 0, // meaning no wait is necessary
-        }
+        result.unwrap_or(0) // '0' means no wait is necessary
     }
 }
 
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index bd2950c277..157b83641a 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -633,10 +633,7 @@ impl Buffer {
             .buffers
             .set_single(self, internal_use);
 
-        let submit_index = match device.lock_life().map(self) {
-            Some(index) => index,
-            None => 0, // meaning no wait is necessary
-        };
+        let submit_index = device.lock_life().map(self).unwrap_or(0); // '0' means no wait is necessary
 
         Ok(submit_index)
     }

From 93934273f76bcfcb9b1d4e0ff459cac07a1c619a Mon Sep 17 00:00:00 2001
From: Elie Michel <elie.michel@exppad.com>
Date: Sun, 27 Oct 2024 14:02:33 +0100
Subject: [PATCH 19/19] Fix merge with trunk

---
 wgpu/src/api/buffer.rs     | 56 +++++++++++++++++++++++++++++++++++++-
 wgpu/src/api/instance.rs   | 13 ++++++---
 wgpu/src/backend/webgpu.rs | 11 +++++---
 3 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs
index 9d490616d3..fa9c7f9ec0 100644
--- a/wgpu/src/api/buffer.rs
+++ b/wgpu/src/api/buffer.rs
@@ -242,6 +242,7 @@ impl Buffer {
     /// end of the buffer.
     pub fn slice<S: RangeBounds<BufferAddress>>(&self, bounds: S) -> BufferSlice<'_> {
         let (offset, size) = range_to_offset_size(bounds);
+        check_buffer_bounds(self.size, offset, size);
         BufferSlice {
             buffer: self,
             offset,
@@ -673,6 +674,31 @@ impl Drop for Buffer {
     }
 }
 
+fn check_buffer_bounds(
+    buffer_size: BufferAddress,
+    offset: BufferAddress,
+    size: Option<BufferSize>,
+) {
+    // A slice of length 0 is invalid, so the offset must not be equal to or greater than the buffer size.
+    if offset >= buffer_size {
+        panic!(
+            "slice offset {} is out of range for buffer of size {}",
+            offset, buffer_size
+        );
+    }
+
+    if let Some(size) = size {
+        // Detect integer overflow.
+        let end = offset.checked_add(size.get());
+        if end.map_or(true, |end| end > buffer_size) {
+            panic!(
+                "slice offset {} size {} is out of range for buffer of size {}",
+                offset, size, buffer_size
+            );
+        }
+    }
+}
+
 fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
     bounds: S,
 ) -> (BufferAddress, Option<BufferSize>) {
@@ -690,9 +716,10 @@ fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
 
     (offset, size)
 }
+
 #[cfg(test)]
 mod tests {
-    use super::{range_to_offset_size, BufferSize};
+    use super::{check_buffer_bounds, range_to_offset_size, BufferSize};
 
     #[test]
     fn range_to_offset_size_works() {
@@ -715,4 +742,31 @@ mod tests {
     fn range_to_offset_size_panics_for_unbounded_empty_range() {
         range_to_offset_size(..0);
     }
+
+    #[test]
+    #[should_panic]
+    fn check_buffer_bounds_panics_for_offset_at_size() {
+        check_buffer_bounds(100, 100, None);
+    }
+
+    #[test]
+    fn check_buffer_bounds_works_for_end_in_range() {
+        check_buffer_bounds(200, 100, BufferSize::new(50));
+        check_buffer_bounds(200, 100, BufferSize::new(100));
+        check_buffer_bounds(u64::MAX, u64::MAX - 100, BufferSize::new(100));
+        check_buffer_bounds(u64::MAX, 0, BufferSize::new(u64::MAX));
+        check_buffer_bounds(u64::MAX, 1, BufferSize::new(u64::MAX - 1));
+    }
+
+    #[test]
+    #[should_panic]
+    fn check_buffer_bounds_panics_for_end_over_size() {
+        check_buffer_bounds(200, 100, BufferSize::new(101));
+    }
+
+    #[test]
+    #[should_panic]
+    fn check_buffer_bounds_panics_for_end_wraparound() {
+        check_buffer_bounds(u64::MAX, 1, BufferSize::new(u64::MAX));
+    }
 }
diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs
index af6775b86b..b21c9f70ec 100644
--- a/wgpu/src/api/instance.rs
+++ b/wgpu/src/api/instance.rs
@@ -93,10 +93,15 @@ impl Instance {
     ///   during instantiation, and which [DX12 shader compiler][Dx12Compiler] wgpu will use.
     ///
     ///   [`Backends::BROWSER_WEBGPU`] takes a special role:
-    ///   If it is set and WebGPU support is detected, this instance will *only* be able to create
-    ///   WebGPU adapters. If you instead want to force use of WebGL, either
-    ///   disable the `webgpu` compile-time feature or don't add the [`Backends::BROWSER_WEBGPU`]
-    ///   flag to the the `instance_desc`'s `backends` field.
+    ///   If it is set and a [`navigator.gpu`](https://developer.mozilla.org/en-US/docs/Web/API/Navigator/gpu)
+    ///   object is present, this instance will *only* be able to create WebGPU adapters.
+    ///
+    ///   ⚠️ On some browsers this check is insufficient to determine whether WebGPU is supported,
+    ///   as the browser may define the `navigator.gpu` object, but be unable to create any WebGPU adapters.
+    ///   For targeting _both_ WebGPU & WebGL is recommended to use [`crate::util::new_instance_with_webgpu_detection`].
+    ///
+    ///   If you instead want to force use of WebGL, either disable the `webgpu` compile-time feature
+    ///   or don't add the [`Backends::BROWSER_WEBGPU`] flag to the the `instance_desc`'s `backends` field.
     ///   If it is set and WebGPU support is *not* detected, the instance will use wgpu-core
     ///   to create adapters. Meaning that if the `webgl` feature is enabled, it is able to create
     ///   a WebGL adapter.
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index e982300e70..e0cf006e6e 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -815,7 +815,6 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits {
         max_vertex_buffer_array_stride: limits.max_vertex_buffer_array_stride(),
         min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment(),
         min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment(),
-        max_inter_stage_shader_components: limits.max_inter_stage_shader_components(),
         max_color_attachments: limits.max_color_attachments(),
         max_color_attachment_bytes_per_sample: limits.max_color_attachment_bytes_per_sample(),
         max_compute_workgroup_storage_size: limits.max_compute_workgroup_storage_size(),
@@ -829,6 +828,7 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits {
         max_subgroup_size: wgt::Limits::default().max_subgroup_size,
         max_push_constant_size: wgt::Limits::default().max_push_constant_size,
         max_non_sampler_bindings: wgt::Limits::default().max_non_sampler_bindings,
+        max_inter_stage_shader_components: wgt::Limits::default().max_inter_stage_shader_components,
     }
 }
 
@@ -875,7 +875,6 @@ fn map_js_sys_limits(limits: &wgt::Limits) -> js_sys::Object {
         (maxBufferSize, max_buffer_size),
         (maxVertexAttributes, max_vertex_attributes),
         (maxVertexBufferArrayStride, max_vertex_buffer_array_stride),
-        (maxInterStageShaderComponents, max_inter_stage_shader_components),
         (maxComputeWorkgroupStorageSize, max_compute_workgroup_storage_size),
         (maxComputeInvocationsPerWorkgroup, max_compute_invocations_per_workgroup),
         (maxComputeWorkgroupSizeX, max_compute_workgroup_size_x),
@@ -1088,8 +1087,12 @@ pub struct BrowserGpuPropertyInaccessible;
 /// Returns the browser's gpu object or `Err(BrowserGpuPropertyInaccessible)` if
 /// the current context is neither the main thread nor a dedicated worker.
 ///
-/// If WebGPU is not supported, the Gpu property is `undefined`, and so this
-/// function will return `Ok(None)`.
+/// If WebGPU is not supported, the Gpu property may (!) be `undefined`,
+/// and so this function will return `Ok(None)`.
+/// Note that this check is insufficient to determine whether WebGPU is
+/// supported, as the browser may define the Gpu property, but be unable to
+/// create any WebGPU adapters.
+/// To detect whether WebGPU is supported, use the [`crate::utils::is_browser_webgpu_supported`] function.
 ///
 /// See:
 /// * <https://developer.mozilla.org/en-US/docs/Web/API/Navigator/gpu>