diff --git a/include/meson.build b/include/meson.build
index d7398aed70..be784e635d 100644
--- a/include/meson.build
+++ b/include/meson.build
@@ -13,6 +13,7 @@ vkd3d_idl = [
   'vkd3d_dxgitype.idl',
   'vkd3d_swapchain_factory.idl',
   'vkd3d_command_list_vkd3d_ext.idl',
+  'vkd3d_command_queue_vkd3d_ext.idl',
   'vkd3d_device_vkd3d_ext.idl',
   'vkd3d_core_interface.idl',
 ]
diff --git a/include/vkd3d_command_queue_vkd3d_ext.idl b/include/vkd3d_command_queue_vkd3d_ext.idl
new file mode 100644
index 0000000000..3c69f00a64
--- /dev/null
+++ b/include/vkd3d_command_queue_vkd3d_ext.idl
@@ -0,0 +1,30 @@
+/*
+ * * Copyright 2023 NVIDIA Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+import "vkd3d_d3d12.idl";
+import "vkd3d_vk_includes.h";
+
+[
+    uuid(40ed3f96-e773-e9bc-fc0c-e95560c99ad6),
+    object,
+    local,
+    pointer_default(unique)
+]
+interface ID3D12CommandQueueExt : IUnknown
+{
+    HRESULT NotifyOutOfBandCommandQueue(D3D12_OUT_OF_BAND_CQ_TYPE type);
+}
diff --git a/include/vkd3d_device_vkd3d_ext.idl b/include/vkd3d_device_vkd3d_ext.idl
index 3e615d76a1..4a21ba763e 100644
--- a/include/vkd3d_device_vkd3d_ext.idl
+++ b/include/vkd3d_device_vkd3d_ext.idl
@@ -54,3 +54,18 @@ interface ID3D12DXVKInteropDevice : IUnknown
     HRESULT LockCommandQueue(ID3D12CommandQueue *queue);
     HRESULT UnlockCommandQueue(ID3D12CommandQueue *queue);
 }
+
+[
+    uuid(f3112584-41f9-348d-a59b-00b7e1d285d6),
+    object,
+    local,
+    pointer_default(unique)
+]
+interface ID3DLowLatencyDevice : IUnknown
+{
+    BOOL SupportsLowLatency();
+    HRESULT LatencySleep();
+    HRESULT SetLatencySleepMode(BOOL low_latency_mode, BOOL low_latency_boost, UINT32 minimum_interval_us);
+    HRESULT SetLatencyMarker(UINT64 frameID, UINT32 markerType);
+    HRESULT GetLatencyInfo(D3D12_LATENCY_RESULTS *latency_results);
+}
diff --git a/include/vkd3d_vk_includes.h b/include/vkd3d_vk_includes.h
index c43e018935..020596130a 100644
--- a/include/vkd3d_vk_includes.h
+++ b/include/vkd3d_vk_includes.h
@@ -41,9 +41,16 @@ typedef enum VkImageLayout VkImageLayout;
 typedef enum D3D12_VK_EXTENSION
 {
     D3D12_VK_NVX_BINARY_IMPORT      = 0x1,
-    D3D12_VK_NVX_IMAGE_VIEW_HANDLE  = 0x2
+    D3D12_VK_NVX_IMAGE_VIEW_HANDLE  = 0x2,
+    D3D12_VK_NV_LOW_LATENCY_2       = 0x3
 } D3D12_VK_EXTENSION;
 
+typedef enum D3D12_OUT_OF_BAND_CQ_TYPE
+{
+    OUT_OF_BAND_RENDER  = 0,
+    OUT_OF_BAND_PRESENT = 1
+} D3D12_OUT_OF_BAND_CQ_TYPE;
+
 typedef struct D3D12_CUBIN_DATA_HANDLE
 {
     VkCuFunctionNVX vkCuFunction;
@@ -61,5 +68,30 @@ typedef struct D3D12_UAV_INFO
     UINT64 gpuVASize;  
 } D3D12_UAV_INFO;
 
+typedef struct D3D12_LATENCY_RESULTS
+{
+    UINT32 version;
+    struct D3D12_FRAME_REPORT {
+        UINT64 frameID;
+        UINT64 inputSampleTime;
+        UINT64 simStartTime;
+        UINT64 simEndTime;
+        UINT64 renderSubmitStartTime;
+        UINT64 renderSubmitEndTime;
+        UINT64 presentStartTime;
+        UINT64 presentEndTime;
+        UINT64 driverStartTime;
+        UINT64 driverEndTime;
+        UINT64 osRenderQueueStartTime;
+        UINT64 osRenderQueueEndTime;
+        UINT64 gpuRenderStartTime;
+        UINT64 gpuRenderEndTime;
+        UINT32 gpuActiveRenderTimeUs;
+        UINT32 gpuFrameTimeUs;
+        UINT8 rsvd[120];
+    } frame_reports[64];
+    UINT8 rsvd[32];
+} D3D12_LATENCY_RESULTS;
+
 #endif  // __VKD3D_VK_INCLUDES_H
 
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c
index a3f7a3adaf..c8a807106e 100644
--- a/libs/vkd3d/command.c
+++ b/libs/vkd3d/command.c
@@ -219,6 +219,26 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, u
     return hr;
 }
 
+void vkd3d_set_queue_out_of_band(struct d3d12_device *device, struct vkd3d_queue *queue)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+    VkOutOfBandQueueTypeInfoNV queue_info;
+
+    if (!device->vk_info.NV_low_latency2)
+        return;
+
+    memset(&queue_info, 0, sizeof(queue_info));
+    queue_info.sType = VK_STRUCTURE_TYPE_OUT_OF_BAND_QUEUE_TYPE_INFO_NV;
+    queue_info.pNext = NULL;
+    queue_info.queueType = VK_OUT_OF_BAND_QUEUE_TYPE_RENDER_NV;
+
+    VK_CALL(vkQueueNotifyOutOfBandNV(queue->vk_queue, &queue_info));
+
+    queue_info.queueType = VK_OUT_OF_BAND_QUEUE_TYPE_PRESENT_NV;
+
+    VK_CALL(vkQueueNotifyOutOfBandNV(queue->vk_queue, &queue_info));
+}
+
 static void vkd3d_queue_flush_waiters(struct vkd3d_queue *vkd3d_queue,
         struct vkd3d_fence_worker *worker,
         const struct vkd3d_vk_device_procs *vk_procs);
@@ -16591,12 +16611,14 @@ static struct d3d12_command_list *d3d12_command_list_from_iface(ID3D12CommandLis
 }
 
 /* ID3D12CommandQueue */
+extern ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface);
+
 static inline struct d3d12_command_queue *impl_from_ID3D12CommandQueue(ID3D12CommandQueue *iface)
 {
     return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueue_iface);
 }
 
-static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface,
+HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface,
         REFIID riid, void **object)
 {
     TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
@@ -16615,6 +16637,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman
         return S_OK;
     }
 
+    if (IsEqualGUID(riid, &IID_ID3D12CommandQueueExt))
+    {
+        struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
+        d3d12_command_queue_vkd3d_ext_AddRef(&command_queue->ID3D12CommandQueueExt_iface);
+        *object = &command_queue->ID3D12CommandQueueExt_iface;
+        return S_OK;
+    }
+
     if (IsEqualGUID(riid, &IID_IDXGIVkSwapChainFactory))
     {
         struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
@@ -16629,7 +16659,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman
     return E_NOINTERFACE;
 }
 
-static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface)
+ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface)
 {
     struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
     ULONG refcount = InterlockedIncrement(&command_queue->refcount);
@@ -16639,7 +16669,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if
     return refcount;
 }
 
-static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface)
+ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface)
 {
     struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
     ULONG refcount = InterlockedDecrement(&command_queue->refcount);
@@ -17132,6 +17162,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm
     sub.execute.cmd_count = num_command_buffers;
     sub.execute.command_allocators = allocators;
     sub.execute.num_command_allocators = command_list_count;
+    sub.execute.low_latency_frame_id = command_queue->device->frame_markers.render;
 #ifdef VKD3D_ENABLE_BREADCRUMBS
     sub.execute.breadcrumb_indices = breadcrumb_indices;
     sub.execute.breadcrumb_indices_count = breadcrumb_indices ? command_list_count : 0;
@@ -17295,6 +17326,8 @@ static D3D12_COMMAND_QUEUE_DESC * STDMETHODCALLTYPE d3d12_command_queue_GetDesc(
     return desc;
 }
 
+extern CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl;
+
 static CONST_VTBL struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl =
 {
     /* IUnknown methods */
@@ -17807,10 +17840,12 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu
         const VkSemaphoreSubmitInfo *transition_semaphore,
         struct d3d12_command_allocator **command_allocators, size_t num_command_allocators,
         struct vkd3d_queue_timeline_trace_cookie timeline_cookie,
-        bool debug_capture, bool split_submissions)
+        uint64_t low_latency_frame_id, bool debug_capture, bool split_submissions)
 {
     const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs;
     struct vkd3d_queue *vkd3d_queue = command_queue->vkd3d_queue;
+    VkLatencySubmissionPresentIdNV latency_submit_present_info;
+    struct dxgi_vk_swap_chain *low_latency_swapchain;
     VkSemaphoreSubmitInfo signal_semaphore_info;
     VkSemaphoreSubmitInfo binary_semaphore_info;
     VkSubmitInfo2 submit_desc[4];
@@ -17895,6 +17930,27 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu
         num_submits += 2;
     }
 
+    if (command_queue->device->vk_info.NV_low_latency2)
+    {
+        spinlock_acquire(&command_queue->device->low_latency_swapchain_spinlock);
+        if ((low_latency_swapchain = command_queue->device->swapchain_info.low_latency_swapchain))
+            dxgi_vk_swap_chain_incref(low_latency_swapchain);
+        spinlock_release(&command_queue->device->low_latency_swapchain_spinlock);
+
+        if (low_latency_swapchain && dxgi_vk_swap_chain_low_latency_enabled(low_latency_swapchain))
+        {
+            latency_submit_present_info.sType = VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV;
+            latency_submit_present_info.pNext = NULL;
+            latency_submit_present_info.presentID = low_latency_frame_id;
+
+            for (i = 0; i < num_submits; i++)
+                submit_desc[i].pNext = &latency_submit_present_info;
+        }
+
+        if (low_latency_swapchain)
+            dxgi_vk_swap_chain_decref(low_latency_swapchain);
+    }
+
 #ifdef VKD3D_ENABLE_RENDERDOC
     /* For each submission we have marked to be captured, we will first need to filter it
      * based on VKD3D_AUTO_CAPTURE_COUNTS.
@@ -18397,7 +18453,9 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
                     submission.execute.command_allocators,
                     submission.execute.num_command_allocators,
                     submission.execute.timeline_cookie,
-                    submission.execute.debug_capture, submission.execute.split_submission);
+                    submission.execute.low_latency_frame_id,
+                    submission.execute.debug_capture,
+                    submission.execute.split_submission);
 
             /* command_queue_execute takes ownership of the
              * outstanding_submission_counters and queue_timeline_indices allocations.
@@ -18460,6 +18518,7 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue,
     int rc;
 
     queue->ID3D12CommandQueue_iface.lpVtbl = &d3d12_command_queue_vtbl;
+    queue->ID3D12CommandQueueExt_iface.lpVtbl = &d3d12_command_queue_vkd3d_ext_vtbl;
     queue->refcount = 1;
 
     queue->desc = *desc;
@@ -18588,6 +18647,7 @@ void vkd3d_enqueue_initial_transition(ID3D12CommandQueue *queue, ID3D12Resource
 
     memset(&sub, 0, sizeof(sub));
     sub.type = VKD3D_SUBMISSION_EXECUTE;
+    sub.execute.low_latency_frame_id = d3d12_queue->device->frame_markers.render;
     sub.execute.transition_count = 1;
     sub.execute.transitions = vkd3d_malloc(sizeof(*sub.execute.transitions));
     sub.execute.transitions[0].type = VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE;
diff --git a/libs/vkd3d/command_queue_vkd3d_ext.c b/libs/vkd3d/command_queue_vkd3d_ext.c
new file mode 100644
index 0000000000..03e1201149
--- /dev/null
+++ b/libs/vkd3d/command_queue_vkd3d_ext.c
@@ -0,0 +1,91 @@
+/*
+ * * Copyright 2023 NVIDIA Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#include "vkd3d_private.h"
+
+static inline struct d3d12_command_queue *d3d12_command_queue_from_ID3D12CommandQueueExt(d3d12_command_queue_vkd3d_ext_iface *iface)
+{
+    return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueueExt_iface);
+}
+
+extern ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(d3d12_command_queue_iface *iface);
+
+ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface)
+{
+    struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
+    return d3d12_command_queue_AddRef(&command_queue->ID3D12CommandQueue_iface);
+}
+
+extern ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(d3d12_command_queue_iface *iface);
+
+static ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_Release(d3d12_command_queue_vkd3d_ext_iface *iface)
+{
+    struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
+    return d3d12_command_queue_Release(&command_queue->ID3D12CommandQueue_iface);
+}
+
+extern HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(d3d12_command_queue_iface *iface,
+        REFIID iid, void **object);
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_QueryInterface(d3d12_command_queue_vkd3d_ext_iface *iface,
+        REFIID iid, void **out)
+{
+    struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
+    TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
+    return d3d12_command_queue_QueryInterface(&command_queue->ID3D12CommandQueue_iface, iid, out);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue(d3d12_command_queue_vkd3d_ext_iface *iface, D3D12_OUT_OF_BAND_CQ_TYPE type)
+{
+    struct d3d12_command_queue *command_queue;
+    int i;
+
+    command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
+
+    if (!command_queue->device->vk_info.NV_low_latency2)
+        return E_NOTIMPL;
+
+    if (type != OUT_OF_BAND_RENDER && type != OUT_OF_BAND_PRESENT)
+        return E_INVALIDARG;
+
+    for (i = 0; i < VKD3D_QUEUE_FAMILY_COUNT; i++)
+    {
+        if (command_queue->device->queue_families[i]->vk_family_index == command_queue->vkd3d_queue->vk_family_index &&
+                command_queue->device->queue_families[i]->out_of_band_queue)
+        {
+            command_queue->vkd3d_queue = command_queue->device->queue_families[i]->out_of_band_queue;
+            break;
+        }
+    }
+
+    return S_OK;
+}
+
+CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl =
+{
+    /* IUnknown methods */
+    d3d12_command_queue_vkd3d_ext_QueryInterface,
+    d3d12_command_queue_vkd3d_ext_AddRef,
+    d3d12_command_queue_vkd3d_ext_Release,
+
+    /* ID3D12CommandQueueExt methods */
+    d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue
+};
+
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c
index 5701b34456..f48a563d9a 100644
--- a/libs/vkd3d/device.c
+++ b/libs/vkd3d/device.c
@@ -127,6 +127,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
     VK_EXTENSION(NV_SHADER_SUBGROUP_PARTITIONED, NV_shader_subgroup_partitioned),
     VK_EXTENSION(NV_MEMORY_DECOMPRESSION, NV_memory_decompression),
     VK_EXTENSION(NV_DEVICE_GENERATED_COMMANDS_COMPUTE, NV_device_generated_commands_compute),
+    VK_EXTENSION_VERSION(NV_LOW_LATENCY_2, NV_low_latency2, 2),
     /* VALVE extensions */
     VK_EXTENSION(VALVE_MUTABLE_DESCRIPTOR_TYPE, VALVE_mutable_descriptor_type),
     VK_EXTENSION(VALVE_DESCRIPTOR_SET_HOST_MAPPING, VALVE_descriptor_set_host_mapping),
@@ -2588,6 +2589,12 @@ struct vkd3d_device_queue_info
     VkDeviceQueueCreateInfo vk_queue_create_info[VKD3D_QUEUE_FAMILY_COUNT];
 };
 
+static bool vkd3d_queue_family_needs_out_of_band_queue(unsigned int vkd3d_queue_family)
+{
+    return vkd3d_queue_family == VKD3D_QUEUE_FAMILY_GRAPHICS ||
+        vkd3d_queue_family == VKD3D_QUEUE_FAMILY_COMPUTE;
+}
+
 static void d3d12_device_destroy_vkd3d_queues(struct d3d12_device *device)
 {
     unsigned int i, j;
@@ -2612,6 +2619,9 @@ static void d3d12_device_destroy_vkd3d_queues(struct d3d12_device *device)
                 vkd3d_queue_destroy(queue_family->queues[j], device);
         }
 
+        if (queue_family->out_of_band_queue)
+            vkd3d_queue_destroy(queue_family->out_of_band_queue, device);
+
         vkd3d_free(queue_family->queues);
         vkd3d_free(queue_family);
     }
@@ -2652,6 +2662,12 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device,
 
         info->queue_count = queue_info->vk_queue_create_info[k++].queueCount;
 
+        /* Unless the queue family only has a single queue to allocate, when NV_low_latency2
+         * is enabled one queue is reserved for out of band work */
+        if (device->vk_info.NV_low_latency2 && vkd3d_queue_family_needs_out_of_band_queue(i) &&
+                queue_info->vk_properties[i].queueCount > 1)
+            info->queue_count--;
+
         if (!(info->queues = vkd3d_calloc(info->queue_count, sizeof(*info->queues))))
         {
             hr = E_OUTOFMEMORY;
@@ -2665,6 +2681,19 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device,
                 goto out_destroy_queues;
         }
 
+        if (device->vk_info.NV_low_latency2 && vkd3d_queue_family_needs_out_of_band_queue(i) &&
+                queue_info->vk_properties[i].queueCount > 1)
+        {
+            /* The low latency out of band queue is always the last queue for the family */
+            if (FAILED((hr = vkd3d_queue_create(device, queue_info->family_index[i],
+                    info->queue_count, &queue_info->vk_properties[i], &info->out_of_band_queue))))
+                goto out_destroy_queues;
+
+            vkd3d_set_queue_out_of_band(device, info->out_of_band_queue);
+        }
+        else
+            WARN("Could not allocate an out of band queue for queue family %u. All out of band work will happen on the in band queue.\n", i);
+
         info->vk_family_index = queue_info->family_index[i];
         info->vk_queue_flags = queue_info->vk_properties[i].queueFlags;
         info->timestamp_bits = queue_info->vk_properties[i].timestampValidBits;
@@ -2684,7 +2713,11 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device,
 }
 
 #define VKD3D_MAX_QUEUE_COUNT_PER_FAMILY (4u)
-static float queue_priorities[] = {1.0f, 1.0f, 1.0f, 1.0f};
+
+/* The queue priorities list contains VKD3D_MAX_QUEUE_COUNT_PER_FAMILY + 1 priorities
+ * because it is possible for low latency to add an additional queue for out of band work
+ * submission. */
+static float queue_priorities[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
 
 static uint32_t vkd3d_find_queue(unsigned int count, const VkQueueFamilyProperties *properties,
         VkQueueFlags mask, VkQueueFlags flags)
@@ -2700,10 +2733,10 @@ static uint32_t vkd3d_find_queue(unsigned int count, const VkQueueFamilyProperti
     return VK_QUEUE_FAMILY_IGNORED;
 }
 
-static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance,
+static HRESULT vkd3d_select_queues(const struct d3d12_device *device,
         VkPhysicalDevice physical_device, struct vkd3d_device_queue_info *info)
 {
-    const struct vkd3d_vk_instance_procs *vk_procs = &vkd3d_instance->vk_procs;
+    const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs;
     VkQueueFamilyProperties *queue_properties = NULL;
     VkDeviceQueueCreateInfo *queue_info = NULL;
     bool duplicate, single_queue;
@@ -2766,6 +2799,10 @@ static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance,
 
         if (single_queue)
             queue_info->queueCount = 1;
+
+        if (device->vk_info.NV_low_latency2 && vkd3d_queue_family_needs_out_of_band_queue(i) &&
+                queue_info->queueCount < info->vk_properties[i].queueCount)
+            queue_info->queueCount++;
     }
 
     vkd3d_free(queue_properties);
@@ -2808,9 +2845,6 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device,
     VK_CALL(vkGetPhysicalDeviceProperties(device->vk_physical_device, &device_properties));
     device->api_version = min(device_properties.apiVersion, VKD3D_MAX_API_VERSION);
 
-    if (FAILED(hr = vkd3d_select_queues(device->vkd3d_instance, physical_device, &device_queue_info)))
-        return hr;
-
     TRACE("Using queue family %u for direct command queues.\n",
             device_queue_info.family_index[VKD3D_QUEUE_FAMILY_GRAPHICS]);
     TRACE("Using queue family %u for compute command queues.\n",
@@ -2857,6 +2891,13 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device,
         return E_OUTOFMEMORY;
     }
 
+    if (FAILED(hr = vkd3d_select_queues(device, physical_device, &device_queue_info)))
+    {
+        vkd3d_free(user_extension_supported);
+        vkd3d_free(extensions);
+        return hr;
+    }
+
     /* Create device */
     device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
     device_info.pNext = device->device_info.features2.pNext;
@@ -3285,8 +3326,9 @@ void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vk
 }
 
 /* ID3D12Device */
-extern ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(ID3D12DeviceExt *iface);
+extern ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(d3d12_device_vkd3d_ext_iface *iface);
 extern ULONG STDMETHODCALLTYPE d3d12_dxvk_interop_device_AddRef(ID3D12DXVKInteropDevice *iface);
+extern ULONG STDMETHODCALLTYPE d3d12_low_latency_device_AddRef(ID3DLowLatencyDevice *iface);
 
 HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
         REFIID riid, void **object)
@@ -3333,6 +3375,14 @@ HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
         return S_OK;
     }
 
+    if (IsEqualGUID(riid, &IID_ID3DLowLatencyDevice))
+    {
+        struct d3d12_device *device = impl_from_ID3D12Device(iface);
+        d3d12_low_latency_device_AddRef(&device->ID3DLowLatencyDevice_iface);
+        *object = &device->ID3DLowLatencyDevice_iface;
+        return S_OK;
+    }
+
     WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));
 
     *object = NULL;
@@ -8349,6 +8399,7 @@ static void d3d12_device_replace_vtable(struct d3d12_device *device)
 
 extern CONST_VTBL struct ID3D12DeviceExtVtbl d3d12_device_vkd3d_ext_vtbl;
 extern CONST_VTBL struct ID3D12DXVKInteropDeviceVtbl d3d12_dxvk_interop_device_vtbl;
+extern CONST_VTBL struct ID3DLowLatencyDeviceVtbl d3d_low_latency_device_vtbl;
 
 static void vkd3d_scratch_pool_init(struct d3d12_device *device)
 {
@@ -8417,8 +8468,11 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
         goto out_free_mutex;
     }
 
+    spinlock_init(&device->low_latency_swapchain_spinlock);
+
     device->ID3D12DeviceExt_iface.lpVtbl = &d3d12_device_vkd3d_ext_vtbl;
     device->ID3D12DXVKInteropDevice_iface.lpVtbl = &d3d12_dxvk_interop_device_vtbl;
+    device->ID3DLowLatencyDevice_iface.lpVtbl = &d3d_low_latency_device_vtbl;
 
     if ((rc = rwlock_init(&device->vertex_input_lock)))
     {
diff --git a/libs/vkd3d/device_vkd3d_ext.c b/libs/vkd3d/device_vkd3d_ext.c
index 5bb7eca840..4cd9b5c419 100644
--- a/libs/vkd3d/device_vkd3d_ext.c
+++ b/libs/vkd3d/device_vkd3d_ext.c
@@ -20,18 +20,18 @@
 
 #include "vkd3d_private.h"
 
-static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceExt(ID3D12DeviceExt *iface)
+static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceExt(d3d12_device_vkd3d_ext_iface *iface)
 {
     return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12DeviceExt_iface);
 }
 
-ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(ID3D12DeviceExt *iface)
+ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(d3d12_device_vkd3d_ext_iface *iface)
 {
     struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
     return d3d12_device_add_ref(device);
 }
 
-static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(ID3D12DeviceExt *iface)
+static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(d3d12_device_vkd3d_ext_iface *iface)
 {
     struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
     return d3d12_device_release(device);
@@ -40,7 +40,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(ID3D12DeviceExt *i
 extern HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
         REFIID riid, void **object);
 
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(ID3D12DeviceExt *iface,
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(d3d12_device_vkd3d_ext_iface *iface,
         REFIID iid, void **out)
 {
     struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
@@ -48,7 +48,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(ID3D12Dev
     return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out);
 }
 
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(ID3D12DeviceExt *iface, VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device)
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(d3d12_device_vkd3d_ext_iface *iface, VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device)
 {
     struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
     TRACE("iface %p, vk_instance %p, vk_physical_device %p, vk_device %p \n", iface, vk_instance, vk_physical_device, vk_device);
@@ -61,7 +61,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(ID3D12D
     return S_OK;
 }
 
-static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12DeviceExt *iface, D3D12_VK_EXTENSION extension)
+static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(d3d12_device_vkd3d_ext_iface *iface, D3D12_VK_EXTENSION extension)
 {
     const struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
     bool ret_val = false;
@@ -75,6 +75,9 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12D
         case D3D12_VK_NVX_IMAGE_VIEW_HANDLE:
             ret_val = device->vk_info.NVX_image_view_handle;
             break;
+        case D3D12_VK_NV_LOW_LATENCY_2:
+            ret_val = device->vk_info.NV_low_latency2;
+            break;
         default:
             WARN("Invalid extension %x\n", extension);
     }
@@ -82,7 +85,7 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12D
     return ret_val;
 }
 
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(ID3D12DeviceExt *iface, const void *cubin_data,
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(d3d12_device_vkd3d_ext_iface *iface, const void *cubin_data,
        UINT32 cubin_size, UINT32 block_x, UINT32 block_y, UINT32 block_z, const char *shader_name, D3D12_CUBIN_DATA_HANDLE **out_handle)
 {
     VkCuFunctionCreateInfoNVX functionCreateInfo = { VK_STRUCTURE_TYPE_CU_FUNCTION_CREATE_INFO_NVX };
@@ -129,7 +132,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShader
     return S_OK;
 }
 
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShader(ID3D12DeviceExt *iface, D3D12_CUBIN_DATA_HANDLE *handle)
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShader(d3d12_device_vkd3d_ext_iface *iface, D3D12_CUBIN_DATA_HANDLE *handle)
 {   
     const struct vkd3d_vk_device_procs *vk_procs;
     struct d3d12_device *device;
@@ -149,7 +152,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShade
     return S_OK;
 }
 
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(ID3D12DeviceExt *iface, D3D12_CPU_DESCRIPTOR_HANDLE srv_handle,
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_CPU_DESCRIPTOR_HANDLE srv_handle,
        D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle, UINT32 *cuda_texture_handle)
 {
     VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
@@ -177,7 +180,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(ID3
     return S_OK;
 }
 
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3D12DeviceExt *iface, D3D12_CPU_DESCRIPTOR_HANDLE uav_handle, 
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_CPU_DESCRIPTOR_HANDLE uav_handle, 
         UINT32 *cuda_surface_handle)
 {
     VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
@@ -202,7 +205,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3
 
 extern VKD3D_THREAD_LOCAL struct D3D12_UAV_INFO *d3d12_uav_info;
 
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(ID3D12DeviceExt *iface, D3D12_UAV_INFO *uav_info)
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(d3d12_device_vkd3d_ext_iface *iface, D3D12_UAV_INFO *uav_info)
 {
     if (!uav_info)
        return E_INVALIDARG;
@@ -417,3 +420,138 @@ CONST_VTBL struct ID3D12DXVKInteropDeviceVtbl d3d12_dxvk_interop_device_vtbl =
     d3d12_dxvk_interop_device_LockCommandQueue,
     d3d12_dxvk_interop_device_UnlockCommandQueue,
 };
+
+static inline struct d3d12_device *d3d12_device_from_ID3DLowLatencyDevice(d3d_low_latency_device_iface *iface)
+{
+    return CONTAINING_RECORD(iface, struct d3d12_device, ID3DLowLatencyDevice_iface);
+}
+
+ULONG STDMETHODCALLTYPE d3d12_low_latency_device_AddRef(d3d_low_latency_device_iface *iface)
+{
+    struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+    return d3d12_device_add_ref(device);
+}
+
+static ULONG STDMETHODCALLTYPE d3d12_low_latency_device_Release(d3d_low_latency_device_iface *iface)
+{
+    struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+    return d3d12_device_release(device);
+}
+
+extern HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
+        REFIID riid, void **object);
+
+static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_QueryInterface(d3d_low_latency_device_iface *iface,
+        REFIID iid, void **out)
+{
+    struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+    TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
+    return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out);
+}
+
+static BOOL STDMETHODCALLTYPE d3d12_low_latency_device_SupportsLowLatency(d3d_low_latency_device_iface *iface)
+{
+    struct d3d12_device *device;
+
+    device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+
+    return device->vk_info.NV_low_latency2;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_LatencySleep(d3d_low_latency_device_iface *iface)
+{
+    struct d3d12_device *device;
+
+    device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+
+    if (!device->vk_info.NV_low_latency2)
+        return E_NOTIMPL;
+
+    if (device->swapchain_info.low_latency_swapchain)
+        dxgi_vk_swap_chain_latency_sleep(device->swapchain_info.low_latency_swapchain);
+
+    return S_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_SetLatencySleepMode(d3d_low_latency_device_iface *iface, BOOL low_latency_mode, BOOL low_latency_boost,
+        UINT32 minimum_interval_us)
+{
+    struct d3d12_device *device;
+
+    device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+
+    if (!device->vk_info.NV_low_latency2)
+        return E_NOTIMPL;
+
+    if (device->swapchain_info.low_latency_swapchain)
+        dxgi_vk_swap_chain_set_latency_sleep_mode(device->swapchain_info.low_latency_swapchain, low_latency_mode, low_latency_boost, minimum_interval_us);
+
+    return S_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_SetLatencyMarker(d3d_low_latency_device_iface *iface, UINT64 frameID, UINT32 markerType)
+{
+    struct d3d12_device *device;
+    VkLatencyMarkerNV vk_marker;
+    uint64_t internal_frame_id;
+
+    device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+    vk_marker = (VkLatencyMarkerNV)markerType;
+
+    if (!device->vk_info.NV_low_latency2)
+        return E_NOTIMPL;
+
+    /* Offset the frameID by one to ensure it will always
+     * be a valid presentID */
+    internal_frame_id = frameID + 1;
+
+    switch (vk_marker)
+    {
+        case VK_LATENCY_MARKER_SIMULATION_START_NV:
+            device->frame_markers.simulation = internal_frame_id;
+            break;
+        case VK_LATENCY_MARKER_RENDERSUBMIT_START_NV:
+            device->frame_markers.render = internal_frame_id;
+            break;
+        case VK_LATENCY_MARKER_PRESENT_START_NV:
+            device->frame_markers.present = internal_frame_id;
+            break;
+        default:
+            break;
+    }
+
+    if (device->swapchain_info.low_latency_swapchain)
+        dxgi_vk_swap_chain_set_latency_marker(device->swapchain_info.low_latency_swapchain, internal_frame_id, vk_marker);
+
+    return S_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_GetLatencyInfo(d3d_low_latency_device_iface *iface, D3D12_LATENCY_RESULTS *latency_results)
+{
+    struct d3d12_device *device;
+
+    device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+
+    if (!device->vk_info.NV_low_latency2)
+        return E_NOTIMPL;
+
+    if (device->swapchain_info.low_latency_swapchain)
+        dxgi_vk_swap_chain_get_latency_info(device->swapchain_info.low_latency_swapchain, latency_results);
+
+    return S_OK;
+}
+
+CONST_VTBL struct ID3DLowLatencyDeviceVtbl d3d_low_latency_device_vtbl =
+{
+    /* IUnknown methods */
+    d3d12_low_latency_device_QueryInterface,
+    d3d12_low_latency_device_AddRef,
+    d3d12_low_latency_device_Release,
+
+    /* ID3DLowLatencyDevice methods */
+    d3d12_low_latency_device_SupportsLowLatency,
+    d3d12_low_latency_device_LatencySleep,
+    d3d12_low_latency_device_SetLatencySleepMode,
+    d3d12_low_latency_device_SetLatencyMarker,
+    d3d12_low_latency_device_GetLatencyInfo
+};
diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build
index 04394fd9f9..9c91ffcd27 100644
--- a/libs/vkd3d/meson.build
+++ b/libs/vkd3d/meson.build
@@ -60,6 +60,7 @@ vkd3d_src = [
   'cache.c',
   'command.c',
   'command_list_vkd3d_ext.c',
+  'command_queue_vkd3d_ext.c',
   'device.c',
   'device_vkd3d_ext.c',
   'heap.c',
diff --git a/libs/vkd3d/swapchain.c b/libs/vkd3d/swapchain.c
index d0bc526d4a..8ea35fd821 100644
--- a/libs/vkd3d/swapchain.c
+++ b/libs/vkd3d/swapchain.c
@@ -49,6 +49,13 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_factory_QueryInterface(IDXGI
     return ID3D12CommandQueue_QueryInterface(&chain->queue->ID3D12CommandQueue_iface, riid, object);
 }
 
+struct low_latency_state
+{
+    bool mode;
+    bool boost;
+    uint32_t minimum_interval_us;
+};
+
 struct dxgi_vk_swap_chain_present_request
 {
     uint64_t begin_frame_time_ns;
@@ -58,6 +65,9 @@ struct dxgi_vk_swap_chain_present_request
     DXGI_COLOR_SPACE_TYPE dxgi_color_space_type;
     DXGI_VK_HDR_METADATA dxgi_hdr_metadata;
     uint32_t swap_interval;
+    uint64_t low_latency_frame_id;
+    struct low_latency_state requested_low_latency_state;
+    bool low_latency_update_requested;
     bool modifies_hdr_metadata;
 };
 
@@ -73,6 +83,7 @@ struct dxgi_vk_swap_chain
     struct d3d12_command_queue *queue;
 
     LONG refcount;
+    LONG internal_refcount;
     DXGI_SWAP_CHAIN_DESC1 desc;
 
     vkd3d_native_sync_handle frame_latency_event;
@@ -85,6 +96,9 @@ struct dxgi_vk_swap_chain
     bool frame_latency_internal_is_static;
     VkSurfaceKHR vk_surface;
 
+    struct low_latency_state requested_low_latency_state;
+    bool low_latency_update_requested;
+
     bool debug_latency;
     bool swapchain_maintenance1;
 
@@ -146,6 +160,21 @@ struct dxgi_vk_swap_chain
         VkPresentModeKHR unlocked_present_mode;
         bool compatible_unlocked_present_mode;
         bool present_mode_forces_fifo;
+
+        /* Info about the current low latency state of the swapchain */
+        uint32_t low_latency_present_mode_count;
+        VkPresentModeKHR low_latency_present_modes[16];
+
+        pthread_mutex_t low_latency_swapchain_lock;
+        pthread_mutex_t low_latency_state_update_lock;
+
+        VkSemaphore low_latency_sem;
+        uint64_t low_latency_sem_value;
+
+        uint64_t previous_application_frame_id;
+        bool using_application_frame_id;
+
+        struct low_latency_state low_latency_state;
     } present;
 
     struct dxgi_vk_swap_chain_present_request request, request_ring[DXGI_MAX_SWAP_CHAIN_BUFFERS];
@@ -390,6 +419,13 @@ static void dxgi_vk_swap_chain_cleanup(struct dxgi_vk_swap_chain *chain)
     for (i = 0; i < ARRAY_SIZE(chain->present.vk_swapchain_fences); i++)
         VK_CALL(vkDestroyFence(chain->queue->device->vk_device, chain->present.vk_swapchain_fences[i], NULL));
 
+    if (chain->queue->device->vk_info.NV_low_latency2)
+    {
+        VK_CALL(vkDestroySemaphore(chain->queue->device->vk_device, chain->present.low_latency_sem, NULL));
+        pthread_mutex_destroy(&chain->present.low_latency_swapchain_lock);
+        pthread_mutex_destroy(&chain->present.low_latency_state_update_lock);
+    }
+
     VK_CALL(vkDestroySwapchainKHR(chain->queue->device->vk_device, chain->present.vk_swapchain, NULL));
 
     for (i = 0; i < ARRAY_SIZE(chain->user.backbuffers); i++)
@@ -414,14 +450,18 @@ static ULONG STDMETHODCALLTYPE dxgi_vk_swap_chain_AddRef(IDXGIVkSwapChain *iface
 {
     struct dxgi_vk_swap_chain *chain = impl_from_IDXGIVkSwapChain(iface);
     UINT refcount = InterlockedIncrement(&chain->refcount);
+
     TRACE("iface %p, refcount %u\n", iface, refcount);
+
+    if (refcount == 1)
+        dxgi_vk_swap_chain_incref(chain);
+
     return refcount;
 }
 
 static ULONG STDMETHODCALLTYPE dxgi_vk_swap_chain_Release(IDXGIVkSwapChain *iface)
 {
     struct dxgi_vk_swap_chain *chain = impl_from_IDXGIVkSwapChain(iface);
-    struct d3d12_command_queue *queue = chain->queue;
     UINT refcount;
 
     refcount = InterlockedDecrement(&chain->refcount);
@@ -429,11 +469,16 @@ static ULONG STDMETHODCALLTYPE dxgi_vk_swap_chain_Release(IDXGIVkSwapChain *ifac
 
     if (!refcount)
     {
+        /* Calling this from the submission thread will result in a deadlock, so
+         * drain the swapchain queue now. */
         dxgi_vk_swap_chain_drain_queue(chain);
-        dxgi_vk_swap_chain_cleanup(chain);
-        vkd3d_free(chain);
-        ID3D12CommandQueue_Release(&queue->ID3D12CommandQueue_iface);
+
+        if (chain->queue->device->vk_info.NV_low_latency2)
+            d3d12_device_remove_swapchain(chain->queue->device, chain);
+
+        dxgi_vk_swap_chain_decref(chain);
     }
+
     return refcount;
 }
 
@@ -873,8 +918,23 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *if
     request->dxgi_hdr_metadata = chain->user.dxgi_hdr_metadata;
     request->modifies_hdr_metadata = chain->user.modifies_hdr_metadata;
     request->begin_frame_time_ns = chain->user.begin_frame_time_ns;
+    request->low_latency_frame_id = chain->queue->device->frame_markers.present;
     chain->user.modifies_hdr_metadata = false;
 
+    if (chain->queue->device->vk_info.NV_low_latency2)
+    {
+        pthread_mutex_lock(&chain->present.low_latency_state_update_lock);
+        request->requested_low_latency_state = chain->requested_low_latency_state;
+        request->low_latency_update_requested = chain->low_latency_update_requested;
+        chain->low_latency_update_requested = false;
+        pthread_mutex_unlock(&chain->present.low_latency_state_update_lock);
+    }
+    else
+    {
+        memset(&request->requested_low_latency_state, 0, sizeof(request->requested_low_latency_state));
+        request->low_latency_update_requested = false;
+    }
+
     /* Need to process this task in queue thread to deal with wait-before-signal.
      * All interesting works happens in the callback. */
     chain->user.blit_count += 1;
@@ -1236,6 +1296,12 @@ static void dxgi_vk_swap_chain_destroy_swapchain_in_present_task(struct dxgi_vk_
     if (!chain->present.vk_swapchain)
         return;
 
+    /* If we are going to destroy the swapchain and the device supports VK_NV_low_latency2
+     * take the low latency lock. This ensures none of the other NV low latency functions
+     * will attempt to use the stale swapchain handle. */
+    if (chain->queue->device->vk_info.NV_low_latency2)
+        pthread_mutex_lock(&chain->present.low_latency_swapchain_lock);
+
     if (chain->swapchain_maintenance1)
     {
         dxgi_vk_swap_chain_drain_swapchain_fences(chain);
@@ -1268,6 +1334,9 @@ static void dxgi_vk_swap_chain_destroy_swapchain_in_present_task(struct dxgi_vk_
     chain->present.present_id_valid = false;
     chain->present.present_id = 0;
     chain->present.current_backbuffer_index = UINT32_MAX;
+
+    if (chain->queue->device->vk_info.NV_low_latency2)
+        pthread_mutex_unlock(&chain->present.low_latency_swapchain_lock);
 }
 
 static VkColorSpaceKHR convert_color_space(DXGI_COLOR_SPACE_TYPE dxgi_color_space)
@@ -1448,10 +1517,29 @@ static bool dxgi_vk_swap_chain_find_compatible_unlocked_present_mode(
     return true;
 }
 
+static void dxgi_vk_swap_chain_set_low_latency_state(struct dxgi_vk_swap_chain *chain, struct low_latency_state *low_latency_state)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
+    VkLatencySleepModeInfoNV swapchain_latency_sleep_mode_info;
+
+    memset(&swapchain_latency_sleep_mode_info, 0, sizeof(swapchain_latency_sleep_mode_info));
+    swapchain_latency_sleep_mode_info.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV;
+    swapchain_latency_sleep_mode_info.pNext = NULL;
+
+    swapchain_latency_sleep_mode_info.lowLatencyMode = low_latency_state->mode;
+    swapchain_latency_sleep_mode_info.lowLatencyBoost = low_latency_state->boost;
+    swapchain_latency_sleep_mode_info.minimumIntervalUs = low_latency_state->minimum_interval_us;
+
+    VK_CALL(vkSetLatencySleepModeNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &swapchain_latency_sleep_mode_info));
+
+    chain->present.low_latency_state = *low_latency_state;
+}
+
 static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk_swap_chain *chain)
 {
     const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
     VkPhysicalDevice vk_physical_device = chain->queue->device->vk_physical_device;
+    VkSwapchainLatencyCreateInfoNV swapchain_latency_create_info;
     VkSwapchainPresentModesCreateInfoEXT present_modes_info;
     VkDevice vk_device = chain->queue->device->vk_device;
     VkCommandPoolCreateInfo command_pool_create_info;
@@ -1573,6 +1661,15 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk
     swapchain_create_info.imageExtent.height = max(swapchain_create_info.imageExtent.height, surface_caps.minImageExtent.height);
     swapchain_create_info.imageExtent.height = min(swapchain_create_info.imageExtent.height, surface_caps.maxImageExtent.height);
 
+    if (chain->queue->device->vk_info.NV_low_latency2)
+    {
+        memset(&swapchain_latency_create_info, 0, sizeof(swapchain_latency_create_info));
+        swapchain_latency_create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV;
+        swapchain_latency_create_info.pNext = NULL;
+        swapchain_latency_create_info.latencyModeEnable = true;
+        swapchain_create_info.pNext = &swapchain_latency_create_info;
+    }
+
     vr = VK_CALL(vkCreateSwapchainKHR(vk_device, &swapchain_create_info, NULL, &chain->present.vk_swapchain));
     if (vr < 0)
     {
@@ -1586,6 +1683,19 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk
 
     INFO("Got %u swapchain images.\n", chain->present.backbuffer_count);
 
+    /* If low latency is supported restore the current low latency state now */
+    if (chain->queue->device->vk_info.NV_low_latency2)
+    {
+        struct low_latency_state* low_latency_state = chain->request.low_latency_update_requested ?
+            &chain->request.requested_low_latency_state : &chain->present.low_latency_state;
+
+        dxgi_vk_swap_chain_set_low_latency_state(chain, low_latency_state);
+
+        /* If low latency is enabled assume the application will start driving the frame id again. */
+        chain->present.using_application_frame_id = low_latency_state->mode;
+        chain->present.previous_application_frame_id = 0;
+    }
+
     memset(&view_info, 0, sizeof(view_info));
     view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
     view_info.format = swapchain_create_info.imageFormat;
@@ -1626,6 +1736,9 @@ static bool dxgi_vk_swap_chain_request_needs_swapchain_recreation(
     return request->dxgi_color_space_type != last_request->dxgi_color_space_type ||
             request->dxgi_format != last_request->dxgi_format ||
             request->target_min_image_count != last_request->target_min_image_count ||
+            (chain->present.low_latency_state.mode &&
+                    (request->low_latency_frame_id != chain->present.previous_application_frame_id) &&
+                    !chain->present.using_application_frame_id) ||
             ((!!request->swap_interval) != (!!last_request->swap_interval) &&
                     !chain->present.compatible_unlocked_present_mode);
 }
@@ -2019,6 +2132,7 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai
     const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
     VkSwapchainPresentFenceInfoEXT present_fence_info;
     VkSwapchainPresentModeInfoEXT present_mode_info;
+    uint32_t max_present_iterations;
     VkPresentModeKHR present_mode;
     VkPresentInfoKHR present_info;
     VkPresentIdKHR present_id;
@@ -2035,6 +2149,12 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai
     if (!chain->present.vk_swapchain)
         return;
 
+    /* If low latency is enabled we should only try to present once to avoid having to
+     * increment the present id for each failed present. Make sure to do this after checking
+     * if the swapchain needs to be recreated so the low latency state is up to date. */
+    max_present_iterations = chain->present.low_latency_state.mode ?
+        0 : 3;
+
     vr = dxgi_vk_swap_chain_try_acquire_next_image(chain);
     VKD3D_DEVICE_REPORT_FAULT_AND_BREADCRUMB_IF(chain->queue->device, vr == VK_ERROR_DEVICE_LOST);
 
@@ -2046,7 +2166,7 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai
 
     if (vr == VK_ERROR_OUT_OF_DATE_KHR)
     {
-        if (retry_counter < 3)
+        if (retry_counter < max_present_iterations)
             dxgi_vk_swap_chain_present_iteration(chain, retry_counter + 1);
     }
     else if (vr == VK_ERROR_SURFACE_LOST_KHR)
@@ -2081,11 +2201,28 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai
      * Non-FIFO swapchains will pump their frame latency handles through the fallback path of blit command being done.
      * Especially on Xwayland, the present ID is updated when images actually hit on-screen due to MAILBOX behavior.
      * This would unnecessarily stall our progress. */
-    if (chain->wait_thread.active && !chain->present.present_id_valid && swapchain_is_fifo)
+    if (chain->wait_thread.active && !chain->present.present_id_valid &&
+        (swapchain_is_fifo || chain->present.low_latency_state.mode))
     {
-        /* If we recreate swapchain, we still want to maintain a monotonically increasing counter here for
-         * profiling purposes. */
-        chain->present.present_id = chain->present.complete_count + 1;
+        if (chain->present.low_latency_state.mode &&
+            chain->request.low_latency_frame_id > chain->present.previous_application_frame_id)
+        {
+            chain->present.present_id = chain->request.low_latency_frame_id;
+            chain->present.previous_application_frame_id = chain->request.low_latency_frame_id;
+        }
+        else
+        {
+            /* If we recreate swapchain, we still want to maintain a monotonically increasing counter here for
+             * profiling purposes. */
+            chain->present.present_id = chain->present.low_latency_state.mode ?
+                chain->present.present_id + 1 : chain->present.complete_count + 1;
+
+            /* It is possible for an application to stop providing low latency frame ids. If that happens we are
+             * now responsible for ensuring the present id is always incrementing. If the application starts to
+             * provide them again, we will have to recreate the swapchain. */
+            chain->present.using_application_frame_id = false;
+        }
+
         present_id.sType = VK_STRUCTURE_TYPE_PRESENT_ID_KHR;
         present_id.pNext = NULL;
         present_id.swapchainCount = 1;
@@ -2150,7 +2287,7 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai
 
     if (vr == VK_ERROR_OUT_OF_DATE_KHR)
     {
-        if (retry_counter < 3)
+        if (retry_counter < max_present_iterations)
             dxgi_vk_swap_chain_present_iteration(chain, retry_counter + 1);
     }
     else if (vr == VK_ERROR_SURFACE_LOST_KHR)
@@ -2201,6 +2338,7 @@ static void dxgi_vk_swap_chain_present_callback(void *chain_)
 {
     const struct dxgi_vk_swap_chain_present_request *next_request;
     struct dxgi_vk_swap_chain *chain = chain_;
+    bool require_low_latency_state_update;
     uint32_t next_present_count;
     uint32_t present_count;
     uint32_t i;
@@ -2214,6 +2352,23 @@ static void dxgi_vk_swap_chain_present_callback(void *chain_)
     if (chain->request.modifies_hdr_metadata)
         dxgi_vk_swap_chain_set_hdr_metadata(chain);
 
+    require_low_latency_state_update = chain->request.low_latency_update_requested &&
+        memcmp(&chain->present.low_latency_state, &chain->request.requested_low_latency_state,
+            sizeof(chain->present.low_latency_state));
+
+    /* If the low latency state is already set to what the application is requesting, it is safe to skip
+     * this request. */
+    if (require_low_latency_state_update)
+    {
+        /* When the low latency mode isn't changing the low latency state can be updated immediately. Otherwise
+         * the swapchain will have to be recreated to reset the present id. For this case the low latency state
+         * update will happen after the swapchain has been recreated. */
+        if (chain->present.low_latency_state.mode == chain->requested_low_latency_state.mode)
+            dxgi_vk_swap_chain_set_low_latency_state(chain, &chain->request.requested_low_latency_state);
+        else
+            chain->present.force_swapchain_recreation = true;
+    }
+
     /* If no QueuePresentKHRs successfully commits a present ID, we'll fallback to a normal queue signal. */
     chain->present.present_id_valid = false;
 
@@ -2222,8 +2377,10 @@ static void dxgi_vk_swap_chain_present_callback(void *chain_)
      * TODO: Propose VK_EXT_present_interval. */
     present_count = max(1u, chain->request.swap_interval);
 
-    /* If we hit the legacy way of synchronizing with swapchain, blitting multiple times would be horrible. */
-    if (!chain->wait_thread.active)
+    /* If we hit the legacy way of synchronizing with swapchain, blitting multiple times would be horrible.
+     * Also if low latency mode is enabled only do a single present iteration to avoid falling off the application
+     * provided frame id path. */
+    if (!chain->wait_thread.active || chain->present.low_latency_state.mode)
         present_count = 1;
 
     for (i = 0; i < present_count; i++)
@@ -2374,6 +2531,80 @@ static HRESULT dxgi_vk_swap_chain_init_waiter_thread(struct dxgi_vk_swap_chain *
     return S_OK;
 }
 
+static HRESULT dxgi_vk_swap_chain_init_low_latency(struct dxgi_vk_swap_chain* chain)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
+    VkPhysicalDevice vk_physical_device = chain->queue->device->vk_physical_device;
+
+    VkLatencySurfaceCapabilitiesNV latency_surface_caps;
+    VkSemaphoreTypeCreateInfoKHR semaphore_type_info;
+    VkPhysicalDeviceSurfaceInfo2KHR surface_info;
+    VkSurfaceCapabilities2KHR surface_caps;
+    VkSemaphoreCreateInfo semaphore_info;
+    VkResult vr;
+
+    chain->present.low_latency_present_mode_count = 0;
+
+    chain->present.low_latency_sem = VK_NULL_HANDLE;
+    chain->present.low_latency_sem_value = 0;
+
+    chain->present.previous_application_frame_id = 0;
+    chain->present.using_application_frame_id = false;
+
+    chain->present.low_latency_state.mode = false;
+    chain->present.low_latency_state.boost = false;
+    chain->present.low_latency_state.minimum_interval_us = 0;
+
+    if (chain->queue->device->vk_info.NV_low_latency2)
+    {
+        memset(&surface_info, 0, sizeof(surface_info));
+        surface_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SURFACE_INFO_2_KHR;
+        surface_info.pNext = NULL;
+        surface_info.surface = chain->vk_surface;
+
+        memset(&latency_surface_caps, 0, sizeof(latency_surface_caps));
+        latency_surface_caps.sType = VK_STRUCTURE_TYPE_LATENCY_SURFACE_CAPABILITIES_NV;
+        latency_surface_caps.presentModeCount = ARRAY_SIZE(chain->present.low_latency_present_modes);
+        latency_surface_caps.pPresentModes = chain->present.low_latency_present_modes;
+
+        memset(&surface_caps, 0, sizeof(surface_caps));
+        surface_caps.sType = VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_KHR;
+        surface_caps.pNext = &latency_surface_caps;
+
+        if ((vr = VK_CALL(vkGetPhysicalDeviceSurfaceCapabilities2KHR(vk_physical_device, &surface_info,
+                &surface_caps))) < 0)
+        {
+            ERR("Failed to query latency surface capabilities count, vr %d.\n", vr);
+            return hresult_from_vk_result(vr);
+        }
+
+        chain->present.low_latency_present_mode_count = latency_surface_caps.presentModeCount;
+
+        memset(&semaphore_type_info, 0, sizeof(semaphore_type_info));
+        semaphore_type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR;
+        semaphore_type_info.pNext = NULL;
+        semaphore_type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR;
+        semaphore_type_info.initialValue = 0;
+
+        memset(&semaphore_info, 0, sizeof(semaphore_info));
+        semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+        semaphore_info.pNext = &semaphore_type_info;
+        semaphore_info.flags = 0;
+
+        if ((vr = VK_CALL(vkCreateSemaphore(chain->queue->device->vk_device, &semaphore_info,
+                NULL, &chain->present.low_latency_sem))) < 0)
+        {
+            ERR("Failed to create semaphore, vr %d.\n", vr);
+            return hresult_from_vk_result(vr);
+        }
+
+        pthread_mutex_init(&chain->present.low_latency_swapchain_lock, NULL);
+        pthread_mutex_init(&chain->present.low_latency_state_update_lock, NULL);
+    }
+
+    return S_OK;
+}
+
 static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVkSurfaceFactory *pFactory,
         const DXGI_SWAP_CHAIN_DESC1 *pDesc, struct d3d12_command_queue *queue)
 {
@@ -2381,6 +2612,7 @@ static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVk
 
     chain->IDXGIVkSwapChain_iface.lpVtbl = &dxgi_vk_swap_chain_vtbl;
     chain->refcount = 1;
+    chain->internal_refcount = 1;
     chain->queue = queue;
     chain->desc = *pDesc;
 
@@ -2402,6 +2634,9 @@ static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVk
     if (FAILED(hr = dxgi_vk_swap_chain_init_waiter_thread(chain)))
         goto err;
 
+    if (FAILED(hr = dxgi_vk_swap_chain_init_low_latency(chain)))
+        goto err;
+
     ID3D12CommandQueue_AddRef(&queue->ID3D12CommandQueue_iface);
     return S_OK;
 
@@ -2429,6 +2664,9 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_factory_CreateSwapChain(IDXG
         return hr;
     }
 
+    if (chain->queue->device->vk_info.NV_low_latency2)
+        d3d12_device_register_swapchain(chain->queue->device, chain);
+
     *ppSwapchain = &chain->IDXGIVkSwapChain_iface;
     return S_OK;
 }
@@ -2444,6 +2682,181 @@ static CONST_VTBL struct IDXGIVkSwapChainFactoryVtbl dxgi_vk_swap_chain_factory_
     dxgi_vk_swap_chain_factory_CreateSwapChain,
 };
 
+bool dxgi_vk_swap_chain_low_latency_enabled(struct dxgi_vk_swap_chain* chain)
+{
+    return chain->present.low_latency_state.mode;
+}
+
+void dxgi_vk_swap_chain_latency_sleep(struct dxgi_vk_swap_chain* chain)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
+    VkLatencySleepInfoNV latency_sleep_info;
+    VkSemaphoreWaitInfo sem_wait_info;
+    bool should_sleep = false;
+
+    /* Increment the low latency sem value before the wait */
+    chain->present.low_latency_sem_value++;
+
+    memset(&latency_sleep_info, 0, sizeof(latency_sleep_info));
+    latency_sleep_info.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_INFO_NV;
+    latency_sleep_info.pNext = NULL;
+    latency_sleep_info.signalSemaphore = chain->present.low_latency_sem;
+    latency_sleep_info.value = chain->present.low_latency_sem_value;
+
+    pthread_mutex_lock(&chain->present.low_latency_swapchain_lock);
+
+    if (chain->present.vk_swapchain)
+    {
+        should_sleep = true;
+        VK_CALL(vkLatencySleepNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &latency_sleep_info));
+    }
+
+    pthread_mutex_unlock(&chain->present.low_latency_swapchain_lock);
+
+    if (should_sleep)
+    {
+        memset(&sem_wait_info, 0, sizeof(sem_wait_info));
+        sem_wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO;
+        sem_wait_info.pNext = NULL;
+        sem_wait_info.flags = 0;
+        sem_wait_info.semaphoreCount = 1;
+        sem_wait_info.pSemaphores = &chain->present.low_latency_sem;
+        sem_wait_info.pValues = &chain->present.low_latency_sem_value;
+
+        VK_CALL(vkWaitSemaphores(chain->queue->device->vk_device, &sem_wait_info, UINT64_MAX));
+    }
+}
+
+void dxgi_vk_swap_chain_set_latency_sleep_mode(struct dxgi_vk_swap_chain* chain, bool low_latency_mode,
+	bool low_latency_boost, uint32_t minimum_interval_us)
+{
+    pthread_mutex_lock(&chain->present.low_latency_state_update_lock);
+
+    chain->requested_low_latency_state.mode = low_latency_mode;
+    chain->requested_low_latency_state.boost = low_latency_boost;
+    chain->requested_low_latency_state.minimum_interval_us = minimum_interval_us;
+
+    /* The actual call to vkSetLatencySleepModeNV will happen
+     * when the application calls Present and the requested low
+     * latency state is passed to the present task. */
+    chain->low_latency_update_requested = true;
+
+    pthread_mutex_unlock(&chain->present.low_latency_state_update_lock);
+}
+
+void dxgi_vk_swap_chain_set_latency_marker(struct dxgi_vk_swap_chain* chain, uint64_t frameID, VkLatencyMarkerNV marker)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
+    VkSetLatencyMarkerInfoNV latency_marker_info;
+
+    memset(&latency_marker_info, 0, sizeof(latency_marker_info));
+    latency_marker_info.sType = VK_STRUCTURE_TYPE_SET_LATENCY_MARKER_INFO_NV;
+    latency_marker_info.pNext = NULL;
+    latency_marker_info.presentID = frameID;
+    latency_marker_info.marker = marker;
+
+    pthread_mutex_lock(&chain->present.low_latency_swapchain_lock);
+
+    if (chain->present.vk_swapchain)
+        VK_CALL(vkSetLatencyMarkerNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &latency_marker_info));
+
+    pthread_mutex_unlock(&chain->present.low_latency_swapchain_lock);
+}
+
+void dxgi_vk_swap_chain_get_latency_info(struct dxgi_vk_swap_chain* chain, D3D12_LATENCY_RESULTS *latency_results)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
+    VkLatencyTimingsFrameReportNV* frame_reports;
+    VkGetLatencyMarkerInfoNV marker_info;
+    uint32_t i;
+
+    pthread_mutex_lock(&chain->present.low_latency_swapchain_lock);
+
+    if (chain->present.vk_swapchain)
+    {
+        memset(&marker_info, 0, sizeof(marker_info));
+        marker_info.sType = VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV;
+
+        VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &marker_info));
+
+        if (marker_info.timingCount >= 64)
+        {
+            marker_info.timingCount = 64;
+            frame_reports = vkd3d_calloc(marker_info.timingCount, sizeof(VkLatencyTimingsFrameReportNV));
+            for (i = 0; i < marker_info.timingCount; i++)
+                frame_reports[i].sType = VK_STRUCTURE_TYPE_LATENCY_TIMINGS_FRAME_REPORT_NV;
+
+            marker_info.pTimings = frame_reports;
+
+            VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &marker_info));
+
+            for (i = 0; i < marker_info.timingCount; i++)
+            {
+                latency_results->frame_reports[i].frameID = frame_reports[i].presentID - 1;
+                latency_results->frame_reports[i].inputSampleTime = frame_reports[i].inputSampleTimeUs;
+                latency_results->frame_reports[i].simStartTime = frame_reports[i].simStartTimeUs;
+                latency_results->frame_reports[i].simEndTime = frame_reports[i].simEndTimeUs;
+                latency_results->frame_reports[i].renderSubmitStartTime = frame_reports[i].renderSubmitStartTimeUs;
+                latency_results->frame_reports[i].renderSubmitEndTime = frame_reports[i].renderSubmitEndTimeUs;
+                latency_results->frame_reports[i].presentStartTime = frame_reports[i].presentStartTimeUs;
+                latency_results->frame_reports[i].presentEndTime = frame_reports[i].presentEndTimeUs;
+                latency_results->frame_reports[i].driverStartTime = frame_reports[i].driverStartTimeUs;
+                latency_results->frame_reports[i].driverEndTime = frame_reports[i].driverEndTimeUs;
+                latency_results->frame_reports[i].osRenderQueueStartTime = frame_reports[i].osRenderQueueStartTimeUs;
+                latency_results->frame_reports[i].osRenderQueueEndTime = frame_reports[i].osRenderQueueEndTimeUs;
+                latency_results->frame_reports[i].gpuRenderStartTime = frame_reports[i].gpuRenderStartTimeUs;
+                latency_results->frame_reports[i].gpuRenderEndTime = frame_reports[i].gpuRenderEndTimeUs;
+                latency_results->frame_reports[i].gpuActiveRenderTimeUs =
+                    frame_reports[i].gpuRenderEndTimeUs - frame_reports[i].gpuRenderStartTimeUs;
+                latency_results->frame_reports[i].gpuFrameTimeUs = 0;
+
+                if (i)
+                {
+                    latency_results->frame_reports[i].gpuFrameTimeUs =
+                        frame_reports[i].gpuRenderEndTimeUs - frame_reports[i - 1].gpuRenderEndTimeUs;
+                }
+            }
+
+            vkd3d_free(frame_reports);
+        }
+        else
+        {
+            /* If there are less than 64 frame reports, zero out the frame report
+             * buffer returned to the app. */
+            memset(latency_results->frame_reports, 0, sizeof(latency_results->frame_reports));
+        }
+    }
+
+    pthread_mutex_unlock(&chain->present.low_latency_swapchain_lock);
+}
+
+ULONG dxgi_vk_swap_chain_incref(struct dxgi_vk_swap_chain *chain)
+{
+    ULONG refcount = InterlockedIncrement(&chain->internal_refcount);
+
+    TRACE("%p increasing refcount to %u.\n", chain, refcount);
+
+    return refcount;
+}
+
+ULONG dxgi_vk_swap_chain_decref(struct dxgi_vk_swap_chain *chain)
+{
+    ULONG refcount = InterlockedDecrement(&chain->internal_refcount);
+
+    TRACE("%p decreasing refcount to %u.\n", chain, refcount);
+
+    if (!refcount)
+    {
+        struct d3d12_command_queue *queue = chain->queue;
+
+        dxgi_vk_swap_chain_cleanup(chain);
+        vkd3d_free(chain);
+        ID3D12CommandQueue_Release(&queue->ID3D12CommandQueue_iface);
+    }
+
+    return refcount;
+}
+
 HRESULT dxgi_vk_swap_chain_factory_init(struct d3d12_command_queue *queue, struct dxgi_vk_swap_chain_factory *chain)
 {
     chain->IDXGIVkSwapChainFactory_iface.lpVtbl = &dxgi_vk_swap_chain_factory_vtbl;
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
index 935f3dc8dc..dcba390549 100644
--- a/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/vkd3d_private.h
@@ -37,6 +37,7 @@
 #include "vkd3d_platform.h"
 #include "vkd3d_swapchain_factory.h"
 #include "vkd3d_command_list_vkd3d_ext.h"
+#include "vkd3d_command_queue_vkd3d_ext.h"
 #include "vkd3d_device_vkd3d_ext.h"
 #include "vkd3d_string.h"
 #include "vkd3d_file_utils.h"
@@ -172,6 +173,7 @@ struct vkd3d_vulkan_info
     bool NV_shader_subgroup_partitioned;
     bool NV_memory_decompression;
     bool NV_device_generated_commands_compute;
+    bool NV_low_latency2;
     /* VALVE extensions */
     bool VALVE_mutable_descriptor_type;
     bool VALVE_descriptor_set_host_mapping;
@@ -3029,6 +3031,7 @@ struct vkd3d_queue
 VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue);
 HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, uint32_t queue_index,
         const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue);
+void vkd3d_set_queue_out_of_band(struct d3d12_device *device, struct vkd3d_queue *queue);
 void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device);
 void vkd3d_queue_release(struct vkd3d_queue *queue);
 void vkd3d_queue_add_wait(struct vkd3d_queue *queue, d3d12_fence_iface *waiter,
@@ -3085,6 +3088,7 @@ struct d3d12_command_queue_submission_execute
     struct d3d12_command_allocator **command_allocators;
     UINT cmd_count;
     UINT num_command_allocators;
+    uint64_t low_latency_frame_id;
 
     struct vkd3d_initial_transition *transitions;
     size_t transition_count;
@@ -3142,12 +3146,33 @@ struct dxgi_vk_swap_chain_factory
     struct d3d12_command_queue *queue;
 };
 
+struct dxgi_vk_swap_chain;
+
+bool dxgi_vk_swap_chain_low_latency_enabled(struct dxgi_vk_swap_chain *chain);
+void dxgi_vk_swap_chain_latency_sleep(struct dxgi_vk_swap_chain *chain);
+void dxgi_vk_swap_chain_set_latency_sleep_mode(struct dxgi_vk_swap_chain *chain,
+	bool low_latency_mode, bool low_latency_boost, uint32_t minimum_interval_us);
+void dxgi_vk_swap_chain_set_latency_marker(struct dxgi_vk_swap_chain *chain,
+	uint64_t frameID, VkLatencyMarkerNV marker);
+void dxgi_vk_swap_chain_get_latency_info(struct dxgi_vk_swap_chain *chain,
+	D3D12_LATENCY_RESULTS *latency_results);
+
+ULONG dxgi_vk_swap_chain_incref(struct dxgi_vk_swap_chain *chain);
+ULONG dxgi_vk_swap_chain_decref(struct dxgi_vk_swap_chain *chain);
+
 HRESULT dxgi_vk_swap_chain_factory_init(struct d3d12_command_queue *queue, struct dxgi_vk_swap_chain_factory *chain);
 
+/* ID3D12CommandQueueExt */
+typedef ID3D12CommandQueueExt d3d12_command_queue_vkd3d_ext_iface;
+
 /* ID3D12CommandQueue */
+typedef ID3D12CommandQueue d3d12_command_queue_iface;
+
 struct d3d12_command_queue
 {
-    ID3D12CommandQueue ID3D12CommandQueue_iface;
+    d3d12_command_queue_iface ID3D12CommandQueue_iface;
+    d3d12_command_queue_vkd3d_ext_iface ID3D12CommandQueueExt_iface;
+
     LONG refcount;
 
     D3D12_COMMAND_QUEUE_DESC desc;
@@ -4442,6 +4467,7 @@ enum vkd3d_queue_family
 
 struct vkd3d_queue_family_info
 {
+    struct vkd3d_queue *out_of_band_queue;
     struct vkd3d_queue **queues;
     uint32_t queue_count;
     uint32_t vk_family_index;
@@ -4456,6 +4482,19 @@ struct vkd3d_cached_command_allocator
     uint32_t vk_family_index;
 };
 
+struct vkd3d_device_swapchain_info
+{
+    struct dxgi_vk_swap_chain *low_latency_swapchain;
+    uint32_t swapchain_count;
+};
+
+struct vkd3d_device_frame_markers
+{
+    uint64_t simulation;
+    uint64_t render;
+    uint64_t present;
+};
+
 /* ID3D12Device */
 typedef ID3D12Device12 d3d12_device_iface;
 
@@ -4468,6 +4507,9 @@ typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface;
 /* ID3D12DXVKInteropDevice */
 typedef ID3D12DXVKInteropDevice d3d12_dxvk_interop_device_iface;
 
+/* ID3DLowLatencyDevice */
+typedef ID3DLowLatencyDevice d3d_low_latency_device_iface;
+
 struct d3d12_device_scratch_pool
 {
     struct vkd3d_scratch_buffer scratch_buffers[VKD3D_MAX_SCRATCH_BUFFER_COUNT];
@@ -4591,6 +4633,7 @@ struct d3d12_device
     d3d12_device_iface ID3D12Device_iface;
     d3d12_device_vkd3d_ext_iface ID3D12DeviceExt_iface;
     d3d12_dxvk_interop_device_iface ID3D12DXVKInteropDevice_iface;
+    d3d_low_latency_device_iface ID3DLowLatencyDevice_iface;
     LONG refcount;
 
     VkDevice vk_device;
@@ -4600,6 +4643,7 @@ struct d3d12_device
 
     pthread_mutex_t mutex;
     pthread_mutex_t global_submission_mutex;
+    spinlock_t low_latency_swapchain_spinlock;
 
     VkPhysicalDeviceMemoryProperties memory_properties;
 
@@ -4662,6 +4706,9 @@ struct d3d12_device
 #endif
     uint64_t shader_interface_key;
     uint32_t device_has_dgc_templates;
+
+    struct vkd3d_device_swapchain_info swapchain_info;
+    struct vkd3d_device_frame_markers frame_markers;
 };
 
 HRESULT d3d12_device_create(struct vkd3d_instance *instance,
@@ -4885,6 +4932,32 @@ UINT d3d12_determine_shading_rate_image_tile_size(struct d3d12_device *device);
 bool d3d12_device_supports_required_subgroup_size_for_stage(
         struct d3d12_device *device, VkShaderStageFlagBits stage);
 
+static inline void d3d12_device_register_swapchain(struct d3d12_device* device, struct dxgi_vk_swap_chain* swapchain)
+{
+    spinlock_acquire(&device->low_latency_swapchain_spinlock);
+
+    if (!device->swapchain_info.low_latency_swapchain && device->swapchain_info.swapchain_count == 0)
+        device->swapchain_info.low_latency_swapchain = swapchain;
+    else
+        device->swapchain_info.low_latency_swapchain = NULL;
+
+    device->swapchain_info.swapchain_count++;
+
+    spinlock_release(&device->low_latency_swapchain_spinlock);
+}
+
+static inline void d3d12_device_remove_swapchain(struct d3d12_device* device, struct dxgi_vk_swap_chain* swapchain)
+{
+    spinlock_acquire(&device->low_latency_swapchain_spinlock);
+
+    if (device->swapchain_info.low_latency_swapchain == swapchain)
+        device->swapchain_info.low_latency_swapchain = NULL;
+
+    device->swapchain_info.swapchain_count--;
+
+    spinlock_release(&device->low_latency_swapchain_spinlock);
+}
+
 /* ID3DBlob */
 struct d3d_blob
 {
diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h
index e3b672a0e1..96f673dda7 100644
--- a/libs/vkd3d/vulkan_procs.h
+++ b/libs/vkd3d/vulkan_procs.h
@@ -354,6 +354,13 @@ VK_DEVICE_EXT_PFN(vkCmdDecompressMemoryIndirectCountNV)
 /* VK_EXT_device_fault */
 VK_DEVICE_EXT_PFN(vkGetDeviceFaultInfoEXT)
 
+/* VK_NV_low_latency2 */
+VK_DEVICE_EXT_PFN(vkSetLatencySleepModeNV)
+VK_DEVICE_EXT_PFN(vkLatencySleepNV)
+VK_DEVICE_EXT_PFN(vkSetLatencyMarkerNV)
+VK_DEVICE_EXT_PFN(vkGetLatencyTimingsNV)
+VK_DEVICE_EXT_PFN(vkQueueNotifyOutOfBandNV)
+
 #undef VK_INSTANCE_PFN
 #undef VK_INSTANCE_EXT_PFN
 #undef VK_DEVICE_PFN