diff --git a/include/meson.build b/include/meson.build index d7398aed70..be784e635d 100644 --- a/include/meson.build +++ b/include/meson.build @@ -13,6 +13,7 @@ vkd3d_idl = [ 'vkd3d_dxgitype.idl', 'vkd3d_swapchain_factory.idl', 'vkd3d_command_list_vkd3d_ext.idl', + 'vkd3d_command_queue_vkd3d_ext.idl', 'vkd3d_device_vkd3d_ext.idl', 'vkd3d_core_interface.idl', ] diff --git a/include/vkd3d_command_queue_vkd3d_ext.idl b/include/vkd3d_command_queue_vkd3d_ext.idl new file mode 100644 index 0000000000..3c69f00a64 --- /dev/null +++ b/include/vkd3d_command_queue_vkd3d_ext.idl @@ -0,0 +1,30 @@ +/* + * * Copyright 2023 NVIDIA Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ +import "vkd3d_d3d12.idl"; +import "vkd3d_vk_includes.h"; + +[ + uuid(40ed3f96-e773-e9bc-fc0c-e95560c99ad6), + object, + local, + pointer_default(unique) +] +interface ID3D12CommandQueueExt : IUnknown +{ + HRESULT NotifyOutOfBandCommandQueue(D3D12_OUT_OF_BAND_CQ_TYPE type); +} diff --git a/include/vkd3d_device_vkd3d_ext.idl b/include/vkd3d_device_vkd3d_ext.idl index 3e615d76a1..4a21ba763e 100644 --- a/include/vkd3d_device_vkd3d_ext.idl +++ b/include/vkd3d_device_vkd3d_ext.idl @@ -54,3 +54,18 @@ interface ID3D12DXVKInteropDevice : IUnknown HRESULT LockCommandQueue(ID3D12CommandQueue *queue); HRESULT UnlockCommandQueue(ID3D12CommandQueue *queue); } + +[ + uuid(f3112584-41f9-348d-a59b-00b7e1d285d6), + object, + local, + pointer_default(unique) +] +interface ID3DLowLatencyDevice : IUnknown +{ + BOOL SupportsLowLatency(); + HRESULT LatencySleep(); + HRESULT SetLatencySleepMode(BOOL low_latency_mode, BOOL low_latency_boost, UINT32 minimum_interval_us); + HRESULT SetLatencyMarker(UINT64 frameID, UINT32 markerType); + HRESULT GetLatencyInfo(D3D12_LATENCY_RESULTS *latency_results); +} diff --git a/include/vkd3d_vk_includes.h b/include/vkd3d_vk_includes.h index c43e018935..020596130a 100644 --- a/include/vkd3d_vk_includes.h +++ b/include/vkd3d_vk_includes.h @@ -41,9 +41,16 @@ typedef enum VkImageLayout VkImageLayout; typedef enum D3D12_VK_EXTENSION { D3D12_VK_NVX_BINARY_IMPORT = 0x1, - D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2 + D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2, + D3D12_VK_NV_LOW_LATENCY_2 = 0x3 } D3D12_VK_EXTENSION; +typedef enum D3D12_OUT_OF_BAND_CQ_TYPE +{ + OUT_OF_BAND_RENDER = 0, + OUT_OF_BAND_PRESENT = 1 +} D3D12_OUT_OF_BAND_CQ_TYPE; + typedef struct D3D12_CUBIN_DATA_HANDLE { VkCuFunctionNVX vkCuFunction; @@ -61,5 +68,30 @@ typedef struct D3D12_UAV_INFO UINT64 gpuVASize; } D3D12_UAV_INFO; +typedef struct D3D12_LATENCY_RESULTS +{ + UINT32 version; + struct D3D12_FRAME_REPORT { + UINT64 frameID; + UINT64 inputSampleTime; + UINT64 simStartTime; + UINT64 simEndTime; + UINT64 renderSubmitStartTime; + UINT64 renderSubmitEndTime; + UINT64 presentStartTime; + UINT64 presentEndTime; + UINT64 driverStartTime; + UINT64 driverEndTime; + UINT64 osRenderQueueStartTime; + UINT64 osRenderQueueEndTime; + UINT64 gpuRenderStartTime; + UINT64 gpuRenderEndTime; + UINT32 gpuActiveRenderTimeUs; + UINT32 gpuFrameTimeUs; + UINT8 rsvd[120]; + } frame_reports[64]; + UINT8 rsvd[32]; +} D3D12_LATENCY_RESULTS; + #endif // __VKD3D_VK_INCLUDES_H diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index a3f7a3adaf..c8a807106e 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -219,6 +219,26 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, u return hr; } +void vkd3d_set_queue_out_of_band(struct d3d12_device *device, struct vkd3d_queue *queue) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkOutOfBandQueueTypeInfoNV queue_info; + + if (!device->vk_info.NV_low_latency2) + return; + + memset(&queue_info, 0, sizeof(queue_info)); + queue_info.sType = VK_STRUCTURE_TYPE_OUT_OF_BAND_QUEUE_TYPE_INFO_NV; + queue_info.pNext = NULL; + queue_info.queueType = VK_OUT_OF_BAND_QUEUE_TYPE_RENDER_NV; + + VK_CALL(vkQueueNotifyOutOfBandNV(queue->vk_queue, &queue_info)); + + queue_info.queueType = VK_OUT_OF_BAND_QUEUE_TYPE_PRESENT_NV; + + VK_CALL(vkQueueNotifyOutOfBandNV(queue->vk_queue, &queue_info)); +} + static void vkd3d_queue_flush_waiters(struct vkd3d_queue *vkd3d_queue, struct vkd3d_fence_worker *worker, const struct vkd3d_vk_device_procs *vk_procs); @@ -16591,12 +16611,14 @@ static struct d3d12_command_list *d3d12_command_list_from_iface(ID3D12CommandLis } /* ID3D12CommandQueue */ +extern ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface); + static inline struct d3d12_command_queue *impl_from_ID3D12CommandQueue(ID3D12CommandQueue *iface) { return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueue_iface); } -static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface, +HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface, REFIID riid, void **object) { TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); @@ -16615,6 +16637,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman return S_OK; } + if (IsEqualGUID(riid, &IID_ID3D12CommandQueueExt)) + { + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + d3d12_command_queue_vkd3d_ext_AddRef(&command_queue->ID3D12CommandQueueExt_iface); + *object = &command_queue->ID3D12CommandQueueExt_iface; + return S_OK; + } + if (IsEqualGUID(riid, &IID_IDXGIVkSwapChainFactory)) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); @@ -16629,7 +16659,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman return E_NOINTERFACE; } -static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface) +ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ULONG refcount = InterlockedIncrement(&command_queue->refcount); @@ -16639,7 +16669,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if return refcount; } -static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface) +ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ULONG refcount = InterlockedDecrement(&command_queue->refcount); @@ -17132,6 +17162,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm sub.execute.cmd_count = num_command_buffers; sub.execute.command_allocators = allocators; sub.execute.num_command_allocators = command_list_count; + sub.execute.low_latency_frame_id = command_queue->device->frame_markers.render; #ifdef VKD3D_ENABLE_BREADCRUMBS sub.execute.breadcrumb_indices = breadcrumb_indices; sub.execute.breadcrumb_indices_count = breadcrumb_indices ? command_list_count : 0; @@ -17295,6 +17326,8 @@ static D3D12_COMMAND_QUEUE_DESC * STDMETHODCALLTYPE d3d12_command_queue_GetDesc( return desc; } +extern CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl; + static CONST_VTBL struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = { /* IUnknown methods */ @@ -17807,10 +17840,12 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu const VkSemaphoreSubmitInfo *transition_semaphore, struct d3d12_command_allocator **command_allocators, size_t num_command_allocators, struct vkd3d_queue_timeline_trace_cookie timeline_cookie, - bool debug_capture, bool split_submissions) + uint64_t low_latency_frame_id, bool debug_capture, bool split_submissions) { const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs; struct vkd3d_queue *vkd3d_queue = command_queue->vkd3d_queue; + VkLatencySubmissionPresentIdNV latency_submit_present_info; + struct dxgi_vk_swap_chain *low_latency_swapchain; VkSemaphoreSubmitInfo signal_semaphore_info; VkSemaphoreSubmitInfo binary_semaphore_info; VkSubmitInfo2 submit_desc[4]; @@ -17895,6 +17930,27 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu num_submits += 2; } + if (command_queue->device->vk_info.NV_low_latency2) + { + spinlock_acquire(&command_queue->device->low_latency_swapchain_spinlock); + if ((low_latency_swapchain = command_queue->device->swapchain_info.low_latency_swapchain)) + dxgi_vk_swap_chain_incref(low_latency_swapchain); + spinlock_release(&command_queue->device->low_latency_swapchain_spinlock); + + if (low_latency_swapchain && dxgi_vk_swap_chain_low_latency_enabled(low_latency_swapchain)) + { + latency_submit_present_info.sType = VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV; + latency_submit_present_info.pNext = NULL; + latency_submit_present_info.presentID = low_latency_frame_id; + + for (i = 0; i < num_submits; i++) + submit_desc[i].pNext = &latency_submit_present_info; + } + + if (low_latency_swapchain) + dxgi_vk_swap_chain_decref(low_latency_swapchain); + } + #ifdef VKD3D_ENABLE_RENDERDOC /* For each submission we have marked to be captured, we will first need to filter it * based on VKD3D_AUTO_CAPTURE_COUNTS. @@ -18397,7 +18453,9 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) submission.execute.command_allocators, submission.execute.num_command_allocators, submission.execute.timeline_cookie, - submission.execute.debug_capture, submission.execute.split_submission); + submission.execute.low_latency_frame_id, + submission.execute.debug_capture, + submission.execute.split_submission); /* command_queue_execute takes ownership of the * outstanding_submission_counters and queue_timeline_indices allocations. @@ -18460,6 +18518,7 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, int rc; queue->ID3D12CommandQueue_iface.lpVtbl = &d3d12_command_queue_vtbl; + queue->ID3D12CommandQueueExt_iface.lpVtbl = &d3d12_command_queue_vkd3d_ext_vtbl; queue->refcount = 1; queue->desc = *desc; @@ -18588,6 +18647,7 @@ void vkd3d_enqueue_initial_transition(ID3D12CommandQueue *queue, ID3D12Resource memset(&sub, 0, sizeof(sub)); sub.type = VKD3D_SUBMISSION_EXECUTE; + sub.execute.low_latency_frame_id = d3d12_queue->device->frame_markers.render; sub.execute.transition_count = 1; sub.execute.transitions = vkd3d_malloc(sizeof(*sub.execute.transitions)); sub.execute.transitions[0].type = VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE; diff --git a/libs/vkd3d/command_queue_vkd3d_ext.c b/libs/vkd3d/command_queue_vkd3d_ext.c new file mode 100644 index 0000000000..03e1201149 --- /dev/null +++ b/libs/vkd3d/command_queue_vkd3d_ext.c @@ -0,0 +1,91 @@ +/* + * * Copyright 2023 NVIDIA Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API + +#include "vkd3d_private.h" + +static inline struct d3d12_command_queue *d3d12_command_queue_from_ID3D12CommandQueueExt(d3d12_command_queue_vkd3d_ext_iface *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueueExt_iface); +} + +extern ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(d3d12_command_queue_iface *iface); + +ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface) +{ + struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface); + return d3d12_command_queue_AddRef(&command_queue->ID3D12CommandQueue_iface); +} + +extern ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(d3d12_command_queue_iface *iface); + +static ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_Release(d3d12_command_queue_vkd3d_ext_iface *iface) +{ + struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface); + return d3d12_command_queue_Release(&command_queue->ID3D12CommandQueue_iface); +} + +extern HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(d3d12_command_queue_iface *iface, + REFIID iid, void **object); + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_QueryInterface(d3d12_command_queue_vkd3d_ext_iface *iface, + REFIID iid, void **out) +{ + struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface); + TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out); + return d3d12_command_queue_QueryInterface(&command_queue->ID3D12CommandQueue_iface, iid, out); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue(d3d12_command_queue_vkd3d_ext_iface *iface, D3D12_OUT_OF_BAND_CQ_TYPE type) +{ + struct d3d12_command_queue *command_queue; + int i; + + command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface); + + if (!command_queue->device->vk_info.NV_low_latency2) + return E_NOTIMPL; + + if (type != OUT_OF_BAND_RENDER && type != OUT_OF_BAND_PRESENT) + return E_INVALIDARG; + + for (i = 0; i < VKD3D_QUEUE_FAMILY_COUNT; i++) + { + if (command_queue->device->queue_families[i]->vk_family_index == command_queue->vkd3d_queue->vk_family_index && + command_queue->device->queue_families[i]->out_of_band_queue) + { + command_queue->vkd3d_queue = command_queue->device->queue_families[i]->out_of_band_queue; + break; + } + } + + return S_OK; +} + +CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl = +{ + /* IUnknown methods */ + d3d12_command_queue_vkd3d_ext_QueryInterface, + d3d12_command_queue_vkd3d_ext_AddRef, + d3d12_command_queue_vkd3d_ext_Release, + + /* ID3D12CommandQueueExt methods */ + d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue +}; + diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 5701b34456..f48a563d9a 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -127,6 +127,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(NV_SHADER_SUBGROUP_PARTITIONED, NV_shader_subgroup_partitioned), VK_EXTENSION(NV_MEMORY_DECOMPRESSION, NV_memory_decompression), VK_EXTENSION(NV_DEVICE_GENERATED_COMMANDS_COMPUTE, NV_device_generated_commands_compute), + VK_EXTENSION_VERSION(NV_LOW_LATENCY_2, NV_low_latency2, 2), /* VALVE extensions */ VK_EXTENSION(VALVE_MUTABLE_DESCRIPTOR_TYPE, VALVE_mutable_descriptor_type), VK_EXTENSION(VALVE_DESCRIPTOR_SET_HOST_MAPPING, VALVE_descriptor_set_host_mapping), @@ -2588,6 +2589,12 @@ struct vkd3d_device_queue_info VkDeviceQueueCreateInfo vk_queue_create_info[VKD3D_QUEUE_FAMILY_COUNT]; }; +static bool vkd3d_queue_family_needs_out_of_band_queue(unsigned int vkd3d_queue_family) +{ + return vkd3d_queue_family == VKD3D_QUEUE_FAMILY_GRAPHICS || + vkd3d_queue_family == VKD3D_QUEUE_FAMILY_COMPUTE; +} + static void d3d12_device_destroy_vkd3d_queues(struct d3d12_device *device) { unsigned int i, j; @@ -2612,6 +2619,9 @@ static void d3d12_device_destroy_vkd3d_queues(struct d3d12_device *device) vkd3d_queue_destroy(queue_family->queues[j], device); } + if (queue_family->out_of_band_queue) + vkd3d_queue_destroy(queue_family->out_of_band_queue, device); + vkd3d_free(queue_family->queues); vkd3d_free(queue_family); } @@ -2652,6 +2662,12 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, info->queue_count = queue_info->vk_queue_create_info[k++].queueCount; + /* Unless the queue family only has a single queue to allocate, when NV_low_latency2 + * is enabled one queue is reserved for out of band work */ + if (device->vk_info.NV_low_latency2 && vkd3d_queue_family_needs_out_of_band_queue(i) && + queue_info->vk_properties[i].queueCount > 1) + info->queue_count--; + if (!(info->queues = vkd3d_calloc(info->queue_count, sizeof(*info->queues)))) { hr = E_OUTOFMEMORY; @@ -2665,6 +2681,19 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, goto out_destroy_queues; } + if (device->vk_info.NV_low_latency2 && vkd3d_queue_family_needs_out_of_band_queue(i) && + queue_info->vk_properties[i].queueCount > 1) + { + /* The low latency out of band queue is always the last queue for the family */ + if (FAILED((hr = vkd3d_queue_create(device, queue_info->family_index[i], + info->queue_count, &queue_info->vk_properties[i], &info->out_of_band_queue)))) + goto out_destroy_queues; + + vkd3d_set_queue_out_of_band(device, info->out_of_band_queue); + } + else + WARN("Could not allocate an out of band queue for queue family %u. All out of band work will happen on the in band queue.\n", i); + info->vk_family_index = queue_info->family_index[i]; info->vk_queue_flags = queue_info->vk_properties[i].queueFlags; info->timestamp_bits = queue_info->vk_properties[i].timestampValidBits; @@ -2684,7 +2713,11 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, } #define VKD3D_MAX_QUEUE_COUNT_PER_FAMILY (4u) -static float queue_priorities[] = {1.0f, 1.0f, 1.0f, 1.0f}; + +/* The queue priorities list contains VKD3D_MAX_QUEUE_COUNT_PER_FAMILY + 1 priorities + * because it is possible for low latency to add an additional queue for out of band work + * submission. */ +static float queue_priorities[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; static uint32_t vkd3d_find_queue(unsigned int count, const VkQueueFamilyProperties *properties, VkQueueFlags mask, VkQueueFlags flags) @@ -2700,10 +2733,10 @@ static uint32_t vkd3d_find_queue(unsigned int count, const VkQueueFamilyProperti return VK_QUEUE_FAMILY_IGNORED; } -static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, +static HRESULT vkd3d_select_queues(const struct d3d12_device *device, VkPhysicalDevice physical_device, struct vkd3d_device_queue_info *info) { - const struct vkd3d_vk_instance_procs *vk_procs = &vkd3d_instance->vk_procs; + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; VkQueueFamilyProperties *queue_properties = NULL; VkDeviceQueueCreateInfo *queue_info = NULL; bool duplicate, single_queue; @@ -2766,6 +2799,10 @@ static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, if (single_queue) queue_info->queueCount = 1; + + if (device->vk_info.NV_low_latency2 && vkd3d_queue_family_needs_out_of_band_queue(i) && + queue_info->queueCount < info->vk_properties[i].queueCount) + queue_info->queueCount++; } vkd3d_free(queue_properties); @@ -2808,9 +2845,6 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, VK_CALL(vkGetPhysicalDeviceProperties(device->vk_physical_device, &device_properties)); device->api_version = min(device_properties.apiVersion, VKD3D_MAX_API_VERSION); - if (FAILED(hr = vkd3d_select_queues(device->vkd3d_instance, physical_device, &device_queue_info))) - return hr; - TRACE("Using queue family %u for direct command queues.\n", device_queue_info.family_index[VKD3D_QUEUE_FAMILY_GRAPHICS]); TRACE("Using queue family %u for compute command queues.\n", @@ -2857,6 +2891,13 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, return E_OUTOFMEMORY; } + if (FAILED(hr = vkd3d_select_queues(device, physical_device, &device_queue_info))) + { + vkd3d_free(user_extension_supported); + vkd3d_free(extensions); + return hr; + } + /* Create device */ device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; device_info.pNext = device->device_info.features2.pNext; @@ -3285,8 +3326,9 @@ void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vk } /* ID3D12Device */ -extern ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(ID3D12DeviceExt *iface); +extern ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(d3d12_device_vkd3d_ext_iface *iface); extern ULONG STDMETHODCALLTYPE d3d12_dxvk_interop_device_AddRef(ID3D12DXVKInteropDevice *iface); +extern ULONG STDMETHODCALLTYPE d3d12_low_latency_device_AddRef(ID3DLowLatencyDevice *iface); HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, REFIID riid, void **object) @@ -3333,6 +3375,14 @@ HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, return S_OK; } + if (IsEqualGUID(riid, &IID_ID3DLowLatencyDevice)) + { + struct d3d12_device *device = impl_from_ID3D12Device(iface); + d3d12_low_latency_device_AddRef(&device->ID3DLowLatencyDevice_iface); + *object = &device->ID3DLowLatencyDevice_iface; + return S_OK; + } + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); *object = NULL; @@ -8349,6 +8399,7 @@ static void d3d12_device_replace_vtable(struct d3d12_device *device) extern CONST_VTBL struct ID3D12DeviceExtVtbl d3d12_device_vkd3d_ext_vtbl; extern CONST_VTBL struct ID3D12DXVKInteropDeviceVtbl d3d12_dxvk_interop_device_vtbl; +extern CONST_VTBL struct ID3DLowLatencyDeviceVtbl d3d_low_latency_device_vtbl; static void vkd3d_scratch_pool_init(struct d3d12_device *device) { @@ -8417,8 +8468,11 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, goto out_free_mutex; } + spinlock_init(&device->low_latency_swapchain_spinlock); + device->ID3D12DeviceExt_iface.lpVtbl = &d3d12_device_vkd3d_ext_vtbl; device->ID3D12DXVKInteropDevice_iface.lpVtbl = &d3d12_dxvk_interop_device_vtbl; + device->ID3DLowLatencyDevice_iface.lpVtbl = &d3d_low_latency_device_vtbl; if ((rc = rwlock_init(&device->vertex_input_lock))) { diff --git a/libs/vkd3d/device_vkd3d_ext.c b/libs/vkd3d/device_vkd3d_ext.c index 5bb7eca840..4cd9b5c419 100644 --- a/libs/vkd3d/device_vkd3d_ext.c +++ b/libs/vkd3d/device_vkd3d_ext.c @@ -20,18 +20,18 @@ #include "vkd3d_private.h" -static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceExt(ID3D12DeviceExt *iface) +static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceExt(d3d12_device_vkd3d_ext_iface *iface) { return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12DeviceExt_iface); } -ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(ID3D12DeviceExt *iface) +ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(d3d12_device_vkd3d_ext_iface *iface) { struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); return d3d12_device_add_ref(device); } -static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(ID3D12DeviceExt *iface) +static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(d3d12_device_vkd3d_ext_iface *iface) { struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); return d3d12_device_release(device); @@ -40,7 +40,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(ID3D12DeviceExt *i extern HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, REFIID riid, void **object); -static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(ID3D12DeviceExt *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(d3d12_device_vkd3d_ext_iface *iface, REFIID iid, void **out) { struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); @@ -48,7 +48,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(ID3D12Dev return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out); } -static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(ID3D12DeviceExt *iface, VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device) +static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(d3d12_device_vkd3d_ext_iface *iface, VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device) { struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); TRACE("iface %p, vk_instance %p, vk_physical_device %p, vk_device %p \n", iface, vk_instance, vk_physical_device, vk_device); @@ -61,7 +61,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(ID3D12D return S_OK; } -static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12DeviceExt *iface, D3D12_VK_EXTENSION extension) +static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(d3d12_device_vkd3d_ext_iface *iface, D3D12_VK_EXTENSION extension) { const struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface); bool ret_val = false; @@ -75,6 +75,9 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12D case D3D12_VK_NVX_IMAGE_VIEW_HANDLE: ret_val = device->vk_info.NVX_image_view_handle; break; + case D3D12_VK_NV_LOW_LATENCY_2: + ret_val = device->vk_info.NV_low_latency2; + break; default: WARN("Invalid extension %x\n", extension); } @@ -82,7 +85,7 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12D return ret_val; } -static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(ID3D12DeviceExt *iface, const void *cubin_data, +static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(d3d12_device_vkd3d_ext_iface *iface, const void *cubin_data, UINT32 cubin_size, UINT32 block_x, UINT32 block_y, UINT32 block_z, const char *shader_name, D3D12_CUBIN_DATA_HANDLE **out_handle) { VkCuFunctionCreateInfoNVX functionCreateInfo = { VK_STRUCTURE_TYPE_CU_FUNCTION_CREATE_INFO_NVX }; @@ -129,7 +132,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShader return S_OK; } -static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShader(ID3D12DeviceExt *iface, D3D12_CUBIN_DATA_HANDLE *handle) +static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShader(d3d12_device_vkd3d_ext_iface *iface, D3D12_CUBIN_DATA_HANDLE *handle) { const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_device *device; @@ -149,7 +152,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShade return S_OK; } -static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(ID3D12DeviceExt *iface, D3D12_CPU_DESCRIPTOR_HANDLE srv_handle, +static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_CPU_DESCRIPTOR_HANDLE srv_handle, D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle, UINT32 *cuda_texture_handle) { VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX }; @@ -177,7 +180,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(ID3 return S_OK; } -static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3D12DeviceExt *iface, D3D12_CPU_DESCRIPTOR_HANDLE uav_handle, +static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_CPU_DESCRIPTOR_HANDLE uav_handle, UINT32 *cuda_surface_handle) { VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX }; @@ -202,7 +205,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3 extern VKD3D_THREAD_LOCAL struct D3D12_UAV_INFO *d3d12_uav_info; -static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(ID3D12DeviceExt *iface, D3D12_UAV_INFO *uav_info) +static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(d3d12_device_vkd3d_ext_iface *iface, D3D12_UAV_INFO *uav_info) { if (!uav_info) return E_INVALIDARG; @@ -417,3 +420,138 @@ CONST_VTBL struct ID3D12DXVKInteropDeviceVtbl d3d12_dxvk_interop_device_vtbl = d3d12_dxvk_interop_device_LockCommandQueue, d3d12_dxvk_interop_device_UnlockCommandQueue, }; + +static inline struct d3d12_device *d3d12_device_from_ID3DLowLatencyDevice(d3d_low_latency_device_iface *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_device, ID3DLowLatencyDevice_iface); +} + +ULONG STDMETHODCALLTYPE d3d12_low_latency_device_AddRef(d3d_low_latency_device_iface *iface) +{ + struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface); + return d3d12_device_add_ref(device); +} + +static ULONG STDMETHODCALLTYPE d3d12_low_latency_device_Release(d3d_low_latency_device_iface *iface) +{ + struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface); + return d3d12_device_release(device); +} + +extern HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface, + REFIID riid, void **object); + +static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_QueryInterface(d3d_low_latency_device_iface *iface, + REFIID iid, void **out) +{ + struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface); + TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out); + return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out); +} + +static BOOL STDMETHODCALLTYPE d3d12_low_latency_device_SupportsLowLatency(d3d_low_latency_device_iface *iface) +{ + struct d3d12_device *device; + + device = d3d12_device_from_ID3DLowLatencyDevice(iface); + + return device->vk_info.NV_low_latency2; +} + +static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_LatencySleep(d3d_low_latency_device_iface *iface) +{ + struct d3d12_device *device; + + device = d3d12_device_from_ID3DLowLatencyDevice(iface); + + if (!device->vk_info.NV_low_latency2) + return E_NOTIMPL; + + if (device->swapchain_info.low_latency_swapchain) + dxgi_vk_swap_chain_latency_sleep(device->swapchain_info.low_latency_swapchain); + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_SetLatencySleepMode(d3d_low_latency_device_iface *iface, BOOL low_latency_mode, BOOL low_latency_boost, + UINT32 minimum_interval_us) +{ + struct d3d12_device *device; + + device = d3d12_device_from_ID3DLowLatencyDevice(iface); + + if (!device->vk_info.NV_low_latency2) + return E_NOTIMPL; + + if (device->swapchain_info.low_latency_swapchain) + dxgi_vk_swap_chain_set_latency_sleep_mode(device->swapchain_info.low_latency_swapchain, low_latency_mode, low_latency_boost, minimum_interval_us); + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_SetLatencyMarker(d3d_low_latency_device_iface *iface, UINT64 frameID, UINT32 markerType) +{ + struct d3d12_device *device; + VkLatencyMarkerNV vk_marker; + uint64_t internal_frame_id; + + device = d3d12_device_from_ID3DLowLatencyDevice(iface); + vk_marker = (VkLatencyMarkerNV)markerType; + + if (!device->vk_info.NV_low_latency2) + return E_NOTIMPL; + + /* Offset the frameID by one to ensure it will always + * be a valid presentID */ + internal_frame_id = frameID + 1; + + switch (vk_marker) + { + case VK_LATENCY_MARKER_SIMULATION_START_NV: + device->frame_markers.simulation = internal_frame_id; + break; + case VK_LATENCY_MARKER_RENDERSUBMIT_START_NV: + device->frame_markers.render = internal_frame_id; + break; + case VK_LATENCY_MARKER_PRESENT_START_NV: + device->frame_markers.present = internal_frame_id; + break; + default: + break; + } + + if (device->swapchain_info.low_latency_swapchain) + dxgi_vk_swap_chain_set_latency_marker(device->swapchain_info.low_latency_swapchain, internal_frame_id, vk_marker); + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_GetLatencyInfo(d3d_low_latency_device_iface *iface, D3D12_LATENCY_RESULTS *latency_results) +{ + struct d3d12_device *device; + + device = d3d12_device_from_ID3DLowLatencyDevice(iface); + + if (!device->vk_info.NV_low_latency2) + return E_NOTIMPL; + + if (device->swapchain_info.low_latency_swapchain) + dxgi_vk_swap_chain_get_latency_info(device->swapchain_info.low_latency_swapchain, latency_results); + + return S_OK; +} + +CONST_VTBL struct ID3DLowLatencyDeviceVtbl d3d_low_latency_device_vtbl = +{ + /* IUnknown methods */ + d3d12_low_latency_device_QueryInterface, + d3d12_low_latency_device_AddRef, + d3d12_low_latency_device_Release, + + /* ID3DLowLatencyDevice methods */ + d3d12_low_latency_device_SupportsLowLatency, + d3d12_low_latency_device_LatencySleep, + d3d12_low_latency_device_SetLatencySleepMode, + d3d12_low_latency_device_SetLatencyMarker, + d3d12_low_latency_device_GetLatencyInfo +}; diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build index 04394fd9f9..9c91ffcd27 100644 --- a/libs/vkd3d/meson.build +++ b/libs/vkd3d/meson.build @@ -60,6 +60,7 @@ vkd3d_src = [ 'cache.c', 'command.c', 'command_list_vkd3d_ext.c', + 'command_queue_vkd3d_ext.c', 'device.c', 'device_vkd3d_ext.c', 'heap.c', diff --git a/libs/vkd3d/swapchain.c b/libs/vkd3d/swapchain.c index d0bc526d4a..8ea35fd821 100644 --- a/libs/vkd3d/swapchain.c +++ b/libs/vkd3d/swapchain.c @@ -49,6 +49,13 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_factory_QueryInterface(IDXGI return ID3D12CommandQueue_QueryInterface(&chain->queue->ID3D12CommandQueue_iface, riid, object); } +struct low_latency_state +{ + bool mode; + bool boost; + uint32_t minimum_interval_us; +}; + struct dxgi_vk_swap_chain_present_request { uint64_t begin_frame_time_ns; @@ -58,6 +65,9 @@ struct dxgi_vk_swap_chain_present_request DXGI_COLOR_SPACE_TYPE dxgi_color_space_type; DXGI_VK_HDR_METADATA dxgi_hdr_metadata; uint32_t swap_interval; + uint64_t low_latency_frame_id; + struct low_latency_state requested_low_latency_state; + bool low_latency_update_requested; bool modifies_hdr_metadata; }; @@ -73,6 +83,7 @@ struct dxgi_vk_swap_chain struct d3d12_command_queue *queue; LONG refcount; + LONG internal_refcount; DXGI_SWAP_CHAIN_DESC1 desc; vkd3d_native_sync_handle frame_latency_event; @@ -85,6 +96,9 @@ struct dxgi_vk_swap_chain bool frame_latency_internal_is_static; VkSurfaceKHR vk_surface; + struct low_latency_state requested_low_latency_state; + bool low_latency_update_requested; + bool debug_latency; bool swapchain_maintenance1; @@ -146,6 +160,21 @@ struct dxgi_vk_swap_chain VkPresentModeKHR unlocked_present_mode; bool compatible_unlocked_present_mode; bool present_mode_forces_fifo; + + /* Info about the current low latency state of the swapchain */ + uint32_t low_latency_present_mode_count; + VkPresentModeKHR low_latency_present_modes[16]; + + pthread_mutex_t low_latency_swapchain_lock; + pthread_mutex_t low_latency_state_update_lock; + + VkSemaphore low_latency_sem; + uint64_t low_latency_sem_value; + + uint64_t previous_application_frame_id; + bool using_application_frame_id; + + struct low_latency_state low_latency_state; } present; struct dxgi_vk_swap_chain_present_request request, request_ring[DXGI_MAX_SWAP_CHAIN_BUFFERS]; @@ -390,6 +419,13 @@ static void dxgi_vk_swap_chain_cleanup(struct dxgi_vk_swap_chain *chain) for (i = 0; i < ARRAY_SIZE(chain->present.vk_swapchain_fences); i++) VK_CALL(vkDestroyFence(chain->queue->device->vk_device, chain->present.vk_swapchain_fences[i], NULL)); + if (chain->queue->device->vk_info.NV_low_latency2) + { + VK_CALL(vkDestroySemaphore(chain->queue->device->vk_device, chain->present.low_latency_sem, NULL)); + pthread_mutex_destroy(&chain->present.low_latency_swapchain_lock); + pthread_mutex_destroy(&chain->present.low_latency_state_update_lock); + } + VK_CALL(vkDestroySwapchainKHR(chain->queue->device->vk_device, chain->present.vk_swapchain, NULL)); for (i = 0; i < ARRAY_SIZE(chain->user.backbuffers); i++) @@ -414,14 +450,18 @@ static ULONG STDMETHODCALLTYPE dxgi_vk_swap_chain_AddRef(IDXGIVkSwapChain *iface { struct dxgi_vk_swap_chain *chain = impl_from_IDXGIVkSwapChain(iface); UINT refcount = InterlockedIncrement(&chain->refcount); + TRACE("iface %p, refcount %u\n", iface, refcount); + + if (refcount == 1) + dxgi_vk_swap_chain_incref(chain); + return refcount; } static ULONG STDMETHODCALLTYPE dxgi_vk_swap_chain_Release(IDXGIVkSwapChain *iface) { struct dxgi_vk_swap_chain *chain = impl_from_IDXGIVkSwapChain(iface); - struct d3d12_command_queue *queue = chain->queue; UINT refcount; refcount = InterlockedDecrement(&chain->refcount); @@ -429,11 +469,16 @@ static ULONG STDMETHODCALLTYPE dxgi_vk_swap_chain_Release(IDXGIVkSwapChain *ifac if (!refcount) { + /* Calling this from the submission thread will result in a deadlock, so + * drain the swapchain queue now. */ dxgi_vk_swap_chain_drain_queue(chain); - dxgi_vk_swap_chain_cleanup(chain); - vkd3d_free(chain); - ID3D12CommandQueue_Release(&queue->ID3D12CommandQueue_iface); + + if (chain->queue->device->vk_info.NV_low_latency2) + d3d12_device_remove_swapchain(chain->queue->device, chain); + + dxgi_vk_swap_chain_decref(chain); } + return refcount; } @@ -873,8 +918,23 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *if request->dxgi_hdr_metadata = chain->user.dxgi_hdr_metadata; request->modifies_hdr_metadata = chain->user.modifies_hdr_metadata; request->begin_frame_time_ns = chain->user.begin_frame_time_ns; + request->low_latency_frame_id = chain->queue->device->frame_markers.present; chain->user.modifies_hdr_metadata = false; + if (chain->queue->device->vk_info.NV_low_latency2) + { + pthread_mutex_lock(&chain->present.low_latency_state_update_lock); + request->requested_low_latency_state = chain->requested_low_latency_state; + request->low_latency_update_requested = chain->low_latency_update_requested; + chain->low_latency_update_requested = false; + pthread_mutex_unlock(&chain->present.low_latency_state_update_lock); + } + else + { + memset(&request->requested_low_latency_state, 0, sizeof(request->requested_low_latency_state)); + request->low_latency_update_requested = false; + } + /* Need to process this task in queue thread to deal with wait-before-signal. * All interesting works happens in the callback. */ chain->user.blit_count += 1; @@ -1236,6 +1296,12 @@ static void dxgi_vk_swap_chain_destroy_swapchain_in_present_task(struct dxgi_vk_ if (!chain->present.vk_swapchain) return; + /* If we are going to destroy the swapchain and the device supports VK_NV_low_latency2 + * take the low latency lock. This ensures none of the other NV low latency functions + * will attempt to use the stale swapchain handle. */ + if (chain->queue->device->vk_info.NV_low_latency2) + pthread_mutex_lock(&chain->present.low_latency_swapchain_lock); + if (chain->swapchain_maintenance1) { dxgi_vk_swap_chain_drain_swapchain_fences(chain); @@ -1268,6 +1334,9 @@ static void dxgi_vk_swap_chain_destroy_swapchain_in_present_task(struct dxgi_vk_ chain->present.present_id_valid = false; chain->present.present_id = 0; chain->present.current_backbuffer_index = UINT32_MAX; + + if (chain->queue->device->vk_info.NV_low_latency2) + pthread_mutex_unlock(&chain->present.low_latency_swapchain_lock); } static VkColorSpaceKHR convert_color_space(DXGI_COLOR_SPACE_TYPE dxgi_color_space) @@ -1448,10 +1517,29 @@ static bool dxgi_vk_swap_chain_find_compatible_unlocked_present_mode( return true; } +static void dxgi_vk_swap_chain_set_low_latency_state(struct dxgi_vk_swap_chain *chain, struct low_latency_state *low_latency_state) +{ + const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; + VkLatencySleepModeInfoNV swapchain_latency_sleep_mode_info; + + memset(&swapchain_latency_sleep_mode_info, 0, sizeof(swapchain_latency_sleep_mode_info)); + swapchain_latency_sleep_mode_info.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV; + swapchain_latency_sleep_mode_info.pNext = NULL; + + swapchain_latency_sleep_mode_info.lowLatencyMode = low_latency_state->mode; + swapchain_latency_sleep_mode_info.lowLatencyBoost = low_latency_state->boost; + swapchain_latency_sleep_mode_info.minimumIntervalUs = low_latency_state->minimum_interval_us; + + VK_CALL(vkSetLatencySleepModeNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &swapchain_latency_sleep_mode_info)); + + chain->present.low_latency_state = *low_latency_state; +} + static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk_swap_chain *chain) { const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; VkPhysicalDevice vk_physical_device = chain->queue->device->vk_physical_device; + VkSwapchainLatencyCreateInfoNV swapchain_latency_create_info; VkSwapchainPresentModesCreateInfoEXT present_modes_info; VkDevice vk_device = chain->queue->device->vk_device; VkCommandPoolCreateInfo command_pool_create_info; @@ -1573,6 +1661,15 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk swapchain_create_info.imageExtent.height = max(swapchain_create_info.imageExtent.height, surface_caps.minImageExtent.height); swapchain_create_info.imageExtent.height = min(swapchain_create_info.imageExtent.height, surface_caps.maxImageExtent.height); + if (chain->queue->device->vk_info.NV_low_latency2) + { + memset(&swapchain_latency_create_info, 0, sizeof(swapchain_latency_create_info)); + swapchain_latency_create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV; + swapchain_latency_create_info.pNext = NULL; + swapchain_latency_create_info.latencyModeEnable = true; + swapchain_create_info.pNext = &swapchain_latency_create_info; + } + vr = VK_CALL(vkCreateSwapchainKHR(vk_device, &swapchain_create_info, NULL, &chain->present.vk_swapchain)); if (vr < 0) { @@ -1586,6 +1683,19 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk INFO("Got %u swapchain images.\n", chain->present.backbuffer_count); + /* If low latency is supported restore the current low latency state now */ + if (chain->queue->device->vk_info.NV_low_latency2) + { + struct low_latency_state* low_latency_state = chain->request.low_latency_update_requested ? + &chain->request.requested_low_latency_state : &chain->present.low_latency_state; + + dxgi_vk_swap_chain_set_low_latency_state(chain, low_latency_state); + + /* If low latency is enabled assume the application will start driving the frame id again. */ + chain->present.using_application_frame_id = low_latency_state->mode; + chain->present.previous_application_frame_id = 0; + } + memset(&view_info, 0, sizeof(view_info)); view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; view_info.format = swapchain_create_info.imageFormat; @@ -1626,6 +1736,9 @@ static bool dxgi_vk_swap_chain_request_needs_swapchain_recreation( return request->dxgi_color_space_type != last_request->dxgi_color_space_type || request->dxgi_format != last_request->dxgi_format || request->target_min_image_count != last_request->target_min_image_count || + (chain->present.low_latency_state.mode && + (request->low_latency_frame_id != chain->present.previous_application_frame_id) && + !chain->present.using_application_frame_id) || ((!!request->swap_interval) != (!!last_request->swap_interval) && !chain->present.compatible_unlocked_present_mode); } @@ -2019,6 +2132,7 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; VkSwapchainPresentFenceInfoEXT present_fence_info; VkSwapchainPresentModeInfoEXT present_mode_info; + uint32_t max_present_iterations; VkPresentModeKHR present_mode; VkPresentInfoKHR present_info; VkPresentIdKHR present_id; @@ -2035,6 +2149,12 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai if (!chain->present.vk_swapchain) return; + /* If low latency is enabled we should only try to present once to avoid having to + * increment the present id for each failed present. Make sure to do this after checking + * if the swapchain needs to be recreated so the low latency state is up to date. */ + max_present_iterations = chain->present.low_latency_state.mode ? + 0 : 3; + vr = dxgi_vk_swap_chain_try_acquire_next_image(chain); VKD3D_DEVICE_REPORT_FAULT_AND_BREADCRUMB_IF(chain->queue->device, vr == VK_ERROR_DEVICE_LOST); @@ -2046,7 +2166,7 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai if (vr == VK_ERROR_OUT_OF_DATE_KHR) { - if (retry_counter < 3) + if (retry_counter < max_present_iterations) dxgi_vk_swap_chain_present_iteration(chain, retry_counter + 1); } else if (vr == VK_ERROR_SURFACE_LOST_KHR) @@ -2081,11 +2201,28 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai * Non-FIFO swapchains will pump their frame latency handles through the fallback path of blit command being done. * Especially on Xwayland, the present ID is updated when images actually hit on-screen due to MAILBOX behavior. * This would unnecessarily stall our progress. */ - if (chain->wait_thread.active && !chain->present.present_id_valid && swapchain_is_fifo) + if (chain->wait_thread.active && !chain->present.present_id_valid && + (swapchain_is_fifo || chain->present.low_latency_state.mode)) { - /* If we recreate swapchain, we still want to maintain a monotonically increasing counter here for - * profiling purposes. */ - chain->present.present_id = chain->present.complete_count + 1; + if (chain->present.low_latency_state.mode && + chain->request.low_latency_frame_id > chain->present.previous_application_frame_id) + { + chain->present.present_id = chain->request.low_latency_frame_id; + chain->present.previous_application_frame_id = chain->request.low_latency_frame_id; + } + else + { + /* If we recreate swapchain, we still want to maintain a monotonically increasing counter here for + * profiling purposes. */ + chain->present.present_id = chain->present.low_latency_state.mode ? + chain->present.present_id + 1 : chain->present.complete_count + 1; + + /* It is possible for an application to stop providing low latency frame ids. If that happens we are + * now responsible for ensuring the present id is always incrementing. If the application starts to + * provide them again, we will have to recreate the swapchain. */ + chain->present.using_application_frame_id = false; + } + present_id.sType = VK_STRUCTURE_TYPE_PRESENT_ID_KHR; present_id.pNext = NULL; present_id.swapchainCount = 1; @@ -2150,7 +2287,7 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai if (vr == VK_ERROR_OUT_OF_DATE_KHR) { - if (retry_counter < 3) + if (retry_counter < max_present_iterations) dxgi_vk_swap_chain_present_iteration(chain, retry_counter + 1); } else if (vr == VK_ERROR_SURFACE_LOST_KHR) @@ -2201,6 +2338,7 @@ static void dxgi_vk_swap_chain_present_callback(void *chain_) { const struct dxgi_vk_swap_chain_present_request *next_request; struct dxgi_vk_swap_chain *chain = chain_; + bool require_low_latency_state_update; uint32_t next_present_count; uint32_t present_count; uint32_t i; @@ -2214,6 +2352,23 @@ static void dxgi_vk_swap_chain_present_callback(void *chain_) if (chain->request.modifies_hdr_metadata) dxgi_vk_swap_chain_set_hdr_metadata(chain); + require_low_latency_state_update = chain->request.low_latency_update_requested && + memcmp(&chain->present.low_latency_state, &chain->request.requested_low_latency_state, + sizeof(chain->present.low_latency_state)); + + /* If the low latency state is already set to what the application is requesting, it is safe to skip + * this request. */ + if (require_low_latency_state_update) + { + /* When the low latency mode isn't changing the low latency state can be updated immediately. Otherwise + * the swapchain will have to be recreated to reset the present id. For this case the low latency state + * update will happen after the swapchain has been recreated. */ + if (chain->present.low_latency_state.mode == chain->requested_low_latency_state.mode) + dxgi_vk_swap_chain_set_low_latency_state(chain, &chain->request.requested_low_latency_state); + else + chain->present.force_swapchain_recreation = true; + } + /* If no QueuePresentKHRs successfully commits a present ID, we'll fallback to a normal queue signal. */ chain->present.present_id_valid = false; @@ -2222,8 +2377,10 @@ static void dxgi_vk_swap_chain_present_callback(void *chain_) * TODO: Propose VK_EXT_present_interval. */ present_count = max(1u, chain->request.swap_interval); - /* If we hit the legacy way of synchronizing with swapchain, blitting multiple times would be horrible. */ - if (!chain->wait_thread.active) + /* If we hit the legacy way of synchronizing with swapchain, blitting multiple times would be horrible. + * Also if low latency mode is enabled only do a single present iteration to avoid falling off the application + * provided frame id path. */ + if (!chain->wait_thread.active || chain->present.low_latency_state.mode) present_count = 1; for (i = 0; i < present_count; i++) @@ -2374,6 +2531,80 @@ static HRESULT dxgi_vk_swap_chain_init_waiter_thread(struct dxgi_vk_swap_chain * return S_OK; } +static HRESULT dxgi_vk_swap_chain_init_low_latency(struct dxgi_vk_swap_chain* chain) +{ + const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; + VkPhysicalDevice vk_physical_device = chain->queue->device->vk_physical_device; + + VkLatencySurfaceCapabilitiesNV latency_surface_caps; + VkSemaphoreTypeCreateInfoKHR semaphore_type_info; + VkPhysicalDeviceSurfaceInfo2KHR surface_info; + VkSurfaceCapabilities2KHR surface_caps; + VkSemaphoreCreateInfo semaphore_info; + VkResult vr; + + chain->present.low_latency_present_mode_count = 0; + + chain->present.low_latency_sem = VK_NULL_HANDLE; + chain->present.low_latency_sem_value = 0; + + chain->present.previous_application_frame_id = 0; + chain->present.using_application_frame_id = false; + + chain->present.low_latency_state.mode = false; + chain->present.low_latency_state.boost = false; + chain->present.low_latency_state.minimum_interval_us = 0; + + if (chain->queue->device->vk_info.NV_low_latency2) + { + memset(&surface_info, 0, sizeof(surface_info)); + surface_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SURFACE_INFO_2_KHR; + surface_info.pNext = NULL; + surface_info.surface = chain->vk_surface; + + memset(&latency_surface_caps, 0, sizeof(latency_surface_caps)); + latency_surface_caps.sType = VK_STRUCTURE_TYPE_LATENCY_SURFACE_CAPABILITIES_NV; + latency_surface_caps.presentModeCount = ARRAY_SIZE(chain->present.low_latency_present_modes); + latency_surface_caps.pPresentModes = chain->present.low_latency_present_modes; + + memset(&surface_caps, 0, sizeof(surface_caps)); + surface_caps.sType = VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_KHR; + surface_caps.pNext = &latency_surface_caps; + + if ((vr = VK_CALL(vkGetPhysicalDeviceSurfaceCapabilities2KHR(vk_physical_device, &surface_info, + &surface_caps))) < 0) + { + ERR("Failed to query latency surface capabilities count, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + chain->present.low_latency_present_mode_count = latency_surface_caps.presentModeCount; + + memset(&semaphore_type_info, 0, sizeof(semaphore_type_info)); + semaphore_type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR; + semaphore_type_info.pNext = NULL; + semaphore_type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR; + semaphore_type_info.initialValue = 0; + + memset(&semaphore_info, 0, sizeof(semaphore_info)); + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_info.pNext = &semaphore_type_info; + semaphore_info.flags = 0; + + if ((vr = VK_CALL(vkCreateSemaphore(chain->queue->device->vk_device, &semaphore_info, + NULL, &chain->present.low_latency_sem))) < 0) + { + ERR("Failed to create semaphore, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + pthread_mutex_init(&chain->present.low_latency_swapchain_lock, NULL); + pthread_mutex_init(&chain->present.low_latency_state_update_lock, NULL); + } + + return S_OK; +} + static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVkSurfaceFactory *pFactory, const DXGI_SWAP_CHAIN_DESC1 *pDesc, struct d3d12_command_queue *queue) { @@ -2381,6 +2612,7 @@ static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVk chain->IDXGIVkSwapChain_iface.lpVtbl = &dxgi_vk_swap_chain_vtbl; chain->refcount = 1; + chain->internal_refcount = 1; chain->queue = queue; chain->desc = *pDesc; @@ -2402,6 +2634,9 @@ static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVk if (FAILED(hr = dxgi_vk_swap_chain_init_waiter_thread(chain))) goto err; + if (FAILED(hr = dxgi_vk_swap_chain_init_low_latency(chain))) + goto err; + ID3D12CommandQueue_AddRef(&queue->ID3D12CommandQueue_iface); return S_OK; @@ -2429,6 +2664,9 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_factory_CreateSwapChain(IDXG return hr; } + if (chain->queue->device->vk_info.NV_low_latency2) + d3d12_device_register_swapchain(chain->queue->device, chain); + *ppSwapchain = &chain->IDXGIVkSwapChain_iface; return S_OK; } @@ -2444,6 +2682,181 @@ static CONST_VTBL struct IDXGIVkSwapChainFactoryVtbl dxgi_vk_swap_chain_factory_ dxgi_vk_swap_chain_factory_CreateSwapChain, }; +bool dxgi_vk_swap_chain_low_latency_enabled(struct dxgi_vk_swap_chain* chain) +{ + return chain->present.low_latency_state.mode; +} + +void dxgi_vk_swap_chain_latency_sleep(struct dxgi_vk_swap_chain* chain) +{ + const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; + VkLatencySleepInfoNV latency_sleep_info; + VkSemaphoreWaitInfo sem_wait_info; + bool should_sleep = false; + + /* Increment the low latency sem value before the wait */ + chain->present.low_latency_sem_value++; + + memset(&latency_sleep_info, 0, sizeof(latency_sleep_info)); + latency_sleep_info.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_INFO_NV; + latency_sleep_info.pNext = NULL; + latency_sleep_info.signalSemaphore = chain->present.low_latency_sem; + latency_sleep_info.value = chain->present.low_latency_sem_value; + + pthread_mutex_lock(&chain->present.low_latency_swapchain_lock); + + if (chain->present.vk_swapchain) + { + should_sleep = true; + VK_CALL(vkLatencySleepNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &latency_sleep_info)); + } + + pthread_mutex_unlock(&chain->present.low_latency_swapchain_lock); + + if (should_sleep) + { + memset(&sem_wait_info, 0, sizeof(sem_wait_info)); + sem_wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO; + sem_wait_info.pNext = NULL; + sem_wait_info.flags = 0; + sem_wait_info.semaphoreCount = 1; + sem_wait_info.pSemaphores = &chain->present.low_latency_sem; + sem_wait_info.pValues = &chain->present.low_latency_sem_value; + + VK_CALL(vkWaitSemaphores(chain->queue->device->vk_device, &sem_wait_info, UINT64_MAX)); + } +} + +void dxgi_vk_swap_chain_set_latency_sleep_mode(struct dxgi_vk_swap_chain* chain, bool low_latency_mode, + bool low_latency_boost, uint32_t minimum_interval_us) +{ + pthread_mutex_lock(&chain->present.low_latency_state_update_lock); + + chain->requested_low_latency_state.mode = low_latency_mode; + chain->requested_low_latency_state.boost = low_latency_boost; + chain->requested_low_latency_state.minimum_interval_us = minimum_interval_us; + + /* The actual call to vkSetLatencySleepModeNV will happen + * when the application calls Present and the requested low + * latency state is passed to the present task. */ + chain->low_latency_update_requested = true; + + pthread_mutex_unlock(&chain->present.low_latency_state_update_lock); +} + +void dxgi_vk_swap_chain_set_latency_marker(struct dxgi_vk_swap_chain* chain, uint64_t frameID, VkLatencyMarkerNV marker) +{ + const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; + VkSetLatencyMarkerInfoNV latency_marker_info; + + memset(&latency_marker_info, 0, sizeof(latency_marker_info)); + latency_marker_info.sType = VK_STRUCTURE_TYPE_SET_LATENCY_MARKER_INFO_NV; + latency_marker_info.pNext = NULL; + latency_marker_info.presentID = frameID; + latency_marker_info.marker = marker; + + pthread_mutex_lock(&chain->present.low_latency_swapchain_lock); + + if (chain->present.vk_swapchain) + VK_CALL(vkSetLatencyMarkerNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &latency_marker_info)); + + pthread_mutex_unlock(&chain->present.low_latency_swapchain_lock); +} + +void dxgi_vk_swap_chain_get_latency_info(struct dxgi_vk_swap_chain* chain, D3D12_LATENCY_RESULTS *latency_results) +{ + const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; + VkLatencyTimingsFrameReportNV* frame_reports; + VkGetLatencyMarkerInfoNV marker_info; + uint32_t i; + + pthread_mutex_lock(&chain->present.low_latency_swapchain_lock); + + if (chain->present.vk_swapchain) + { + memset(&marker_info, 0, sizeof(marker_info)); + marker_info.sType = VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV; + + VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &marker_info)); + + if (marker_info.timingCount >= 64) + { + marker_info.timingCount = 64; + frame_reports = vkd3d_calloc(marker_info.timingCount, sizeof(VkLatencyTimingsFrameReportNV)); + for (i = 0; i < marker_info.timingCount; i++) + frame_reports[i].sType = VK_STRUCTURE_TYPE_LATENCY_TIMINGS_FRAME_REPORT_NV; + + marker_info.pTimings = frame_reports; + + VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &marker_info)); + + for (i = 0; i < marker_info.timingCount; i++) + { + latency_results->frame_reports[i].frameID = frame_reports[i].presentID - 1; + latency_results->frame_reports[i].inputSampleTime = frame_reports[i].inputSampleTimeUs; + latency_results->frame_reports[i].simStartTime = frame_reports[i].simStartTimeUs; + latency_results->frame_reports[i].simEndTime = frame_reports[i].simEndTimeUs; + latency_results->frame_reports[i].renderSubmitStartTime = frame_reports[i].renderSubmitStartTimeUs; + latency_results->frame_reports[i].renderSubmitEndTime = frame_reports[i].renderSubmitEndTimeUs; + latency_results->frame_reports[i].presentStartTime = frame_reports[i].presentStartTimeUs; + latency_results->frame_reports[i].presentEndTime = frame_reports[i].presentEndTimeUs; + latency_results->frame_reports[i].driverStartTime = frame_reports[i].driverStartTimeUs; + latency_results->frame_reports[i].driverEndTime = frame_reports[i].driverEndTimeUs; + latency_results->frame_reports[i].osRenderQueueStartTime = frame_reports[i].osRenderQueueStartTimeUs; + latency_results->frame_reports[i].osRenderQueueEndTime = frame_reports[i].osRenderQueueEndTimeUs; + latency_results->frame_reports[i].gpuRenderStartTime = frame_reports[i].gpuRenderStartTimeUs; + latency_results->frame_reports[i].gpuRenderEndTime = frame_reports[i].gpuRenderEndTimeUs; + latency_results->frame_reports[i].gpuActiveRenderTimeUs = + frame_reports[i].gpuRenderEndTimeUs - frame_reports[i].gpuRenderStartTimeUs; + latency_results->frame_reports[i].gpuFrameTimeUs = 0; + + if (i) + { + latency_results->frame_reports[i].gpuFrameTimeUs = + frame_reports[i].gpuRenderEndTimeUs - frame_reports[i - 1].gpuRenderEndTimeUs; + } + } + + vkd3d_free(frame_reports); + } + else + { + /* If there are less than 64 frame reports, zero out the frame report + * buffer returned to the app. */ + memset(latency_results->frame_reports, 0, sizeof(latency_results->frame_reports)); + } + } + + pthread_mutex_unlock(&chain->present.low_latency_swapchain_lock); +} + +ULONG dxgi_vk_swap_chain_incref(struct dxgi_vk_swap_chain *chain) +{ + ULONG refcount = InterlockedIncrement(&chain->internal_refcount); + + TRACE("%p increasing refcount to %u.\n", chain, refcount); + + return refcount; +} + +ULONG dxgi_vk_swap_chain_decref(struct dxgi_vk_swap_chain *chain) +{ + ULONG refcount = InterlockedDecrement(&chain->internal_refcount); + + TRACE("%p decreasing refcount to %u.\n", chain, refcount); + + if (!refcount) + { + struct d3d12_command_queue *queue = chain->queue; + + dxgi_vk_swap_chain_cleanup(chain); + vkd3d_free(chain); + ID3D12CommandQueue_Release(&queue->ID3D12CommandQueue_iface); + } + + return refcount; +} + HRESULT dxgi_vk_swap_chain_factory_init(struct d3d12_command_queue *queue, struct dxgi_vk_swap_chain_factory *chain) { chain->IDXGIVkSwapChainFactory_iface.lpVtbl = &dxgi_vk_swap_chain_factory_vtbl; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 935f3dc8dc..dcba390549 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -37,6 +37,7 @@ #include "vkd3d_platform.h" #include "vkd3d_swapchain_factory.h" #include "vkd3d_command_list_vkd3d_ext.h" +#include "vkd3d_command_queue_vkd3d_ext.h" #include "vkd3d_device_vkd3d_ext.h" #include "vkd3d_string.h" #include "vkd3d_file_utils.h" @@ -172,6 +173,7 @@ struct vkd3d_vulkan_info bool NV_shader_subgroup_partitioned; bool NV_memory_decompression; bool NV_device_generated_commands_compute; + bool NV_low_latency2; /* VALVE extensions */ bool VALVE_mutable_descriptor_type; bool VALVE_descriptor_set_host_mapping; @@ -3029,6 +3031,7 @@ struct vkd3d_queue VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, uint32_t queue_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); +void vkd3d_set_queue_out_of_band(struct d3d12_device *device, struct vkd3d_queue *queue); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_release(struct vkd3d_queue *queue); void vkd3d_queue_add_wait(struct vkd3d_queue *queue, d3d12_fence_iface *waiter, @@ -3085,6 +3088,7 @@ struct d3d12_command_queue_submission_execute struct d3d12_command_allocator **command_allocators; UINT cmd_count; UINT num_command_allocators; + uint64_t low_latency_frame_id; struct vkd3d_initial_transition *transitions; size_t transition_count; @@ -3142,12 +3146,33 @@ struct dxgi_vk_swap_chain_factory struct d3d12_command_queue *queue; }; +struct dxgi_vk_swap_chain; + +bool dxgi_vk_swap_chain_low_latency_enabled(struct dxgi_vk_swap_chain *chain); +void dxgi_vk_swap_chain_latency_sleep(struct dxgi_vk_swap_chain *chain); +void dxgi_vk_swap_chain_set_latency_sleep_mode(struct dxgi_vk_swap_chain *chain, + bool low_latency_mode, bool low_latency_boost, uint32_t minimum_interval_us); +void dxgi_vk_swap_chain_set_latency_marker(struct dxgi_vk_swap_chain *chain, + uint64_t frameID, VkLatencyMarkerNV marker); +void dxgi_vk_swap_chain_get_latency_info(struct dxgi_vk_swap_chain *chain, + D3D12_LATENCY_RESULTS *latency_results); + +ULONG dxgi_vk_swap_chain_incref(struct dxgi_vk_swap_chain *chain); +ULONG dxgi_vk_swap_chain_decref(struct dxgi_vk_swap_chain *chain); + HRESULT dxgi_vk_swap_chain_factory_init(struct d3d12_command_queue *queue, struct dxgi_vk_swap_chain_factory *chain); +/* ID3D12CommandQueueExt */ +typedef ID3D12CommandQueueExt d3d12_command_queue_vkd3d_ext_iface; + /* ID3D12CommandQueue */ +typedef ID3D12CommandQueue d3d12_command_queue_iface; + struct d3d12_command_queue { - ID3D12CommandQueue ID3D12CommandQueue_iface; + d3d12_command_queue_iface ID3D12CommandQueue_iface; + d3d12_command_queue_vkd3d_ext_iface ID3D12CommandQueueExt_iface; + LONG refcount; D3D12_COMMAND_QUEUE_DESC desc; @@ -4442,6 +4467,7 @@ enum vkd3d_queue_family struct vkd3d_queue_family_info { + struct vkd3d_queue *out_of_band_queue; struct vkd3d_queue **queues; uint32_t queue_count; uint32_t vk_family_index; @@ -4456,6 +4482,19 @@ struct vkd3d_cached_command_allocator uint32_t vk_family_index; }; +struct vkd3d_device_swapchain_info +{ + struct dxgi_vk_swap_chain *low_latency_swapchain; + uint32_t swapchain_count; +}; + +struct vkd3d_device_frame_markers +{ + uint64_t simulation; + uint64_t render; + uint64_t present; +}; + /* ID3D12Device */ typedef ID3D12Device12 d3d12_device_iface; @@ -4468,6 +4507,9 @@ typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface; /* ID3D12DXVKInteropDevice */ typedef ID3D12DXVKInteropDevice d3d12_dxvk_interop_device_iface; +/* ID3DLowLatencyDevice */ +typedef ID3DLowLatencyDevice d3d_low_latency_device_iface; + struct d3d12_device_scratch_pool { struct vkd3d_scratch_buffer scratch_buffers[VKD3D_MAX_SCRATCH_BUFFER_COUNT]; @@ -4591,6 +4633,7 @@ struct d3d12_device d3d12_device_iface ID3D12Device_iface; d3d12_device_vkd3d_ext_iface ID3D12DeviceExt_iface; d3d12_dxvk_interop_device_iface ID3D12DXVKInteropDevice_iface; + d3d_low_latency_device_iface ID3DLowLatencyDevice_iface; LONG refcount; VkDevice vk_device; @@ -4600,6 +4643,7 @@ struct d3d12_device pthread_mutex_t mutex; pthread_mutex_t global_submission_mutex; + spinlock_t low_latency_swapchain_spinlock; VkPhysicalDeviceMemoryProperties memory_properties; @@ -4662,6 +4706,9 @@ struct d3d12_device #endif uint64_t shader_interface_key; uint32_t device_has_dgc_templates; + + struct vkd3d_device_swapchain_info swapchain_info; + struct vkd3d_device_frame_markers frame_markers; }; HRESULT d3d12_device_create(struct vkd3d_instance *instance, @@ -4885,6 +4932,32 @@ UINT d3d12_determine_shading_rate_image_tile_size(struct d3d12_device *device); bool d3d12_device_supports_required_subgroup_size_for_stage( struct d3d12_device *device, VkShaderStageFlagBits stage); +static inline void d3d12_device_register_swapchain(struct d3d12_device* device, struct dxgi_vk_swap_chain* swapchain) +{ + spinlock_acquire(&device->low_latency_swapchain_spinlock); + + if (!device->swapchain_info.low_latency_swapchain && device->swapchain_info.swapchain_count == 0) + device->swapchain_info.low_latency_swapchain = swapchain; + else + device->swapchain_info.low_latency_swapchain = NULL; + + device->swapchain_info.swapchain_count++; + + spinlock_release(&device->low_latency_swapchain_spinlock); +} + +static inline void d3d12_device_remove_swapchain(struct d3d12_device* device, struct dxgi_vk_swap_chain* swapchain) +{ + spinlock_acquire(&device->low_latency_swapchain_spinlock); + + if (device->swapchain_info.low_latency_swapchain == swapchain) + device->swapchain_info.low_latency_swapchain = NULL; + + device->swapchain_info.swapchain_count--; + + spinlock_release(&device->low_latency_swapchain_spinlock); +} + /* ID3DBlob */ struct d3d_blob { diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index e3b672a0e1..96f673dda7 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -354,6 +354,13 @@ VK_DEVICE_EXT_PFN(vkCmdDecompressMemoryIndirectCountNV) /* VK_EXT_device_fault */ VK_DEVICE_EXT_PFN(vkGetDeviceFaultInfoEXT) +/* VK_NV_low_latency2 */ +VK_DEVICE_EXT_PFN(vkSetLatencySleepModeNV) +VK_DEVICE_EXT_PFN(vkLatencySleepNV) +VK_DEVICE_EXT_PFN(vkSetLatencyMarkerNV) +VK_DEVICE_EXT_PFN(vkGetLatencyTimingsNV) +VK_DEVICE_EXT_PFN(vkQueueNotifyOutOfBandNV) + #undef VK_INSTANCE_PFN #undef VK_INSTANCE_EXT_PFN #undef VK_DEVICE_PFN