Skip to content

Commit

Permalink
vkd3d: Add support for VK_NV_low_latency2
Browse files Browse the repository at this point in the history
This commit add support for the VK_NV_low_latency2 extension, and
implements the ID3DLowLatencyDevice, and ID3D12CommandQueueExt
interfaces.
  • Loading branch information
esullivan-nvidia authored and ejsullivan committed Feb 21, 2024
1 parent f462223 commit 495c249
Show file tree
Hide file tree
Showing 12 changed files with 1,011 additions and 37 deletions.
1 change: 1 addition & 0 deletions include/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ vkd3d_idl = [
'vkd3d_dxgitype.idl',
'vkd3d_swapchain_factory.idl',
'vkd3d_command_list_vkd3d_ext.idl',
'vkd3d_command_queue_vkd3d_ext.idl',
'vkd3d_device_vkd3d_ext.idl',
'vkd3d_core_interface.idl',
]
Expand Down
30 changes: 30 additions & 0 deletions include/vkd3d_command_queue_vkd3d_ext.idl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* * Copyright 2023 NVIDIA Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
import "vkd3d_d3d12.idl";
import "vkd3d_vk_includes.h";

[
uuid(40ed3f96-e773-e9bc-fc0c-e95560c99ad6),
object,
local,
pointer_default(unique)
]
interface ID3D12CommandQueueExt : IUnknown
{
HRESULT NotifyOutOfBandCommandQueue(D3D12_OUT_OF_BAND_CQ_TYPE type);
}
15 changes: 15 additions & 0 deletions include/vkd3d_device_vkd3d_ext.idl
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,18 @@ interface ID3D12DXVKInteropDevice : IUnknown
HRESULT LockCommandQueue(ID3D12CommandQueue *queue);
HRESULT UnlockCommandQueue(ID3D12CommandQueue *queue);
}

[
uuid(f3112584-41f9-348d-a59b-00b7e1d285d6),
object,
local,
pointer_default(unique)
]
interface ID3DLowLatencyDevice : IUnknown
{
BOOL SupportsLowLatency();
HRESULT LatencySleep();
HRESULT SetLatencySleepMode(BOOL low_latency_mode, BOOL low_latency_boost, UINT32 minimum_interval_us);
HRESULT SetLatencyMarker(UINT64 frameID, UINT32 markerType);
HRESULT GetLatencyInfo(D3D12_LATENCY_RESULTS *latency_results);
}
34 changes: 33 additions & 1 deletion include/vkd3d_vk_includes.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,16 @@ typedef enum VkImageLayout VkImageLayout;
typedef enum D3D12_VK_EXTENSION
{
D3D12_VK_NVX_BINARY_IMPORT = 0x1,
D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2
D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2,
D3D12_VK_NV_LOW_LATENCY_2 = 0x3
} D3D12_VK_EXTENSION;

typedef enum D3D12_OUT_OF_BAND_CQ_TYPE
{
OUT_OF_BAND_RENDER = 0,
OUT_OF_BAND_PRESENT = 1
} D3D12_OUT_OF_BAND_CQ_TYPE;

typedef struct D3D12_CUBIN_DATA_HANDLE
{
VkCuFunctionNVX vkCuFunction;
Expand All @@ -61,5 +68,30 @@ typedef struct D3D12_UAV_INFO
UINT64 gpuVASize;
} D3D12_UAV_INFO;

typedef struct D3D12_LATENCY_RESULTS
{
UINT32 version;
struct D3D12_FRAME_REPORT {
UINT64 frameID;
UINT64 inputSampleTime;
UINT64 simStartTime;
UINT64 simEndTime;
UINT64 renderSubmitStartTime;
UINT64 renderSubmitEndTime;
UINT64 presentStartTime;
UINT64 presentEndTime;
UINT64 driverStartTime;
UINT64 driverEndTime;
UINT64 osRenderQueueStartTime;
UINT64 osRenderQueueEndTime;
UINT64 gpuRenderStartTime;
UINT64 gpuRenderEndTime;
UINT32 gpuActiveRenderTimeUs;
UINT32 gpuFrameTimeUs;
UINT8 rsvd[120];
} frame_reports[64];
UINT8 rsvd[32];
} D3D12_LATENCY_RESULTS;

#endif // __VKD3D_VK_INCLUDES_H

70 changes: 65 additions & 5 deletions libs/vkd3d/command.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,26 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, u
return hr;
}

void vkd3d_set_queue_out_of_band(struct d3d12_device *device, struct vkd3d_queue *queue)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkOutOfBandQueueTypeInfoNV queue_info;

if (!device->vk_info.NV_low_latency2)
return;

memset(&queue_info, 0, sizeof(queue_info));
queue_info.sType = VK_STRUCTURE_TYPE_OUT_OF_BAND_QUEUE_TYPE_INFO_NV;
queue_info.pNext = NULL;
queue_info.queueType = VK_OUT_OF_BAND_QUEUE_TYPE_RENDER_NV;

VK_CALL(vkQueueNotifyOutOfBandNV(queue->vk_queue, &queue_info));

queue_info.queueType = VK_OUT_OF_BAND_QUEUE_TYPE_PRESENT_NV;

VK_CALL(vkQueueNotifyOutOfBandNV(queue->vk_queue, &queue_info));
}

static void vkd3d_queue_flush_waiters(struct vkd3d_queue *vkd3d_queue,
struct vkd3d_fence_worker *worker,
const struct vkd3d_vk_device_procs *vk_procs);
Expand Down Expand Up @@ -16591,12 +16611,14 @@ static struct d3d12_command_list *d3d12_command_list_from_iface(ID3D12CommandLis
}

/* ID3D12CommandQueue */
extern ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface);

static inline struct d3d12_command_queue *impl_from_ID3D12CommandQueue(ID3D12CommandQueue *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueue_iface);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface,
HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface,
REFIID riid, void **object)
{
TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
Expand All @@ -16615,6 +16637,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman
return S_OK;
}

if (IsEqualGUID(riid, &IID_ID3D12CommandQueueExt))
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
d3d12_command_queue_vkd3d_ext_AddRef(&command_queue->ID3D12CommandQueueExt_iface);
*object = &command_queue->ID3D12CommandQueueExt_iface;
return S_OK;
}

if (IsEqualGUID(riid, &IID_IDXGIVkSwapChainFactory))
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
Expand All @@ -16629,7 +16659,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman
return E_NOINTERFACE;
}

static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface)
ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface)
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
ULONG refcount = InterlockedIncrement(&command_queue->refcount);
Expand All @@ -16639,7 +16669,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if
return refcount;
}

static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface)
ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface)
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
ULONG refcount = InterlockedDecrement(&command_queue->refcount);
Expand Down Expand Up @@ -17132,6 +17162,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm
sub.execute.cmd_count = num_command_buffers;
sub.execute.command_allocators = allocators;
sub.execute.num_command_allocators = command_list_count;
sub.execute.low_latency_frame_id = command_queue->device->frame_markers.render;
#ifdef VKD3D_ENABLE_BREADCRUMBS
sub.execute.breadcrumb_indices = breadcrumb_indices;
sub.execute.breadcrumb_indices_count = breadcrumb_indices ? command_list_count : 0;
Expand Down Expand Up @@ -17295,6 +17326,8 @@ static D3D12_COMMAND_QUEUE_DESC * STDMETHODCALLTYPE d3d12_command_queue_GetDesc(
return desc;
}

extern CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl;

static CONST_VTBL struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl =
{
/* IUnknown methods */
Expand Down Expand Up @@ -17807,10 +17840,12 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu
const VkSemaphoreSubmitInfo *transition_semaphore,
struct d3d12_command_allocator **command_allocators, size_t num_command_allocators,
struct vkd3d_queue_timeline_trace_cookie timeline_cookie,
bool debug_capture, bool split_submissions)
uint64_t low_latency_frame_id, bool debug_capture, bool split_submissions)
{
const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs;
struct vkd3d_queue *vkd3d_queue = command_queue->vkd3d_queue;
VkLatencySubmissionPresentIdNV latency_submit_present_info;
struct dxgi_vk_swap_chain *low_latency_swapchain;
VkSemaphoreSubmitInfo signal_semaphore_info;
VkSemaphoreSubmitInfo binary_semaphore_info;
VkSubmitInfo2 submit_desc[4];
Expand Down Expand Up @@ -17895,6 +17930,27 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu
num_submits += 2;
}

if (command_queue->device->vk_info.NV_low_latency2)
{
spinlock_acquire(&command_queue->device->low_latency_swapchain_spinlock);
if ((low_latency_swapchain = command_queue->device->swapchain_info.low_latency_swapchain))
dxgi_vk_swap_chain_incref(low_latency_swapchain);
spinlock_release(&command_queue->device->low_latency_swapchain_spinlock);

if (low_latency_swapchain && dxgi_vk_swap_chain_low_latency_enabled(low_latency_swapchain))
{
latency_submit_present_info.sType = VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV;
latency_submit_present_info.pNext = NULL;
latency_submit_present_info.presentID = low_latency_frame_id;

for (i = 0; i < num_submits; i++)
submit_desc[i].pNext = &latency_submit_present_info;
}

if (low_latency_swapchain)
dxgi_vk_swap_chain_decref(low_latency_swapchain);
}

#ifdef VKD3D_ENABLE_RENDERDOC
/* For each submission we have marked to be captured, we will first need to filter it
* based on VKD3D_AUTO_CAPTURE_COUNTS.
Expand Down Expand Up @@ -18397,7 +18453,9 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
submission.execute.command_allocators,
submission.execute.num_command_allocators,
submission.execute.timeline_cookie,
submission.execute.debug_capture, submission.execute.split_submission);
submission.execute.low_latency_frame_id,
submission.execute.debug_capture,
submission.execute.split_submission);

/* command_queue_execute takes ownership of the
* outstanding_submission_counters and queue_timeline_indices allocations.
Expand Down Expand Up @@ -18460,6 +18518,7 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue,
int rc;

queue->ID3D12CommandQueue_iface.lpVtbl = &d3d12_command_queue_vtbl;
queue->ID3D12CommandQueueExt_iface.lpVtbl = &d3d12_command_queue_vkd3d_ext_vtbl;
queue->refcount = 1;

queue->desc = *desc;
Expand Down Expand Up @@ -18588,6 +18647,7 @@ void vkd3d_enqueue_initial_transition(ID3D12CommandQueue *queue, ID3D12Resource

memset(&sub, 0, sizeof(sub));
sub.type = VKD3D_SUBMISSION_EXECUTE;
sub.execute.low_latency_frame_id = d3d12_queue->device->frame_markers.render;
sub.execute.transition_count = 1;
sub.execute.transitions = vkd3d_malloc(sizeof(*sub.execute.transitions));
sub.execute.transitions[0].type = VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE;
Expand Down
91 changes: 91 additions & 0 deletions libs/vkd3d/command_queue_vkd3d_ext.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* * Copyright 2023 NVIDIA Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/

#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API

#include "vkd3d_private.h"

static inline struct d3d12_command_queue *d3d12_command_queue_from_ID3D12CommandQueueExt(d3d12_command_queue_vkd3d_ext_iface *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueueExt_iface);
}

extern ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(d3d12_command_queue_iface *iface);

ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface)
{
struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
return d3d12_command_queue_AddRef(&command_queue->ID3D12CommandQueue_iface);
}

extern ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(d3d12_command_queue_iface *iface);

static ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_Release(d3d12_command_queue_vkd3d_ext_iface *iface)
{
struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
return d3d12_command_queue_Release(&command_queue->ID3D12CommandQueue_iface);
}

extern HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(d3d12_command_queue_iface *iface,
REFIID iid, void **object);

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_QueryInterface(d3d12_command_queue_vkd3d_ext_iface *iface,
REFIID iid, void **out)
{
struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
return d3d12_command_queue_QueryInterface(&command_queue->ID3D12CommandQueue_iface, iid, out);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue(d3d12_command_queue_vkd3d_ext_iface *iface, D3D12_OUT_OF_BAND_CQ_TYPE type)
{
struct d3d12_command_queue *command_queue;
int i;

command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);

if (!command_queue->device->vk_info.NV_low_latency2)
return E_NOTIMPL;

if (type != OUT_OF_BAND_RENDER && type != OUT_OF_BAND_PRESENT)
return E_INVALIDARG;

for (i = 0; i < VKD3D_QUEUE_FAMILY_COUNT; i++)
{
if (command_queue->device->queue_families[i]->vk_family_index == command_queue->vkd3d_queue->vk_family_index &&
command_queue->device->queue_families[i]->out_of_band_queue)
{
command_queue->vkd3d_queue = command_queue->device->queue_families[i]->out_of_band_queue;
break;
}
}

return S_OK;
}

CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl =
{
/* IUnknown methods */
d3d12_command_queue_vkd3d_ext_QueryInterface,
d3d12_command_queue_vkd3d_ext_AddRef,
d3d12_command_queue_vkd3d_ext_Release,

/* ID3D12CommandQueueExt methods */
d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue
};

Loading

0 comments on commit 495c249

Please sign in to comment.