Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NV low latency support #1739

Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ vkd3d_idl = [
'vkd3d_dxgitype.idl',
'vkd3d_swapchain_factory.idl',
'vkd3d_command_list_vkd3d_ext.idl',
'vkd3d_command_queue_vkd3d_ext.idl',
'vkd3d_device_vkd3d_ext.idl',
'vkd3d_core_interface.idl',
]
Expand Down
30 changes: 30 additions & 0 deletions include/vkd3d_command_queue_vkd3d_ext.idl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* * Copyright 2023 NVIDIA Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
import "vkd3d_d3d12.idl";
import "vkd3d_vk_includes.h";

[
uuid(40ed3f96-e773-e9bc-fc0c-e95560c99ad6),
object,
local,
pointer_default(unique)
]
interface ID3D12CommandQueueExt : IUnknown
{
HRESULT NotifyOutOfBandCommandQueue(D3D12_OUT_OF_BAND_CQ_TYPE type);
}
15 changes: 15 additions & 0 deletions include/vkd3d_device_vkd3d_ext.idl
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,18 @@ interface ID3D12DXVKInteropDevice : IUnknown
HRESULT LockCommandQueue(ID3D12CommandQueue *queue);
HRESULT UnlockCommandQueue(ID3D12CommandQueue *queue);
}

[
uuid(f3112584-41f9-348d-a59b-00b7e1d285d6),
object,
local,
pointer_default(unique)
]
interface ID3DLowLatencyDevice : IUnknown
HansKristian-Work marked this conversation as resolved.
Show resolved Hide resolved
{
BOOL SupportsLowLatency();
HRESULT LatencySleep();
HRESULT SetLatencySleepMode(BOOL low_latency_mode, BOOL low_latency_boost, UINT32 minimum_interval_us);
HRESULT SetLatencyMarker(UINT64 frameID, UINT32 markerType);
HRESULT GetLatencyInfo(D3D12_LATENCY_RESULTS *latency_results);
}
34 changes: 33 additions & 1 deletion include/vkd3d_vk_includes.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,16 @@ typedef enum VkImageLayout VkImageLayout;
typedef enum D3D12_VK_EXTENSION
{
D3D12_VK_NVX_BINARY_IMPORT = 0x1,
D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2
D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2,
D3D12_VK_NV_LOW_LATENCY_2 = 0x3
} D3D12_VK_EXTENSION;

typedef enum D3D12_OUT_OF_BAND_CQ_TYPE
{
OUT_OF_BAND_RENDER = 0,
OUT_OF_BAND_PRESENT = 1
} D3D12_OUT_OF_BAND_CQ_TYPE;

typedef struct D3D12_CUBIN_DATA_HANDLE
{
VkCuFunctionNVX vkCuFunction;
Expand All @@ -61,5 +68,30 @@ typedef struct D3D12_UAV_INFO
UINT64 gpuVASize;
} D3D12_UAV_INFO;

typedef struct D3D12_LATENCY_RESULTS
HansKristian-Work marked this conversation as resolved.
Show resolved Hide resolved
{
UINT32 version;
struct D3D12_FRAME_REPORT {
UINT64 frameID;
UINT64 inputSampleTime;
UINT64 simStartTime;
UINT64 simEndTime;
UINT64 renderSubmitStartTime;
UINT64 renderSubmitEndTime;
UINT64 presentStartTime;
UINT64 presentEndTime;
UINT64 driverStartTime;
UINT64 driverEndTime;
UINT64 osRenderQueueStartTime;
UINT64 osRenderQueueEndTime;
UINT64 gpuRenderStartTime;
UINT64 gpuRenderEndTime;
UINT32 gpuActiveRenderTimeUs;
UINT32 gpuFrameTimeUs;
UINT8 rsvd[120];
} frame_reports[64];
UINT8 rsvd[32];
} D3D12_LATENCY_RESULTS;

#endif // __VKD3D_VK_INCLUDES_H

70 changes: 65 additions & 5 deletions libs/vkd3d/command.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,26 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, u
return hr;
}

void vkd3d_set_queue_out_of_band(struct d3d12_device *device, struct vkd3d_queue *queue)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkOutOfBandQueueTypeInfoNV queue_info;

if (!device->vk_info.NV_low_latency2)
return;

memset(&queue_info, 0, sizeof(queue_info));
queue_info.sType = VK_STRUCTURE_TYPE_OUT_OF_BAND_QUEUE_TYPE_INFO_NV;
queue_info.pNext = NULL;
queue_info.queueType = VK_OUT_OF_BAND_QUEUE_TYPE_RENDER_NV;

VK_CALL(vkQueueNotifyOutOfBandNV(queue->vk_queue, &queue_info));
HansKristian-Work marked this conversation as resolved.
Show resolved Hide resolved

queue_info.queueType = VK_OUT_OF_BAND_QUEUE_TYPE_PRESENT_NV;

VK_CALL(vkQueueNotifyOutOfBandNV(queue->vk_queue, &queue_info));
}

static void vkd3d_queue_flush_waiters(struct vkd3d_queue *vkd3d_queue,
struct vkd3d_fence_worker *worker,
const struct vkd3d_vk_device_procs *vk_procs);
Expand Down Expand Up @@ -16591,12 +16611,14 @@ static struct d3d12_command_list *d3d12_command_list_from_iface(ID3D12CommandLis
}

/* ID3D12CommandQueue */
extern ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface);

static inline struct d3d12_command_queue *impl_from_ID3D12CommandQueue(ID3D12CommandQueue *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueue_iface);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface,
HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface,
REFIID riid, void **object)
{
TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
Expand All @@ -16615,6 +16637,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman
return S_OK;
}

if (IsEqualGUID(riid, &IID_ID3D12CommandQueueExt))
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
d3d12_command_queue_vkd3d_ext_AddRef(&command_queue->ID3D12CommandQueueExt_iface);
*object = &command_queue->ID3D12CommandQueueExt_iface;
return S_OK;
}

if (IsEqualGUID(riid, &IID_IDXGIVkSwapChainFactory))
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
Expand All @@ -16629,7 +16659,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman
return E_NOINTERFACE;
}

static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface)
ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface)
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
ULONG refcount = InterlockedIncrement(&command_queue->refcount);
Expand All @@ -16639,7 +16669,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if
return refcount;
}

static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface)
ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface)
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
ULONG refcount = InterlockedDecrement(&command_queue->refcount);
Expand Down Expand Up @@ -17132,6 +17162,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm
sub.execute.cmd_count = num_command_buffers;
sub.execute.command_allocators = allocators;
sub.execute.num_command_allocators = command_list_count;
sub.execute.low_latency_frame_id = command_queue->device->frame_markers.render;
#ifdef VKD3D_ENABLE_BREADCRUMBS
sub.execute.breadcrumb_indices = breadcrumb_indices;
sub.execute.breadcrumb_indices_count = breadcrumb_indices ? command_list_count : 0;
Expand Down Expand Up @@ -17295,6 +17326,8 @@ static D3D12_COMMAND_QUEUE_DESC * STDMETHODCALLTYPE d3d12_command_queue_GetDesc(
return desc;
}

extern CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl;

static CONST_VTBL struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl =
{
/* IUnknown methods */
Expand Down Expand Up @@ -17807,10 +17840,12 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu
const VkSemaphoreSubmitInfo *transition_semaphore,
struct d3d12_command_allocator **command_allocators, size_t num_command_allocators,
struct vkd3d_queue_timeline_trace_cookie timeline_cookie,
bool debug_capture, bool split_submissions)
uint64_t low_latency_frame_id, bool debug_capture, bool split_submissions)
{
const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs;
struct vkd3d_queue *vkd3d_queue = command_queue->vkd3d_queue;
VkLatencySubmissionPresentIdNV latency_submit_present_info;
struct dxgi_vk_swap_chain *low_latency_swapchain;
VkSemaphoreSubmitInfo signal_semaphore_info;
VkSemaphoreSubmitInfo binary_semaphore_info;
VkSubmitInfo2 submit_desc[4];
Expand Down Expand Up @@ -17895,6 +17930,27 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu
num_submits += 2;
}

if (command_queue->device->vk_info.NV_low_latency2)
{
spinlock_acquire(&command_queue->device->low_latency_swapchain_spinlock);
if ((low_latency_swapchain = command_queue->device->swapchain_info.low_latency_swapchain))
dxgi_vk_swap_chain_incref(low_latency_swapchain);
spinlock_release(&command_queue->device->low_latency_swapchain_spinlock);

if (low_latency_swapchain && dxgi_vk_swap_chain_low_latency_enabled(low_latency_swapchain))
{
latency_submit_present_info.sType = VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV;
latency_submit_present_info.pNext = NULL;
latency_submit_present_info.presentID = low_latency_frame_id;

for (i = 0; i < num_submits; i++)
submit_desc[i].pNext = &latency_submit_present_info;
}

if (low_latency_swapchain)
dxgi_vk_swap_chain_decref(low_latency_swapchain);
}

#ifdef VKD3D_ENABLE_RENDERDOC
/* For each submission we have marked to be captured, we will first need to filter it
* based on VKD3D_AUTO_CAPTURE_COUNTS.
Expand Down Expand Up @@ -18397,7 +18453,9 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
submission.execute.command_allocators,
submission.execute.num_command_allocators,
submission.execute.timeline_cookie,
submission.execute.debug_capture, submission.execute.split_submission);
submission.execute.low_latency_frame_id,
submission.execute.debug_capture,
submission.execute.split_submission);

/* command_queue_execute takes ownership of the
* outstanding_submission_counters and queue_timeline_indices allocations.
Expand Down Expand Up @@ -18460,6 +18518,7 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue,
int rc;

queue->ID3D12CommandQueue_iface.lpVtbl = &d3d12_command_queue_vtbl;
queue->ID3D12CommandQueueExt_iface.lpVtbl = &d3d12_command_queue_vkd3d_ext_vtbl;
queue->refcount = 1;

queue->desc = *desc;
Expand Down Expand Up @@ -18588,6 +18647,7 @@ void vkd3d_enqueue_initial_transition(ID3D12CommandQueue *queue, ID3D12Resource

memset(&sub, 0, sizeof(sub));
sub.type = VKD3D_SUBMISSION_EXECUTE;
sub.execute.low_latency_frame_id = d3d12_queue->device->frame_markers.render;
sub.execute.transition_count = 1;
sub.execute.transitions = vkd3d_malloc(sizeof(*sub.execute.transitions));
sub.execute.transitions[0].type = VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE;
Expand Down
91 changes: 91 additions & 0 deletions libs/vkd3d/command_queue_vkd3d_ext.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* * Copyright 2023 NVIDIA Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/

#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API

#include "vkd3d_private.h"

static inline struct d3d12_command_queue *d3d12_command_queue_from_ID3D12CommandQueueExt(d3d12_command_queue_vkd3d_ext_iface *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueueExt_iface);
}

extern ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(d3d12_command_queue_iface *iface);
HansKristian-Work marked this conversation as resolved.
Show resolved Hide resolved

ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface)
{
struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
return d3d12_command_queue_AddRef(&command_queue->ID3D12CommandQueue_iface);
}

extern ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(d3d12_command_queue_iface *iface);

static ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_Release(d3d12_command_queue_vkd3d_ext_iface *iface)
{
struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
return d3d12_command_queue_Release(&command_queue->ID3D12CommandQueue_iface);
}

extern HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(d3d12_command_queue_iface *iface,
REFIID iid, void **object);

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_QueryInterface(d3d12_command_queue_vkd3d_ext_iface *iface,
REFIID iid, void **out)
{
struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
return d3d12_command_queue_QueryInterface(&command_queue->ID3D12CommandQueue_iface, iid, out);
}

static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue(d3d12_command_queue_vkd3d_ext_iface *iface, D3D12_OUT_OF_BAND_CQ_TYPE type)
{
struct d3d12_command_queue *command_queue;
int i;

command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);

if (!command_queue->device->vk_info.NV_low_latency2)
return E_NOTIMPL;

if (type != OUT_OF_BAND_RENDER && type != OUT_OF_BAND_PRESENT)
return E_INVALIDARG;

for (i = 0; i < VKD3D_QUEUE_FAMILY_COUNT; i++)
{
if (command_queue->device->queue_families[i]->vk_family_index == command_queue->vkd3d_queue->vk_family_index &&
command_queue->device->queue_families[i]->out_of_band_queue)
{
command_queue->vkd3d_queue = command_queue->device->queue_families[i]->out_of_band_queue;
HansKristian-Work marked this conversation as resolved.
Show resolved Hide resolved
break;
}
}

return S_OK;
}

CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl =
{
/* IUnknown methods */
d3d12_command_queue_vkd3d_ext_QueryInterface,
d3d12_command_queue_vkd3d_ext_AddRef,
d3d12_command_queue_vkd3d_ext_Release,

/* ID3D12CommandQueueExt methods */
d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue
};

Loading
Loading