diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 508c856b811..6c0f802a19b 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -55,6 +55,7 @@ namespace dxvk { , m_isSWVP ( (BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) ? true : false ) , m_csThread ( dxvkDevice, dxvkDevice->createContext(DxvkContextType::Primary) ) , m_csChunk ( AllocCsChunk() ) + , m_submissionFence (new sync::Fence()) , m_d3d9Interop ( this ) , m_d3d9On12 ( this ) , m_d3d8Bridge ( this ) { @@ -1003,7 +1004,7 @@ namespace dxvk { if (dstTexInfo->IsAutomaticMip() && mipLevels != dstTexInfo->Desc()->MipLevels) MarkTextureMipsDirty(dstTexInfo); - FlushImplicit(false); + ConsiderFlush(GpuFlushType::ImplicitWeakHint); return D3D_OK; } @@ -1450,7 +1451,9 @@ namespace dxvk { return D3D_OK; // Do a strong flush if the first render target is changed. - FlushImplicit(RenderTargetIndex == 0 ? TRUE : FALSE); + ConsiderFlush(RenderTargetIndex == 0 + ? GpuFlushType::ImplicitStrongHint + : GpuFlushType::ImplicitWeakHint); m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); m_state.renderTargets[RenderTargetIndex] = rt; @@ -1529,7 +1532,7 @@ namespace dxvk { if (m_state.depthStencil == ds) return D3D_OK; - FlushImplicit(FALSE); + ConsiderFlush(GpuFlushType::ImplicitWeakHint); m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); if (ds != nullptr && m_depthBiasRepresentation.depthBiasRepresentation != VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT) { @@ -1588,7 +1591,7 @@ namespace dxvk { if (unlikely(!m_flags.test(D3D9DeviceFlag::InScene))) return D3DERR_INVALIDCALL; - FlushImplicit(true); + ConsiderFlush(GpuFlushType::ImplicitStrongHint); m_flags.clr(D3D9DeviceFlag::InScene); @@ -4377,7 +4380,7 @@ namespace dxvk { // We don't have to wait, but misbehaving games may // still try to spin on `Map` until the resource is // idle, so we should flush pending commands - FlushImplicit(FALSE); + ConsiderFlush(GpuFlushType::ImplicitWeakHint); return false; } else { @@ -4890,7 +4893,7 @@ namespace dxvk { slice.slice); } UnmapTextures(); - FlushImplicit(false); + ConsiderFlush(GpuFlushType::ImplicitWeakHint); } void D3D9DeviceEx::EmitGenerateMips( @@ -5049,7 +5052,7 @@ namespace dxvk { TrackBufferMappingBufferSequenceNumber(pResource); UnmapTextures(); - FlushImplicit(false); + ConsiderFlush(GpuFlushType::ImplicitWeakHint); return D3D_OK; } @@ -5080,25 +5083,15 @@ namespace dxvk { void D3D9DeviceEx::EmitCsChunk(DxvkCsChunkRef&& chunk) { m_csSeqNum = m_csThread.dispatchChunk(std::move(chunk)); - m_csIsBusy = true; } - void D3D9DeviceEx::FlushImplicit(BOOL StrongHint) { - // Flush only if the GPU is about to go idle, in - // order to keep the number of submissions low. - uint32_t pending = m_dxvkDevice->pendingSubmissions(); + void D3D9DeviceEx::ConsiderFlush(GpuFlushType FlushType) { + uint64_t chunkId = GetCurrentSequenceNumber(); + uint64_t submissionId = m_submissionFence->value(); - if (StrongHint || pending <= MaxPendingSubmits) { - auto now = dxvk::high_resolution_clock::now(); - - uint32_t delay = MinFlushIntervalUs - + IncFlushIntervalUs * pending; - - // Prevent flushing too often in short intervals. - if (now - m_lastFlush >= std::chrono::microseconds(delay)) - Flush(); - } + if (m_flushTracker.considerFlush(FlushType, chunkId, submissionId)) + Flush(); } @@ -5458,28 +5451,31 @@ namespace dxvk { m_initializer->Flush(); m_converter->Flush(); - if (m_csIsBusy || !m_csChunk->empty()) { - EmitStagingBufferMarker(); + EmitStagingBufferMarker(); - // Add commands to flush the threaded - // context, then flush the command list - EmitCs([](DxvkContext* ctx) { - ctx->flushCommandList(nullptr); - }); + // Add commands to flush the threaded + // context, then flush the command list + uint64_t submissionId = ++m_submissionId; - FlushCsChunk(); + EmitCs([ + cSubmissionFence = m_submissionFence, + cSubmissionId = submissionId + ] (DxvkContext* ctx) { + ctx->signal(cSubmissionFence, cSubmissionId); + ctx->flushCommandList(nullptr); + }); - // Reset flush timer used for implicit flushes - m_lastFlush = dxvk::high_resolution_clock::now(); - m_csIsBusy = false; - } + FlushCsChunk(); + + m_flushSeqNum = m_csSeqNum; + m_flushTracker.notifyFlush(m_flushSeqNum, submissionId); } void D3D9DeviceEx::EndFrame() { D3D9DeviceLock lock = LockDevice(); - EmitCs([] (DxvkContext* ctx) { + EmitCs([] (DxvkContext* ctx) { ctx->endFrame(); }); } @@ -6789,9 +6785,9 @@ namespace dxvk { if (unlikely(pQuery->IsEvent())) { pQuery->IsStalling() ? Flush() - : FlushImplicit(TRUE); + : ConsiderFlush(GpuFlushType::ImplicitStrongHint); } else if (pQuery->IsStalling()) { - FlushImplicit(FALSE); + ConsiderFlush(GpuFlushType::ImplicitWeakHint); } } diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index 540e674c1fd..0851152d84f 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -38,6 +38,7 @@ #include #include +#include "../util/util_flush.h" #include "../util/util_lru.h" namespace dxvk { @@ -927,7 +928,7 @@ namespace dxvk { void SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits); void SetPixelBoolBitfield (uint32_t idx, uint32_t mask, uint32_t bits); - void FlushImplicit(BOOL StrongHint); + void ConsiderFlush(GpuFlushType FlushType); bool ChangeReportedMemory(int64_t delta) { if (IsExtended()) @@ -995,12 +996,15 @@ namespace dxvk { return DxvkCsChunkRef(chunk, &m_csChunkPool); } - template + template void EmitCs(Cmd&& command) { if (unlikely(!m_csChunk->push(command))) { EmitCsChunk(std::move(m_csChunk)); - m_csChunk = AllocCsChunk(); + + if constexpr (AllowFlush) + ConsiderFlush(GpuFlushType::ImplicitWeakHint); + m_csChunk->push(command); } } @@ -1343,12 +1347,14 @@ namespace dxvk { D3D9ViewportInfo m_viewportInfo; DxvkCsChunkPool m_csChunkPool; - dxvk::high_resolution_clock::time_point m_lastFlush - = dxvk::high_resolution_clock::now(); DxvkCsThread m_csThread; DxvkCsChunkRef m_csChunk; uint64_t m_csSeqNum = 0ull; - bool m_csIsBusy = false; + + Rc m_submissionFence; + uint64_t m_submissionId = 0ull; + uint64_t m_flushSeqNum = 0ull; + GpuFlushTracker m_flushTracker; std::atomic m_availableMemory = { 0 }; std::atomic m_samplerCount = { 0 }; diff --git a/src/d3d9/d3d9_query.cpp b/src/d3d9/d3d9_query.cpp index 31624a087fe..5eebdb0d611 100644 --- a/src/d3d9/d3d9_query.cpp +++ b/src/d3d9/d3d9_query.cpp @@ -160,7 +160,7 @@ namespace dxvk { // they didn't call end, do some flushy stuff... if (flush && hr == S_FALSE && m_state != D3D9_VK_QUERY_BEGUN) { this->NotifyStall(); - m_parent->FlushImplicit(FALSE); + m_parent->ConsiderFlush(GpuFlushType::ImplicitSynchronization); } return hr; diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index d01c1f9836b..a24ee311bf5 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -498,17 +498,6 @@ namespace dxvk { m_submissionQueue.unlockDeviceQueue(); } - /** - * \brief Number of pending submissions - * - * A return value of 0 indicates - * that the GPU is currently idle. - * \returns Pending submission count - */ - uint32_t pendingSubmissions() const { - return m_submissionQueue.pendingSubmissions(); - } - /** * \brief Increments a given stat counter * diff --git a/src/dxvk/dxvk_queue.cpp b/src/dxvk/dxvk_queue.cpp index 12673414cb8..7273a37d608 100644 --- a/src/dxvk/dxvk_queue.cpp +++ b/src/dxvk/dxvk_queue.cpp @@ -37,7 +37,6 @@ namespace dxvk { entry.status = status; entry.submit = std::move(submitInfo); - m_pending += 1; m_submitQueue.push(std::move(entry)); m_appendCond.notify_all(); } @@ -215,10 +214,6 @@ namespace dxvk { entry.submit.cmdList->notifyObjects(); lock.lock(); - - if (entry.submit.cmdList != nullptr) - m_pending -= 1; - m_finishQueue.pop(); m_finishCond.notify_all(); lock.unlock(); diff --git a/src/dxvk/dxvk_queue.h b/src/dxvk/dxvk_queue.h index 8bedcaa1bb8..38d91f5dd09 100644 --- a/src/dxvk/dxvk_queue.h +++ b/src/dxvk/dxvk_queue.h @@ -72,17 +72,6 @@ namespace dxvk { ~DxvkSubmissionQueue(); - /** - * \brief Number of pending submissions - * - * A return value of 0 indicates - * that the GPU is currently idle. - * \returns Pending submission count - */ - uint32_t pendingSubmissions() const { - return m_pending.load(); - } - /** * \brief Retrieves estimated GPU idle time * @@ -193,7 +182,6 @@ namespace dxvk { std::atomic m_lastError = { VK_SUCCESS }; std::atomic m_stopped = { false }; - std::atomic m_pending = { 0u }; std::atomic m_gpuIdle = { 0ull }; dxvk::mutex m_mutex;