diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index e8ef3012648..fd5a7ea4bb8 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -6617,12 +6617,21 @@ namespace dxvk { } } - UpdateStateConstants( - &m_state, - StartRegister, - pConstantData, - Count, - m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled); + if constexpr (ProgramType == DxsoProgramType::VertexShader) { + UpdateStateConstants( + m_state.vsConsts, + StartRegister, + pConstantData, + Count, + m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled); + } else { + UpdateStateConstants( + m_state.psConsts, + StartRegister, + pConstantData, + Count, + m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled); + } return D3D_OK; } diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index 63e9fd476fe..06f329fab58 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -899,7 +899,7 @@ namespace dxvk { return &m_d3d9Options; } - Direct3DState9* GetRawState() { + D3D9DeviceState* GetRawState() { return &m_state; } @@ -950,6 +950,10 @@ namespace dxvk { void TouchMappedTexture(D3D9CommonTexture* pTexture); void RemoveMappedTexture(D3D9CommonTexture* pTexture); + bool CanSWVP() { + return m_behaviorFlags & (D3DCREATE_MIXED_VERTEXPROCESSING | D3DCREATE_SOFTWARE_VERTEXPROCESSING); + } + private: DxvkCsChunkRef AllocCsChunk() { @@ -976,10 +980,6 @@ namespace dxvk { } } - bool CanSWVP() { - return m_behaviorFlags & (D3DCREATE_MIXED_VERTEXPROCESSING | D3DCREATE_SOFTWARE_VERTEXPROCESSING); - } - void DetermineConstantLayouts(bool canSWVP); D3D9BufferSlice AllocUPBuffer(VkDeviceSize size); @@ -1315,7 +1315,7 @@ namespace dxvk { std::atomic m_availableMemory = { 0 }; std::atomic m_samplerCount = { 0 }; - Direct3DState9 m_state; + D3D9DeviceState m_state; #ifdef D3D9_ALLOW_UNMAPPING lru_list m_mappedTextures; diff --git a/src/d3d9/d3d9_state.cpp b/src/d3d9/d3d9_state.cpp index 2c32bef45d9..147c2ac0b24 100644 --- a/src/d3d9/d3d9_state.cpp +++ b/src/d3d9/d3d9_state.cpp @@ -4,7 +4,7 @@ namespace dxvk { - D3D9CapturableState::D3D9CapturableState() { + D3D9DeviceState::D3D9DeviceState() { for (uint32_t i = 0; i < streamFreq.size(); i++) streamFreq[i] = 1; @@ -12,7 +12,7 @@ namespace dxvk { enabledLightIndices[i] = UINT32_MAX; } - D3D9CapturableState::~D3D9CapturableState() { + D3D9DeviceState::~D3D9DeviceState() { for (uint32_t i = 0; i < textures.size(); i++) TextureChangePrivate(textures[i], nullptr); } diff --git a/src/d3d9/d3d9_state.h b/src/d3d9/d3d9_state.h index a3dd0a570be..ab27803edea 100644 --- a/src/d3d9/d3d9_state.h +++ b/src/d3d9/d3d9_state.h @@ -181,10 +181,10 @@ namespace dxvk { 0.0f // Phi }; - struct D3D9CapturableState { - D3D9CapturableState(); + struct D3D9DeviceState { + D3D9DeviceState(); - ~D3D9CapturableState(); + ~D3D9DeviceState(); Com vertexDecl; Com indices; @@ -227,6 +227,9 @@ namespace dxvk { std::vector> lights; std::array enabledLightIndices; + std::array, caps::MaxSimultaneousRenderTargets> renderTargets; + Com depthStencil; + bool IsLightEnabled(DWORD Index) { const auto& indices = enabledLightIndices; return std::find(indices.begin(), indices.end(), Index) != indices.end(); @@ -234,61 +237,49 @@ namespace dxvk { }; template < + typename ConstantsT, DxsoProgramType ProgramType, D3D9ConstantType ConstantType, typename T> HRESULT UpdateStateConstants( - D3D9CapturableState* pState, + ConstantsT& Set, UINT StartRegister, const T* pConstantData, UINT Count, bool FloatEmu) { - auto UpdateHelper = [&] (auto& set) { - if constexpr (ConstantType == D3D9ConstantType::Float) { - - if (!FloatEmu) { - size_t size = Count * sizeof(Vector4); - - std::memcpy(set.fConsts[StartRegister].data, pConstantData, size); - } - else { - for (UINT i = 0; i < Count; i++) - set.fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4)); - } - } - else if constexpr (ConstantType == D3D9ConstantType::Int) { - size_t size = Count * sizeof(Vector4i); + if constexpr (ConstantType == D3D9ConstantType::Float) { + + if (!FloatEmu) { + size_t size = Count * sizeof(Vector4); - std::memcpy(set.iConsts[StartRegister].data, pConstantData, size); + std::memcpy(Set.fConsts[StartRegister].data, pConstantData, size); } else { - for (uint32_t i = 0; i < Count; i++) { - const uint32_t constantIdx = StartRegister + i; - const uint32_t arrayIdx = constantIdx / 32; - const uint32_t bitIdx = constantIdx % 32; - - const uint32_t bit = 1u << bitIdx; - - set.bConsts[arrayIdx] &= ~bit; - if (pConstantData[i]) - set.bConsts[arrayIdx] |= bit; - } + for (UINT i = 0; i < Count; i++) + Set.fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4)); } + } + else if constexpr (ConstantType == D3D9ConstantType::Int) { + size_t size = Count * sizeof(Vector4i); - return D3D_OK; - }; - - return ProgramType == DxsoProgramTypes::VertexShader - ? UpdateHelper(pState->vsConsts) - : UpdateHelper(pState->psConsts); - } + std::memcpy(Set.iConsts[StartRegister].data, pConstantData, size); + } + else { + for (uint32_t i = 0; i < Count; i++) { + const uint32_t constantIdx = StartRegister + i; + const uint32_t arrayIdx = constantIdx / 32; + const uint32_t bitIdx = constantIdx % 32; - struct Direct3DState9 : public D3D9CapturableState { + const uint32_t bit = 1u << bitIdx; - std::array, caps::MaxSimultaneousRenderTargets> renderTargets; - Com depthStencil; + Set.bConsts[arrayIdx] &= ~bit; + if (pConstantData[i]) + Set.bConsts[arrayIdx] |= bit; + } + } - }; + return D3D_OK; + } struct D3D9InputAssemblyState { diff --git a/src/d3d9/d3d9_stateblock.cpp b/src/d3d9/d3d9_stateblock.cpp index aec47712734..522689d0b32 100644 --- a/src/d3d9/d3d9_stateblock.cpp +++ b/src/d3d9/d3d9_stateblock.cpp @@ -11,6 +11,21 @@ namespace dxvk { + D3D9CapturedState::D3D9CapturedState() { + for (uint32_t i = 0; i < streamFreq.size(); i++) + streamFreq[i] = 1; + + for (uint32_t i = 0; i < enabledLightIndices.size(); i++) + enabledLightIndices[i] = UINT32_MAX; + } + + D3D9CapturedState::~D3D9CapturedState() { + if (unlikely(textures != nullptr)) { + for (uint32_t i = 0; i < textures->size(); i++) + TextureChangePrivate((*textures)[i], nullptr); + } + } + D3D9StateBlock::D3D9StateBlock(D3D9DeviceEx* pDevice, D3D9StateBlockType Type) : D3D9StateBlockBase(pDevice) , m_deviceState (pDevice->GetRawState()) { @@ -78,7 +93,10 @@ namespace dxvk { HRESULT D3D9StateBlock::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) { - m_state.renderStates[State] = Value; + if (unlikely(!m_state.renderStates)) + m_state.renderStates = std::make_unique(); + + (*m_state.renderStates)[State] = Value; m_captures.flags.set(D3D9CapturedStateFlag::RenderStates); m_captures.renderStates.set(State, true); @@ -90,7 +108,10 @@ namespace dxvk { DWORD StateSampler, D3DSAMPLERSTATETYPE Type, DWORD Value) { - m_state.samplerStates[StateSampler][Type] = Value; + if (unlikely(!m_state.samplerStates)) + m_state.samplerStates = std::make_unique(); + + (*m_state.samplerStates)[StateSampler][Type] = Value; m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates); m_captures.samplers.set(StateSampler, true); @@ -104,10 +125,13 @@ namespace dxvk { D3D9VertexBuffer* pStreamData, UINT OffsetInBytes, UINT Stride) { - m_state.vertexBuffers[StreamNumber].vertexBuffer = pStreamData; + if (unlikely(!m_state.vertexBuffers)) + m_state.vertexBuffers = std::make_unique(); - m_state.vertexBuffers[StreamNumber].offset = OffsetInBytes; - m_state.vertexBuffers[StreamNumber].stride = Stride; + D3D9VBO& vbo = (*m_state.vertexBuffers)[StreamNumber]; + vbo.vertexBuffer = pStreamData; + vbo.offset = OffsetInBytes; + vbo.stride = Stride; m_captures.flags.set(D3D9CapturedStateFlag::VertexBuffers); m_captures.vertexBuffers.set(StreamNumber, true); @@ -125,7 +149,10 @@ namespace dxvk { HRESULT D3D9StateBlock::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) { - TextureChangePrivate(m_state.textures[StateSampler], pTexture); + if (unlikely(!m_state.textures)) + m_state.textures = std::make_unique(); + + TextureChangePrivate((*m_state.textures)[StateSampler], pTexture); m_captures.flags.set(D3D9CapturedStateFlag::Textures); m_captures.textures.set(StateSampler, true); @@ -150,7 +177,10 @@ namespace dxvk { HRESULT D3D9StateBlock::SetMaterial(const D3DMATERIAL9* pMaterial) { - m_state.material = *pMaterial; + if (!m_state.material) + m_state.material = std::make_unique(); + + *m_state.material = *pMaterial; m_captures.flags.set(D3D9CapturedStateFlag::Material); return D3D_OK; @@ -198,7 +228,10 @@ namespace dxvk { HRESULT D3D9StateBlock::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { - m_state.transforms[idx] = ConvertMatrix(pMatrix); + if (unlikely(!m_state.transforms)) + m_state.transforms = std::make_unique(); + + (*m_state.transforms)[idx] = ConvertMatrix(pMatrix); m_captures.flags.set(D3D9CapturedStateFlag::Transforms); m_captures.transforms.set(idx, true); @@ -210,7 +243,10 @@ namespace dxvk { DWORD Stage, D3D9TextureStageStateTypes Type, DWORD Value) { - m_state.textureStages[Stage][Type] = Value; + if (unlikely(!m_state.textureStages)) + m_state.textureStages = std::make_unique(); + + (*m_state.textureStages)[Stage][Type] = Value; m_captures.flags.set(D3D9CapturedStateFlag::TextureStages); m_captures.textureStages.set(Stage, true); @@ -220,7 +256,11 @@ namespace dxvk { HRESULT D3D9StateBlock::MultiplyStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { - m_state.transforms[idx] = m_state.transforms[idx] * ConvertMatrix(pMatrix); + if (unlikely(!m_state.textureStages)) + m_state.textureStages = std::make_unique(); + + Matrix4& transform = (*m_state.transforms)[idx]; + transform = transform * ConvertMatrix(pMatrix); m_captures.flags.set(D3D9CapturedStateFlag::Transforms); m_captures.transforms.set(idx, true); @@ -333,15 +373,30 @@ namespace dxvk { HRESULT D3D9StateBlock::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { - m_state.vsConsts.bConsts[idx] &= ~mask; - m_state.vsConsts.bConsts[idx] |= bits & mask; + if (unlikely(m_parent->CanSWVP())) { + if (unlikely(!m_state.vsConstsSW)) + m_state.vsConstsSW = std::make_unique(); + + m_state.vsConstsSW->bConsts[idx] &= ~mask; + m_state.vsConstsSW->bConsts[idx] |= bits & mask; + } else { + if (unlikely(!m_state.vsConsts)) + m_state.vsConsts = std::make_unique(); + + m_state.vsConsts->bConsts[idx] &= ~mask; + m_state.vsConsts->bConsts[idx] |= bits & mask; + } + return D3D_OK; } HRESULT D3D9StateBlock::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { - m_state.psConsts.bConsts[idx] &= ~mask; - m_state.psConsts.bConsts[idx] |= bits & mask; + if (unlikely(!m_state.psConsts)) + m_state.psConsts = std::make_unique(); + + m_state.psConsts->bConsts[idx] &= ~mask; + m_state.psConsts->bConsts[idx] |= bits & mask; return D3D_OK; } diff --git a/src/d3d9/d3d9_stateblock.h b/src/d3d9/d3d9_stateblock.h index a098b54fa45..126d5263949 100644 --- a/src/d3d9/d3d9_stateblock.h +++ b/src/d3d9/d3d9_stateblock.h @@ -66,6 +66,60 @@ namespace dxvk { bit::bitvector lightEnabledChanges; }; + struct D3D9CapturedState { + typedef typename std::array RenderStatesArray; + typedef typename std::array, SamplerCount> SamplerStatesArray; + typedef typename std::array VertexBuffersArray; + typedef typename std::array TexturesArray; + typedef typename std::array, caps::TextureStageCount> TextureStagesArray; + typedef typename std::array TransformsArray; + + D3D9CapturedState(); + + ~D3D9CapturedState(); + + Com vertexDecl; + Com indices; + + std::unique_ptr renderStates = nullptr; + + std::unique_ptr samplerStates = nullptr; + + std::unique_ptr vertexBuffers = nullptr; + + std::unique_ptr textures = nullptr; + + Com vertexShader; + Com pixelShader; + + D3DVIEWPORT9 viewport = {}; + RECT scissorRect = {}; + + std::array< + D3D9ClipPlane, + caps::MaxClipPlanes> clipPlanes = {}; + + std::unique_ptr textureStages = nullptr; + + std::unique_ptr vsConsts = nullptr; + std::unique_ptr vsConstsSW = nullptr; + std::unique_ptr psConsts = nullptr; + + std::array streamFreq = {}; + + std::unique_ptr transforms = nullptr; + + std::unique_ptr material = nullptr; + + std::vector> lights; + std::array enabledLightIndices; + + bool IsLightEnabled(DWORD Index) { + const auto& indices = enabledLightIndices; + return std::find(indices.begin(), indices.end(), Index) != indices.end(); + } + }; + enum class D3D9StateBlockType :uint32_t { None, VertexState, @@ -194,7 +248,10 @@ namespace dxvk { for (uint32_t rs : bit::BitMask(m_captures.renderStates.dword(i))) { uint32_t idx = i * 32 + rs; - dst->SetRenderState(D3DRENDERSTATETYPE(idx), src->renderStates[idx]); + if constexpr (std::is_same_v) + dst->SetRenderState(D3DRENDERSTATETYPE(idx), src->renderStates[idx]); + else + dst->SetRenderState(D3DRENDERSTATETYPE(idx), (*src->renderStates)[idx]); } } } @@ -202,27 +259,44 @@ namespace dxvk { if (m_captures.flags.test(D3D9CapturedStateFlag::SamplerStates)) { for (uint32_t samplerIdx : bit::BitMask(m_captures.samplers.dword(0))) { for (uint32_t stateIdx : bit::BitMask(m_captures.samplerStates[samplerIdx].dword(0))) - dst->SetStateSamplerState(samplerIdx, D3DSAMPLERSTATETYPE(stateIdx), src->samplerStates[samplerIdx][stateIdx]); + if constexpr (std::is_same_v) + dst->SetStateSamplerState(samplerIdx, D3DSAMPLERSTATETYPE(stateIdx), src->samplerStates[samplerIdx][stateIdx]); + else + dst->SetStateSamplerState(samplerIdx, D3DSAMPLERSTATETYPE(stateIdx), (*src->samplerStates)[samplerIdx][stateIdx]); } } if (m_captures.flags.test(D3D9CapturedStateFlag::VertexBuffers)) { for (uint32_t idx : bit::BitMask(m_captures.vertexBuffers.dword(0))) { - const auto& vbo = src->vertexBuffers[idx]; + const D3D9VBO* vbo; + if constexpr (std::is_same_v) + vbo = &src->vertexBuffers[idx]; + else + vbo = &(*src->vertexBuffers)[idx]; + dst->SetStreamSource( idx, - vbo.vertexBuffer.ptr(), - vbo.offset, - vbo.stride); + vbo->vertexBuffer.ptr(), + vbo->offset, + vbo->stride); } } - if (m_captures.flags.test(D3D9CapturedStateFlag::Material)) - dst->SetMaterial(&src->material); + if (m_captures.flags.test(D3D9CapturedStateFlag::Material)) { + + if constexpr (std::is_same_v) + dst->SetMaterial(&src->material); + else + dst->SetMaterial(src->material.get()); + } if (m_captures.flags.test(D3D9CapturedStateFlag::Textures)) { - for (uint32_t idx : bit::BitMask(m_captures.textures.dword(0))) - dst->SetStateTexture(idx, src->textures[idx]); + for (uint32_t idx : bit::BitMask(m_captures.textures.dword(0))) { + if constexpr (std::is_same_v) + dst->SetStateTexture(idx, src->textures[idx]); + else + dst->SetStateTexture(idx, (*src->textures)[idx]); + } } if (m_captures.flags.test(D3D9CapturedStateFlag::VertexShader)) @@ -236,15 +310,22 @@ namespace dxvk { for (uint32_t trans : bit::BitMask(m_captures.transforms.dword(i))) { uint32_t idx = i * 32 + trans; - dst->SetStateTransform(idx, reinterpret_cast(&src->transforms[idx])); + if constexpr (std::is_same_v) + dst->SetStateTransform(idx, reinterpret_cast(&src->transforms[idx])); + else + dst->SetStateTransform(idx, reinterpret_cast(&(*src->transforms)[idx])); } } } if (m_captures.flags.test(D3D9CapturedStateFlag::TextureStages)) { for (uint32_t stageIdx : bit::BitMask(m_captures.textureStages.dword(0))) { - for (uint32_t stateIdx : bit::BitMask(m_captures.textureStageStates[stageIdx].dword(0))) - dst->SetStateTextureStageState(stageIdx, D3D9TextureStageStateTypes(stateIdx), src->textureStages[stageIdx][stateIdx]); + for (uint32_t stateIdx : bit::BitMask(m_captures.textureStageStates[stageIdx].dword(0))) { + if constexpr (std::is_same_v) + dst->SetStateTextureStageState(stageIdx, D3D9TextureStageStateTypes(stateIdx), src->textureStages[stageIdx][stateIdx]); + else + dst->SetStateTextureStageState(stageIdx, D3D9TextureStageStateTypes(stateIdx), (*src->textureStages)[stageIdx][stateIdx]); + } } } @@ -260,34 +341,95 @@ namespace dxvk { } if (m_captures.flags.test(D3D9CapturedStateFlag::VsConstants)) { - for (uint32_t i = 0; i < m_captures.vsConsts.fConsts.dwordCount(); i++) { - for (uint32_t consts : bit::BitMask(m_captures.vsConsts.fConsts.dword(i))) { - uint32_t idx = i * 32 + consts; + if (unlikely(m_parent->CanSWVP())) { + if (unlikely(!m_state.vsConstsSW)) + m_state.vsConstsSW = std::make_unique(); + + for (uint32_t i = 0; i < m_captures.vsConsts.fConsts.dwordCount(); i++) { + for (uint32_t consts : bit::BitMask(m_captures.vsConsts.fConsts.dword(i))) { + uint32_t idx = i * 32 + consts; + + if constexpr (std::is_same_v) { + dst->SetVertexShaderConstantF(idx, (float*)&src->vsConsts.fConsts[idx], 1); + } else { + dst->SetVertexShaderConstantF(idx, (float*)&src->vsConstsSW->fConsts[idx], 1); + } + } + } - dst->SetVertexShaderConstantF(idx, (float*)&src->vsConsts.fConsts[idx], 1); + for (uint32_t i = 0; i < m_captures.vsConsts.iConsts.dwordCount(); i++) { + for (uint32_t consts : bit::BitMask(m_captures.vsConsts.iConsts.dword(i))) { + uint32_t idx = i * 32 + consts; + + if constexpr (std::is_same_v) { + dst->SetVertexShaderConstantI(idx, (int*)&src->vsConsts.iConsts[idx], 1); + } else { + dst->SetVertexShaderConstantI(idx, (int*)&src->vsConstsSW->iConsts[idx], 1); + } + } } - } - for (uint32_t i = 0; i < m_captures.vsConsts.iConsts.dwordCount(); i++) { - for (uint32_t consts : bit::BitMask(m_captures.vsConsts.iConsts.dword(i))) { - uint32_t idx = i * 32 + consts; + if (m_captures.vsConsts.bConsts.any()) { + for (uint32_t i = 0; i < m_captures.vsConsts.bConsts.dwordCount(); i++) + if constexpr (std::is_same_v) { + dst->SetVertexBoolBitfield(i, m_captures.vsConsts.bConsts.dword(i), src->vsConsts.bConsts[i]); + } else { + dst->SetVertexBoolBitfield(i, m_captures.vsConsts.bConsts.dword(i), src->vsConstsSW->bConsts[i]); + } + } + } else { + if (unlikely(!m_state.vsConsts)) + m_state.vsConsts = std::make_unique(); + + for (uint32_t i = 0; i < m_captures.vsConsts.fConsts.dwordCount(); i++) { + for (uint32_t consts : bit::BitMask(m_captures.vsConsts.fConsts.dword(i))) { + uint32_t idx = i * 32 + consts; + + if constexpr (std::is_same_v) { + dst->SetVertexShaderConstantF(idx, (float*)&src->vsConsts.fConsts[idx], 1); + } else { + dst->SetVertexShaderConstantF(idx, (float*)&src->vsConsts->fConsts[idx], 1); + } + } + } - dst->SetVertexShaderConstantI(idx, (int*)&src->vsConsts.iConsts[idx], 1); + for (uint32_t i = 0; i < m_captures.vsConsts.iConsts.dwordCount(); i++) { + for (uint32_t consts : bit::BitMask(m_captures.vsConsts.iConsts.dword(i))) { + uint32_t idx = i * 32 + consts; + + if constexpr (std::is_same_v) { + dst->SetVertexShaderConstantI(idx, (int*)&src->vsConsts.iConsts[idx], 1); + } else { + dst->SetVertexShaderConstantI(idx, (int*)&src->vsConsts->iConsts[idx], 1); + } + } } - } - if (m_captures.vsConsts.bConsts.any()) { - for (uint32_t i = 0; i < m_captures.vsConsts.bConsts.dwordCount(); i++) - dst->SetVertexBoolBitfield(i, m_captures.vsConsts.bConsts.dword(i), src->vsConsts.bConsts[i]); + if (m_captures.vsConsts.bConsts.any()) { + for (uint32_t i = 0; i < m_captures.vsConsts.bConsts.dwordCount(); i++) + if constexpr (std::is_same_v) { + dst->SetVertexBoolBitfield(i, m_captures.vsConsts.bConsts.dword(i), src->vsConsts.bConsts[i]); + } else { + dst->SetVertexBoolBitfield(i, m_captures.vsConsts.bConsts.dword(i), src->vsConsts->bConsts[i]); + } + } } } if (m_captures.flags.test(D3D9CapturedStateFlag::PsConstants)) { + if (unlikely(!m_state.psConsts)) { + m_state.psConsts = std::make_unique(); + } + for (uint32_t i = 0; i < m_captures.psConsts.fConsts.dwordCount(); i++) { for (uint32_t consts : bit::BitMask(m_captures.psConsts.fConsts.dword(i))) { uint32_t idx = i * 32 + consts; - dst->SetPixelShaderConstantF(idx, (float*)&src->psConsts.fConsts[idx], 1); + if constexpr (std::is_same_v) { + dst->SetPixelShaderConstantF(idx, (float*)&src->psConsts.fConsts[idx], 1); + } else { + dst->SetPixelShaderConstantF(idx, (float*)&src->psConsts->fConsts[idx], 1); + } } } @@ -295,13 +437,21 @@ namespace dxvk { for (uint32_t consts : bit::BitMask(m_captures.psConsts.iConsts.dword(i))) { uint32_t idx = i * 32 + consts; - dst->SetPixelShaderConstantI(idx, (int*)&src->psConsts.iConsts[idx], 1); + if constexpr (std::is_same_v) { + dst->SetPixelShaderConstantI(idx, (int*)&src->psConsts.iConsts[idx], 1); + } else { + dst->SetPixelShaderConstantI(idx, (int*)&src->psConsts->iConsts[idx], 1); + } } } if (m_captures.psConsts.bConsts.any()) { for (uint32_t i = 0; i < m_captures.psConsts.bConsts.dwordCount(); i++) - dst->SetPixelBoolBitfield(i, m_captures.psConsts.bConsts.dword(i), src->psConsts.bConsts[i]); + if constexpr (std::is_same_v) { + dst->SetPixelBoolBitfield(i, m_captures.psConsts.bConsts.dword(i), src->psConsts.bConsts[i]); + } else { + dst->SetPixelBoolBitfield(i, m_captures.psConsts.bConsts.dword(i), src->psConsts->bConsts[i]); + } } } @@ -354,15 +504,51 @@ namespace dxvk { setCaptures.bConsts.set(reg, true); } - UpdateStateConstants< - ProgramType, - ConstantType, - T>( - &m_state, - StartRegister, - pConstantData, - Count, - false); + if constexpr (ProgramType == DxsoProgramTypes::VertexShader) { + if (m_parent->CanSWVP()) { + if (unlikely(!m_state.vsConstsSW)) + m_state.vsConstsSW = std::make_unique(); + + UpdateStateConstants< + D3D9ShaderConstantsVSSoftware, + ProgramType, + ConstantType, + T>( + (*m_state.vsConstsSW), + StartRegister, + pConstantData, + Count, + false); + } else { + if (unlikely(!m_state.vsConsts)) + m_state.vsConsts = std::make_unique(); + + UpdateStateConstants< + D3D9ShaderConstantsVSHardware, + ProgramType, + ConstantType, + T>( + (*m_state.vsConsts), + StartRegister, + pConstantData, + Count, + false); + } + } else { + if (unlikely(!m_state.psConsts)) + m_state.psConsts = std::make_unique(); + + UpdateStateConstants< + D3D9ShaderConstantsPS, + ProgramType, + ConstantType, + T>( + (*m_state.psConsts), + StartRegister, + pConstantData, + Count, + false); + } return D3D_OK; }; @@ -391,10 +577,10 @@ namespace dxvk { void CaptureType(D3D9StateBlockType State); - D3D9CapturableState m_state; + D3D9CapturedState m_state; D3D9StateCaptures m_captures; - D3D9CapturableState* m_deviceState; + D3D9DeviceState* m_deviceState; bool m_applying = false;