diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index e8ef3012648..5292f8d95e1 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -5145,11 +5145,11 @@ namespace dxvk { void D3D9DeviceEx::UploadConstants() { if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) { if (CanSWVP()) - return UploadSoftwareConstantSet(m_state.vsConsts, m_vsLayout); + return UploadSoftwareConstantSet(m_state.consts.vs, m_vsLayout); else - return UploadConstantSet(m_state.vsConsts, m_vsLayout, m_state.vertexShader); + return UploadConstantSet(m_state.consts.vs, m_vsLayout, m_state.vertexShader); } else { - return UploadConstantSet (m_state.psConsts, m_psLayout, m_state.pixelShader); + return UploadConstantSet (m_state.consts.ps, m_psLayout, m_state.pixelShader); } } @@ -6257,7 +6257,7 @@ namespace dxvk { if (likely(!CanSWVP())) { UpdateVertexBoolSpec( - m_state.vsConsts.bConsts[0] & + m_state.consts.vs.bConsts[0] & m_consts[DxsoProgramType::VertexShader].meta.boolConstantMask); } else UpdateVertexBoolSpec(0); @@ -6285,7 +6285,7 @@ namespace dxvk { UpdatePixelShaderSamplerSpec(m_textureTypes, programInfo.minorVersion() >= 4 ? 0u : projected, fetch4); // For implicit samplers... UpdatePixelBoolSpec( - m_state.psConsts.bConsts[0] & + m_state.consts.ps.bConsts[0] & m_consts[DxsoProgramType::PixelShader].meta.boolConstantMask); } else { @@ -6526,16 +6526,16 @@ namespace dxvk { void D3D9DeviceEx::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { - m_state.vsConsts.bConsts[idx] &= ~mask; - m_state.vsConsts.bConsts[idx] |= bits & mask; + m_state.consts.vs.bConsts[idx] &= ~mask; + m_state.consts.vs.bConsts[idx] |= bits & mask; m_consts[DxsoProgramTypes::VertexShader].dirty = true; } void D3D9DeviceEx::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { - m_state.psConsts.bConsts[idx] &= ~mask; - m_state.psConsts.bConsts[idx] |= bits & mask; + m_state.consts.ps.bConsts[idx] &= ~mask; + m_state.consts.ps.bConsts[idx] |= bits & mask; m_consts[DxsoProgramTypes::PixelShader].dirty = true; } @@ -6618,7 +6618,7 @@ namespace dxvk { } UpdateStateConstants( - &m_state, + &m_state.consts, StartRegister, pConstantData, Count, diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index 63e9fd476fe..b0151c497c0 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -899,7 +899,7 @@ namespace dxvk { return &m_d3d9Options; } - Direct3DState9* GetRawState() { + D3D9DeviceState* GetRawState() { return &m_state; } @@ -1121,8 +1121,8 @@ namespace dxvk { }; return ProgramType == DxsoProgramTypes::VertexShader - ? GetHelper(m_state.vsConsts) - : GetHelper(m_state.psConsts); + ? GetHelper(m_state.consts.vs) + : GetHelper(m_state.consts.ps); } void UpdateFixedFunctionVS(); @@ -1315,7 +1315,7 @@ namespace dxvk { std::atomic m_availableMemory = { 0 }; std::atomic m_samplerCount = { 0 }; - Direct3DState9 m_state; + D3D9DeviceState m_state; #ifdef D3D9_ALLOW_UNMAPPING lru_list m_mappedTextures; diff --git a/src/d3d9/d3d9_state.cpp b/src/d3d9/d3d9_state.cpp index 2c32bef45d9..147c2ac0b24 100644 --- a/src/d3d9/d3d9_state.cpp +++ b/src/d3d9/d3d9_state.cpp @@ -4,7 +4,7 @@ namespace dxvk { - D3D9CapturableState::D3D9CapturableState() { + D3D9DeviceState::D3D9DeviceState() { for (uint32_t i = 0; i < streamFreq.size(); i++) streamFreq[i] = 1; @@ -12,7 +12,7 @@ namespace dxvk { enabledLightIndices[i] = UINT32_MAX; } - D3D9CapturableState::~D3D9CapturableState() { + D3D9DeviceState::~D3D9DeviceState() { for (uint32_t i = 0; i < textures.size(); i++) TextureChangePrivate(textures[i], nullptr); } diff --git a/src/d3d9/d3d9_state.h b/src/d3d9/d3d9_state.h index a3dd0a570be..2b2efb4bab2 100644 --- a/src/d3d9/d3d9_state.h +++ b/src/d3d9/d3d9_state.h @@ -181,10 +181,15 @@ namespace dxvk { 0.0f // Phi }; - struct D3D9CapturableState { - D3D9CapturableState(); + struct D3D9Constants { + D3D9ShaderConstantsVSSoftware vs; + D3D9ShaderConstantsPS ps; + }; + + struct D3D9DeviceState { + D3D9DeviceState(); - ~D3D9CapturableState(); + ~D3D9DeviceState(); Com vertexDecl; Com indices; @@ -215,8 +220,7 @@ namespace dxvk { std::array, caps::TextureStageCount> textureStages = {}; - D3D9ShaderConstantsVSSoftware vsConsts; - D3D9ShaderConstantsPS psConsts; + D3D9Constants consts = {}; std::array streamFreq = {}; @@ -227,6 +231,9 @@ namespace dxvk { std::vector> lights; std::array enabledLightIndices; + std::array, caps::MaxSimultaneousRenderTargets> renderTargets; + Com depthStencil; + bool IsLightEnabled(DWORD Index) { const auto& indices = enabledLightIndices; return std::find(indices.begin(), indices.end(), Index) != indices.end(); @@ -238,7 +245,7 @@ namespace dxvk { D3D9ConstantType ConstantType, typename T> HRESULT UpdateStateConstants( - D3D9CapturableState* pState, + D3D9Constants* pConsts, UINT StartRegister, const T* pConstantData, UINT Count, @@ -279,17 +286,10 @@ namespace dxvk { }; return ProgramType == DxsoProgramTypes::VertexShader - ? UpdateHelper(pState->vsConsts) - : UpdateHelper(pState->psConsts); + ? UpdateHelper(pConsts->vs) + : UpdateHelper(pConsts->ps); } - struct Direct3DState9 : public D3D9CapturableState { - - std::array, caps::MaxSimultaneousRenderTargets> renderTargets; - Com depthStencil; - - }; - struct D3D9InputAssemblyState { D3DPRIMITIVETYPE primitiveType = D3DPRIMITIVETYPE(0); diff --git a/src/d3d9/d3d9_stateblock.cpp b/src/d3d9/d3d9_stateblock.cpp index aec47712734..68a9a679e6f 100644 --- a/src/d3d9/d3d9_stateblock.cpp +++ b/src/d3d9/d3d9_stateblock.cpp @@ -11,6 +11,21 @@ namespace dxvk { + D3D9CapturedState::D3D9CapturedState() { + for (uint32_t i = 0; i < streamFreq.size(); i++) + streamFreq[i] = 1; + + for (uint32_t i = 0; i < enabledLightIndices.size(); i++) + enabledLightIndices[i] = UINT32_MAX; + } + + D3D9CapturedState::~D3D9CapturedState() { + if (unlikely(textures != nullptr)) { + for (uint32_t i = 0; i < textures->size(); i++) + TextureChangePrivate((*textures)[i], nullptr); + } + } + D3D9StateBlock::D3D9StateBlock(D3D9DeviceEx* pDevice, D3D9StateBlockType Type) : D3D9StateBlockBase(pDevice) , m_deviceState (pDevice->GetRawState()) { @@ -78,7 +93,10 @@ namespace dxvk { HRESULT D3D9StateBlock::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) { - m_state.renderStates[State] = Value; + if (unlikely(!m_state.renderStates)) + m_state.renderStates = std::make_unique(); + + (*m_state.renderStates)[State] = Value; m_captures.flags.set(D3D9CapturedStateFlag::RenderStates); m_captures.renderStates.set(State, true); @@ -90,7 +108,10 @@ namespace dxvk { DWORD StateSampler, D3DSAMPLERSTATETYPE Type, DWORD Value) { - m_state.samplerStates[StateSampler][Type] = Value; + if (unlikely(!m_state.samplerStates)) + m_state.samplerStates = std::make_unique(); + + (*m_state.samplerStates)[StateSampler][Type] = Value; m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates); m_captures.samplers.set(StateSampler, true); @@ -104,10 +125,13 @@ namespace dxvk { D3D9VertexBuffer* pStreamData, UINT OffsetInBytes, UINT Stride) { - m_state.vertexBuffers[StreamNumber].vertexBuffer = pStreamData; + if (unlikely(!m_state.vertexBuffers)) + m_state.vertexBuffers = std::make_unique(); - m_state.vertexBuffers[StreamNumber].offset = OffsetInBytes; - m_state.vertexBuffers[StreamNumber].stride = Stride; + D3D9VBO& vbo = (*m_state.vertexBuffers)[StreamNumber]; + vbo.vertexBuffer = pStreamData; + vbo.offset = OffsetInBytes; + vbo.stride = Stride; m_captures.flags.set(D3D9CapturedStateFlag::VertexBuffers); m_captures.vertexBuffers.set(StreamNumber, true); @@ -125,7 +149,10 @@ namespace dxvk { HRESULT D3D9StateBlock::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) { - TextureChangePrivate(m_state.textures[StateSampler], pTexture); + if (unlikely(!m_state.textures)) + m_state.textures = std::make_unique(); + + TextureChangePrivate((*m_state.textures)[StateSampler], pTexture); m_captures.flags.set(D3D9CapturedStateFlag::Textures); m_captures.textures.set(StateSampler, true); @@ -150,7 +177,10 @@ namespace dxvk { HRESULT D3D9StateBlock::SetMaterial(const D3DMATERIAL9* pMaterial) { - m_state.material = *pMaterial; + if (!m_state.material) + m_state.material = std::make_unique(); + + *m_state.material = *pMaterial; m_captures.flags.set(D3D9CapturedStateFlag::Material); return D3D_OK; @@ -198,7 +228,10 @@ namespace dxvk { HRESULT D3D9StateBlock::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { - m_state.transforms[idx] = ConvertMatrix(pMatrix); + if (unlikely(!m_state.transforms)) + m_state.transforms = std::make_unique(); + + (*m_state.transforms)[idx] = ConvertMatrix(pMatrix); m_captures.flags.set(D3D9CapturedStateFlag::Transforms); m_captures.transforms.set(idx, true); @@ -210,7 +243,10 @@ namespace dxvk { DWORD Stage, D3D9TextureStageStateTypes Type, DWORD Value) { - m_state.textureStages[Stage][Type] = Value; + if (unlikely(!m_state.textureStages)) + m_state.textureStages = std::make_unique(); + + (*m_state.textureStages)[Stage][Type] = Value; m_captures.flags.set(D3D9CapturedStateFlag::TextureStages); m_captures.textureStages.set(Stage, true); @@ -220,7 +256,11 @@ namespace dxvk { HRESULT D3D9StateBlock::MultiplyStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { - m_state.transforms[idx] = m_state.transforms[idx] * ConvertMatrix(pMatrix); + if (unlikely(!m_state.textureStages)) + m_state.textureStages = std::make_unique(); + + Matrix4& transform = (*m_state.transforms)[idx]; + transform = transform * ConvertMatrix(pMatrix); m_captures.flags.set(D3D9CapturedStateFlag::Transforms); m_captures.transforms.set(idx, true); @@ -323,6 +363,9 @@ namespace dxvk { UINT StartRegister, const BOOL* pConstantData, UINT BoolCount) { + if (unlikely(!m_state.consts)) + m_state.consts = std::make_unique(); + return SetShaderConstants< DxsoProgramTypes::PixelShader, D3D9ConstantType::Bool>( @@ -333,15 +376,21 @@ namespace dxvk { HRESULT D3D9StateBlock::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { - m_state.vsConsts.bConsts[idx] &= ~mask; - m_state.vsConsts.bConsts[idx] |= bits & mask; + if (unlikely(!m_state.consts)) + m_state.consts = std::make_unique(); + + m_state.consts->vs.bConsts[idx] &= ~mask; + m_state.consts->vs.bConsts[idx] |= bits & mask; return D3D_OK; } HRESULT D3D9StateBlock::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { - m_state.psConsts.bConsts[idx] &= ~mask; - m_state.psConsts.bConsts[idx] |= bits & mask; + if (unlikely(!m_state.consts)) + m_state.consts = std::make_unique(); + + m_state.consts->ps.bConsts[idx] &= ~mask; + m_state.consts->ps.bConsts[idx] |= bits & mask; return D3D_OK; } diff --git a/src/d3d9/d3d9_stateblock.h b/src/d3d9/d3d9_stateblock.h index a098b54fa45..1ba13adba57 100644 --- a/src/d3d9/d3d9_stateblock.h +++ b/src/d3d9/d3d9_stateblock.h @@ -66,6 +66,58 @@ namespace dxvk { bit::bitvector lightEnabledChanges; }; + struct D3D9CapturedState { + typedef typename std::array RenderStatesArray; + typedef typename std::array, SamplerCount> SamplerStatesArray; + typedef typename std::array VertexBuffersArray; + typedef typename std::array TexturesArray; + typedef typename std::array, caps::TextureStageCount> TextureStagesArray; + typedef typename std::array TransformsArray; + + D3D9CapturedState(); + + ~D3D9CapturedState(); + + Com vertexDecl; + Com indices; + + std::unique_ptr renderStates = nullptr; + + std::unique_ptr samplerStates = nullptr; + + std::unique_ptr vertexBuffers = nullptr; + + std::unique_ptr textures = nullptr; + + Com vertexShader; + Com pixelShader; + + D3DVIEWPORT9 viewport = {}; + RECT scissorRect = {}; + + std::array< + D3D9ClipPlane, + caps::MaxClipPlanes> clipPlanes = {}; + + std::unique_ptr textureStages = nullptr; + + std::unique_ptr consts = nullptr; + + std::array streamFreq = {}; + + std::unique_ptr transforms = nullptr; + + std::unique_ptr material = nullptr; + + std::vector> lights; + std::array enabledLightIndices; + + bool IsLightEnabled(DWORD Index) { + const auto& indices = enabledLightIndices; + return std::find(indices.begin(), indices.end(), Index) != indices.end(); + } + }; + enum class D3D9StateBlockType :uint32_t { None, VertexState, @@ -194,7 +246,10 @@ namespace dxvk { for (uint32_t rs : bit::BitMask(m_captures.renderStates.dword(i))) { uint32_t idx = i * 32 + rs; - dst->SetRenderState(D3DRENDERSTATETYPE(idx), src->renderStates[idx]); + if constexpr (std::is_same_v) + dst->SetRenderState(D3DRENDERSTATETYPE(idx), src->renderStates[idx]); + else + dst->SetRenderState(D3DRENDERSTATETYPE(idx), (*src->renderStates)[idx]); } } } @@ -202,27 +257,44 @@ namespace dxvk { if (m_captures.flags.test(D3D9CapturedStateFlag::SamplerStates)) { for (uint32_t samplerIdx : bit::BitMask(m_captures.samplers.dword(0))) { for (uint32_t stateIdx : bit::BitMask(m_captures.samplerStates[samplerIdx].dword(0))) - dst->SetStateSamplerState(samplerIdx, D3DSAMPLERSTATETYPE(stateIdx), src->samplerStates[samplerIdx][stateIdx]); + if constexpr (std::is_same_v) + dst->SetStateSamplerState(samplerIdx, D3DSAMPLERSTATETYPE(stateIdx), src->samplerStates[samplerIdx][stateIdx]); + else + dst->SetStateSamplerState(samplerIdx, D3DSAMPLERSTATETYPE(stateIdx), (*src->samplerStates)[samplerIdx][stateIdx]); } } if (m_captures.flags.test(D3D9CapturedStateFlag::VertexBuffers)) { for (uint32_t idx : bit::BitMask(m_captures.vertexBuffers.dword(0))) { - const auto& vbo = src->vertexBuffers[idx]; + const D3D9VBO* vbo; + if constexpr (std::is_same_v) + vbo = &src->vertexBuffers[idx]; + else + vbo = &(*src->vertexBuffers)[idx]; + dst->SetStreamSource( idx, - vbo.vertexBuffer.ptr(), - vbo.offset, - vbo.stride); + vbo->vertexBuffer.ptr(), + vbo->offset, + vbo->stride); } } - if (m_captures.flags.test(D3D9CapturedStateFlag::Material)) - dst->SetMaterial(&src->material); + if (m_captures.flags.test(D3D9CapturedStateFlag::Material)) { + + if constexpr (std::is_same_v) + dst->SetMaterial(&src->material); + else + dst->SetMaterial(src->material.get()); + } if (m_captures.flags.test(D3D9CapturedStateFlag::Textures)) { - for (uint32_t idx : bit::BitMask(m_captures.textures.dword(0))) - dst->SetStateTexture(idx, src->textures[idx]); + for (uint32_t idx : bit::BitMask(m_captures.textures.dword(0))) { + if constexpr (std::is_same_v) + dst->SetStateTexture(idx, src->textures[idx]); + else + dst->SetStateTexture(idx, (*src->textures)[idx]); + } } if (m_captures.flags.test(D3D9CapturedStateFlag::VertexShader)) @@ -236,15 +308,22 @@ namespace dxvk { for (uint32_t trans : bit::BitMask(m_captures.transforms.dword(i))) { uint32_t idx = i * 32 + trans; - dst->SetStateTransform(idx, reinterpret_cast(&src->transforms[idx])); + if constexpr (std::is_same_v) + dst->SetStateTransform(idx, reinterpret_cast(&src->transforms[idx])); + else + dst->SetStateTransform(idx, reinterpret_cast(&(*src->transforms)[idx])); } } } if (m_captures.flags.test(D3D9CapturedStateFlag::TextureStages)) { for (uint32_t stageIdx : bit::BitMask(m_captures.textureStages.dword(0))) { - for (uint32_t stateIdx : bit::BitMask(m_captures.textureStageStates[stageIdx].dword(0))) - dst->SetStateTextureStageState(stageIdx, D3D9TextureStageStateTypes(stateIdx), src->textureStages[stageIdx][stateIdx]); + for (uint32_t stateIdx : bit::BitMask(m_captures.textureStageStates[stageIdx].dword(0))) { + if constexpr (std::is_same_v) + dst->SetStateTextureStageState(stageIdx, D3D9TextureStageStateTypes(stateIdx), src->textureStages[stageIdx][stateIdx]); + else + dst->SetStateTextureStageState(stageIdx, D3D9TextureStageStateTypes(stateIdx), (*src->textureStages)[stageIdx][stateIdx]); + } } } @@ -260,11 +339,19 @@ namespace dxvk { } if (m_captures.flags.test(D3D9CapturedStateFlag::VsConstants)) { + if (unlikely(!m_state.consts)) { + m_state.consts = std::make_unique(); + } + for (uint32_t i = 0; i < m_captures.vsConsts.fConsts.dwordCount(); i++) { for (uint32_t consts : bit::BitMask(m_captures.vsConsts.fConsts.dword(i))) { uint32_t idx = i * 32 + consts; - dst->SetVertexShaderConstantF(idx, (float*)&src->vsConsts.fConsts[idx], 1); + if constexpr (std::is_same_v) { + dst->SetVertexShaderConstantF(idx, (float*)&src->consts.vs.fConsts[idx], 1); + } else { + dst->SetVertexShaderConstantF(idx, (float*)&src->consts->vs.fConsts[idx], 1); + } } } @@ -272,22 +359,38 @@ namespace dxvk { for (uint32_t consts : bit::BitMask(m_captures.vsConsts.iConsts.dword(i))) { uint32_t idx = i * 32 + consts; - dst->SetVertexShaderConstantI(idx, (int*)&src->vsConsts.iConsts[idx], 1); + if constexpr (std::is_same_v) { + dst->SetVertexShaderConstantI(idx, (int*)&src->consts.vs.iConsts[idx], 1); + } else { + dst->SetVertexShaderConstantI(idx, (int*)&src->consts->vs.iConsts[idx], 1); + } } } if (m_captures.vsConsts.bConsts.any()) { for (uint32_t i = 0; i < m_captures.vsConsts.bConsts.dwordCount(); i++) - dst->SetVertexBoolBitfield(i, m_captures.vsConsts.bConsts.dword(i), src->vsConsts.bConsts[i]); + if constexpr (std::is_same_v) { + dst->SetVertexBoolBitfield(i, m_captures.vsConsts.bConsts.dword(i), src->consts.vs.bConsts[i]); + } else { + dst->SetVertexBoolBitfield(i, m_captures.vsConsts.bConsts.dword(i), src->consts->vs.bConsts[i]); + } } } if (m_captures.flags.test(D3D9CapturedStateFlag::PsConstants)) { + if (unlikely(!m_state.consts)) { + m_state.consts = std::make_unique(); + } + for (uint32_t i = 0; i < m_captures.psConsts.fConsts.dwordCount(); i++) { for (uint32_t consts : bit::BitMask(m_captures.psConsts.fConsts.dword(i))) { uint32_t idx = i * 32 + consts; - dst->SetPixelShaderConstantF(idx, (float*)&src->psConsts.fConsts[idx], 1); + if constexpr (std::is_same_v) { + dst->SetPixelShaderConstantF(idx, (float*)&src->consts.ps.fConsts[idx], 1); + } else { + dst->SetPixelShaderConstantF(idx, (float*)&src->consts->ps.fConsts[idx], 1); + } } } @@ -295,13 +398,21 @@ namespace dxvk { for (uint32_t consts : bit::BitMask(m_captures.psConsts.iConsts.dword(i))) { uint32_t idx = i * 32 + consts; - dst->SetPixelShaderConstantI(idx, (int*)&src->psConsts.iConsts[idx], 1); + if constexpr (std::is_same_v) { + dst->SetPixelShaderConstantI(idx, (int*)&src->consts.ps.iConsts[idx], 1); + } else { + dst->SetPixelShaderConstantI(idx, (int*)&src->consts->ps.iConsts[idx], 1); + } } } if (m_captures.psConsts.bConsts.any()) { for (uint32_t i = 0; i < m_captures.psConsts.bConsts.dwordCount(); i++) - dst->SetPixelBoolBitfield(i, m_captures.psConsts.bConsts.dword(i), src->psConsts.bConsts[i]); + if constexpr (std::is_same_v) { + dst->SetPixelBoolBitfield(i, m_captures.psConsts.bConsts.dword(i), src->consts.ps.bConsts[i]); + } else { + dst->SetPixelBoolBitfield(i, m_captures.psConsts.bConsts.dword(i), src->consts->ps.bConsts[i]); + } } } @@ -358,7 +469,7 @@ namespace dxvk { ProgramType, ConstantType, T>( - &m_state, + m_state.consts.get(), StartRegister, pConstantData, Count, @@ -391,10 +502,10 @@ namespace dxvk { void CaptureType(D3D9StateBlockType State); - D3D9CapturableState m_state; + D3D9CapturedState m_state; D3D9StateCaptures m_captures; - D3D9CapturableState* m_deviceState; + D3D9DeviceState* m_deviceState; bool m_applying = false;