Skip to content

Commit

Permalink
[d3d9] Reduce size of state blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
K0bin committed Feb 6, 2023
1 parent 2263dca commit c3de03d
Show file tree
Hide file tree
Showing 6 changed files with 351 additions and 110 deletions.
21 changes: 15 additions & 6 deletions src/d3d9/d3d9_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6617,12 +6617,21 @@ namespace dxvk {
}
}

UpdateStateConstants<ProgramType, ConstantType, T>(
&m_state,
StartRegister,
pConstantData,
Count,
m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled);
if constexpr (ProgramType == DxsoProgramType::VertexShader) {
UpdateStateConstants<D3D9ShaderConstantsVSSoftware, ProgramType, ConstantType, T>(
m_state.vsConsts,
StartRegister,
pConstantData,
Count,
m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled);
} else {
UpdateStateConstants<D3D9ShaderConstantsPS, ProgramType, ConstantType, T>(
m_state.psConsts,
StartRegister,
pConstantData,
Count,
m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled);
}

return D3D_OK;
}
Expand Down
12 changes: 6 additions & 6 deletions src/d3d9/d3d9_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ namespace dxvk {
return &m_d3d9Options;
}

Direct3DState9* GetRawState() {
D3D9DeviceState* GetRawState() {
return &m_state;
}

Expand Down Expand Up @@ -950,6 +950,10 @@ namespace dxvk {
void TouchMappedTexture(D3D9CommonTexture* pTexture);
void RemoveMappedTexture(D3D9CommonTexture* pTexture);

bool CanSWVP() {
return m_behaviorFlags & (D3DCREATE_MIXED_VERTEXPROCESSING | D3DCREATE_SOFTWARE_VERTEXPROCESSING);
}

private:

DxvkCsChunkRef AllocCsChunk() {
Expand All @@ -976,10 +980,6 @@ namespace dxvk {
}
}

bool CanSWVP() {
return m_behaviorFlags & (D3DCREATE_MIXED_VERTEXPROCESSING | D3DCREATE_SOFTWARE_VERTEXPROCESSING);
}

void DetermineConstantLayouts(bool canSWVP);

D3D9BufferSlice AllocUPBuffer(VkDeviceSize size);
Expand Down Expand Up @@ -1315,7 +1315,7 @@ namespace dxvk {
std::atomic<int64_t> m_availableMemory = { 0 };
std::atomic<int32_t> m_samplerCount = { 0 };

Direct3DState9 m_state;
D3D9DeviceState m_state;

#ifdef D3D9_ALLOW_UNMAPPING
lru_list<D3D9CommonTexture*> m_mappedTextures;
Expand Down
4 changes: 2 additions & 2 deletions src/d3d9/d3d9_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

namespace dxvk {

D3D9CapturableState::D3D9CapturableState() {
D3D9DeviceState::D3D9DeviceState() {
for (uint32_t i = 0; i < streamFreq.size(); i++)
streamFreq[i] = 1;

for (uint32_t i = 0; i < enabledLightIndices.size(); i++)
enabledLightIndices[i] = UINT32_MAX;
}

D3D9CapturableState::~D3D9CapturableState() {
D3D9DeviceState::~D3D9DeviceState() {
for (uint32_t i = 0; i < textures.size(); i++)
TextureChangePrivate(textures[i], nullptr);
}
Expand Down
75 changes: 33 additions & 42 deletions src/d3d9/d3d9_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,10 @@ namespace dxvk {
0.0f // Phi
};

struct D3D9CapturableState {
D3D9CapturableState();
struct D3D9DeviceState {
D3D9DeviceState();

~D3D9CapturableState();
~D3D9DeviceState();

Com<D3D9VertexDecl, false> vertexDecl;
Com<D3D9IndexBuffer, false> indices;
Expand Down Expand Up @@ -227,68 +227,59 @@ namespace dxvk {
std::vector<std::optional<D3DLIGHT9>> lights;
std::array<DWORD, caps::MaxEnabledLights> enabledLightIndices;

std::array<Com<D3D9Surface, false>, caps::MaxSimultaneousRenderTargets> renderTargets;
Com<D3D9Surface, false> depthStencil;

bool IsLightEnabled(DWORD Index) {
const auto& indices = enabledLightIndices;
return std::find(indices.begin(), indices.end(), Index) != indices.end();
}
};

template <
typename ConstantsT,
DxsoProgramType ProgramType,
D3D9ConstantType ConstantType,
typename T>
HRESULT UpdateStateConstants(
D3D9CapturableState* pState,
ConstantsT& Set,
UINT StartRegister,
const T* pConstantData,
UINT Count,
bool FloatEmu) {
auto UpdateHelper = [&] (auto& set) {
if constexpr (ConstantType == D3D9ConstantType::Float) {

if (!FloatEmu) {
size_t size = Count * sizeof(Vector4);

std::memcpy(set.fConsts[StartRegister].data, pConstantData, size);
}
else {
for (UINT i = 0; i < Count; i++)
set.fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4));
}
}
else if constexpr (ConstantType == D3D9ConstantType::Int) {
size_t size = Count * sizeof(Vector4i);
if constexpr (ConstantType == D3D9ConstantType::Float) {

if (!FloatEmu) {
size_t size = Count * sizeof(Vector4);

std::memcpy(set.iConsts[StartRegister].data, pConstantData, size);
std::memcpy(Set.fConsts[StartRegister].data, pConstantData, size);
}
else {
for (uint32_t i = 0; i < Count; i++) {
const uint32_t constantIdx = StartRegister + i;
const uint32_t arrayIdx = constantIdx / 32;
const uint32_t bitIdx = constantIdx % 32;

const uint32_t bit = 1u << bitIdx;

set.bConsts[arrayIdx] &= ~bit;
if (pConstantData[i])
set.bConsts[arrayIdx] |= bit;
}
for (UINT i = 0; i < Count; i++)
Set.fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4));
}
}
else if constexpr (ConstantType == D3D9ConstantType::Int) {
size_t size = Count * sizeof(Vector4i);

return D3D_OK;
};

return ProgramType == DxsoProgramTypes::VertexShader
? UpdateHelper(pState->vsConsts)
: UpdateHelper(pState->psConsts);
}
std::memcpy(Set.iConsts[StartRegister].data, pConstantData, size);
}
else {
for (uint32_t i = 0; i < Count; i++) {
const uint32_t constantIdx = StartRegister + i;
const uint32_t arrayIdx = constantIdx / 32;
const uint32_t bitIdx = constantIdx % 32;

struct Direct3DState9 : public D3D9CapturableState {
const uint32_t bit = 1u << bitIdx;

std::array<Com<D3D9Surface, false>, caps::MaxSimultaneousRenderTargets> renderTargets;
Com<D3D9Surface, false> depthStencil;
Set.bConsts[arrayIdx] &= ~bit;
if (pConstantData[i])
Set.bConsts[arrayIdx] |= bit;
}
}

};
return D3D_OK;
}


struct D3D9InputAssemblyState {
Expand Down
83 changes: 69 additions & 14 deletions src/d3d9/d3d9_stateblock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,21 @@

namespace dxvk {

D3D9CapturedState::D3D9CapturedState() {
for (uint32_t i = 0; i < streamFreq.size(); i++)
streamFreq[i] = 1;

for (uint32_t i = 0; i < enabledLightIndices.size(); i++)
enabledLightIndices[i] = UINT32_MAX;
}

D3D9CapturedState::~D3D9CapturedState() {
if (unlikely(textures != nullptr)) {
for (uint32_t i = 0; i < textures->size(); i++)
TextureChangePrivate((*textures)[i], nullptr);
}
}

D3D9StateBlock::D3D9StateBlock(D3D9DeviceEx* pDevice, D3D9StateBlockType Type)
: D3D9StateBlockBase(pDevice)
, m_deviceState (pDevice->GetRawState()) {
Expand Down Expand Up @@ -78,7 +93,10 @@ namespace dxvk {


HRESULT D3D9StateBlock::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) {
m_state.renderStates[State] = Value;
if (unlikely(!m_state.renderStates))
m_state.renderStates = std::make_unique<D3D9CapturedState::RenderStatesArray>();

(*m_state.renderStates)[State] = Value;

m_captures.flags.set(D3D9CapturedStateFlag::RenderStates);
m_captures.renderStates.set(State, true);
Expand All @@ -90,7 +108,10 @@ namespace dxvk {
DWORD StateSampler,
D3DSAMPLERSTATETYPE Type,
DWORD Value) {
m_state.samplerStates[StateSampler][Type] = Value;
if (unlikely(!m_state.samplerStates))
m_state.samplerStates = std::make_unique<D3D9CapturedState::SamplerStatesArray>();

(*m_state.samplerStates)[StateSampler][Type] = Value;

m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates);
m_captures.samplers.set(StateSampler, true);
Expand All @@ -104,10 +125,13 @@ namespace dxvk {
D3D9VertexBuffer* pStreamData,
UINT OffsetInBytes,
UINT Stride) {
m_state.vertexBuffers[StreamNumber].vertexBuffer = pStreamData;
if (unlikely(!m_state.vertexBuffers))
m_state.vertexBuffers = std::make_unique<D3D9CapturedState::VertexBuffersArray>();

m_state.vertexBuffers[StreamNumber].offset = OffsetInBytes;
m_state.vertexBuffers[StreamNumber].stride = Stride;
D3D9VBO& vbo = (*m_state.vertexBuffers)[StreamNumber];
vbo.vertexBuffer = pStreamData;
vbo.offset = OffsetInBytes;
vbo.stride = Stride;

m_captures.flags.set(D3D9CapturedStateFlag::VertexBuffers);
m_captures.vertexBuffers.set(StreamNumber, true);
Expand All @@ -125,7 +149,10 @@ namespace dxvk {


HRESULT D3D9StateBlock::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) {
TextureChangePrivate(m_state.textures[StateSampler], pTexture);
if (unlikely(!m_state.textures))
m_state.textures = std::make_unique<D3D9CapturedState::TexturesArray>();

TextureChangePrivate((*m_state.textures)[StateSampler], pTexture);

m_captures.flags.set(D3D9CapturedStateFlag::Textures);
m_captures.textures.set(StateSampler, true);
Expand All @@ -150,7 +177,10 @@ namespace dxvk {


HRESULT D3D9StateBlock::SetMaterial(const D3DMATERIAL9* pMaterial) {
m_state.material = *pMaterial;
if (!m_state.material)
m_state.material = std::make_unique<D3DMATERIAL9>();

*m_state.material = *pMaterial;

m_captures.flags.set(D3D9CapturedStateFlag::Material);
return D3D_OK;
Expand Down Expand Up @@ -198,7 +228,10 @@ namespace dxvk {


HRESULT D3D9StateBlock::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) {
m_state.transforms[idx] = ConvertMatrix(pMatrix);
if (unlikely(!m_state.transforms))
m_state.transforms = std::make_unique<D3D9CapturedState::TransformsArray>();

(*m_state.transforms)[idx] = ConvertMatrix(pMatrix);

m_captures.flags.set(D3D9CapturedStateFlag::Transforms);
m_captures.transforms.set(idx, true);
Expand All @@ -210,7 +243,10 @@ namespace dxvk {
DWORD Stage,
D3D9TextureStageStateTypes Type,
DWORD Value) {
m_state.textureStages[Stage][Type] = Value;
if (unlikely(!m_state.textureStages))
m_state.textureStages = std::make_unique<D3D9CapturedState::TextureStagesArray>();

(*m_state.textureStages)[Stage][Type] = Value;

m_captures.flags.set(D3D9CapturedStateFlag::TextureStages);
m_captures.textureStages.set(Stage, true);
Expand All @@ -220,7 +256,11 @@ namespace dxvk {


HRESULT D3D9StateBlock::MultiplyStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) {
m_state.transforms[idx] = m_state.transforms[idx] * ConvertMatrix(pMatrix);
if (unlikely(!m_state.textureStages))
m_state.textureStages = std::make_unique<D3D9CapturedState::TextureStagesArray>();

Matrix4& transform = (*m_state.transforms)[idx];
transform = transform * ConvertMatrix(pMatrix);

m_captures.flags.set(D3D9CapturedStateFlag::Transforms);
m_captures.transforms.set(idx, true);
Expand Down Expand Up @@ -333,15 +373,30 @@ namespace dxvk {


HRESULT D3D9StateBlock::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) {
m_state.vsConsts.bConsts[idx] &= ~mask;
m_state.vsConsts.bConsts[idx] |= bits & mask;
if (unlikely(m_parent->CanSWVP())) {
if (unlikely(!m_state.vsConstsSW))
m_state.vsConstsSW = std::make_unique<D3D9ShaderConstantsVSSoftware>();

m_state.vsConstsSW->bConsts[idx] &= ~mask;
m_state.vsConstsSW->bConsts[idx] |= bits & mask;
} else {
if (unlikely(!m_state.vsConsts))
m_state.vsConsts = std::make_unique<D3D9ShaderConstantsVSHardware>();

m_state.vsConsts->bConsts[idx] &= ~mask;
m_state.vsConsts->bConsts[idx] |= bits & mask;
}

return D3D_OK;
}


HRESULT D3D9StateBlock::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) {
m_state.psConsts.bConsts[idx] &= ~mask;
m_state.psConsts.bConsts[idx] |= bits & mask;
if (unlikely(!m_state.psConsts))
m_state.psConsts = std::make_unique<D3D9ShaderConstantsPS>();

m_state.psConsts->bConsts[idx] &= ~mask;
m_state.psConsts->bConsts[idx] |= bits & mask;
return D3D_OK;
}

Expand Down
Loading

0 comments on commit c3de03d

Please sign in to comment.