Skip to content

Commit

Permalink
rpcsx-gpu: fix linear tiler offset
Browse files Browse the repository at this point in the history
implement depth clear
implemented shader resource merge
fix smrd offset
fix discard export
  • Loading branch information
DHrpcs3 committed Oct 5, 2024
1 parent 7bea1e3 commit fb64f8b
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 88 deletions.
32 changes: 18 additions & 14 deletions rpcsx-gpu/Cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ void Cache::ShaderResources::loadResources(
bufferMemoryTable.map(*pointerBase,
*pointerBase + *pointerOffset + pointer.size,
Access::Read);
resourceSlotToAddress.push_back(
{slotOffset + pointer.resourceSlot, *pointerBase});
resourceSlotToAddress.emplace_back(slotOffset + pointer.resourceSlot,
*pointerBase);
}

for (auto &bufferRes : res.buffers) {
Expand All @@ -124,10 +124,16 @@ void Cache::ShaderResources::loadResources(
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 3, &*word3,
sizeof(std::uint32_t));

bufferMemoryTable.map(buffer.address(), buffer.address() + buffer.size(),
bufferRes.access);
resourceSlotToAddress.push_back(
{slotOffset + bufferRes.resourceSlot, buffer.address()});
if (auto it = bufferMemoryTable.queryArea(buffer.address());
it != bufferMemoryTable.end() &&
it.beginAddress() == buffer.address() && it.size() == buffer.size()) {
it.get() |= bufferRes.access;
} else {
bufferMemoryTable.map(buffer.address(), buffer.address() + buffer.size(),
bufferRes.access);
}
resourceSlotToAddress.emplace_back(slotOffset + bufferRes.resourceSlot,
buffer.address());
}

for (auto &texture : res.textures) {
Expand Down Expand Up @@ -569,7 +575,6 @@ struct CachedImage : Cache::Entry {
auto &regionInfo = info.getSubresourceInfo(mipLevel);
regions.push_back({
.bufferOffset = regionInfo.linearOffset,
.bufferRowLength = regionInfo.linearPitch,
.imageSubresource =
{
.aspectMask = toAspect(kind),
Expand All @@ -579,9 +584,9 @@ struct CachedImage : Cache::Entry {
},
.imageExtent =
{
.width = regionInfo.linearWidth,
.height = regionInfo.linearHeight,
.depth = regionInfo.linearDepth,
.width = std::max(image.getWidth() >> mipLevel, 1u),
.height = std::max(image.getHeight() >> mipLevel, 1u),
.depth = std::max(image.getDepth() >> mipLevel, 1u),
},
});
}
Expand Down Expand Up @@ -1183,7 +1188,6 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {

regions.push_back({
.bufferOffset = info.linearOffset,
.bufferRowLength = info.linearPitch,
.imageSubresource =
{
.aspectMask = toAspect(key.kind),
Expand All @@ -1193,9 +1197,9 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
},
.imageExtent =
{
.width = info.linearWidth,
.height = info.linearHeight,
.depth = info.linearDepth,
.width = std::max(key.extent.width >> mipLevel, 1u),
.height = std::max(key.extent.height >> mipLevel, 1u),
.depth = std::max(key.extent.depth >> mipLevel, 1u),
},
});
}
Expand Down
11 changes: 4 additions & 7 deletions rpcsx-gpu/Cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@
#include "Pipe.hpp"
#include "amdgpu/tiler.hpp"
#include "gnm/constants.hpp"
#include "rx/die.hpp"
#include "shader/Access.hpp"
#include "shader/Evaluator.hpp"
#include "shader/GcnConverter.hpp"
#include <algorithm>
#include <memory>
#include <mutex>
#include <ostream>
#include <print>
#include <rx/ConcurrentBitPool.hpp>
#include <rx/MemoryTable.hpp>
Expand All @@ -27,7 +24,7 @@ struct ShaderKey {
shader::gcn::Environment env;
};

enum class ImageKind { Color, Depth, Stencil };
enum class ImageKind : std::uint8_t { Color, Depth, Stencil };

struct ImageKey {
std::uint64_t readAddress;
Expand Down Expand Up @@ -265,7 +262,7 @@ struct Cache {
}
~Tag() { release(); }

void swap(Tag &other) {
void swap(Tag &other) noexcept {
std::swap(static_cast<TagData &>(*this), static_cast<TagData &>(other));
}

Expand Down Expand Up @@ -364,7 +361,7 @@ struct Cache {
std::span<const VkViewport> viewPorts);
void release();

void swap(GraphicsTag &other) {
void swap(GraphicsTag &other) noexcept {
Tag::swap(other);
std::swap(mAcquiredGraphicsDescriptorSet,
other.mAcquiredGraphicsDescriptorSet);
Expand Down Expand Up @@ -396,7 +393,7 @@ struct Cache {

void release();

void swap(ComputeTag &other) {
void swap(ComputeTag &other) noexcept {
Tag::swap(other);
std::swap(mAcquiredComputeDescriptorSet,
other.mAcquiredComputeDescriptorSet);
Expand Down
175 changes: 114 additions & 61 deletions rpcsx-gpu/Renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
std::uint32_t vertexCount, std::uint32_t firstInstance,
std::uint32_t instanceCount, std::uint64_t indiciesAddress,
std::uint32_t indexCount) {
if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) {
return;
}

if (pipe.context.cbColorControl.mode == gnm::CbMode::Disable) {
return;
}
Expand All @@ -153,10 +149,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
return;
}

if (pipe.context.cbTargetMask.raw == 0) {
return;
}

auto cacheTag = pipe.device->getGraphicsTag(vmId, pipe.scheduler);
auto targetMask = pipe.context.cbTargetMask.raw;

Expand Down Expand Up @@ -200,50 +192,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
// FIXME
stencilAccess = Access::None;

if (depthAccess != Access::None) {
auto viewPortScissor = pipe.context.paScScreenScissor;
auto viewPortRect = gnm::toVkRect2D(viewPortScissor);

auto imageView = cacheTag.getImageView(
{
.readAddress = static_cast<std::uint64_t>(pipe.context.dbZReadBase)
<< 8,
.writeAddress =
static_cast<std::uint64_t>(pipe.context.dbZWriteBase) << 8,
.type = gnm::TextureType::Dim2D,
.dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format),
.nfmt = gnm::getNumericFormat(pipe.context.dbZInfo.format),
.extent =
{
.width = viewPortRect.extent.width,
.height = viewPortRect.extent.height,
.depth = 1,
},
.pitch = viewPortRect.extent.width,
.mipCount = 1,
.arrayLayerCount = 1,
.kind = ImageKind::Depth,
},
depthAccess);

depthAttachment = {
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
.imageView = imageView.handle,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
};

if ((depthAccess & Access::Read) == Access::None) {
depthAttachment.clearValue.depthStencil.depth = pipe.context.dbDepthClear;
depthAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
}

if ((depthAccess & Access::Write) == Access::None) {
depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_NONE;
}
}

for (auto &cbColor : pipe.context.cbColor) {
if (targetMask == 0) {
break;
Expand Down Expand Up @@ -304,6 +252,26 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
access |= Access::Write;
}

if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) {
if (cbColor.info.fastClear) {
auto image = cacheTag.getImage(renderTargetInfo, access);
VkClearColorValue clearValue = {
.uint32 =
{
cbColor.clearWord0,
cbColor.clearWord1,
cbColor.clearWord2,
},
};

vkCmdClearColorImage(cacheTag.getScheduler().getCommandBuffer(),
image.handle, VK_IMAGE_LAYOUT_GENERAL, &clearValue,
1, &image.subresource);
}

continue;
}

auto imageView = cacheTag.getImageView(renderTargetInfo, access);

colorAttachments[renderTargets] = {
Expand Down Expand Up @@ -359,8 +327,94 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
}

if (renderTargets == 0) {
if ((depthAccess & Access::Write) != Access::None) {
auto screenRect = gnm::toVkRect2D(pipe.context.paScScreenScissor);

auto image = cacheTag.getImage(
{
.readAddress =
static_cast<std::uint64_t>(pipe.context.dbZReadBase) << 8,
.writeAddress =
static_cast<std::uint64_t>(pipe.context.dbZWriteBase) << 8,
.type = gnm::TextureType::Dim2D,
.dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format),
.nfmt = gnm::getNumericFormat(pipe.context.dbZInfo.format),
.extent =
{
.width = screenRect.extent.width,
.height = screenRect.extent.height,
.depth = 1,
},
.pitch = screenRect.extent.width,
.mipCount = 1,
.arrayLayerCount = 1,
.kind = ImageKind::Depth,
},
Access::Write);

VkClearDepthStencilValue depthStencil = {
.depth = pipe.context.dbDepthClear,
};

vkCmdClearDepthStencilImage(cacheTag.getScheduler().getCommandBuffer(),
image.handle, VK_IMAGE_LAYOUT_GENERAL,
&depthStencil, 1, &image.subresource);
pipe.scheduler.submit();
pipe.scheduler.wait();
}

return;
}

if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) {
pipe.scheduler.submit();
pipe.scheduler.wait();
return;
}

if (depthAccess != Access::None) {
auto screenRect = gnm::toVkRect2D(pipe.context.paScScreenScissor);

auto imageView = cacheTag.getImageView(
{
.readAddress = static_cast<std::uint64_t>(pipe.context.dbZReadBase)
<< 8,
.writeAddress =
static_cast<std::uint64_t>(pipe.context.dbZWriteBase) << 8,
.type = gnm::TextureType::Dim2D,
.dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format),
.nfmt = gnm::getNumericFormat(pipe.context.dbZInfo.format),
.extent =
{
.width = screenRect.extent.width,
.height = screenRect.extent.height,
.depth = 1,
},
.pitch = screenRect.extent.width,
.mipCount = 1,
.arrayLayerCount = 1,
.kind = ImageKind::Depth,
},
depthAccess);

depthAttachment = {
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
.imageView = imageView.handle,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
};

if ((depthAccess & Access::Read) == Access::None) {
depthAttachment.clearValue.depthStencil.depth = pipe.context.dbDepthClear;
depthAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
}

if ((depthAccess & Access::Write) == Access::None) {
depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_NONE;
}
}

if (indiciesAddress == 0) {
indexCount = vertexCount;
}
Expand Down Expand Up @@ -483,11 +537,11 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
VkCullModeFlags cullMode = VK_CULL_MODE_NONE;

if (pipe.uConfig.vgtPrimitiveType != gnm::PrimitiveType::RectList) {
if (pipe.context.paSuScModeCntl.cullBack) {
cullMode |= VK_CULL_MODE_BACK_BIT;
}
if (pipe.context.paSuScModeCntl.cullFront) {
cullMode |= VK_CULL_MODE_FRONT_BIT;
if (pipe.context.paSuScModeCntl.cullBack) {
cullMode |= VK_CULL_MODE_BACK_BIT;
}
if (pipe.context.paSuScModeCntl.cullFront) {
cullMode |= VK_CULL_MODE_FRONT_BIT;
}
}

Expand Down Expand Up @@ -521,12 +575,11 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
}

void amdgpu::dispatch(Cache &cache, Scheduler &sched,
Registers::ComputeConfig &computeConfig,
std::uint32_t groupCountX, std::uint32_t groupCountY,
std::uint32_t groupCountZ) {
Registers::ComputeConfig &pgm, std::uint32_t groupCountX,
std::uint32_t groupCountY, std::uint32_t groupCountZ) {
auto tag = cache.createComputeTag(sched);
auto descriptorSet = tag.getDescriptorSet();
auto shader = tag.getShader(computeConfig);
auto shader = tag.getShader(pgm);
auto pipelineLayout = tag.getComputePipelineLayout();
tag.buildDescriptors(descriptorSet);

Expand Down
2 changes: 1 addition & 1 deletion rpcsx-gpu/lib/amdgpu-tiler/src/tiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ static SurfaceInfo computeTextureLinearInfo(
}

surfaceOffset += arraySliceCount * surfaceSize;
surfaceOffset += arraySliceCount * linearSize;
linearOffset += arraySliceCount * linearSize;
}

result.totalTiledSize = surfaceOffset;
Expand Down
2 changes: 1 addition & 1 deletion rpcsx-gpu/lib/gcn-shader/shaders/rdna.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -2101,7 +2101,7 @@ void tbuffer_store_format_xyzw(u32vec4 vdata, uint32_t vOFFSET, uint32_t vINDEX,
#define S_LOAD_DWORD(dest, memoryLocationHint, sbase, offset, N) \
int32_t _offset = 0; \
uint64_t deviceAreaSize = 0; \
uint64_t deviceAddress = findMemoryAddress(sbase + offset, SIZEOF(uint32_t) * N, memoryLocationHint, deviceAreaSize); \
uint64_t deviceAddress = findMemoryAddress((sbase & ~uint64_t(3)) + (offset & ~3), SIZEOF(uint32_t) * N, memoryLocationHint, deviceAreaSize); \
if (deviceAddress == kInvalidAddress || deviceAreaSize < SIZEOF(uint32_t) * N) { \
for (int i = 0; i < (N); ++i) { \
dest[i] = 0; \
Expand Down
Loading

0 comments on commit fb64f8b

Please sign in to comment.