Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle block transfers from RAM to depth buffers. #18478

Merged
merged 3 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 34 additions & 15 deletions GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,8 +511,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(Framebuffer
vfb->usageFlags = FB_USAGE_RENDER_COLOR;

u32 colorByteSize = vfb->BufferByteSize(RASTER_COLOR);
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + colorByteSize > framebufRangeEnd_) {
framebufRangeEnd_ = params.fb_address + colorByteSize;
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + colorByteSize > framebufColorRangeEnd_) {
framebufColorRangeEnd_ = params.fb_address + colorByteSize;
}

// This is where we actually create the framebuffer. The true is "force".
Expand Down Expand Up @@ -1904,6 +1904,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,

dst &= 0x3FFFFFFF;
src &= 0x3FFFFFFF;

if (Memory::IsVRAMAddress(dst))
dst &= 0x041FFFFF;
if (Memory::IsVRAMAddress(src))
Expand All @@ -1917,7 +1918,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
bool ignoreSrcBuffer = flags & (GPUCopyFlag::FORCE_SRC_MATCH_MEM | GPUCopyFlag::MEMSET);

// TODO: In the future we should probably check both channels. Currently depth is only on request.
RasterChannel channel = flags & GPUCopyFlag::DEPTH_REQUESTED ? RASTER_DEPTH : RASTER_COLOR;
RasterChannel channel = (flags & GPUCopyFlag::DEPTH_REQUESTED) ? RASTER_DEPTH : RASTER_COLOR;

TinySet<CopyCandidate, 4> srcCandidates;
TinySet<CopyCandidate, 4> dstCandidates;
Expand Down Expand Up @@ -2172,14 +2173,14 @@ bool FramebufferManagerCommon::FindTransferFramebuffer(u32 basePtr, int stride_p
for (auto vfb : vfbs_) {
BlockTransferRect candidate{ vfb, RASTER_COLOR };

// Check for easily detected depth copies for logging purposes.
// Depth copies are not that useful though because you manually need to account for swizzle, so
// not sure if games will use them. Actually we do have a case, Iron Man in issue #16530.
if (vfb->z_address == basePtr && vfb->z_stride == stride_pixels && PSP_CoreParameter().compat.flags().BlockTransferDepth) {
// Two cases so far of games depending on depth copies: Iron Man in issue #16530 (buffer->buffer)
// and also #17878 where a game does ram->buffer to an auto-swizzling (|0x600000) address,
// to initialize Z with a pre-rendered depth buffer.
if (vfb->z_address == basePtr && vfb->BufferByteStride(RASTER_DEPTH) == byteStride && PSP_CoreParameter().compat.flags().BlockTransferDepth) {
WARN_LOG_N_TIMES(z_xfer, 5, G3D, "FindTransferFramebuffer: found matching depth buffer, %08x (dest=%d, bpp=%d)", basePtr, (int)destination, bpp);
candidate.channel = RASTER_DEPTH;
candidate.x_bytes = x_pixels * 2;
candidate.w_bytes = w_pixels * 2;
candidate.x_bytes = x_pixels * bpp;
candidate.w_bytes = w_pixels * bpp;
candidate.y = y;
candidate.h = h;
candidates.push_back(candidate);
Expand Down Expand Up @@ -2347,8 +2348,8 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd
vfbs_.push_back(vfb);

u32 byteSize = vfb->BufferByteSize(channel);
if (fbAddress + byteSize > framebufRangeEnd_) {
framebufRangeEnd_ = fbAddress + byteSize;
if (fbAddress + byteSize > framebufColorRangeEnd_) {
framebufColorRangeEnd_ = fbAddress + byteSize;
}

return vfb;
Expand Down Expand Up @@ -2510,8 +2511,10 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst
return false;
}

// Skip checking if there's no framebuffers in that area.
if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) {
// Skip checking if there's no framebuffers in that area. Make a special exception for obvious transfers to depth buffer, see issue #17878
bool dstDepthSwizzle = Memory::IsVRAMAddress(dstBasePtr) && ((dstBasePtr & 0x600000) == 0x600000);

if (!dstDepthSwizzle && !MayIntersectFramebufferColor(srcBasePtr) && !MayIntersectFramebufferColor(dstBasePtr)) {
return false;
}

Expand All @@ -2529,6 +2532,10 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst
}
}

if (!srcBuffer && dstBuffer && dstRect.channel == RASTER_DEPTH) {
dstBuffer = true;
}

if (srcBuffer && !dstBuffer) {
// In here, we can't read from dstRect.
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
Expand Down Expand Up @@ -2635,7 +2642,19 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst
return true;

} else if (dstBuffer) {
// Here we should just draw the pixels into the buffer. Copy first.
// Handle depth uploads directly here, and let's not bother copying the data. This is compat-flag-gated for now,
// may generalize it when I remove the compat flag.
if (dstRect.channel == RASTER_DEPTH) {
WARN_LOG_ONCE(btud, G3D, "Block transfer upload %08x -> %08x (%dx%d %d,%d bpp=%d %s)", srcBasePtr, dstBasePtr, width, height, dstX, dstY, bpp, RasterChannelToString(dstRect.channel));
FlushBeforeCopy();
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
DrawPixels(dstRect.vfb, dstX, dstY, srcBase, dstRect.vfb->Format(dstRect.channel), srcStride * bpp / 2, (int)(dstRect.w_bytes / 2), dstRect.h, dstRect.channel, "BlockTransferCopy_DrawPixelsDepth");
RebindFramebuffer("RebindFramebuffer - UploadDepth");
return true;
}

// Here we should just draw the pixels into the buffer. Return false to copy the memory first.
// NotifyBlockTransferAfter will take care of the rest.
return false;
} else if (srcBuffer) {
WARN_LOG_N_TIMES(btd, 10, G3D, "Block transfer readback %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
Expand Down Expand Up @@ -2680,7 +2699,7 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
}
}

if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) {
if (MayIntersectFramebufferColor(srcBasePtr) || MayIntersectFramebufferColor(dstBasePtr)) {
// TODO: Figure out how we can avoid repeating the search here.

BlockTransferRect dstRect{};
Expand Down
7 changes: 4 additions & 3 deletions GPU/Common/FramebufferManagerCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -382,13 +382,14 @@ class FramebufferManagerCommon {
return useBufferedRendering_;
}

bool MayIntersectFramebuffer(u32 start) const {
// TODO: Maybe just include the last depth buffer address in this, too.
bool MayIntersectFramebufferColor(u32 start) const {
// Clear the cache/kernel bits.
start &= 0x3FFFFFFF;
if (Memory::IsVRAMAddress(start))
start &= 0x041FFFFF;
// Most games only have two framebuffers at the start.
if (start >= framebufRangeEnd_ || start < PSP_GetVidMemBase()) {
if (start >= framebufColorRangeEnd_ || start < PSP_GetVidMemBase()) {
return false;
}
return true;
Expand Down Expand Up @@ -572,7 +573,7 @@ class FramebufferManagerCommon {
Draw::Framebuffer *currentFramebufferCopy_ = nullptr;

// The range of PSP memory that may contain FBOs. So we can skip iterating.
u32 framebufRangeEnd_ = 0;
u32 framebufColorRangeEnd_ = 0;

bool useBufferedRendering_ = false;
bool postShaderIsUpscalingFilter_ = false;
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/StencilCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
using namespace Draw;

addr &= 0x3FFFFFFF;
if (!MayIntersectFramebuffer(addr)) {
if (!MayIntersectFramebufferColor(addr)) {
return false;
}

Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1853,7 +1853,7 @@ void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {

bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags) {
// Track stray copies of a framebuffer in RAM. MotoGP does this.
if (framebufferManager_->MayIntersectFramebuffer(src) || framebufferManager_->MayIntersectFramebuffer(dest)) {
if (framebufferManager_->MayIntersectFramebufferColor(src) || framebufferManager_->MayIntersectFramebufferColor(dest)) {
if (!framebufferManager_->NotifyFramebufferCopy(src, dest, size, flags, gstate_c.skipDrawReason)) {
// We use matching values in PerformReadbackToMemory/PerformWriteColorFromMemory.
// Since they're identical we don't need to copy.
Expand Down Expand Up @@ -1881,7 +1881,7 @@ bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags

bool GPUCommon::PerformMemorySet(u32 dest, u8 v, int size) {
// This may indicate a memset, usually to 0, of a framebuffer.
if (framebufferManager_->MayIntersectFramebuffer(dest)) {
if (framebufferManager_->MayIntersectFramebufferColor(dest)) {
Memory::Memset(dest, v, size, "GPUMemset");
if (!framebufferManager_->NotifyFramebufferCopy(dest, dest, size, GPUCopyFlag::MEMSET, gstate_c.skipDrawReason)) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
Expand Down Expand Up @@ -1920,7 +1920,7 @@ void GPUCommon::PerformWriteFormattedFromMemory(u32 addr, int size, int frameWid
}

bool GPUCommon::PerformWriteStencilFromMemory(u32 dest, int size, WriteStencil flags) {
if (framebufferManager_->MayIntersectFramebuffer(dest)) {
if (framebufferManager_->MayIntersectFramebufferColor(dest)) {
framebufferManager_->PerformWriteStencilFromMemory(dest, size, flags);
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUCommonHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ void GPUCommonHW::InvalidateCache(u32 addr, int size, GPUInvalidationType type)
else
textureCache_->InvalidateAll(type);

if (type != GPU_INVALIDATE_ALL && framebufferManager_->MayIntersectFramebuffer(addr)) {
if (type != GPU_INVALIDATE_ALL && framebufferManager_->MayIntersectFramebufferColor(addr)) {
// Vempire invalidates (with writeback) after drawing, but before blitting.
// TODO: Investigate whether we can get this to work some other way.
if (type == GPU_INVALIDATE_SAFE) {
Expand Down
3 changes: 3 additions & 0 deletions assets/compat.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1573,6 +1573,9 @@ ULES01070 = true
ULES01071 = true
ULUS10347 = true

# Hayate no Gotoku!! Nightmare Paradise - see issue #17878
ULJM05416 = true

[DaxterRotatedAnalogStick]
# Daxter (see issue #17015)
UCUS98618 = true
Expand Down