diff --git a/Common/GPU/ShaderWriter.cpp b/Common/GPU/ShaderWriter.cpp index 900e2d116014..0332e4d3460e 100644 --- a/Common/GPU/ShaderWriter.cpp +++ b/Common/GPU/ShaderWriter.cpp @@ -423,3 +423,19 @@ ShaderWriter &ShaderWriter::SampleTexture2D(const char *sampName, const char *uv } return *this; } + +ShaderWriter &ShaderWriter::GetTextureSize(const char *szVariable, const char *texName) { + switch (lang_.shaderLanguage) { + case HLSL_D3D11: + F(" float2 %s; %s.GetDimensions(%s.x, %s.y);", szVariable, texName, szVariable, szVariable); + break; + case HLSL_D3D9: + F(" float2 %s; %s.GetDimensions(%s.x, %s.y);", szVariable, texName, szVariable, szVariable); + break; + default: + // Note: we ignore the sampler. make sure you bound samplers to the textures correctly. + F("vec2 %s = textureSize(%s, 0);", szVariable, texName); + break; + } + return *this; +} diff --git a/Common/GPU/ShaderWriter.h b/Common/GPU/ShaderWriter.h index f07d4159f568..1a1896c75bf6 100644 --- a/Common/GPU/ShaderWriter.h +++ b/Common/GPU/ShaderWriter.h @@ -83,7 +83,8 @@ class ShaderWriter { void ConstFloat(const char *name, float value); - ShaderWriter &SampleTexture2D(const char *sampName, const char *uv); + ShaderWriter &SampleTexture2D(const char *texName, const char *uv); + ShaderWriter &GetTextureSize(const char *szVariable, const char *texName); // Simple shaders with no special tricks. void BeginVSMain(Slice inputs, Slice uniforms, Slice varyings); diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index c56cf702f8ce..f17c1fd6a1f4 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -97,6 +97,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "ZZT3SelectHack", &flags_.ZZT3SelectHack); CheckSetting(iniFile, gameID, "AllowLargeFBTextureOffsets", &flags_.AllowLargeFBTextureOffsets); CheckSetting(iniFile, gameID, "AtracLoopHack", &flags_.AtracLoopHack); + CheckSetting(iniFile, gameID, "DeswizzleDepth", &flags_.DeswizzleDepth); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index c293fdfa17af..29345aee42ca 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -87,6 +87,7 @@ struct CompatFlags { bool ZZT3SelectHack; bool AllowLargeFBTextureOffsets; bool AtracLoopHack; + bool DeswizzleDepth; }; class IniFile; diff --git a/Core/CoreParameter.h b/Core/CoreParameter.h index 7e9ea60b2e46..9ef01c4d9bac 100644 --- a/Core/CoreParameter.h +++ b/Core/CoreParameter.h @@ -66,7 +66,7 @@ struct CoreParameter { bool headLess; // Try to avoid messageboxes etc // Internal PSP rendering resolution and scale factor. - int renderScaleFactor; + int renderScaleFactor = 1; int renderWidth; int renderHeight; diff --git a/GPU/Common/DepalettizeCommon.cpp b/GPU/Common/DepalettizeCommon.cpp index 70214fc3d5ca..185eecdbc4b0 100644 --- a/GPU/Common/DepalettizeCommon.cpp +++ b/GPU/Common/DepalettizeCommon.cpp @@ -147,10 +147,10 @@ Draw::SamplerState *DepalShaderCache::GetSampler() { return nearestSampler_; } -DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat) { +DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat) { using namespace Draw; - u32 id = GenerateShaderID(clutMode, pixelFormat); + u32 id = GenerateShaderID(clutMode, textureFormat, bufferFormat); auto shader = cache_.find(id); if (shader != cache_.end()) { @@ -171,7 +171,8 @@ DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferF config.startPos = gstate.getClutIndexStartPos(); config.shift = gstate.getClutIndexShift(); config.mask = gstate.getClutIndexMask(); - config.pixelFormat = pixelFormat; + config.bufferFormat = bufferFormat; + config.textureFormat = textureFormat; GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc()); diff --git a/GPU/Common/DepalettizeCommon.h b/GPU/Common/DepalettizeCommon.h index c394f47eb7c3..fbac86300df7 100644 --- a/GPU/Common/DepalettizeCommon.h +++ b/GPU/Common/DepalettizeCommon.h @@ -49,7 +49,7 @@ class DepalShaderCache { ~DepalShaderCache(); // This also uploads the palette and binds the correct texture. - DepalShader *GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat); + DepalShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat); Draw::Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); Draw::SamplerState *GetSampler(); @@ -63,8 +63,8 @@ class DepalShaderCache { void DeviceRestore(Draw::DrawContext *draw); private: - static uint32_t GenerateShaderID(uint32_t clutMode, GEBufferFormat pixelFormat) { - return (clutMode & 0xFFFFFF) | (pixelFormat << 24); + static uint32_t GenerateShaderID(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat) { + return (clutMode & 0xFFFFFF) | (pixelFormat << 24) | (texFormat << 28); } static uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) { diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 9874e91d6e39..af2bf13437eb 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -49,7 +49,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con const int shift = config.shift; const int mask = config.mask; - if (config.pixelFormat == GE_FORMAT_DEPTH16) { + if (config.bufferFormat == GE_FORMAT_DEPTH16) { DepthScaleFactors factors = GetDepthScaleFactors(); writer.ConstFloat("z_scale", factors.scale); writer.ConstFloat("z_offset", factors.offset); @@ -71,7 +71,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con writer.C(" vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n"); int shiftedMask = mask << shift; - switch (config.pixelFormat) { + switch (config.bufferFormat) { case GE_FORMAT_8888: if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n"); if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n"); @@ -102,6 +102,17 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con case GE_FORMAT_DEPTH16: // Remap depth buffer. writer.C(" float depth = (color.x - z_offset) * z_scale;\n"); + + if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) { + // Convert depth to 565, without going through a CLUT. + writer.C(" int idepth = int(clamp(depth, 0.0, 65535.0));\n"); + writer.C(" float r = (idepth & 31) / 31.0f;\n"); + writer.C(" float g = ((idepth >> 5) & 63) / 63.0f;\n"); + writer.C(" float b = ((idepth >> 11) & 31) / 31.0f;\n"); + writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n"); + return; + } + writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n"); break; default: @@ -135,16 +146,18 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c const int shift = config.shift; const int mask = config.mask; - if (config.pixelFormat == GE_FORMAT_DEPTH16) { + if (config.bufferFormat == GE_FORMAT_DEPTH16) { DepthScaleFactors factors = GetDepthScaleFactors(); writer.ConstFloat("z_scale", factors.scale); writer.ConstFloat("z_offset", factors.offset); } + writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n"); + float index_multiplier = 1.0f; // pixelformat is the format of the texture we are sampling. bool formatOK = true; - switch (config.pixelFormat) { + switch (config.bufferFormat) { case GE_FORMAT_8888: if ((mask & (mask + 1)) == 0) { // If the value has all bits contiguous (bitmask check above), we can mod by it + 1. @@ -222,6 +235,19 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c case GE_FORMAT_DEPTH16: { // TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway. + // Not on D3D9 though, so this path is still relevant. + + if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) { + // Convert depth to 565, without going through a CLUT. + writer.C(" float depth = (index.x - z_offset) * z_scale;\n"); + writer.C(" float idepth = floor(clamp(depth, 0.0, 65535.0));\n"); + writer.C(" float r = mod(idepth, 32.0) / 31.0f;\n"); + writer.C(" float g = mod(floor(idepth / 32.0), 64.0) / 63.0f;\n"); + writer.C(" float b = mod(floor(idepth / 2048.0), 32.0) / 31.0f;\n"); + writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n"); + return; + } + if (shift < 16) { index_multiplier = 1.0f / (float)(1 << shift); truncate_cpy(lookupMethod, "((index.x - z_offset) * z_scale)"); @@ -249,7 +275,7 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c // index_multiplier -= 0.01f / texturePixels; if (!formatOK) { - ERROR_LOG_REPORT_ONCE(depal, G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.pixelFormat), shift, mask, config.startPos); + ERROR_LOG_REPORT_ONCE(depal, G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.bufferFormat), shift, mask, config.startPos); } // Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR. @@ -258,7 +284,6 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c char offset[128] = ""; sprintf(offset, " + %f", texel_offset); - writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n"); writer.F(" float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset); writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n"); } diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h index 5ce5ef88a84c..74bb38d19606 100644 --- a/GPU/Common/DepalettizeShaderCommon.h +++ b/GPU/Common/DepalettizeShaderCommon.h @@ -29,7 +29,8 @@ struct DepalConfig { int shift; u32 startPos; GEPaletteFormat clutFormat; - GEBufferFormat pixelFormat; + GETextureFormat textureFormat; + GEBufferFormat bufferFormat; }; void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang); diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index a6b4abb68c01..663ff2cc905f 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -40,6 +40,23 @@ static const SamplerDef samplers[1] = { { "tex" }, }; +static const UniformDef uniforms[2] = { + { "vec2", "texSize", 0 }, + { "float", "scaleFactor", 1}, +}; + +struct Draw2DUB { + float texSizeX; + float texSizeY; + float scaleFactor; +}; + +const UniformBufferDesc draw2DUBDesc{ sizeof(Draw2DUB), { + { "texSize", -1, 0, UniformType::FLOAT2, 0 }, + { "scaleFactor", -1, 1, UniformType::FLOAT1, 0 }, +} }; + + RasterChannel GenerateDraw2DFs(ShaderWriter &writer) { writer.DeclareSamplers(samplers); writer.BeginFSMain(Slice::empty(), varyings, FSFLAG_NONE); @@ -70,6 +87,27 @@ RasterChannel GenerateDraw2D565ToDepthFs(ShaderWriter &writer) { writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n"); writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset); writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH); + + return RASTER_DEPTH; +} + +RasterChannel GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) { + writer.DeclareSamplers(samplers); + writer.BeginFSMain(uniforms, varyings, FSFLAG_WRITEDEPTH); + writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n"); + // Unlike when just copying a depth buffer, here we're generating new depth values so we'll + // have to apply the scaling. + DepthScaleFactors factors = GetDepthScaleFactors(); + writer.C(" vec2 tsize = texSize;\n"); + writer.C(" vec2 coord = v_texcoord * tsize;\n"); + writer.F(" float strip = 4.0 * scaleFactor;\n"); + writer.C(" float in_strip = mod(coord.y, strip);\n"); + writer.C(" coord.y = coord.y - in_strip + strip - in_strip;\n"); + writer.C(" coord /= tsize;\n"); + writer.C(" vec3 rgb = ").SampleTexture2D("tex", "coord").C(".xyz;\n"); + writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n"); + writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset); + writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH); return RASTER_DEPTH; } @@ -159,7 +197,7 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(RasterChannel (*gener { draw2DVs_, fs }, inputLayout, depthStencil, - blend, rasterNoCull, nullptr, + blend, rasterNoCull, &draw2DUBDesc, }; Draw::Pipeline *pipeline = draw_->CreateGraphicsPipeline(pipelineDesc); @@ -174,7 +212,7 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(RasterChannel (*gener return pipeline; } -void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader shader) { +void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader shader, float texW, float texH) { using namespace Draw; Ensure2DResources(); @@ -196,8 +234,8 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver } if (!draw2DPipelineDepth_) { draw2DPipelineDepth_ = Create2DPipeline(&GenerateDraw2DDepthFs); - linearFilter = false; } + linearFilter = false; draw_->BindPipeline(draw2DPipelineDepth_); break; @@ -208,15 +246,37 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver } if (!draw2DPipeline565ToDepth_) { draw2DPipeline565ToDepth_ = Create2DPipeline(&GenerateDraw2D565ToDepthFs); - linearFilter = false; } + linearFilter = false; draw_->BindPipeline(draw2DPipeline565ToDepth_); break; + + case DRAW2D_565_TO_DEPTH_DESWIZZLE: + if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) { + // Can't do it + return; + } + if (!draw2DPipeline565ToDepthDeswizzle_) { + draw2DPipeline565ToDepthDeswizzle_ = Create2DPipeline(&GenerateDraw2D565ToDepthDeswizzleFs); + } + linearFilter = false; + draw_->BindPipeline(draw2DPipeline565ToDepthDeswizzle_); + break; } + Draw2DUB ub; + ub.texSizeX = tex ? tex->Width() : texW; + ub.texSizeY = tex ? tex->Height() : texH; + ub.scaleFactor = (float)renderScaleFactor_; + draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub)); + if (tex) { draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex); } draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, linearFilter ? &draw2DSamplerLinear_ : &draw2DSamplerNearest_); draw_->DrawUP(verts, vertexCount); + + draw_->InvalidateCachedState(); + + gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_VERTEXSHADER_STATE); } diff --git a/GPU/Common/Draw2D.h b/GPU/Common/Draw2D.h index fee2d4cef393..e1ed615ba86a 100644 --- a/GPU/Common/Draw2D.h +++ b/GPU/Common/Draw2D.h @@ -14,6 +14,7 @@ enum Draw2DShader { DRAW2D_COPY_COLOR, DRAW2D_COPY_DEPTH, DRAW2D_565_TO_DEPTH, + DRAW2D_565_TO_DEPTH_DESWIZZLE, }; inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) { @@ -22,6 +23,7 @@ inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) { return RASTER_DEPTH; case DRAW2D_COPY_COLOR: case DRAW2D_565_TO_DEPTH: + case DRAW2D_565_TO_DEPTH_DESWIZZLE: default: return RASTER_COLOR; } diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index a1164c686ffd..bfdc23815f15 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -551,20 +551,33 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra // For now, let's just do the last thing, if there are multiple. // for (auto &source : sources) { - if (sources.size()) { + if (!sources.empty()) { + draw_->InvalidateCachedState(); + auto &source = sources.back(); if (source.channel == RASTER_DEPTH) { // Good old depth->depth copy. BlitFramebufferDepth(source.vfb, dest); gpuStats.numDepthCopies++; dest->last_frame_depth_updated = gpuStats.numFlips; - } else if (source.channel == RASTER_COLOR) { + } else if (source.channel == RASTER_COLOR && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) { VirtualFramebuffer *src = source.vfb; - // Copying color to depth. if (src->drawnFormat != GE_FORMAT_565) { WARN_LOG_ONCE(not565, G3D, "Drawn format of buffer at %08x not 565 as expected", src->fb_address); } - BlitUsingRaster(src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight, dest->fbo, 0.0f, 0.0f, dest->renderWidth, dest->renderHeight, false, DRAW2D_565_TO_DEPTH, "565_to_depth"); + + // Really hate to do this, but tracking the depth swizzle state across multiple + // copies is not easy. + Draw2DShader shader = DRAW2D_565_TO_DEPTH; + if (PSP_CoreParameter().compat.flags().DeswizzleDepth) { + shader = DRAW2D_565_TO_DEPTH_DESWIZZLE; + } + + // Copying color to depth. + BlitUsingRaster( + src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight, + dest->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight, + false, shader, "565_to_depth"); } } @@ -1941,7 +1954,7 @@ Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u bool z_stencil = reason == TempFBO::STENCIL; char name[128]; - snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w, h, z_stencil ? "_depth" : ""); + snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w / renderScaleFactor_, h / renderScaleFactor_, z_stencil ? "_depth" : ""); Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, z_stencil, name }); if (!fbo) { return nullptr; @@ -2343,6 +2356,7 @@ void FramebufferManagerCommon::DeviceLost() { DoRelease(draw2DPipelineColor_); DoRelease(draw2DPipelineDepth_); DoRelease(draw2DPipeline565ToDepth_); + DoRelease(draw2DPipeline565ToDepthDeswizzle_); draw_ = nullptr; } @@ -2545,7 +2559,7 @@ void FramebufferManagerCommon::BlitUsingRaster( Draw::Viewport vp{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f }; draw_->SetViewports(1, &vp); draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height()); - DrawStrip2D(nullptr, vtx, 4, linearFilter, shader); + DrawStrip2D(nullptr, vtx, 4, linearFilter, shader, src->Width(), src->Height()); gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 447554f376f8..f63b319c3676 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -375,7 +375,7 @@ class FramebufferManagerCommon { Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags); - void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader channel); + void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader channel, float texW = 0.0f, float texH = 0.0f); void Ensure2DResources(); Draw::Pipeline *Create2DPipeline(RasterChannel (*generate)(ShaderWriter &)); @@ -511,6 +511,7 @@ class FramebufferManagerCommon { Draw::Pipeline *draw2DPipelineColor_ = nullptr; Draw::Pipeline *draw2DPipelineDepth_ = nullptr; Draw::Pipeline *draw2DPipeline565ToDepth_ = nullptr; + Draw::Pipeline *draw2DPipeline565ToDepthDeswizzle_ = nullptr; Draw::SamplerState *draw2DSamplerLinear_ = nullptr; Draw::SamplerState *draw2DSamplerNearest_ = nullptr; Draw::ShaderModule *draw2DVs_ = nullptr; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index af642a7df1dc..e4a8c9cd330f 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -892,6 +892,7 @@ bool TextureCacheCommon::MatchFramebuffer( const bool noOffset = texaddr == addr; const bool exactMatch = noOffset && entry.format < 4 && channel == RASTER_COLOR; + const u32 w = 1 << ((entry.dim >> 0) & 0xf); const u32 h = 1 << ((entry.dim >> 8) & 0xf); // 512 on a 272 framebuffer is sane, so let's be lenient. @@ -927,6 +928,7 @@ bool TextureCacheCommon::MatchFramebuffer( // Check works for D16 too (???) const bool matchingClutFormat = (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) || + (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_5650) || (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) || (fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16); @@ -971,7 +973,7 @@ bool TextureCacheCommon::MatchFramebuffer( // 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture. if (matchingClutFormat) { if (!noOffset) { - WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset); + WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer (%s) using %s with offset at %08x +%dx%d", channel == RASTER_DEPTH ? "DEPTH" : "COLOR", GeTextureFormatToString(entry.format), fb_address, matchInfo->xOffset, matchInfo->yOffset); } return true; } else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) { @@ -1823,18 +1825,44 @@ void TextureCacheCommon::ApplyTexture() { gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); } +bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) { + if (IsClutFormat(texFormat)) { + switch (bufferFormat) { + case GE_FORMAT_4444: + case GE_FORMAT_565: + case GE_FORMAT_5551: + case GE_FORMAT_DEPTH16: + if (texFormat == GE_TFMT_CLUT16) { + return true; + } + break; + case GE_FORMAT_8888: + if (texFormat == GE_TFMT_CLUT32) { + return true; + } + break; + } + WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat)); + return false; + } else if (texFormat == GE_TFMT_5650 && bufferFormat == GE_FORMAT_DEPTH16) { + // We can also "depal" 565 format, this is used to read depth buffers as 565 on occasion (#15491). + return true; + } else { + return false; + } +} + void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) { DepalShader *depalShader = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; - bool need_depalettize = IsClutFormat(texFormat); bool depth = channel == RASTER_DEPTH; + bool need_depalettize = CanDepalettize(texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth && !gstate_c.curTextureIs3D; // TODO: Implement shader depal in the fragment shader generator for D3D11 at least. if (!draw_->GetDeviceCaps().fragmentShaderInt32Supported) { useShaderDepal = false; - depth = false; // Can't support this } switch (draw_->GetShaderLanguageDesc().shaderLanguage) { @@ -1878,7 +1906,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer return; } - depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); + depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); gstate_c.SetUseShaderDepal(false); } diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index c3f4476fcb1f..b36cb3428887 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -160,6 +160,7 @@ void GPU_DX9::CheckGPUFeatures() { u32 features = 0; features |= GPU_SUPPORTS_16BIT_FORMATS; features |= GPU_SUPPORTS_BLEND_MINMAX; + features |= GPU_SUPPORTS_DEPTH_TEXTURE; features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; // Accurate depth is required because the Direct3D API does not support inverse Z. diff --git a/assets/compat.ini b/assets/compat.ini index c6011605d780..974a6216defd 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -1102,6 +1102,10 @@ NPEH00029 = true ULUS10455 = true [BlueToAlpha] +# Some games render first to RGB of a 4444 texture, then they switch to 565 and render masked to blue, +# just to be able to render to the alpha channel of the 4444. We can detect that and reroute rendering +# to avoid problems. + # Split/Second ULES01402 = true ULUS10513 = true @@ -1121,10 +1125,6 @@ ULES00262 = true ULUS10064 = true ULKS46087 = true -# Some games render first to RGB of a 4444 texture, then they switch to 565 and render masked to blue, -# just to be able to render to the alpha channel of the 4444. We can detect that and reroute rendering -# to avoid problems. - [DateLimited] # Car Jack Streets - issue #12698 NPUZ00043 = true @@ -1266,3 +1266,13 @@ ULES00618 = true # Silver Fall ULES00808 = true ULUS10270 = true + +[DeswizzleDepth] +UCUS98633 = true +UCAS40145 = true +UCES00420 = true +UCJS10052 = true +UCKS45048 = true +UCJS18030 = true +UCJS18047 = true +NPJG00015 = true diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 19b0b0d2e977..a96df6657ce3 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -309,7 +309,8 @@ bool TestDepalShaders() { config.shift = 8; config.startPos = 64; config.mask = 0xFF; - config.pixelFormat = GE_FORMAT_8888; + config.bufferFormat = GE_FORMAT_8888; + config.textureFormat = GE_TFMT_CLUT32; GenerateDepalFs(buffer, config, desc); if (!TestCompileShader(buffer, languages[k], ShaderStage::Fragment, &errorMessage)) {