From ba79de0a5a3b59b93c386108795bc56075725a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 13 Dec 2020 16:04:16 +0100 Subject: [PATCH] Turn off vertex range culling in bezier/spline calls. When we do lower res tess than the real PSP, we cant trust the game to not cause range culling to kick in. Fixes #11692 --- Common/GPU/ShaderWriter.cpp | 8 ++++++++ GPU/Common/ShaderId.cpp | 8 ++++++-- GPU/Common/ShaderId.h | 2 +- GPU/Common/ShaderUniforms.cpp | 2 +- GPU/Common/VertexShaderGenerator.cpp | 4 +++- GPU/D3D11/DrawEngineD3D11.cpp | 2 +- GPU/Directx9/ShaderManagerDX9.cpp | 2 +- GPU/GLES/ShaderManagerGLES.cpp | 2 +- GPU/GPUCommon.cpp | 18 ++++++++++-------- GPU/GPUState.h | 12 ++++++++++-- GPU/Vulkan/DrawEngineVulkan.cpp | 2 +- 11 files changed, 43 insertions(+), 19 deletions(-) diff --git a/Common/GPU/ShaderWriter.cpp b/Common/GPU/ShaderWriter.cpp index 4db6bd8577fd..5bbb091f1ce6 100644 --- a/Common/GPU/ShaderWriter.cpp +++ b/Common/GPU/ShaderWriter.cpp @@ -83,6 +83,8 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions case ShaderStage::Fragment: W(vulkan_glsl_preamble_fs); break; + default: + break; } break; case HLSL_D3D11: @@ -99,6 +101,8 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions W(hlsl_d3d11_preamble_fs); } break; + default: + break; } break; default: // OpenGL @@ -125,6 +129,8 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions } C("#define gl_VertexIndex gl_VertexID\n"); break; + default: + break; } if (!lang_.gles) { C("#define lowp\n"); @@ -293,6 +299,8 @@ void ShaderWriter::DeclareSampler2D(const char *name, int binding) { case HLSL_D3D11: F("SamplerState %s : register(s%d);\n", name, binding); break; + default: + break; } } diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index b01c46044feb..a5c18aa1de3a 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -56,6 +56,7 @@ std::string VertexShaderDesc(const VShaderID &id) { if (id.Bit(VS_BIT_HAS_TEXCOORD_TESS)) desc << "TessT "; if (id.Bit(VS_BIT_HAS_NORMAL_TESS)) desc << "TessN "; if (id.Bit(VS_BIT_NORM_REVERSE_TESS)) desc << "TessRevN "; + if (id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) desc << "Cull "; return desc.str(); } @@ -70,17 +71,20 @@ void ComputeVertexShaderID(VShaderID *id_out, u32 vertType, bool useHWTransform, bool hasNormal = (vertType & GE_VTYPE_NRM_MASK) != 0; bool hasTexcoord = (vertType & GE_VTYPE_TC_MASK) != 0; - bool doBezier = gstate_c.bezier; - bool doSpline = gstate_c.spline; + bool doBezier = gstate_c.submitType == SubmitType::HW_BEZIER; + bool doSpline = gstate_c.submitType == SubmitType::HW_SPLINE; bool enableFog = gstate.isFogEnabled() && !isModeThrough && !gstate.isModeClear(); bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled() && !isModeThrough; + bool vertexRangeCulling = gstate_c.Supports(GPU_SUPPORTS_VS_RANGE_CULLING) && + !isModeThrough && gstate_c.submitType == SubmitType::DRAW; // neither hw nor sw spline/bezier. See #11692 VShaderID id; id.SetBit(VS_BIT_LMODE, lmode); id.SetBit(VS_BIT_IS_THROUGH, isModeThrough); id.SetBit(VS_BIT_ENABLE_FOG, enableFog); id.SetBit(VS_BIT_HAS_COLOR, hasColor); + id.SetBit(VS_BIT_VERTEX_RANGE_CULLING, vertexRangeCulling); if (doTexture) { id.SetBit(VS_BIT_DO_TEXTURE); diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 5ae71487ad5b..c98a914be6d6 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -15,7 +15,7 @@ enum VShaderBit : uint8_t { VS_BIT_ENABLE_FOG = 2, VS_BIT_HAS_COLOR = 3, VS_BIT_DO_TEXTURE = 4, - // 5 is free. + VS_BIT_VERTEX_RANGE_CULLING = 5, // 6 is free, // 7 is free. VS_BIT_USE_HW_TRANSFORM = 8, diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index bf2b540db5b8..7860ed9ca838 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -215,7 +215,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView const int h = gstate.getTextureHeight(0); const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; - if (gstate_c.bezier || gstate_c.spline) { + if (gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE) { // When we are generating UV coordinates through the bezier/spline, we need to apply the scaling. // However, this is missing a check that we're not getting our UV:s supplied for us in the vertices. ub->uvScaleOffset[0] = gstate_c.uv.uScale * widthFactor; diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 37fd9403e617..20a51867d759 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -207,6 +207,8 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag } bool texCoordInVec3 = false; + bool vertexRangeCulling = id.Bit(VS_BIT_VERTEX_RANGE_CULLING); + if (compat.shaderLanguage == GLSL_VULKAN) { WRITE(p, "\n"); WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseVars {\n%s};\n", ub_baseStr); @@ -1086,7 +1088,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " %sv_fogdepth = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n", compat.vsOutPrefix); } - if (!isModeThrough && gstate_c.Supports(GPU_SUPPORTS_VS_RANGE_CULLING)) { + if (vertexRangeCulling) { WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n"); // Vertex range culling doesn't happen when depth is clamped, so only do this if in range. WRITE(p, " if (u_cullRangeMin.w <= 0.0 || (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z)) {\n"); diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index fbfd4f820fea..8c10d6fe73ae 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -341,7 +341,7 @@ void DrawEngineD3D11::DoFlush() { ApplyDrawState(prim); // Always use software for flat shading to fix the provoking index. - bool tess = gstate_c.bezier || gstate_c.spline; + bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE; bool useHWTransform = CanUseHardwareTransform(prim) && (tess || gstate.getShadeMode() != GE_SHADE_FLAT); if (useHWTransform) { diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index bb423ca23354..ce5c7b983c18 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -543,7 +543,7 @@ void ShaderManagerDX9::DirtyLastShader() { // disables vertex arrays VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellation, u32 vertType) { // Always use software for flat shading to fix the provoking index. - bool tess = gstate_c.bezier || gstate_c.spline; + bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE; useHWTransform = useHWTransform && (tess || gstate.getShadeMode() != GE_SHADE_FLAT); VShaderID VSID; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index eccc9f8834c1..99b7dd08bd84 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -409,7 +409,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu const float widthFactor = (float)w * invW; const float heightFactor = (float)h * invH; float uvscaleoff[4]; - if (gstate_c.bezier || gstate_c.spline) { + if (gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE) { // When we are generating UV coordinates through the bezier/spline, we need to apply the scaling. // However, this is missing a check that we're not getting our UV:s supplied for us in the vertices. uvscaleoff[0] = gstate_c.uv.uScale * widthFactor; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index c57f026670fb..bf829e1026a5 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1830,20 +1830,21 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) { if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) { gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); - gstate_c.bezier = true; + gstate_c.submitType = SubmitType::HW_BEZIER; if (gstate_c.spline_num_points_u != surface.num_points_u) { gstate_c.Dirty(DIRTY_BEZIERSPLINE); gstate_c.spline_num_points_u = surface.num_points_u; } + } else { + gstate_c.submitType = SubmitType::BEZIER; } int bytesRead = 0; UpdateUVScaleOffset(); drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier"); - if (gstate_c.bezier) - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); - gstate_c.bezier = false; + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); + gstate_c.submitType = SubmitType::DRAW; // After drawing, we advance pointers - see SubmitPrim which does the same. int count = surface.num_points_u * surface.num_points_v; @@ -1896,20 +1897,21 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) { if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) { gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); - gstate_c.spline = true; + gstate_c.submitType = SubmitType::HW_SPLINE; if (gstate_c.spline_num_points_u != surface.num_points_u) { gstate_c.Dirty(DIRTY_BEZIERSPLINE); gstate_c.spline_num_points_u = surface.num_points_u; } + } else { + gstate_c.submitType = SubmitType::SPLINE; } int bytesRead = 0; UpdateUVScaleOffset(); drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline"); - if (gstate_c.spline) - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); - gstate_c.spline = false; + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); + gstate_c.submitType = SubmitType::DRAW; // After drawing, we advance pointers - see SubmitPrim which does the same. int count = surface.num_points_u * surface.num_points_v; diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 3af959f6b087..262a227b74f1 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -504,6 +504,14 @@ struct KnownVertexBounds { u16 maxV; }; +enum class SubmitType { + DRAW, + BEZIER, + SPLINE, + HW_BEZIER, + HW_SPLINE, +}; + struct GPUStateCache { bool Supports(u32 flags) { return (featureFlags & flags) != 0; } // Return true if ANY of flags are true. bool SupportsAll(u32 flags) { return (featureFlags & flags) == flags; } // Return true if ALL flags are true. @@ -602,8 +610,8 @@ struct GPUStateCache { } u32 curRTOffsetX; - bool bezier; - bool spline; + // Set if we are doing hardware bezier/spline. + SubmitType submitType; int spline_num_points_u; bool useShaderDepal; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index da1fb1f52ca7..f30e357d3040 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -598,7 +598,7 @@ void DrawEngineVulkan::DoFlush() { FrameData *frame = &frame_[vulkan_->GetCurFrame()]; - bool tess = gstate_c.bezier || gstate_c.spline; + bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE; bool textureNeedsApply = false; if (gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS) && !gstate.isModeClear() && gstate.isTextureMapEnabled()) {