Skip to content

Commit

Permalink
Cache planes used for BBOX culling
Browse files Browse the repository at this point in the history
This isn't a huge performance boost for the games that use BBOX (like
Tekken), but it'll be more valuable if we start using soft culling more
widely automatically, see #17808
  • Loading branch information
hrydgard committed Jul 30, 2023
1 parent 36951a0 commit 061131e
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 84 deletions.
125 changes: 64 additions & 61 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
#include <cfloat>

#include "Common/Data/Convert/ColorConv.h"
#include "Common/Math/lin/matrix4x4.h"
#include "Common/Profiler/Profiler.h"
#include "Common/LogReporting.h"
#include "Common/Math/lin/matrix4x4.h"
#include "Core/Config.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/SplineCommon.h"
Expand Down Expand Up @@ -136,21 +136,6 @@ std::string DrawEngineCommon::DebugGetVertexLoaderString(std::string id, DebugSh
return dec ? dec->GetString(stringType) : "N/A";
}

struct Plane {
float x, y, z, w;
void Set(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; }
float Test(const float f[3]) const { return x * f[0] + y * f[1] + z * f[2] + w; }
};

static void PlanesFromMatrix(const float mtx[16], Plane planes[6]) {
planes[0].Set(mtx[3]-mtx[0], mtx[7]-mtx[4], mtx[11]-mtx[8], mtx[15]-mtx[12]); // Right
planes[1].Set(mtx[3]+mtx[0], mtx[7]+mtx[4], mtx[11]+mtx[8], mtx[15]+mtx[12]); // Left
planes[2].Set(mtx[3]+mtx[1], mtx[7]+mtx[5], mtx[11]+mtx[9], mtx[15]+mtx[13]); // Bottom
planes[3].Set(mtx[3]-mtx[1], mtx[7]-mtx[5], mtx[11]-mtx[9], mtx[15]-mtx[13]); // Top
planes[4].Set(mtx[3]+mtx[2], mtx[7]+mtx[6], mtx[11]+mtx[10], mtx[15]+mtx[14]); // Near
planes[5].Set(mtx[3]-mtx[2], mtx[7]-mtx[6], mtx[11]-mtx[10], mtx[15]-mtx[14]); // Far
}

static Vec3f ClipToScreen(const Vec4f& coords) {
float xScale = gstate.getViewportXScale();
float xCenter = gstate.getViewportXCenter();
Expand Down Expand Up @@ -250,6 +235,52 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex
}
}

// Gated by DIRTY_CULL_PLANES
void DrawEngineCommon::UpdatePlanes() {
float world[16];
float view[16];
float worldview[16];
float worldviewproj[16];
ConvertMatrix4x3To4x4(world, gstate.worldMatrix);
ConvertMatrix4x3To4x4(view, gstate.viewMatrix);
// TODO: Create a Matrix4x3ByMatrix4x3, and Matrix4x4ByMatrix4x3?
Matrix4ByMatrix4(worldview, world, view);
Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix);

// Next, we need to apply viewport, scissor, region, and even offset - but only for X/Y.
// Note that the PSP does not clip against the viewport.
const Vec2f baseOffset = Vec2f(gstate.getOffsetX(), gstate.getOffsetY());
// Region1 (rate) is used as an X1/Y1 here, matching PSP behavior.
minOffset_ = baseOffset + Vec2f(std::max(gstate.getRegionRateX() - 0x100, gstate.getScissorX1()), std::max(gstate.getRegionRateY() - 0x100, gstate.getScissorY1())) - Vec2f(1.0f, 1.0f);
maxOffset_ = baseOffset + Vec2f(std::min(gstate.getRegionX2(), gstate.getScissorX2()), std::min(gstate.getRegionY2(), gstate.getScissorY2())) + Vec2f(1.0f, 1.0f);

// Now let's apply the viewport to our scissor/region + offset range.
Vec2f inverseViewportScale = Vec2f(1.0f / gstate.getViewportXScale(), 1.0f / gstate.getViewportYScale());
Vec2f minViewport = (minOffset_ - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;
Vec2f maxViewport = (maxOffset_ - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;

Lin::Matrix4x4 applyViewport;
applyViewport.empty();
// Scale to the viewport's size.
applyViewport.xx = 2.0f / (maxViewport.x - minViewport.x);
applyViewport.yy = 2.0f / (maxViewport.y - minViewport.y);
applyViewport.zz = 1.0f;
applyViewport.ww = 1.0f;
// And offset to the viewport's centers.
applyViewport.wx = -(maxViewport.x + minViewport.x) / (maxViewport.x - minViewport.x);
applyViewport.wy = -(maxViewport.y + minViewport.y) / (maxViewport.y - minViewport.y);

float mtx[16];
Matrix4ByMatrix4(mtx, worldviewproj, applyViewport.m);

planes_[0].Set(mtx[3] - mtx[0], mtx[7] - mtx[4], mtx[11] - mtx[8], mtx[15] - mtx[12]); // Right
planes_[1].Set(mtx[3] + mtx[0], mtx[7] + mtx[4], mtx[11] + mtx[8], mtx[15] + mtx[12]); // Left
planes_[2].Set(mtx[3] + mtx[1], mtx[7] + mtx[5], mtx[11] + mtx[9], mtx[15] + mtx[13]); // Bottom
planes_[3].Set(mtx[3] - mtx[1], mtx[7] - mtx[5], mtx[11] - mtx[9], mtx[15] - mtx[13]); // Top
planes_[4].Set(mtx[3] + mtx[2], mtx[7] + mtx[6], mtx[11] + mtx[10], mtx[15] + mtx[14]); // Near
planes_[5].Set(mtx[3] - mtx[2], mtx[7] - mtx[6], mtx[11] - mtx[10], mtx[15] - mtx[14]); // Far
}

// This code has plenty of potential for optimization.
//
// It does the simplest and safest test possible: If all points of a bbox is outside a single of
Expand All @@ -273,7 +304,7 @@ bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *i
verts[i] = vtx[i] * (1.0f / 128.0f);
}
} else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_16BIT && !inds) {
const s16 *vtx = (const s16*)control_points;
const s16 *vtx = (const s16 *)control_points;
for (int i = 0; i < vertexCount * 3; i++) {
verts[i] = vtx[i] * (1.0f / 32768.0f);
}
Expand Down Expand Up @@ -302,70 +333,42 @@ bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *i
}
}

Plane planes[6];

float world[16];
float view[16];
float worldview[16];
float worldviewproj[16];
ConvertMatrix4x3To4x4(world, gstate.worldMatrix);
ConvertMatrix4x3To4x4(view, gstate.viewMatrix);
// TODO: Create a Matrix4x3ByMatrix4x3, and Matrix4x4ByMatrix4x3?
Matrix4ByMatrix4(worldview, world, view);
Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix);

// Next, we need to apply viewport, scissor, region, and even offset - but only for X/Y.
// Note that the PSP does not clip against the viewport.
const Vec2f baseOffset = Vec2f(gstate.getOffsetX(), gstate.getOffsetY());
// Region1 (rate) is used as an X1/Y1 here, matching PSP behavior.
Vec2f minOffset = baseOffset + Vec2f(std::max(gstate.getRegionRateX() - 0x100, gstate.getScissorX1()), std::max(gstate.getRegionRateY() - 0x100, gstate.getScissorY1())) - Vec2f(1.0f, 1.0f);
Vec2f maxOffset = baseOffset + Vec2f(std::min(gstate.getRegionX2(), gstate.getScissorX2()), std::min(gstate.getRegionY2(), gstate.getScissorY2())) + Vec2f(1.0f, 1.0f);

// Now let's apply the viewport to our scissor/region + offset range.
Vec2f inverseViewportScale = Vec2f(1.0f / gstate.getViewportXScale(), 1.0f / gstate.getViewportYScale());
Vec2f minViewport = (minOffset - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;
Vec2f maxViewport = (maxOffset - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;

Lin::Matrix4x4 applyViewport;
applyViewport.empty();
// Scale to the viewport's size.
applyViewport.xx = 2.0f / (maxViewport.x - minViewport.x);
applyViewport.yy = 2.0f / (maxViewport.y - minViewport.y);
applyViewport.zz = 1.0f;
applyViewport.ww = 1.0f;
// And offset to the viewport's centers.
applyViewport.wx = -(maxViewport.x + minViewport.x) / (maxViewport.x - minViewport.x);
applyViewport.wy = -(maxViewport.y + minViewport.y) / (maxViewport.y - minViewport.y);

float screenBounds[16];
Matrix4ByMatrix4(screenBounds, worldviewproj, applyViewport.m);
// Due to world matrix updates per "thing", this isn't quite as effective as it could be if we did world transform
// in here as well. Though, it still does cut down on a lot of updates in Tekken 6.
if (gstate_c.IsDirty(DIRTY_CULL_PLANES)) {
UpdatePlanes();
gpuStats.numPlaneUpdates++;
gstate_c.Clean(DIRTY_CULL_PLANES);
}

PlanesFromMatrix(screenBounds, planes);
// Note: near/far are not checked without clamp/clip enabled, so we skip those planes.
int totalPlanes = gstate.isDepthClampEnabled() ? 6 : 4;
for (int plane = 0; plane < totalPlanes; plane++) {
int inside = 0;
int out = 0;
for (int i = 0; i < vertexCount; i++) {
// Here we can test against the frustum planes!
float value = planes[plane].Test(verts + i * 3);
// Test against the frustum planes, and count.
// TODO: We should test 4 vertices at a time using SIMD.
// I guess could also test one vertex against 4 planes at a time, though a lot of waste at the common case of 6.
float value = planes_[plane].Test(verts + i * 3);
if (value <= -FLT_EPSILON)
out++;
else
inside++;
}

// No vertices inside this one plane? Don't need to draw.
if (inside == 0) {
// All out - but check for X and Y if the offset was near the cullbox edge.
bool outsideEdge = false;
if (plane == 1)
outsideEdge = minOffset.x < 1.0f;
outsideEdge = minOffset_.x < 1.0f;
if (plane == 2)
outsideEdge = minOffset.y < 1.0f;
outsideEdge = minOffset_.y < 1.0f;
else if (plane == 0)
outsideEdge = maxOffset.x >= 4096.0f;
outsideEdge = maxOffset_.x >= 4096.0f;
else if (plane == 3)
outsideEdge = maxOffset.y >= 4096.0f;
outsideEdge = maxOffset_.y >= 4096.0f;

// Only consider this outside if offset + scissor/region is fully inside the cullbox.
if (!outsideEdge)
Expand Down
14 changes: 14 additions & 0 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "Common/CommonTypes.h"
#include "Common/Data/Collections/Hashmaps.h"

#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/GPUDebugInterface.h"
Expand Down Expand Up @@ -68,6 +69,13 @@ class TessellationDataTransfer {
virtual void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) = 0;
};

// Culling plane.
struct Plane {
float x, y, z, w;
void Set(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; }
float Test(const float f[3]) const { return x * f[0] + y * f[1] + z * f[2] + w; }
};

class DrawEngineCommon {
public:
DrawEngineCommon();
Expand Down Expand Up @@ -131,6 +139,7 @@ class DrawEngineCommon {

protected:
virtual bool UpdateUseHWTessellation(bool enabled) const { return enabled; }
void UpdatePlanes();

int ComputeNumVertsToDecode() const;
void DecodeVerts(u8 *dest);
Expand Down Expand Up @@ -236,4 +245,9 @@ class DrawEngineCommon {

// Hardware tessellation
TessellationDataTransfer *tessDataTransfer;

// Culling
Plane planes_[6];
Vec2f minOffset_;
Vec2f maxOffset_;
};
3 changes: 2 additions & 1 deletion GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,15 @@ enum : uint64_t {
DIRTY_LIGHT_CONTROL = 1ULL << 38,
DIRTY_TEX_ALPHA_MUL = 1ULL << 39,

// Bits 40-43 are free for new uniforms. Then we're really out and need to start merging.
// Bits 40-42 are free for new uniforms. Then we're really out and need to start merging.
// Don't forget to update DIRTY_ALL_UNIFORMS when you start using them.

DIRTY_BONE_UNIFORMS = 0xFF000000ULL,

DIRTY_ALL_UNIFORMS = 0x0FFFFFFFFFFULL,

// Other dirty elements that aren't uniforms
DIRTY_CULL_PLANES = 1ULL << 43,
DIRTY_FRAMEBUF = 1ULL << 44,
DIRTY_TEXTURE_IMAGE = 1ULL << 45, // Means that the definition of the texture image has changed (address, stride etc), and we need to look up again.
DIRTY_TEXTURE_PARAMS = 1ULL << 46,
Expand Down
9 changes: 5 additions & 4 deletions GPU/GPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ inline unsigned int toFloat24(float f) {
return i >> 8;
}

// The ToString function lives in GPUCommonHW.cpp.
struct GPUStatistics {
void Reset() {
ResetFrame();
Expand All @@ -84,10 +85,10 @@ struct GPUStatistics {
numTextureInvalidations = 0;
numTextureInvalidationsByFramebuffer = 0;
numTexturesHashed = 0;
numTextureSwitches = 0;
numTextureDataBytesHashed = 0;
numShaderSwitches = 0;
numFlushes = 0;
numBBOXJumps = 0;
numPlaneUpdates = 0;
numTexturesDecoded = 0;
numFramebufferEvaluations = 0;
numBlockingReadbacks = 0;
Expand All @@ -114,6 +115,8 @@ struct GPUStatistics {
int numListSyncs;
int numCachedDrawCalls;
int numFlushes;
int numBBOXJumps;
int numPlaneUpdates;
int numVertsSubmitted;
int numCachedVertsDrawn;
int numUncachedVertsDrawn;
Expand All @@ -122,8 +125,6 @@ struct GPUStatistics {
int numTextureInvalidationsByFramebuffer;
int numTexturesHashed;
int numTextureDataBytesHashed;
int numTextureSwitches;
int numShaderSwitches;
int numTexturesDecoded;
int numFramebufferEvaluations;
int numBlockingReadbacks;
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,7 @@ void GPUCommon::Execute_BJump(u32 op, u32 diff) {
if (!currentList->bboxResult) {
// bounding box jump.
const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);
gpuStats.numBBOXJumps++;
if (Memory::IsValidAddress(target)) {
UpdatePC(currentList->pc, target - 4);
currentList->pc = target - 4; // pc will be increased after we return, counteract that
Expand Down
38 changes: 20 additions & 18 deletions GPU/GPUCommonHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,12 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_ANTIALIASENABLE, FLAG_FLUSHBEFOREONCHANGE },

// Viewport.
{ GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
{ GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
{ GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
{ GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
{ GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
{ GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
{ GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_DEPTHCLAMPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_RASTER_STATE },
Expand All @@ -206,12 +206,12 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_MAXZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },

// Region
{ GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
{ GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },

// Scissor
{ GE_CMD_SCISSOR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_SCISSOR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_SCISSOR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
{ GE_CMD_SCISSOR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },

// Lighting base colors
{ GE_CMD_AMBIENTCOLOR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_AMBIENT },
Expand Down Expand Up @@ -1368,7 +1368,7 @@ void GPUCommonHW::Execute_WorldMtxNum(u32 op, u32 diff) {
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
gstate_c.Dirty(DIRTY_WORLDMATRIX);
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_CULL_PLANES);
}
if (++i >= end) {
break;
Expand All @@ -1391,7 +1391,7 @@ void GPUCommonHW::Execute_WorldMtxData(u32 op, u32 diff) {
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
Flush();
((u32 *)gstate.worldMatrix)[num] = newVal;
gstate_c.Dirty(DIRTY_WORLDMATRIX);
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_CULL_PLANES);
}
num++;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
Expand Down Expand Up @@ -1421,7 +1421,7 @@ void GPUCommonHW::Execute_ViewMtxNum(u32 op, u32 diff) {
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
gstate_c.Dirty(DIRTY_VIEWMATRIX);
gstate_c.Dirty(DIRTY_VIEWMATRIX | DIRTY_CULL_PLANES);
}
if (++i >= end) {
break;
Expand All @@ -1444,7 +1444,7 @@ void GPUCommonHW::Execute_ViewMtxData(u32 op, u32 diff) {
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
Flush();
((u32 *)gstate.viewMatrix)[num] = newVal;
gstate_c.Dirty(DIRTY_VIEWMATRIX);
gstate_c.Dirty(DIRTY_VIEWMATRIX | DIRTY_CULL_PLANES);
}
num++;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
Expand Down Expand Up @@ -1474,7 +1474,7 @@ void GPUCommonHW::Execute_ProjMtxNum(u32 op, u32 diff) {
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
gstate_c.Dirty(DIRTY_PROJMATRIX);
gstate_c.Dirty(DIRTY_PROJMATRIX | DIRTY_CULL_PLANES);
}
if (++i >= end) {
break;
Expand All @@ -1497,7 +1497,7 @@ void GPUCommonHW::Execute_ProjMtxData(u32 op, u32 diff) {
if (num < 16 && newVal != ((const u32 *)gstate.projMatrix)[num]) {
Flush();
((u32 *)gstate.projMatrix)[num] = newVal;
gstate_c.Dirty(DIRTY_PROJMATRIX);
gstate_c.Dirty(DIRTY_PROJMATRIX | DIRTY_CULL_PLANES);
}
num++;
if (num <= 16)
Expand Down Expand Up @@ -1647,8 +1647,8 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
return snprintf(buffer, size,
"DL processing time: %0.2f ms, %d drawsync, %d listsync\n"
"Draw calls: %d, flushes %d, clears %d (cached: %d)\n"
"Num Tracked Vertex Arrays: %d\n"
"Draw calls: %d, flushes %d, clears %d, bbox jumps %d (%d updates)\n"
"Cached draws: %d (tracked: %d)\n"
"Vertices: %d cached: %d uncached: %d\n"
"FBOs active: %d (evaluations: %d)\n"
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n"
Expand All @@ -1662,6 +1662,8 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
gpuStats.numDrawCalls,
gpuStats.numFlushes,
gpuStats.numClears,
gpuStats.numBBOXJumps,
gpuStats.numPlaneUpdates,
gpuStats.numCachedDrawCalls,
gpuStats.numTrackedVertexArrays,
gpuStats.numVertsSubmitted,
Expand Down

0 comments on commit 061131e

Please sign in to comment.