Skip to content

Commit

Permalink
Adds HgiShaderFunctionComputeDesc, with member "localSize", to
Browse files Browse the repository at this point in the history
HgiShaderFunctionDesc, allowing client to specify the desired number of threads
in the local compute work group for a computer shader. HgiShaderGenerator now
handles the writing of this local size to the shader, if necessary.

The HgiComputeCmds::Dispatch() function for HgiGL and HgiVulkan will now use
the local size provided by the HgiShaderFunctionDesc to determine number of
work groups to dispatch. Calls to Dispatch() should specify the total size of
the compute work (and not the total work size / local work size), and the
backend will handle determining the number of work groups.

Also adds some limit checks on work group size and number of work groups based
on device limits.

See #1656

(Internal change: 2199330)
  • Loading branch information
clach authored and pixar-oss committed Nov 13, 2021
1 parent 38021a0 commit a3c3bf7
Show file tree
Hide file tree
Showing 21 changed files with 300 additions and 31 deletions.
3 changes: 0 additions & 3 deletions pxr/imaging/hdSt/codeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1000,9 +1000,6 @@ HdSt_CodeGen::CompileComputeProgram(HdStResourceRegistry*const registry)
_genCommon << "#extension GL_ARB_bindless_texture : require\n";
}

// default workgroup size (must follow #extension directives)
_genCommon << "layout(local_size_x = 1, local_size_y = 1) in;\n";

// Used in glslfx files to determine if it is using new/old
// imaging system. It can also be used as API guards when
// we need new versions of Storm shading.
Expand Down
15 changes: 12 additions & 3 deletions pxr/imaging/hdSt/domeLightComputations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,21 @@ HdSt_DomeLightComputationGPU::Execute(

HdStResourceRegistry* hdStResourceRegistry =
static_cast<HdStResourceRegistry*>(resourceRegistry);

constexpr int localSize = 8;

HdStGLSLProgramSharedPtr const computeProgram =
HdStGLSLProgram::GetComputeProgram(
HdStPackageDomeLightShader(),
_shaderToken,
static_cast<HdStResourceRegistry*>(resourceRegistry));
"",
static_cast<HdStResourceRegistry*>(resourceRegistry),
[&] (HgiShaderFunctionDesc &computeDesc) {
computeDesc.debugName = _shaderToken.GetString();
computeDesc.shaderStage = HgiShaderStageCompute;
computeDesc.computeDescriptor.localSize =
GfVec3i(localSize, localSize, 1);
});
if (!TF_VERIFY(computeProgram)) {
return;
}
Expand All @@ -165,7 +175,6 @@ HdSt_DomeLightComputationGPU::Execute(
int height = downsize ? srcDim[1] / 2 : srcDim[1];

// Make sure dimensions align with the local size used in the Compute Shader
constexpr int localSize = 8;
width = _MakeMultipleOf(width, localSize);
height = _MakeMultipleOf(height, localSize);

Expand Down Expand Up @@ -264,7 +273,7 @@ HdSt_DomeLightComputationGPU::Execute(
}

// Queue compute work
computeCmds->Dispatch(width / localSize, height / localSize);
computeCmds->Dispatch(width, height);

computeCmds->PopDebugGroup();

Expand Down
9 changes: 7 additions & 2 deletions pxr/imaging/hdSt/flatNormals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ HdSt_FlatNormalsComputationGPU::Execute(
int indexStride;
int pParamOffset;
int pParamStride;
int primIndexEnd;
} uniform;

HdStResourceRegistry* hdStResourceRegistry =
Expand All @@ -250,6 +251,7 @@ HdSt_FlatNormalsComputationGPU::Execute(
[&](HgiShaderFunctionDesc &computeDesc) {
computeDesc.debugName = shaderToken.GetString();
computeDesc.shaderStage = HgiShaderStageCompute;
computeDesc.computeDescriptor.localSize = GfVec3i(64, 1, 1);

TfToken srcType;
TfToken dstType;
Expand Down Expand Up @@ -290,7 +292,8 @@ HdSt_FlatNormalsComputationGPU::Execute(
"indexOffset", // interleave offset
"indexStride", // interleave stride
"pParamOffset", // interleave offset
"pParamStride" // interleave stride
"pParamStride", // interleave stride
"primIndexEnd"
};
static_assert((sizeof(Uniform) / sizeof(int)) ==
(sizeof(params) / sizeof(params[0])), "");
Expand Down Expand Up @@ -340,6 +343,9 @@ HdSt_FlatNormalsComputationGPU::Execute(
HdDataSizeOfType(HdGetComponentType(primitiveParam->GetTupleType().type));
uniform.pParamOffset = primitiveParam->GetOffset() / pParamComponentSize;
uniform.pParamStride = primitiveParam->GetStride() / pParamComponentSize;

const int numPrims = topologyRange->GetNumElements();
uniform.primIndexEnd = numPrims;

Hgi* hgi = hdStResourceRegistry->GetHgi();

Expand Down Expand Up @@ -395,7 +401,6 @@ HdSt_FlatNormalsComputationGPU::Execute(
pipeline, BufferBinding_Uniforms, sizeof(uniform), &uniform);

// Queue compute work
int numPrims = topologyRange->GetNumElements();
computeCmds->Dispatch(numPrims, 1);

computeCmds->PopDebugGroup();
Expand Down
27 changes: 19 additions & 8 deletions pxr/imaging/hdSt/glslProgram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ HdStGLSLProgram::CompileShader(
HgiShaderFunctionDesc shaderFnDesc;
shaderFnDesc.shaderCode = shaderSource.c_str();
shaderFnDesc.shaderStage = stage;
// Default work group size
shaderFnDesc.computeDescriptor.localSize = GfVec3i(1, 1, 1);
HgiShaderFunctionHandle shaderFn = hgi->CreateShaderFunction(shaderFnDesc);

if (!_ValidateCompilation(shaderFn, shaderType, shaderSource, _debugID)) {
Expand Down Expand Up @@ -442,15 +444,29 @@ HdStGLSLProgram::GetComputeProgram(
std::string const &defines,
HdStResourceRegistry *resourceRegistry,
PopulateDescriptorCallback populateDescriptor)
{
return GetComputeProgram(HdStPackageComputeShader(),
shaderToken,
defines,
resourceRegistry,
populateDescriptor);
}

HdStGLSLProgramSharedPtr
HdStGLSLProgram::GetComputeProgram(
TfToken const &shaderFileName,
TfToken const &shaderToken,
std::string const &defines,
HdStResourceRegistry *resourceRegistry,
PopulateDescriptorCallback populateDescriptor)
{
// Find the program from registry
HdInstance<HdStGLSLProgramSharedPtr> programInstance =
resourceRegistry->RegisterGLSLProgram(
_ComputeHash(shaderToken, defines));

if (programInstance.IsFirstInstance()) {
// if not exists, create new one
TfToken const &shaderFileName = HdStPackageComputeShader();
// If program does not exist, create new one
const HioGlslfx glslfx(shaderFileName, HioGlslfxTokens->defVal);
std::string errorString;
if (!glslfx.IsValid(&errorString)){
Expand All @@ -462,14 +478,9 @@ HdStGLSLProgram::GetComputeProgram(
Hgi *hgi = resourceRegistry->GetHgi();

HgiShaderFunctionDesc computeDesc;
std::string sourceCode(
"layout(local_size_x=1, local_size_y=1, local_size_z=1) in;\n");

sourceCode += defines;

populateDescriptor(computeDesc);

sourceCode += glslfx.GetSource(shaderToken);
const std::string sourceCode = defines + glslfx.GetSource(shaderToken);
computeDesc.shaderCode = sourceCode.c_str();

HgiShaderFunctionHandle computeFn =
Expand Down
8 changes: 8 additions & 0 deletions pxr/imaging/hdSt/glslProgram.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ class HdStGLSLProgram final
HdStResourceRegistry *resourceRegistry,
PopulateDescriptorCallback populateDescriptor);

HDST_API
static HdStGLSLProgramSharedPtr GetComputeProgram(
TfToken const &shaderFileName,
TfToken const &shaderToken,
std::string const &defines,
HdStResourceRegistry *resourceRegistry,
PopulateDescriptorCallback populateDescriptor);

/// Returns the role of the GPU data in this resource.
TfToken const & GetRole() const {return _role;}

Expand Down
5 changes: 5 additions & 0 deletions pxr/imaging/hdSt/quadrangulate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,7 @@ HdSt_QuadrangulateComputationGPU::Execute(
int primvarOffset;
int primvarStride;
int numComponents;
int indexEnd;
} uniform;

// select shader by datatype
Expand All @@ -527,6 +528,8 @@ HdSt_QuadrangulateComputationGPU::Execute(
[&](HgiShaderFunctionDesc &computeDesc) {
computeDesc.debugName = shaderToken.GetString();
computeDesc.shaderStage = HgiShaderStageCompute;
computeDesc.computeDescriptor.localSize = GfVec3i(64, 1, 1);

if (shaderToken == HdStGLSLProgramTokens->quadrangulateFloat) {
HgiShaderFunctionAddWritableBuffer(
&computeDesc, "primvar", HdStTokens->_float,
Expand All @@ -548,6 +551,7 @@ HdSt_QuadrangulateComputationGPU::Execute(
"primvarOffset", // interleave offset
"primvarStride", // interleave stride
"numComponents", // interleave datasize
"indexEnd"
};
static_assert((sizeof(Uniform) / sizeof(int)) ==
(sizeof(params) / sizeof(params[0])), "");
Expand Down Expand Up @@ -599,6 +603,7 @@ HdSt_QuadrangulateComputationGPU::Execute(
HdGetComponentCount(primvar->GetTupleType().type);

int numNonQuads = (int)quadInfo->numVerts.size();
uniform.indexEnd = numNonQuads;

Hgi* hgi = hdStResourceRegistry->GetHgi();

Expand Down
10 changes: 10 additions & 0 deletions pxr/imaging/hdSt/shaders/compute.glslfx
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ int getNormalsIndex(int idx)
void main()
{
int index = int(hd_GlobalInvocationID.x);
if (index >= indexEnd) {
return;
}

int offIndex = index * 2 + adjacencyOffset;

Expand Down Expand Up @@ -264,6 +267,10 @@ vec3 computeNormalForPrimIndex(int primIndex);
void main()
{
int primIndex = int(hd_GlobalInvocationID.x);
if (primIndex >= primIndexEnd) {
return;
}

int pParam = primitiveParam[getPrimitiveParamIndex(primIndex)];
int edgeFlag = getEdgeFlag(pParam);
int faceIndex = getFaceIndex(pParam);
Expand Down Expand Up @@ -377,6 +384,9 @@ vec3 computeNormalForPrimIndex(int primIndex)
void main()
{
int index = int(hd_GlobalInvocationID.x);
if (index >= indexEnd) {
return;
}

int quadInfoIndex = index * quadInfoStride + quadInfoOffset;
int numVert = quadInfo[quadInfoIndex];
Expand Down
4 changes: 0 additions & 4 deletions pxr/imaging/hdSt/shaders/domeLight.glslfx
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,6 @@ vec3 ImportanceSample_GGX(vec2 Xi, float roughness, vec3 normal)
--- --------------------------------------------------------------------------
-- glsl DomeLight.Irradiance

layout(local_size_x = 8, local_size_y = 8) in;

const float deltaPhi = (2.0f * float(PI)) / 180.0f;
const float deltaTheta = (0.5f * float(PI)) / 64.0f;

Expand Down Expand Up @@ -169,7 +167,6 @@ void main(void)
--- --------------------------------------------------------------------------
-- glsl DomeLight.Prefilter

layout(local_size_x = 8, local_size_y = 8) in;
layout(std140, binding=0) uniform Uniforms {
float roughness;
} uniforms;
Expand Down Expand Up @@ -234,7 +231,6 @@ void main(void)
--- --------------------------------------------------------------------------
-- glsl DomeLight.BRDF

layout(local_size_x = 8, local_size_y = 8) in;
uniform int sampleLevel = 0;

float Geometry_SchlicksmithGGX(float dotNL, float dotNV, float roughness)
Expand Down
11 changes: 7 additions & 4 deletions pxr/imaging/hdSt/smoothNormals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ HdSt_SmoothNormalsComputationGPU::Execute(
int pointsStride;
int normalsOffset;
int normalsStride;
int indexEnd;
} uniform;

HdStResourceRegistry* hdStResourceRegistry =
Expand All @@ -187,6 +188,7 @@ HdSt_SmoothNormalsComputationGPU::Execute(
[&](HgiShaderFunctionDesc &computeDesc) {
computeDesc.debugName = shaderToken.GetString();
computeDesc.shaderStage = HgiShaderStageCompute;
computeDesc.computeDescriptor.localSize = GfVec3i(64, 1, 1);

TfToken srcType;
TfToken dstType;
Expand Down Expand Up @@ -220,6 +222,7 @@ HdSt_SmoothNormalsComputationGPU::Execute(
"pointsStride", // interleave stride
"normalsOffset", // interleave offset
"normalsStride", // interleave stride
"indexEnd"
};
static_assert((sizeof(Uniform) / sizeof(int)) ==
(sizeof(params) / sizeof(params[0])), "");
Expand Down Expand Up @@ -266,18 +269,18 @@ HdSt_SmoothNormalsComputationGPU::Execute(
HdDataSizeOfType(HdGetComponentType(normals->GetTupleType().type));
uniform.normalsOffset = normals->GetOffset() / normalComponentSize;
uniform.normalsStride = normals->GetStride() / normalComponentSize;

// The number of points is based off the size of the output,
// However, the number of points in the adjacency table
// is computed based off the largest vertex indexed from
// to topology (aka topology->ComputeNumPoints).
//
// Therefore, we need to clamp the number of points
// to the number of entries in the adjancency table.
int numDestPoints = range->GetNumElements();
int numSrcPoints = _adjacency->GetNumPoints();
const int numDestPoints = range->GetNumElements();
const int numSrcPoints = _adjacency->GetNumPoints();

int numPoints = std::min(numSrcPoints, numDestPoints);
const int numPoints = std::min(numSrcPoints, numDestPoints);
uniform.indexEnd = numPoints;

Hgi* hgi = hdStResourceRegistry->GetHgi();

Expand Down
1 change: 1 addition & 0 deletions pxr/imaging/hdSt/subdivision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1840,6 +1840,7 @@ _EvalStencilsGPU(
[&](HgiShaderFunctionDesc &computeDesc) {
computeDesc.debugName = shaderToken.GetString();
computeDesc.shaderStage = HgiShaderStageCompute;
computeDesc.computeDescriptor.localSize = GfVec3i(64, 1, 1);

HgiShaderFunctionAddBuffer(&computeDesc,
"sizes", HdStTokens->_int,
Expand Down
21 changes: 21 additions & 0 deletions pxr/imaging/hgi/shaderFunctionDesc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ HgiShaderFunctionBufferDesc::HgiShaderFunctionBufferDesc()

HgiShaderFunctionParamDesc::HgiShaderFunctionParamDesc() = default;

HgiShaderFunctionComputeDesc::HgiShaderFunctionComputeDesc()
: localSize(GfVec3i(0, 0, 0))
{
}

HgiShaderFunctionTessellationDesc::HgiShaderFunctionTessellationDesc()
= default;

Expand All @@ -52,6 +57,7 @@ HgiShaderFunctionDesc::HgiShaderFunctionDesc()
, stageInputs()
, stageOutputs()
, tessellationDescriptor()
, computeDescriptor()
{
}

Expand Down Expand Up @@ -108,6 +114,20 @@ bool operator!=(
return !(lhs == rhs);
}

bool operator==(
const HgiShaderFunctionComputeDesc& lhs,
const HgiShaderFunctionComputeDesc& rhs)
{
return lhs.localSize == rhs.localSize;
}

bool operator!=(
const HgiShaderFunctionComputeDesc& lhs,
const HgiShaderFunctionComputeDesc& rhs)
{
return !(lhs == rhs);
}

bool operator==(
const HgiShaderFunctionTessellationDesc& lhs,
const HgiShaderFunctionTessellationDesc& rhs)
Expand Down Expand Up @@ -136,6 +156,7 @@ bool operator==(
lhs.constantParams == rhs.constantParams &&
lhs.stageInputs == rhs.stageInputs &&
lhs.stageOutputs == rhs.stageOutputs &&
lhs.computeDescriptor == rhs.computeDescriptor;
lhs.tessellationDescriptor == rhs.tessellationDescriptor;
}

Expand Down
Loading

0 comments on commit a3c3bf7

Please sign in to comment.