Skip to content

Commit

Permalink
Move LowerGpuRt into LGC
Browse files Browse the repository at this point in the history
With some tweak to adapt the change.
  • Loading branch information
LLJJDD committed Nov 29, 2023
1 parent 8e26731 commit d15e7f2
Show file tree
Hide file tree
Showing 13 changed files with 126 additions and 70 deletions.
1 change: 1 addition & 0 deletions lgc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ target_sources(LLVMlgc PRIVATE
patch/RegisterMetadataBuilder.cpp
patch/CombineCooperativeMatrix.cpp
patch/LowerCooperativeMatrix.cpp
patch/LowerGpuRt.cpp
)

# lgc/state
Expand Down
26 changes: 11 additions & 15 deletions llpc/lower/LowerGpuRt.h → lgc/include/lgc/patch/LowerGpuRt.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,18 @@
/**
***********************************************************************************************************************
* @file LowerGpuRt.h
* @brief LLPC header file: contains declaration of Llpc::LowerGpuRt
* @brief LGC header file: contains declaration of lgc::LowerGpuRt
***********************************************************************************************************************
*/
#pragma once

#include "llpcSpirvLower.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/PassManager.h"

namespace lgc {
class BuilderImpl;
class PipelineState;

class GpurtGetStackSizeOp;
class GpurtGetStackBaseOp;
class GpurtGetStackStrideOp;
Expand All @@ -46,24 +48,17 @@ class GpurtGetBoxSortHeuristicModeOp;
class GpurtGetStaticFlagsOp;
class GpurtGetTriangleCompressionModeOp;
class GpurtGetFlattenedGroupThreadIdOp;
} // namespace lgc

namespace llvm {
class AllocaInst;
}

namespace Llpc {
class LowerGpuRt : public SpirvLower, public llvm::PassInfoMixin<LowerGpuRt> {
class LowerGpuRt : public llvm::PassInfoMixin<LowerGpuRt> {
public:
LowerGpuRt();
llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager);

private:
typedef void (LowerGpuRt::*LibraryFuncPtr)(llvm::Function *, unsigned);
const static unsigned MaxLdsStackEntries = 16;
uint32_t getWorkgroupSize() const;
llvm::Value *getThreadIdInGroup() const;
void createGlobalStack();
void createGlobalStack(llvm::Module &module);
void createRayStaticIdValue();
void visitGetStackSize(lgc::GpurtGetStackSizeOp &inst);
void visitGetStackBase(lgc::GpurtGetStackBaseOp &inst);
Expand All @@ -76,10 +71,11 @@ class LowerGpuRt : public SpirvLower, public llvm::PassInfoMixin<LowerGpuRt> {
void visitGetStaticFlags(lgc::GpurtGetStaticFlagsOp &inst);
void visitGetTriangleCompressionMode(lgc::GpurtGetTriangleCompressionModeOp &inst);
void visitGetFlattenedGroupThreadId(lgc::GpurtGetFlattenedGroupThreadIdOp &inst);
llvm::Value *m_stack; // Stack array to hold stack value
llvm::Type *m_stackTy; // Stack type
bool m_lowerStack; // If it is lowerStack
llvm::Value *m_stack = nullptr; // Stack array to hold stack value
llvm::Type *m_stackTy = nullptr; // Stack type
PipelineState *m_pipelineState = nullptr; // Pipeline state
llvm::SmallVector<llvm::Instruction *> m_callsToLower; // Call instruction to lower
llvm::SmallSet<llvm::Function *, 4> m_funcsToLower; // Functions to lower
BuilderImpl *m_builder = nullptr;
};
} // namespace Llpc
} // namespace lgc
5 changes: 4 additions & 1 deletion lgc/interface/lgc/Pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ static const char SampleShadingMetaName[] = "lgc.sample.shading";
// The front-end should zero-initialize a struct with "= {}" in case future changes add new fields.
// Note: new fields must be added to the end of this structure to maintain test compatibility.
union Options {
unsigned u32All[36];
unsigned u32All[40];
struct {
uint64_t hash[2]; // Pipeline hash to set in ELF PAL metadata
unsigned includeDisassembly; // If set, the disassembly for all compiled shaders will be included
Expand Down Expand Up @@ -183,6 +183,9 @@ union Options {
bool enableFragColor; // If enabled, do frag color broadcast
bool useSoftwareVertexBufferDescriptors; // Use software vertex buffer descriptors to structure SRD.
unsigned cpsFlags; // CPS feature flags
unsigned rtBoxSortHeuristicMode; // Ray tracing box sort heuristic mode
unsigned rtStaticPipelineFlags; // Ray tracing static pipeline flags
unsigned rtTriCompressMode; // Ray tracing triangle compression mode
};
};
static_assert(sizeof(Options) == sizeof(Options::u32All));
Expand Down
112 changes: 66 additions & 46 deletions llpc/lower/LowerGpuRt.cpp → lgc/patch/LowerGpuRt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,43 +25,43 @@
/**
***********************************************************************************************************************
* @file LowerGpuRt.cpp
* @brief LLPC source file: contains implementation of class Llpc::LowerGpuRt.
* @brief LGC source file: contains implementation of class lgc::LowerGpuRt.
***********************************************************************************************************************
*/
#include "LowerGpuRt.h"
#include "llpcContext.h"
#include "llpcRayTracingContext.h"
#include "lgc/patch/LowerGpuRt.h"
#include "lgc/Builder.h"
#include "lgc/GpurtDialect.h"
#include "lgc/LgcContext.h"
#include "lgc/builder/BuilderImpl.h"
#include "lgc/state/TargetInfo.h"
#include "llvm-dialects/Dialect/Visitor.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"

#define DEBUG_TYPE "llpc-gpurt"
#define DEBUG_TYPE "lgc-lower-gpurt"
using namespace lgc;
using namespace llvm;
using namespace Llpc;

namespace RtName {
static const char *LdsStack = "LdsStack";
} // namespace RtName

namespace Llpc {
// =====================================================================================================================
LowerGpuRt::LowerGpuRt() : m_stack(nullptr), m_stackTy(nullptr), m_lowerStack(false) {
}
namespace lgc {
// =====================================================================================================================
// Executes this SPIR-V lowering pass on the specified LLVM module.
//
// @param [in/out] module : LLVM module to be run on
// @param [in/out] analysisManager : Analysis manager to use for this transformation
PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysisManager) {
LLVM_DEBUG(dbgs() << "Run the pass Lower-gpurt\n");
SpirvLower::init(&module);
auto gfxip = m_context->getPipelineContext()->getGfxIpVersion();
// NOTE: rayquery of sect and ahit can reuse lds.
m_lowerStack = (m_entryPoint->getName().startswith("_ahit") || m_entryPoint->getName().startswith("_sect")) &&
(gfxip.major < 11);
createGlobalStack();

PipelineState *pipelineState = analysisManager.getResult<PipelineStateWrapper>(module).getPipelineState();
m_pipelineState = pipelineState;

BuilderImpl builderImpl(pipelineState);
m_builder = &builderImpl;
m_builder->setShaderStage(ShaderStageCompute);

createGlobalStack(module);

static auto visitor = llvm_dialects::VisitorBuilder<LowerGpuRt>()
.setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration)
Expand All @@ -78,7 +78,7 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi
.add(&LowerGpuRt::visitGetFlattenedGroupThreadId)
.build();

visitor.visit(*this, *m_module);
visitor.visit(*this, module);

for (Instruction *call : m_callsToLower) {
call->dropAllReferences();
Expand All @@ -99,14 +99,15 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi
// Get pipeline workgroup size for stack size calculation
unsigned LowerGpuRt::getWorkgroupSize() const {
unsigned workgroupSize = 0;
if (m_context->getPipelineType() == PipelineType::Graphics) {
workgroupSize = m_context->getPipelineContext()->getRayTracingWaveSize();
if (m_pipelineState->isGraphics()) {
// Force 64 for graphics stages
workgroupSize = 64;
} else {
ComputeShaderMode mode = lgc::Pipeline::getComputeShaderMode(*m_module);
ComputeShaderMode mode = m_pipelineState->getShaderModes()->getComputeShaderMode();
workgroupSize = mode.workgroupSizeX * mode.workgroupSizeY * mode.workgroupSizeZ;
}
assert(workgroupSize != 0);
if (m_context->getPipelineContext()->getGfxIpVersion().major >= 11) {
if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 11) {
// Round up to multiple of 32, as the ds_bvh_stack swizzle as 32 threads
workgroupSize = alignTo(workgroupSize, 32);
}
Expand All @@ -117,26 +118,48 @@ unsigned LowerGpuRt::getWorkgroupSize() const {
// Get flat thread id in work group/wave
Value *LowerGpuRt::getThreadIdInGroup() const {
// Todo: for graphics shader, subgroupId * waveSize + subgroupLocalInvocationId()
unsigned builtIn = m_context->getPipelineType() == PipelineType::Graphics ? lgc::BuiltInSubgroupLocalInvocationId
: lgc::BuiltInLocalInvocationIndex;
lgc::InOutInfo inputInfo = {};
return m_builder->CreateReadBuiltInInput(static_cast<lgc::BuiltInKind>(builtIn), inputInfo, nullptr, nullptr, "");
unsigned builtIn = m_pipelineState->isGraphics() ? BuiltInSubgroupLocalInvocationId : BuiltInLocalInvocationIndex;
InOutInfo inputInfo = {};
return m_builder->CreateReadBuiltInInput(static_cast<BuiltInKind>(builtIn), inputInfo, nullptr, nullptr, "");
}

// =====================================================================================================================
// Create global variable for the stack
void LowerGpuRt::createGlobalStack() {
auto ldsStackSize = getWorkgroupSize() * MaxLdsStackEntries;
// Double anyhit and intersection shader lds size, these shader use lower part of stack to read/write value
if (m_lowerStack)
ldsStackSize = ldsStackSize << 1;

m_stackTy = ArrayType::get(m_builder->getInt32Ty(), ldsStackSize);
auto ldsStack = new GlobalVariable(*m_module, m_stackTy, false, GlobalValue::ExternalLinkage, nullptr,
RtName::LdsStack, nullptr, GlobalValue::NotThreadLocal, 3);

ldsStack->setAlignment(MaybeAlign(4));
m_stack = ldsStack;
// @param [in/out] module : LLVM module to be run on
void LowerGpuRt::createGlobalStack(Module &module) {

struct Payload {
bool needGlobalStack;
bool needExtraStack;
};
Payload payload = {false, false};
static auto visitor = llvm_dialects::VisitorBuilder<Payload>()
.setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration)
.add<GpurtStackWriteOp>([](auto &payload, auto &op) {
payload.needGlobalStack = true;
payload.needExtraStack |= op.getUseExtraStack();
})
.add<GpurtStackReadOp>([](auto &payload, auto &op) {
payload.needGlobalStack = true;
payload.needExtraStack |= op.getUseExtraStack();
})
.add<GpurtLdsStackInitOp>([](auto &payload, auto &op) { payload.needGlobalStack = true; })
.build();
visitor.visit(payload, module);

if (payload.needGlobalStack) {
auto ldsStackSize = getWorkgroupSize() * MaxLdsStackEntries;
// Double LDS size when any operations requires to perform on extra stack.
if (payload.needExtraStack)
ldsStackSize = ldsStackSize << 1;

m_stackTy = ArrayType::get(m_builder->getInt32Ty(), ldsStackSize);
auto ldsStack = new GlobalVariable(module, m_stackTy, false, GlobalValue::ExternalLinkage, nullptr,
RtName::LdsStack, nullptr, GlobalValue::NotThreadLocal, 3);

ldsStack->setAlignment(MaybeAlign(4));
m_stack = ldsStack;
}
}

// =====================================================================================================================
Expand Down Expand Up @@ -184,7 +207,7 @@ void LowerGpuRt::visitStackRead(GpurtStackReadOp &inst) {
m_builder->SetInsertPoint(&inst);
Value *stackIndex = inst.getIndex();
Type *stackTy = PointerType::get(m_builder->getInt32Ty(), 3);
if (m_lowerStack) {
if (inst.getUseExtraStack()) {
auto ldsStackSize = m_builder->getInt32(getWorkgroupSize() * MaxLdsStackEntries);
stackIndex = m_builder->CreateAdd(stackIndex, ldsStackSize);
}
Expand All @@ -206,7 +229,7 @@ void LowerGpuRt::visitStackWrite(GpurtStackWriteOp &inst) {
Value *stackIndex = inst.getIndex();
Value *stackData = inst.getValue();
Type *stackTy = PointerType::get(m_builder->getInt32Ty(), 3);
if (m_lowerStack) {
if (inst.getUseExtraStack()) {
auto ldsStackSize = m_builder->getInt32(getWorkgroupSize() * MaxLdsStackEntries);
stackIndex = m_builder->CreateAdd(stackIndex, ldsStackSize);
}
Expand Down Expand Up @@ -297,8 +320,7 @@ void LowerGpuRt::visitLdsStackStore(GpurtLdsStackStoreOp &inst) {
// @param inst : The dialect instruction to process
void LowerGpuRt::visitGetBoxSortHeuristicMode(GpurtGetBoxSortHeuristicModeOp &inst) {
m_builder->SetInsertPoint(&inst);
auto rtState = m_context->getPipelineContext()->getRayTracingState();
Value *boxSortHeuristicMode = m_builder->getInt32(rtState->boxSortHeuristicMode);
Value *boxSortHeuristicMode = m_builder->getInt32(m_pipelineState->getOptions().rtBoxSortHeuristicMode);
inst.replaceAllUsesWith(boxSortHeuristicMode);
m_callsToLower.push_back(&inst);
m_funcsToLower.insert(inst.getCalledFunction());
Expand All @@ -310,8 +332,7 @@ void LowerGpuRt::visitGetBoxSortHeuristicMode(GpurtGetBoxSortHeuristicModeOp &in
// @param inst : The dialect instruction to process
void LowerGpuRt::visitGetStaticFlags(GpurtGetStaticFlagsOp &inst) {
m_builder->SetInsertPoint(&inst);
auto rtState = m_context->getPipelineContext()->getRayTracingState();
Value *staticPipelineFlags = m_builder->getInt32(rtState->staticPipelineFlags);
Value *staticPipelineFlags = m_builder->getInt32(m_pipelineState->getOptions().rtStaticPipelineFlags);
inst.replaceAllUsesWith(staticPipelineFlags);
m_callsToLower.push_back(&inst);
m_funcsToLower.insert(inst.getCalledFunction());
Expand All @@ -323,8 +344,7 @@ void LowerGpuRt::visitGetStaticFlags(GpurtGetStaticFlagsOp &inst) {
// @param inst : The dialect instruction to process
void LowerGpuRt::visitGetTriangleCompressionMode(GpurtGetTriangleCompressionModeOp &inst) {
m_builder->SetInsertPoint(&inst);
auto rtState = m_context->getPipelineContext()->getRayTracingState();
Value *triCompressMode = m_builder->getInt32(rtState->triCompressMode);
Value *triCompressMode = m_builder->getInt32(m_pipelineState->getOptions().rtTriCompressMode);
inst.replaceAllUsesWith(triCompressMode);
m_callsToLower.push_back(&inst);
m_funcsToLower.insert(inst.getCalledFunction());
Expand All @@ -341,4 +361,4 @@ void LowerGpuRt::visitGetFlattenedGroupThreadId(GpurtGetFlattenedGroupThreadIdOp
m_funcsToLower.insert(inst.getCalledFunction());
}

} // namespace Llpc
} // namespace lgc
1 change: 1 addition & 0 deletions lgc/patch/PassRegistry.inc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ LLPC_MODULE_PASS("lgc-lower-debug-printf", LowerDebugPrintf)

LLPC_FUNCTION_PASS("lgc-combine-cooperative-matrix", CombineCooperativeMatrix)
LLPC_MODULE_PASS("lgc-lower-cooperative-matrix", LowerCooperativeMatrix)
LLPC_MODULE_PASS("lgc-lower-gpurt", LowerGpuRt)

#undef LLPC_PASS
#undef LLPC_LOOP_PASS
Expand Down
3 changes: 3 additions & 0 deletions lgc/patch/Patch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "lgc/patch/FragColorExport.h"
#include "lgc/patch/LowerCooperativeMatrix.h"
#include "lgc/patch/LowerDebugPrintf.h"
#include "lgc/patch/LowerGpuRt.h"
#include "lgc/patch/PatchBufferOp.h"
#include "lgc/patch/PatchCheckShaderCache.h"
#include "lgc/patch/PatchCopyShader.h"
Expand Down Expand Up @@ -141,6 +142,8 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T
"// LLPC pipeline before-patching results\n"));
}

passMgr.addPass(LowerGpuRt());

const auto indirectMode = pipelineState->getOptions().rtIndirectMode;
if (indirectMode == RayTracingIndirectMode::ContinuationsContinufy ||
indirectMode == RayTracingIndirectMode::Continuations) {
Expand Down
1 change: 0 additions & 1 deletion llpc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ if(ICD_BUILD_LLPC)
lower/llpcSpirvLowerTranslator.cpp
lower/llpcSpirvLowerUtil.cpp
lower/llpcSpirvProcessGpuRtLibrary.cpp
lower/LowerGpuRt.cpp
lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp
lower/LowerGLCompatibility.cpp
lower/llpcSpirvLowerCooperativeMatrix.cpp
Expand Down
4 changes: 4 additions & 0 deletions llpc/context/llpcPipelineContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ Options PipelineContext::computePipelineOptions() const {
options.enablePrimGeneratedQuery = getPipelineOptions()->enablePrimGeneratedQuery;
options.enableFragColor = getPipelineOptions()->enableFragColor;

options.rtBoxSortHeuristicMode = m_rtState.boxSortHeuristicMode;
options.rtStaticPipelineFlags = m_rtState.staticPipelineFlags;
options.rtTriCompressMode = m_rtState.triCompressMode;

return options;
}

Expand Down
3 changes: 0 additions & 3 deletions llpc/lower/llpcSpirvLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
// New version of the code (also handles unknown version, which we treat as latest)
#include "llvm/IRPrinter/IRPrintingPasses.h"
#endif
#include "LowerGpuRt.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "llvm/Transforms/IPO.h"
Expand Down Expand Up @@ -264,8 +263,6 @@ void SpirvLower::addPasses(Context *context, ShaderStage stage, lgc::PassManager
passMgr.addPass(SpirvLowerInstMetaRemove());

if (rayTracing || rayQuery || isInternalRtShader) {
passMgr.addPass(LowerGpuRt());

FunctionPassManager fpm;
fpm.addPass(SROAPass(SROAOptions::PreserveCFG));
fpm.addPass(InstCombinePass(instCombineOpt));
Expand Down
24 changes: 24 additions & 0 deletions llpc/lower/llpcSpirvLowerRayTracing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,8 @@ PreservedAnalyses SpirvLowerRayTracing::run(Module &module, ModuleAnalysisManage
.add(&SpirvLowerRayTracing::visitInstanceInclusionMaskOp)
.add(&SpirvLowerRayTracing::visitShaderIndexOp)
.add(&SpirvLowerRayTracing::visitShaderRecordBufferOp)
.add(&SpirvLowerRayTracing::visitStackReadOp)
.add(&SpirvLowerRayTracing::visitStackWriteOp)
.build();

visitor.visit(*this, *m_module);
Expand Down Expand Up @@ -2575,6 +2577,28 @@ void SpirvLowerRayTracing::visitGetRayStaticId(lgc::GpurtGetRayStaticIdOp &inst)
m_funcsToLower.insert(inst.getCalledFunction());
}

// =====================================================================================================================
// Visits "lgc.gpurt.stack.read" instructions
//
// @param inst : The instruction
void SpirvLowerRayTracing::visitStackReadOp(lgc::GpurtStackReadOp &inst) {
// NOTE: If RayQuery is used inside intersection or any-hit shaders, where we already holding a traversal stack for
// TraceRay, perform the stack operations for this RayQuery in an extra stack space.
if ((m_shaderStage == ShaderStageRayTracingIntersect) || (m_shaderStage == ShaderStageRayTracingAnyHit))
inst.setUseExtraStack(true);
}

// =====================================================================================================================
// Visits "lgc.gpurt.stack.write" instructions
//
// @param inst : The instruction
void SpirvLowerRayTracing::visitStackWriteOp(lgc::GpurtStackWriteOp &inst) {
// NOTE: If RayQuery is used inside intersection or any-hit shaders, where we already holding a traversal stack for
// TraceRay, perform the stack operations for this RayQuery in an extra stack space.
if ((m_shaderStage == ShaderStageRayTracingIntersect) || (m_shaderStage == ShaderStageRayTracingAnyHit))
inst.setUseExtraStack(true);
}

// =====================================================================================================================
// Visits "lgc.gpurt.get.parent.id" instructions
//
Expand Down
Loading

0 comments on commit d15e7f2

Please sign in to comment.