Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shader blending in Vulkan through input attachments #15944

Merged
merged 7 commits into from
Sep 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Common/GPU/Vulkan/VulkanBarrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

void VulkanBarrier::Flush(VkCommandBuffer cmd) {
if (!imageBarriers_.empty()) {
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, 0, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
}
imageBarriers_.clear();
srcStageMask_ = 0;
Expand Down
2 changes: 2 additions & 0 deletions Common/GPU/Vulkan/VulkanBarrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class VulkanBarrier {
) {
srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT;

VkImageMemoryBarrier imageBarrier;
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
Expand Down Expand Up @@ -112,4 +113,5 @@ class VulkanBarrier {
VkPipelineStageFlags srcStageMask_ = 0;
VkPipelineStageFlags dstStageMask_ = 0;
std::vector<VkImageMemoryBarrier> imageBarriers_;
VkDependencyFlags dependencyFlags_ = 0;
};
3 changes: 3 additions & 0 deletions Common/GPU/Vulkan/VulkanContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,10 @@ VkResult VulkanContext::CreateDevice() {
extensionsLookup_.KHR_create_renderpass2 = true;
extensionsLookup_.KHR_depth_stencil_resolve = EnableDeviceExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME);
}

extensionsLookup_.EXT_shader_stencil_export = EnableDeviceExtension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
extensionsLookup_.EXT_fragment_shader_interlock = EnableDeviceExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME);
extensionsLookup_.ARM_rasterization_order_attachment_access = EnableDeviceExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME);

VkDeviceCreateInfo device_info{ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO };
device_info.queueCreateInfoCount = 1;
Expand Down
2 changes: 1 addition & 1 deletion Common/GPU/Vulkan/VulkanDebug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
} else {
WARN_LOG(G3D, "VKDEBUG: %s", msg.c_str());
}

// false indicates that layer should not bail-out of an
// API call that had validation failures. This may mean that the
// app dies inside the driver due to invalid parameter(s).
// That's what would happen without validation layers, so we'll
// keep that behavior here.
return false;
}

2 changes: 2 additions & 0 deletions Common/GPU/Vulkan/VulkanLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ struct VulkanExtensions {
bool KHR_depth_stencil_resolve;
bool EXT_shader_stencil_export;
bool EXT_swapchain_colorspace;
bool ARM_rasterization_order_attachment_access;
bool EXT_fragment_shader_interlock;
// bool EXT_depth_range_unrestricted; // Allows depth outside [0.0, 1.0] in 32-bit float depth buffers.
};

Expand Down
92 changes: 80 additions & 12 deletions Common/GPU/Vulkan/VulkanQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) {
if (a == b) {
// Trivial merging case.
return a;
} else if (a == RP_TYPE_COLOR_DEPTH && b == RP_TYPE_COLOR_DEPTH_INPUT) {
return RP_TYPE_COLOR_DEPTH_INPUT;
} else if (a == RP_TYPE_COLOR_DEPTH_INPUT && b == RP_TYPE_COLOR_DEPTH) {
return RP_TYPE_COLOR_DEPTH_INPUT;
}
// More cases to be added later.
return a;
}

Expand Down Expand Up @@ -155,7 +158,12 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) {
return VK_ATTACHMENT_STORE_OP_DONT_CARE; // avoid compiler warning
}

// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies

VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) {
bool selfDependency = rpType == RP_TYPE_COLOR_DEPTH_INPUT;

VkAttachmentDescription attachments[2] = {};
attachments[0].format = rpType == RP_TYPE_BACKBUFFER ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
Expand All @@ -179,7 +187,7 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp

VkAttachmentReference color_reference{};
color_reference.attachment = 0;
color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
color_reference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

VkAttachmentReference depth_reference{};
depth_reference.attachment = 1;
Expand All @@ -188,8 +196,13 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.flags = 0;
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = nullptr;
if (selfDependency) {
subpass.inputAttachmentCount = 1;
subpass.pInputAttachments = &color_reference;
} else {
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = nullptr;
}
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &color_reference;
subpass.pResolveAttachments = nullptr;
Expand All @@ -198,22 +211,40 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
subpass.pPreserveAttachments = nullptr;

// Not sure if this is really necessary.
VkSubpassDependency dep{};
dep.srcSubpass = VK_SUBPASS_EXTERNAL;
dep.dstSubpass = 0;
dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dep.srcAccessMask = 0;
dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkSubpassDependency deps[2]{};
size_t numDeps = 0;

VkRenderPassCreateInfo rp{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO };
rp.attachmentCount = 2;
rp.pAttachments = attachments;
rp.subpassCount = 1;
rp.pSubpasses = &subpass;

if (rpType == RP_TYPE_BACKBUFFER) {
deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[numDeps].dstSubpass = 0;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].srcAccessMask = 0;
deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
numDeps++;
rp.dependencyCount = 1;
rp.pDependencies = &dep;
}

if (selfDependency) {
deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
deps[numDeps].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
deps[numDeps].dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
deps[numDeps].srcSubpass = 0;
deps[numDeps].dstSubpass = 0;
numDeps++;
}

if (numDeps > 0) {
rp.dependencyCount = (u32)numDeps;
rp.pDependencies = deps;
}

VkRenderPass pass;
Expand Down Expand Up @@ -246,6 +277,30 @@ VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
return pass;
}

// Must match the subpass self-dependency declared above.
void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) {
if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) {
VkAccessFlags srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkAccessFlags dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
recordBarrier->TransitionImage(
img.image,
0,
1,
aspect,
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_GENERAL,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask
);
} else {
_assert_msg_(false, "Depth self-dependencies not yet supported");
}
}

void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
// Optimizes renderpasses, then sequences them.
// Planned optimizations:
Expand Down Expand Up @@ -628,6 +683,7 @@ std::string VulkanQueueRunner::StepToString(const VKRStep &step) const {
switch (step.render.renderPassType) {
case RP_TYPE_BACKBUFFER: renderCmd = "BACKBUF"; break;
case RP_TYPE_COLOR_DEPTH: renderCmd = "RENDER"; break;
case RP_TYPE_COLOR_DEPTH_INPUT: renderCmd = "RENDER_INPUT"; break;
default: renderCmd = "N/A";
}
snprintf(buffer, sizeof(buffer), "%s %s (draws: %d, %dx%d/%dx%d, fb: %p, )", renderCmd, step.tag, step.render.numDraws, actual_w, actual_h, w, h, step.render.framebuffer);
Expand Down Expand Up @@ -817,6 +873,9 @@ void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) {
case VKRRenderCommand::REMOVED:
INFO_LOG(G3D, " (Removed)");
break;
case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
INFO_LOG(G3D, " SelfBarrier()");
break;
case VKRRenderCommand::BIND_GRAPHICS_PIPELINE:
INFO_LOG(G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline);
break;
Expand Down Expand Up @@ -1235,6 +1294,15 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
break;
}

case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
{
_assert_(step.render.pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT);
VulkanBarrier barrier;
SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier);
barrier.Flush(cmd);
break;
}

case VKRRenderCommand::PUSH_CONSTANTS:
vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data);
break;
Expand Down
20 changes: 12 additions & 8 deletions Common/GPU/Vulkan/VulkanQueueRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ struct VKRImage;
enum {
QUEUE_HACK_MGS2_ACID = 1,
QUEUE_HACK_SONIC = 2,
// Killzone PR = 4.
QUEUE_HACK_RENDERPASS_MERGE = 8,
};

Expand All @@ -36,20 +35,24 @@ enum class VKRRenderCommand : uint8_t {
DRAW,
DRAW_INDEXED,
PUSH_CONSTANTS,
SELF_DEPENDENCY_BARRIER,
NUM_RENDER_COMMANDS,
};

enum PipelineFlags {
PIPELINE_FLAG_NONE = 0,
PIPELINE_FLAG_USES_LINES = (1 << 2),
PIPELINE_FLAG_USES_BLEND_CONSTANT = (1 << 3),
PIPELINE_FLAG_USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
enum class PipelineFlags {
NONE = 0,
USES_LINES = (1 << 2),
USES_BLEND_CONSTANT = (1 << 3),
USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
USES_INPUT_ATTACHMENT = (1 << 5),
};
ENUM_CLASS_BITOPS(PipelineFlags);

// Pipelines need to be created for the right type of render pass.
enum RenderPassType {
RP_TYPE_BACKBUFFER,
RP_TYPE_COLOR_DEPTH,
RP_TYPE_COLOR_DEPTH_INPUT,
// Later will add pure-color render passes.
RP_TYPE_COUNT,
};
Expand Down Expand Up @@ -168,7 +171,6 @@ struct VKRStep {
union {
struct {
VKRFramebuffer *framebuffer;
// TODO: Look these up through renderPass?
VKRRenderPassLoadAction colorLoad;
VKRRenderPassLoadAction depthLoad;
VKRRenderPassLoadAction stencilLoad;
Expand All @@ -183,7 +185,7 @@ struct VKRStep {
int numReads;
VkImageLayout finalColorLayout;
VkImageLayout finalDepthStencilLayout;
u32 pipelineFlags;
PipelineFlags pipelineFlags; // contains the self dependency flag, in the form of USES_INPUT_ATTACHMENT
VkRect2D renderArea;
// Render pass type. Deduced after finishing recording the pass, from the used pipelines.
// NOTE: Storing the render pass here doesn't do much good, we change the compatible parameters (load/store ops) during step optimization.
Expand Down Expand Up @@ -324,6 +326,8 @@ class VulkanQueueRunner {
static void SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);

static void SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);

VulkanContext *vulkan_;

VkFramebuffer backbuffer_ = VK_NULL_HANDLE;
Expand Down
16 changes: 13 additions & 3 deletions Common/GPU/Vulkan/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
// Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers.
ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
if (color) {
ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
} else {
ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
}
Expand Down Expand Up @@ -534,7 +534,9 @@ void VulkanRenderManager::CompileThreadFunc() {
break;
}

INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size());
if (!toCompile.empty()) {
INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size());
}

// TODO: Here we can sort the pending pipelines by vertex and fragment shaders,
// and split up further.
Expand Down Expand Up @@ -774,6 +776,9 @@ void VulkanRenderManager::EndCurRenderStep() {
curRenderStep_->render.pipelineFlags = curPipelineFlags_;
if (!curRenderStep_->render.framebuffer) {
rpType = RP_TYPE_BACKBUFFER;
} else if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) {
// Not allowed on backbuffers.
rpType = RP_TYPE_COLOR_DEPTH_INPUT;
}

VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key);
Expand Down Expand Up @@ -806,7 +811,12 @@ void VulkanRenderManager::EndCurRenderStep() {

// We no longer have a current render step.
curRenderStep_ = nullptr;
curPipelineFlags_ = 0;
curPipelineFlags_ = (PipelineFlags)0;
}

void VulkanRenderManager::BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits) {
_dbg_assert_(curRenderStep_);
curRenderStep_->commands.push_back(VkRenderData{ VKRRenderCommand::SELF_DEPENDENCY_BARRIER });
}

void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
Expand Down
4 changes: 3 additions & 1 deletion Common/GPU/Vulkan/VulkanRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ class VulkanRenderManager {
// as the other backends, even though there's no actual binding happening here.
VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int attachment);

void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits);

bool CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
void CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);

Expand Down Expand Up @@ -544,7 +546,7 @@ class VulkanRenderManager {
VKRStep *curRenderStep_ = nullptr;
bool curStepHasViewport_ = false;
bool curStepHasScissor_ = false;
u32 curPipelineFlags_ = 0;
PipelineFlags curPipelineFlags_{};
BoundingRect curRenderArea_;

std::vector<VKRStep *> steps_;
Expand Down
Loading