Skip to content

Commit

Permalink
Converting Vulkan target to support executable-create2.
Browse files Browse the repository at this point in the history
This produces a new flatbuffer that supports multiple shared modules
per HAL executable, reorganizes per-export information to be per-export,
and swaps HAL pipeline layouts with internal Vulkan pipeline layouts
that are constructed from metadata in the flatbuffer.

This is a startup time regression for as long as we are not linking
modules because descriptor set and pipeline layout reuse now happens
per executable and not per VM context. The fix is to link modules
together.
  • Loading branch information
benvanik committed Aug 21, 2024
1 parent a69a93b commit e0bca76
Show file tree
Hide file tree
Showing 21 changed files with 1,698 additions and 1,252 deletions.
386 changes: 233 additions & 153 deletions compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ struct KernelFeatures {
// and updates features.
//
// Note that the device queries used here should match the ones used in
// iree_hal_vulkan_get_device_properties() on the runtime side.
// iree_hal_vulkan_query_device_properties() on the runtime side.
LogicalResult mapToDeviceQuery(IREE::HAL::ExecutableExportOp entryPoint,
spirv::Capability cap,
KernelFeatures &features) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,7 @@ struct CmdDispatch2OpPattern
SmallVector<IREE::HAL::BindingValue> bindings;
for (auto [i, bindingAttr] : llvm::enumerate(bindingAttrs)) {
auto descriptorFlags = layoutAttr.getSetLayout(bindingAttr.getSet())
.getBinding(i)
.getBinding(bindingAttr.getBinding())
.getFlags();
IREE::HAL::BindingValue binding;
if (bitEnumContainsAll(descriptorFlags,
Expand Down
4 changes: 2 additions & 2 deletions runtime/src/iree/hal/drivers/vulkan/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ iree_runtime_cc_library(
"native_event.h",
"native_executable.cc",
"native_executable.h",
"native_pipeline_layout.cc",
"native_pipeline_layout.h",
"native_semaphore.cc",
"native_semaphore.h",
"nop_executable_cache.cc",
"nop_executable_cache.h",
"pipeline_layout.cc",
"pipeline_layout.h",
"sparse_buffer.cc",
"sparse_buffer.h",
"status_util.c",
Expand Down
4 changes: 2 additions & 2 deletions runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ iree_cc_library(
"native_event.h"
"native_executable.cc"
"native_executable.h"
"native_pipeline_layout.cc"
"native_pipeline_layout.h"
"native_semaphore.cc"
"native_semaphore.h"
"nop_executable_cache.cc"
"nop_executable_cache.h"
"pipeline_layout.cc"
"pipeline_layout.h"
"sparse_buffer.cc"
"sparse_buffer.h"
"status_util.c"
Expand Down
53 changes: 25 additions & 28 deletions runtime/src/iree/hal/drivers/vulkan/builtin_executables.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <cstddef>

#include "iree/hal/drivers/vulkan/builtin/builtin_shaders_spv.h"
#include "iree/hal/drivers/vulkan/native_pipeline_layout.h"
#include "iree/hal/drivers/vulkan/pipeline_layout.h"
#include "iree/hal/drivers/vulkan/status_util.h"

namespace iree {
Expand All @@ -26,7 +26,7 @@ typedef struct iree_hal_vulkan_builtin_fill_unaligned_constants_t {
} iree_hal_vulkan_builtin_fill_unaligned_constants_t;

static_assert(sizeof(iree_hal_vulkan_builtin_fill_unaligned_constants_t) ==
IREE_HAL_VULKAN_BUILTIN_PUSH_CONSTANT_COUNT,
IREE_HAL_VULKAN_BUILTIN_PUSH_CONSTANTS_SIZE,
"push constant count must match struct size");

} // namespace
Expand All @@ -41,11 +41,11 @@ BuiltinExecutables::~BuiltinExecutables() {
}

if (pipeline_layout_) {
iree_hal_pipeline_layout_destroy(pipeline_layout_);
iree_hal_vulkan_pipeline_layout_release(pipeline_layout_);
}

for (size_t i = 0; i < IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT; ++i) {
iree_hal_descriptor_set_layout_release(descriptor_set_layouts_[i]);
iree_hal_vulkan_descriptor_set_layout_release(descriptor_set_layouts_[i]);
}
}

Expand All @@ -56,18 +56,20 @@ iree_status_t BuiltinExecutables::InitializeExecutables() {
// Even though we're just using one set, we still need to create dummy set
// layout (without any bindings) for those preceding this set.
for (size_t i = 0; i < IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT; ++i) {
iree_hal_descriptor_set_layout_t* layout = NULL;
iree_hal_vulkan_descriptor_set_layout_t* layout = NULL;
if (i == IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET) {
iree_hal_descriptor_set_layout_binding_t layout_binding;
VkDescriptorSetLayoutBinding layout_binding;
layout_binding.binding = 0;
layout_binding.type = IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER;
layout_binding.flags = IREE_HAL_DESCRIPTOR_FLAG_NONE;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_native_descriptor_set_layout_create(
logical_device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_FLAG_NONE,
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
layout_binding.descriptorCount = 1;
layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
layout_binding.pImmutableSamplers = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_descriptor_set_layout_create(
logical_device_, /*flags=*/0,
/*binding_count=*/1, &layout_binding, &layout));
} else {
IREE_RETURN_IF_ERROR(iree_hal_vulkan_native_descriptor_set_layout_create(
logical_device_, IREE_HAL_DESCRIPTOR_SET_LAYOUT_FLAG_NONE,
IREE_RETURN_IF_ERROR(iree_hal_vulkan_descriptor_set_layout_create(
logical_device_, /*flags=*/0,
/*binding_count=*/0, /*bindings=*/nullptr, &layout));
}
descriptor_set_layouts_[i] = layout;
Expand All @@ -92,10 +94,14 @@ iree_status_t BuiltinExecutables::InitializeExecutables() {

// Create pipeline layout.
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_native_pipeline_layout_create(
logical_device_, IREE_HAL_VULKAN_BUILTIN_PUSH_CONSTANT_COUNT / 4,
IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT, descriptor_set_layouts_,
&pipeline_layout_);
VkPushConstantRange push_constant_ranges[1];
push_constant_ranges[0].offset = 0;
push_constant_ranges[0].size = IREE_HAL_VULKAN_BUILTIN_PUSH_CONSTANTS_SIZE;
push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
status = iree_hal_vulkan_pipeline_layout_create(
logical_device_, IREE_ARRAYSIZE(push_constant_ranges),
push_constant_ranges, IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT,
descriptor_set_layouts_, &pipeline_layout_);
}

// Create pipeline.
Expand All @@ -105,7 +111,7 @@ iree_status_t BuiltinExecutables::InitializeExecutables() {
pipeline_create_info.pNext = NULL;
pipeline_create_info.flags = VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT;
pipeline_create_info.layout =
iree_hal_vulkan_native_pipeline_layout_handle(pipeline_layout_);
iree_hal_vulkan_pipeline_layout_handle(pipeline_layout_);
pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
pipeline_create_info.basePipelineIndex = 0;
VkPipelineShaderStageCreateInfo* stage_create_info =
Expand Down Expand Up @@ -138,7 +144,7 @@ iree_status_t BuiltinExecutables::FillBufferUnaligned(
VkCommandBuffer command_buffer, DescriptorSetArena* descriptor_set_arena,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, const void* pattern,
iree_host_size_t pattern_length, const void* push_constants_to_restore) {
iree_host_size_t pattern_length) {
IREE_TRACE_SCOPE();

iree_hal_vulkan_builtin_fill_unaligned_constants_t constants;
Expand Down Expand Up @@ -175,8 +181,7 @@ iree_status_t BuiltinExecutables::FillBufferUnaligned(
constants.fill_offset_bytes = target_offset;
constants.fill_length_bytes = length;
logical_device_->syms()->vkCmdPushConstants(
command_buffer,
iree_hal_vulkan_native_pipeline_layout_handle(pipeline_layout_),
command_buffer, iree_hal_vulkan_pipeline_layout_handle(pipeline_layout_),
VK_SHADER_STAGE_COMPUTE_BIT, /*offset=*/0,
sizeof(iree_hal_vulkan_builtin_fill_unaligned_constants_t), &constants);

Expand All @@ -186,14 +191,6 @@ iree_status_t BuiltinExecutables::FillBufferUnaligned(

logical_device_->syms()->vkCmdDispatch(command_buffer, 1, 1, 1);

// Restore push constants.
logical_device_->syms()->vkCmdPushConstants(
command_buffer,
iree_hal_vulkan_native_pipeline_layout_handle(pipeline_layout_),
VK_SHADER_STAGE_COMPUTE_BIT, /*offset=*/0,
sizeof(iree_hal_vulkan_builtin_fill_unaligned_constants_t),
push_constants_to_restore);

return iree_ok_status();
}

Expand Down
21 changes: 10 additions & 11 deletions runtime/src/iree/hal/drivers/vulkan/builtin_executables.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace vulkan {
#define IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT 4
#define IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET 3

#define IREE_HAL_VULKAN_BUILTIN_PUSH_CONSTANT_COUNT 16
#define IREE_HAL_VULKAN_BUILTIN_PUSH_CONSTANTS_SIZE 16

class BuiltinExecutables {
public:
Expand All @@ -43,22 +43,21 @@ class BuiltinExecutables {
//
// This only implements the unaligned edges of fills, vkCmdFillBuffer should
// be used for the aligned interior (if any).
//
// |push_constants_to_restore| will be pushed using vkCmdPushConstants over
// the bytes used by this call.
iree_status_t FillBufferUnaligned(
VkCommandBuffer command_buffer, DescriptorSetArena* descriptor_set_arena,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, const void* pattern,
iree_host_size_t pattern_length, const void* push_constants_to_restore);
iree_status_t FillBufferUnaligned(VkCommandBuffer command_buffer,
DescriptorSetArena* descriptor_set_arena,
iree_hal_buffer_t* target_buffer,
iree_device_size_t target_offset,
iree_device_size_t length,
const void* pattern,
iree_host_size_t pattern_length);

private:
VkDeviceHandle* logical_device_ = NULL;

iree_hal_descriptor_set_layout_t*
iree_hal_vulkan_descriptor_set_layout_t*
descriptor_set_layouts_[IREE_HAL_VULKAN_BUILTIN_DESCRIPTOR_SET_COUNT] = {
NULL};
iree_hal_pipeline_layout_t* pipeline_layout_ = NULL;
iree_hal_vulkan_pipeline_layout_t* pipeline_layout_ = NULL;
VkPipeline pipeline_ = VK_NULL_HANDLE;
};

Expand Down
25 changes: 12 additions & 13 deletions runtime/src/iree/hal/drivers/vulkan/descriptor_set_arena.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "iree/base/internal/math.h"
#include "iree/hal/drivers/vulkan/base_buffer.h"
#include "iree/hal/drivers/vulkan/extensibility_util.h"
#include "iree/hal/drivers/vulkan/native_pipeline_layout.h"
#include "iree/hal/drivers/vulkan/pipeline_layout.h"
#include "iree/hal/drivers/vulkan/status_util.h"

namespace iree {
Expand Down Expand Up @@ -71,7 +71,7 @@ static void PopulateDescriptorSetWriteInfos(
write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write_info.pNext = nullptr;
write_info.dstSet = dst_set;
write_info.dstBinding = binding.ordinal;
write_info.dstBinding = (uint32_t)i;
write_info.dstArrayElement = 0;
write_info.descriptorCount = 1;
write_info.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
Expand Down Expand Up @@ -100,9 +100,9 @@ DescriptorSetArena::~DescriptorSetArena() {
}

iree_status_t DescriptorSetArena::BindDescriptorSet(
VkCommandBuffer command_buffer, iree_hal_pipeline_layout_t* pipeline_layout,
uint32_t set, iree_host_size_t binding_count,
const iree_hal_buffer_ref_t* bindings) {
VkCommandBuffer command_buffer,
iree_hal_vulkan_pipeline_layout_t* pipeline_layout, uint32_t set,
iree_host_size_t binding_count, const iree_hal_buffer_ref_t* bindings) {
// Always prefer using push descriptors when available as we can avoid the
// additional API overhead of updating/resetting pools.
if (logical_device_->enabled_extensions().push_descriptors) {
Expand All @@ -113,8 +113,7 @@ iree_status_t DescriptorSetArena::BindDescriptorSet(

IREE_TRACE_SCOPE_NAMED("DescriptorSetArena::BindDescriptorSet");

auto* set_layout =
iree_hal_vulkan_native_pipeline_layout_set(pipeline_layout, set);
auto* set_layout = iree_hal_vulkan_pipeline_layout_set(pipeline_layout, set);

// Pick a bucket based on the number of descriptors required.
// NOTE: right now we are 1:1 with bindings.
Expand Down Expand Up @@ -143,7 +142,7 @@ iree_status_t DescriptorSetArena::BindDescriptorSet(
allocate_info.pNext = nullptr;
allocate_info.descriptorPool = descriptor_pool.handle;
VkDescriptorSetLayout set_layout_handle =
iree_hal_vulkan_native_descriptor_set_layout_handle(set_layout);
iree_hal_vulkan_descriptor_set_layout_handle(set_layout);
allocate_info.descriptorSetCount = 1;
allocate_info.pSetLayouts = &set_layout_handle;

Expand Down Expand Up @@ -191,19 +190,19 @@ iree_status_t DescriptorSetArena::BindDescriptorSet(
// Bind the descriptor set.
syms().vkCmdBindDescriptorSets(
command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
iree_hal_vulkan_native_pipeline_layout_handle(pipeline_layout), set, 1,
iree_hal_vulkan_pipeline_layout_handle(pipeline_layout), set, 1,
&descriptor_set, 0, nullptr);

return iree_ok_status();
}

void DescriptorSetArena::PushDescriptorSet(
VkCommandBuffer command_buffer, iree_hal_pipeline_layout_t* pipeline_layout,
uint32_t set, iree_host_size_t binding_count,
const iree_hal_buffer_ref_t* bindings) {
VkCommandBuffer command_buffer,
iree_hal_vulkan_pipeline_layout_t* pipeline_layout, uint32_t set,
iree_host_size_t binding_count, const iree_hal_buffer_ref_t* bindings) {
IREE_TRACE_SCOPE_NAMED("DescriptorSetArena::PushDescriptorSet");
VkPipelineLayout device_pipeline_layout =
iree_hal_vulkan_native_pipeline_layout_handle(pipeline_layout);
iree_hal_vulkan_pipeline_layout_handle(pipeline_layout);

// Get a list of VkWriteDescriptorSet structs with all bound buffers.
iree_host_size_t write_info_count = 0;
Expand Down
11 changes: 6 additions & 5 deletions runtime/src/iree/hal/drivers/vulkan/descriptor_set_arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "iree/hal/drivers/vulkan/dynamic_symbols.h"
#include "iree/hal/drivers/vulkan/handle_util.h"
#include "iree/hal/drivers/vulkan/native_executable.h"
#include "iree/hal/drivers/vulkan/pipeline_layout.h"
#include "iree/hal/drivers/vulkan/util/arena.h"
#include "iree/hal/drivers/vulkan/util/ref_ptr.h"

Expand All @@ -34,10 +35,10 @@ class DescriptorSetArena final {
// Allocates and binds a descriptor set from the arena.
// The command buffer will have the descriptor set containing |bindings| bound
// to it.
iree_status_t BindDescriptorSet(VkCommandBuffer command_buffer,
iree_hal_pipeline_layout_t* pipeline_layout,
uint32_t set, iree_host_size_t binding_count,
const iree_hal_buffer_ref_t* bindings);
iree_status_t BindDescriptorSet(
VkCommandBuffer command_buffer,
iree_hal_vulkan_pipeline_layout_t* pipeline_layout, uint32_t set,
iree_host_size_t binding_count, const iree_hal_buffer_ref_t* bindings);

// Flushes all pending writes to descriptor sets allocated from the arena and
// returns a group that - when dropped - will release the descriptor sets
Expand All @@ -49,7 +50,7 @@ class DescriptorSetArena final {

// Pushes the descriptor set to the command buffer, if supported.
void PushDescriptorSet(VkCommandBuffer command_buffer,
iree_hal_pipeline_layout_t* pipeline_layout,
iree_hal_vulkan_pipeline_layout_t* pipeline_layout,
uint32_t set, iree_host_size_t binding_count,
const iree_hal_buffer_ref_t* bindings);

Expand Down
Loading

0 comments on commit e0bca76

Please sign in to comment.