From 4f5ca8cda0701b71a1d89369fc3798f92cef4857 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Fri, 19 Mar 2021 11:05:55 +0900 Subject: [PATCH 01/22] ubo codegen first cut --- src/target/spirv/codegen_spirv.cc | 24 +++++++++++++++++++----- src/target/spirv/ir_builder.cc | 13 +++++++------ src/target/spirv/ir_builder.h | 3 ++- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/target/spirv/codegen_spirv.cc b/src/target/spirv/codegen_spirv.cc index 24608ebc93f4..633a0558f26e 100644 --- a/src/target/spirv/codegen_spirv.cc +++ b/src/target/spirv/codegen_spirv.cc @@ -66,16 +66,30 @@ std::vector CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std:: spirv::Value func_ptr = builder_->NewFunction(); builder_->StartFunction(func_ptr); - // All the POD arguments are passed in through PushConstant if (pod_args.size() != 0) { std::vector value_types; for (size_t i = 0; i < pod_args.size(); ++i) { value_types.push_back(builder_->GetSType(pod_args[i].dtype())); } - spirv::Value ptr = builder_->DeclarePushConstant(value_types); - for (size_t i = 0; i < pod_args.size(); ++i) { - spirv::Value value = builder_->GetPushConstant(ptr, value_types[i], static_cast(i)); - var_map_[pod_args[i].get()] = value; + // All the POD arguments are passed in through PushConstant + if (pod_args.size() * 8 <= 128) { + spirv::Value ptr = builder_->DeclarePushConstant(value_types); + for (size_t i = 0; i < pod_args.size(); ++i) { + spirv::Value value = + builder_->GetPushConstant(ptr, value_types[i], static_cast(i)); + var_map_[pod_args[i].get()] = value; + } + } else { + DataType value_storage_type = DataType::Int(64); + spirv::Value ptr_ubo = + builder_->BufferArgument(builder_->GetSType(value_storage_type), 0, num_buffer, true); + for (size_t i = 0; i < pod_args.size(); ++i) { + spirv::SType ptr_type = builder_->GetPointerType(value_types[i], spv::StorageClassUniform); + spirv::Value ptr = builder_->StructArrayAccess( + ptr_type, ptr_ubo, MakeValue(PrimExpr(static_cast(i * 8)))); + var_map_[pod_args[i].get()] = + builder_->MakeValue(spv::OpLoad, value_types[i], ptr, spv::MemoryAccessMaskNone); + } } } this->VisitStmt(f->body); diff --git a/src/target/spirv/ir_builder.cc b/src/target/spirv/ir_builder.cc index 5a1457387ae5..300355100bd7 100644 --- a/src/target/spirv/ir_builder.cc +++ b/src/target/spirv/ir_builder.cc @@ -185,14 +185,15 @@ Value IRBuilder::FloatImm(const SType& dtype, double value) { } Value IRBuilder::BufferArgument(const SType& value_type, uint32_t descriptor_set, - uint32_t binding) { + uint32_t binding, bool uniform) { // NOTE: BufferBlock was deprecated in SPIRV 1.3 // use StorageClassStorageBuffer instead. -#if SPV_VERSION >= 0x10300 - spv::StorageClass storage_class = spv::StorageClassStorageBuffer; -#else - spv::StorageClass storage_class = spv::StorageClassUniform; -#endif + spv::StorageClass storage_class; + if (uniform) { + storage_class = spv::StorageClassUniform; + } else { + storage_class = spv::StorageClassStorageBuffer; + } SType sarr_type = GetStructArrayType(value_type, 0); SType ptr_type = GetPointerType(sarr_type, storage_class); diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h index 8a08048e1955..ffda3333b57d 100644 --- a/src/target/spirv/ir_builder.h +++ b/src/target/spirv/ir_builder.h @@ -472,7 +472,8 @@ class IRBuilder { * \param binding The binding locaiton in descriptor set. * \param The argument type. */ - Value BufferArgument(const SType& value_type, uint32_t descriptor_set, uint32_t binding); + Value BufferArgument(const SType& value_type, uint32_t descriptor_set, uint32_t binding, bool uniform=false); + /*! * \brief Declare POD arguments through push constants. * From 7d2ed2bed107f1dd1aba2bb6697665a9684b1441 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Fri, 19 Mar 2021 11:20:19 +0900 Subject: [PATCH 02/22] begin runtime change for UBO --- src/runtime/vulkan/vulkan.cc | 41 ++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index 5cd4812f41c4..6e050e5b8603 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -100,6 +100,7 @@ struct VulkanPipeline { VkPipelineLayout pipeline_layout{VK_NULL_HANDLE}; VkPipeline pipeline{VK_NULL_HANDLE}; VkDescriptorUpdateTemplateKHR descriptor_update_template{VK_NULL_HANDLE}; + VulkanBuffer ubo; }; typedef dmlc::ThreadLocalStore VulkanThreadStore; @@ -747,7 +748,9 @@ class VulkanModuleNode final : public runtime::ModuleNode { public: explicit VulkanModuleNode(std::unordered_map smap, std::unordered_map fmap, std::string source) - : smap_(smap), fmap_(fmap), source_(source) {} + : smap_(smap), fmap_(fmap), source_(source) { + LOG(INFO) << source; + } const char* type_key() const final { return "vulkan"; } @@ -843,6 +846,31 @@ class VulkanModuleNode final : public runtime::ModuleNode { } } + if (num_pod != 0 && num_pod * 8 > 120) { + ICHECK(num_pod == num_pack_args); + // UBO + // TODO: allocate ubo + { + VkDescriptorSetLayoutBinding bd; + bd.binding = num_buffer; + bd.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bd.descriptorCount = 1; + bd.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bd.pImmutableSamplers = nullptr; + arg_binding.push_back(bd); + } + { + VkDescriptorUpdateTemplateEntryKHR tpl; + tpl.dstBinding = num_buffer; + tpl.dstArrayElement = 0; + tpl.descriptorCount = 1; + tpl.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + tpl.offset = num_buffer * sizeof(VkDescriptorBufferInfo); + tpl.stride = sizeof(VkDescriptorBufferInfo); + arg_template.push_back(tpl); + } + } + { VkDescriptorSetLayoutCreateInfo descrip_cinfo; descrip_cinfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; @@ -894,7 +922,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { playout_cinfo.setLayoutCount = 1; playout_cinfo.pSetLayouts = &(pe->descriptor_set_layout); - if (num_pack_args != 0) { + if (num_pack_args != 0 && num_pack_args * 8 <= 120) { playout_cinfo.pushConstantRangeCount = 1; playout_cinfo.pPushConstantRanges = &crange; ICHECK_LE(crange.size, vctx.phy_device_prop.limits.maxPushConstantsSize); @@ -1076,6 +1104,15 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, binfo.range = VK_WHOLE_SIZE; descriptor_buffers[i] = binfo; } + if (num_pack_args_ != 0 && num_pack_args_ * 8 > 120) { + // UBO + // TODO: copy pack_args + VkDescriptorBufferInfo binfo; + binfo.buffer = pipeline->ubo.buffer; + binfo.offset = 0; + binfo.range = VK_WHOLE_SIZE; + descriptor_buffers.push_back(binfo); + } if (vctx.UseImmediate()) { // Can safely capture by reference as this lambda is immediately executed on the calling thread. VulkanThreadEntry::ThreadLocal()->Stream(device_id)->Launch([&](VulkanStreamState* state) { From e1788b8d5134dcd6b7bde17b95caca0ed0b24edd Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sat, 20 Mar 2021 04:34:44 +0900 Subject: [PATCH 03/22] allocate and bind ubo --- src/runtime/vulkan/vulkan.cc | 169 ++++++++++++++++++++--------------- 1 file changed, 98 insertions(+), 71 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index 6e050e5b8603..4552c27106e2 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -91,6 +91,11 @@ struct VulkanBuffer { VkDeviceMemory memory{VK_NULL_HANDLE}; }; +struct UniformBuffer { + VulkanBuffer* vk_buf; + ArgUnion64* host_buf; +}; + struct VulkanPipeline { VulkanContext* vctx_{nullptr}; VkShaderModule shader{VK_NULL_HANDLE}; @@ -100,11 +105,80 @@ struct VulkanPipeline { VkPipelineLayout pipeline_layout{VK_NULL_HANDLE}; VkPipeline pipeline{VK_NULL_HANDLE}; VkDescriptorUpdateTemplateKHR descriptor_update_template{VK_NULL_HANDLE}; - VulkanBuffer ubo; + UniformBuffer ubo; }; typedef dmlc::ThreadLocalStore VulkanThreadStore; +VulkanBuffer* CreateBuffer(const VulkanContext& vctx, size_t nbytes, VkBufferUsageFlags usage) { + VkBufferCreateInfo info; + info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + info.pNext = nullptr; + info.flags = 0; + info.size = nbytes; + info.queueFamilyIndexCount = 1; + info.pQueueFamilyIndices = &(vctx.queue_family_index); + info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + info.usage = usage; + // create buffer + VkBuffer buffer; + VULKAN_CALL(vkCreateBuffer(vctx.device, &info, nullptr, &buffer)); + + // bind to memory + bool dedicated_allocation = false; + VkMemoryRequirements2KHR req2; + + if (vctx.get_buffer_memory_requirements_2_functions) { + VkBufferMemoryRequirementsInfo2KHR req_info2; + req_info2.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR; + req_info2.pNext = 0; + req_info2.buffer = buffer; + + req2.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR; + req2.pNext = 0; + + VkMemoryDedicatedRequirementsKHR dedicated_req; + dedicated_req.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR; + dedicated_req.pNext = 0; + req2.pNext = &dedicated_req; + + vctx.get_buffer_memory_requirements_2_functions->vkGetBufferMemoryRequirements2KHR( + vctx.device, &req_info2, &req2); + dedicated_allocation = + dedicated_req.requiresDedicatedAllocation || dedicated_req.prefersDedicatedAllocation; + } + + VkDeviceMemory memory; + // TODO: revisit memoryTypeIndex + if (!dedicated_allocation) { + VkMemoryAllocateInfo minfo; + minfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + minfo.pNext = nullptr; + minfo.allocationSize = nbytes; + minfo.memoryTypeIndex = vctx.compute_mtype_index; + VULKAN_CALL(vkAllocateMemory(vctx.device, &minfo, nullptr, &memory)); + } else { + VkMemoryAllocateInfo minfo; + minfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + minfo.pNext = nullptr; + minfo.allocationSize = req2.memoryRequirements.size; + minfo.memoryTypeIndex = vctx.compute_mtype_index; + + VkMemoryDedicatedAllocateInfoKHR mdinfo; + mdinfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + mdinfo.pNext = 0; + mdinfo.image = 0; + mdinfo.buffer = buffer; + minfo.pNext = &mdinfo; + VULKAN_CALL(vkAllocateMemory(vctx.device, &minfo, nullptr, &memory)); + } + VULKAN_CALL(vkBindBufferMemory(vctx.device, buffer, memory, 0)); + VulkanBuffer* pbuf = new VulkanBuffer(); + pbuf->memory = memory; + pbuf->buffer = buffer; + return pbuf; +} + class VulkanDeviceAPI final : public DeviceAPI { public: VulkanDeviceAPI(); @@ -125,70 +199,9 @@ class VulkanDeviceAPI final : public DeviceAPI { nbytes = 1; } const auto& vctx = context(dev.device_id); - VkBufferCreateInfo info; - info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - info.pNext = nullptr; - info.flags = 0; - info.size = nbytes; - info.queueFamilyIndexCount = 1; - info.pQueueFamilyIndices = &(vctx.queue_family_index); - info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + auto usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - // create buffer - VkBuffer buffer; - VULKAN_CALL(vkCreateBuffer(vctx.device, &info, nullptr, &buffer)); - // bind to memory - VkBufferMemoryRequirementsInfo2KHR req_info2; - req_info2.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR; - req_info2.pNext = 0; - req_info2.buffer = buffer; - - VkMemoryRequirements2KHR req2; - req2.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR; - req2.pNext = 0; - - VkMemoryDedicatedRequirementsKHR dedicated_req; - dedicated_req.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR; - dedicated_req.pNext = 0; - req2.pNext = &dedicated_req; - - bool dedicated_allocation = false; - if (vctx.get_buffer_memory_requirements_2_functions) { - vctx.get_buffer_memory_requirements_2_functions->vkGetBufferMemoryRequirements2KHR( - vctx.device, &req_info2, &req2); - dedicated_allocation = - dedicated_req.requiresDedicatedAllocation || dedicated_req.prefersDedicatedAllocation; - } - - VkDeviceMemory memory; - if (!dedicated_allocation) { - VkMemoryAllocateInfo minfo; - minfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - minfo.pNext = nullptr; - minfo.allocationSize = nbytes; - minfo.memoryTypeIndex = vctx.compute_mtype_index; - VULKAN_CALL(vkAllocateMemory(vctx.device, &minfo, nullptr, &memory)); - } else { - VkMemoryAllocateInfo minfo; - minfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - minfo.pNext = nullptr; - minfo.allocationSize = req2.memoryRequirements.size; - minfo.memoryTypeIndex = vctx.compute_mtype_index; - - VkMemoryDedicatedAllocateInfoKHR mdinfo; - mdinfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; - mdinfo.pNext = 0; - mdinfo.image = 0; - mdinfo.buffer = buffer; - minfo.pNext = &mdinfo; - VULKAN_CALL(vkAllocateMemory(vctx.device, &minfo, nullptr, &memory)); - } - VULKAN_CALL(vkBindBufferMemory(vctx.device, buffer, memory, 0)); - VulkanBuffer* pbuf = new VulkanBuffer(); - pbuf->memory = memory; - pbuf->buffer = buffer; - return pbuf; + return CreateBuffer(vctx, nbytes, usage); } void FreeDataSpace(Device dev, void* ptr) final { @@ -784,6 +797,11 @@ class VulkanModuleNode final : public runtime::ModuleNode { vkDestroyDescriptorPool(vctx.device, pe->descriptor_pool, nullptr); vkDestroyDescriptorSetLayout(vctx.device, pe->descriptor_set_layout, nullptr); vkDestroyShaderModule(vctx.device, pe->shader, nullptr); + // UBO + vkDestroyBuffer(vctx.device, pe->ubo.vk_buf->buffer, nullptr); + vkFreeMemory(vctx.device, pe->ubo.vk_buf->memory, nullptr); + delete pe->ubo.vk_buf; + delete[] pe->ubo.host_buf; } } } @@ -846,14 +864,14 @@ class VulkanModuleNode final : public runtime::ModuleNode { } } - if (num_pod != 0 && num_pod * 8 > 120) { + size_t nbytes_scalars = num_pod * sizeof(ArgUnion64); + if (nbytes_scalars > 120) { ICHECK(num_pod == num_pack_args); // UBO - // TODO: allocate ubo { VkDescriptorSetLayoutBinding bd; bd.binding = num_buffer; - bd.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bd.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; bd.descriptorCount = 1; bd.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; bd.pImmutableSamplers = nullptr; @@ -864,7 +882,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { tpl.dstBinding = num_buffer; tpl.dstArrayElement = 0; tpl.descriptorCount = 1; - tpl.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + tpl.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; tpl.offset = num_buffer * sizeof(VkDescriptorBufferInfo); tpl.stride = sizeof(VkDescriptorBufferInfo); arg_template.push_back(tpl); @@ -951,6 +969,15 @@ class VulkanModuleNode final : public runtime::ModuleNode { VULKAN_CALL(vkCreateComputePipelines(vctx.device, VK_NULL_HANDLE, 1, &pipeline_cinfo, nullptr, &(pe->pipeline))); + if (nbytes_scalars > 120) { + // Allocate, bind and map UBO + UniformBuffer ubo = pe->ubo; + ubo.host_buf = new ArgUnion64[nbytes_scalars]; + ubo.vk_buf = CreateBuffer(vctx, nbytes_scalars, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); + void* host_ptr = ubo.host_buf; + vkMapMemory(vctx.device, ubo.vk_buf->memory, 0, nbytes_scalars, 0, &host_ptr); + } + if (vctx.UseImmediate()) { VkDescriptorUpdateTemplateCreateInfoKHR descrip_template_cinfo; descrip_template_cinfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; @@ -1104,11 +1131,11 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, binfo.range = VK_WHOLE_SIZE; descriptor_buffers[i] = binfo; } - if (num_pack_args_ != 0 && num_pack_args_ * 8 > 120) { + if (num_pack_args_ != 0 && num_pack_args_ * sizeof(ArgUnion64) > 120) { // UBO - // TODO: copy pack_args + memcpy(pipeline->ubo.host_buf, pack_args, num_pack_args_ * sizeof(ArgUnion64)); VkDescriptorBufferInfo binfo; - binfo.buffer = pipeline->ubo.buffer; + binfo.buffer = pipeline->ubo.vk_buf->buffer; binfo.offset = 0; binfo.range = VK_WHOLE_SIZE; descriptor_buffers.push_back(binfo); From 665d5ff950415d9d407e472c5bd6315888aab79d Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sat, 20 Mar 2021 12:57:39 +0900 Subject: [PATCH 04/22] query memory type for uniform --- src/runtime/vulkan/vulkan.cc | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index 4552c27106e2..d6f3899d7ec8 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -124,6 +124,28 @@ VulkanBuffer* CreateBuffer(const VulkanContext& vctx, size_t nbytes, VkBufferUsa VkBuffer buffer; VULKAN_CALL(vkCreateBuffer(vctx.device, &info, nullptr, &buffer)); + uint32_t mem_type_index = vctx.compute_mtype_index; + + if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { + // Find a memory type that supports UBO + VkMemoryRequirements mem_reqs; + vkGetBufferMemoryRequirements(vctx.device, buffer, &mem_reqs); + uint32_t type_bits = mem_reqs.memoryTypeBits; + auto req_prop = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + VkPhysicalDeviceMemoryProperties phy_mem_prop; + vkGetPhysicalDeviceMemoryProperties(vctx.phy_device, &phy_mem_prop); + bool found = false; + for (uint32_t i = 0; i < phy_mem_prop.memoryTypeCount; i++) { + if ((type_bits & 1) == 1 && + (phy_mem_prop.memoryTypes[i].propertyFlags & req_prop) == req_prop) { + mem_type_index = i; + found = true; + } + type_bits >>= 1; + } + ICHECK(found); + } + // bind to memory bool dedicated_allocation = false; VkMemoryRequirements2KHR req2; @@ -155,14 +177,14 @@ VulkanBuffer* CreateBuffer(const VulkanContext& vctx, size_t nbytes, VkBufferUsa minfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; minfo.pNext = nullptr; minfo.allocationSize = nbytes; - minfo.memoryTypeIndex = vctx.compute_mtype_index; + minfo.memoryTypeIndex = mem_type_index; VULKAN_CALL(vkAllocateMemory(vctx.device, &minfo, nullptr, &memory)); } else { VkMemoryAllocateInfo minfo; minfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; minfo.pNext = nullptr; minfo.allocationSize = req2.memoryRequirements.size; - minfo.memoryTypeIndex = vctx.compute_mtype_index; + minfo.memoryTypeIndex = mem_type_index; VkMemoryDedicatedAllocateInfoKHR mdinfo; mdinfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; @@ -940,7 +962,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { playout_cinfo.setLayoutCount = 1; playout_cinfo.pSetLayouts = &(pe->descriptor_set_layout); - if (num_pack_args != 0 && num_pack_args * 8 <= 120) { + if (0 < nbytes_scalars && nbytes_scalars <= 120) { playout_cinfo.pushConstantRangeCount = 1; playout_cinfo.pPushConstantRanges = &crange; ICHECK_LE(crange.size, vctx.phy_device_prop.limits.maxPushConstantsSize); From 432ff24eb0432cee222d6813333049c7c836150e Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sat, 20 Mar 2021 13:18:43 +0900 Subject: [PATCH 05/22] refactor --- src/runtime/vulkan/vulkan.cc | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index d6f3899d7ec8..20b9c7289d5b 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -110,6 +110,24 @@ struct VulkanPipeline { typedef dmlc::ThreadLocalStore VulkanThreadStore; +uint32_t FindMemoryType(VkDevice logical_device, VkPhysicalDevice phy_device, VkBuffer buffer, + VkMemoryPropertyFlags req_prop) { + VkMemoryRequirements mem_reqs; + vkGetBufferMemoryRequirements(logical_device, buffer, &mem_reqs); + uint32_t type_bits = mem_reqs.memoryTypeBits; + VkPhysicalDeviceMemoryProperties phy_mem_prop; + vkGetPhysicalDeviceMemoryProperties(phy_device, &phy_mem_prop); + for (uint32_t i = 0; i < phy_mem_prop.memoryTypeCount; i++) { + if ((type_bits & 1) == 1 && + (phy_mem_prop.memoryTypes[i].propertyFlags & req_prop) == req_prop) { + return i; + } + type_bits >>= 1; + } + LOG(FATAL) << "Requested memory type not found"; + return 0; +} + VulkanBuffer* CreateBuffer(const VulkanContext& vctx, size_t nbytes, VkBufferUsageFlags usage) { VkBufferCreateInfo info; info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; @@ -128,22 +146,8 @@ VulkanBuffer* CreateBuffer(const VulkanContext& vctx, size_t nbytes, VkBufferUsa if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { // Find a memory type that supports UBO - VkMemoryRequirements mem_reqs; - vkGetBufferMemoryRequirements(vctx.device, buffer, &mem_reqs); - uint32_t type_bits = mem_reqs.memoryTypeBits; - auto req_prop = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - VkPhysicalDeviceMemoryProperties phy_mem_prop; - vkGetPhysicalDeviceMemoryProperties(vctx.phy_device, &phy_mem_prop); - bool found = false; - for (uint32_t i = 0; i < phy_mem_prop.memoryTypeCount; i++) { - if ((type_bits & 1) == 1 && - (phy_mem_prop.memoryTypes[i].propertyFlags & req_prop) == req_prop) { - mem_type_index = i; - found = true; - } - type_bits >>= 1; - } - ICHECK(found); + auto prop = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + mem_type_index = FindMemoryType(vctx.device, vctx.phy_device, buffer, prop); } // bind to memory From 5f9f82dd74e51f316a9aeac5d455d1aec4981bab Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 07:55:30 +0900 Subject: [PATCH 06/22] do not use float64 --- python/tvm/topi/cuda/scan.py | 2 +- python/tvm/topi/cuda/sort.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py index 3240ebcd515c..25367bb7b04c 100644 --- a/python/tvm/topi/cuda/scan.py +++ b/python/tvm/topi/cuda/scan.py @@ -104,7 +104,7 @@ def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add, i # The following algorithm performs parallel exclusive scan # Up Sweep of exclusive scan lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(scan_axis_size, "float64"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(scan_axis_size, "float32"))), "int64" ) with ib.for_range(0, lim, dtype="int64") as l2_width: width = 2 << l2_width diff --git a/python/tvm/topi/cuda/sort.py b/python/tvm/topi/cuda/sort.py index 5ebd3060a6bb..5e6108737cd6 100644 --- a/python/tvm/topi/cuda/sort.py +++ b/python/tvm/topi/cuda/sort.py @@ -239,7 +239,7 @@ def compare(a, b): # Sort the lower levels of the merge using odd-even sort, it's fast for small inputs lower_lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(block_size, "float64"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(block_size, "float32"))), "int64" ) _odd_even_sort( @@ -255,7 +255,7 @@ def compare(a, b): ) upper_lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(size, "float64"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(size, "float32"))), "int64" ) def get_merge_begin(source, base_idx, aCount, bCount, aStart, bStart, diag, step_count): From a8de4593853e30569f2d6fde9c26d90529c57c25 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 08:20:27 +0900 Subject: [PATCH 07/22] trying an approach similar to push constant --- src/runtime/vulkan/vulkan.cc | 11 ++++++-- src/target/spirv/codegen_spirv.cc | 22 +++++++++------ src/target/spirv/ir_builder.cc | 45 +++++++++++++++++++++++++++++++ src/target/spirv/ir_builder.h | 9 +++++++ 4 files changed, 77 insertions(+), 10 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index 20b9c7289d5b..d79dcd90f83d 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -120,6 +120,7 @@ uint32_t FindMemoryType(VkDevice logical_device, VkPhysicalDevice phy_device, Vk for (uint32_t i = 0; i < phy_mem_prop.memoryTypeCount; i++) { if ((type_bits & 1) == 1 && (phy_mem_prop.memoryTypes[i].propertyFlags & req_prop) == req_prop) { + LOG(INFO) << "Find memory type index " << i; return i; } type_bits >>= 1; @@ -893,6 +894,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { size_t nbytes_scalars = num_pod * sizeof(ArgUnion64); if (nbytes_scalars > 120) { ICHECK(num_pod == num_pack_args); + LOG(INFO) << "Adding ubo to the pipeline with binding = " << num_buffer; // UBO { VkDescriptorSetLayoutBinding bd; @@ -997,11 +999,13 @@ class VulkanModuleNode final : public runtime::ModuleNode { if (nbytes_scalars > 120) { // Allocate, bind and map UBO - UniformBuffer ubo = pe->ubo; - ubo.host_buf = new ArgUnion64[nbytes_scalars]; + LOG(INFO) << "Allocate ubo of size " << nbytes_scalars; + UniformBuffer& ubo = pe->ubo; + ubo.host_buf = new ArgUnion64[num_pod]; ubo.vk_buf = CreateBuffer(vctx, nbytes_scalars, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); void* host_ptr = ubo.host_buf; vkMapMemory(vctx.device, ubo.vk_buf->memory, 0, nbytes_scalars, 0, &host_ptr); + LOG(INFO) << "Mapping done"; } if (vctx.UseImmediate()) { @@ -1159,7 +1163,10 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, } if (num_pack_args_ != 0 && num_pack_args_ * sizeof(ArgUnion64) > 120) { // UBO + LOG(INFO) << "Copy ubo of size " << num_pack_args_ * sizeof(ArgUnion64); + CHECK(pipeline->ubo.host_buf != nullptr); memcpy(pipeline->ubo.host_buf, pack_args, num_pack_args_ * sizeof(ArgUnion64)); + LOG(INFO) << "copy done"; VkDescriptorBufferInfo binfo; binfo.buffer = pipeline->ubo.vk_buf->buffer; binfo.offset = 0; diff --git a/src/target/spirv/codegen_spirv.cc b/src/target/spirv/codegen_spirv.cc index 633a0558f26e..1ba675c3809d 100644 --- a/src/target/spirv/codegen_spirv.cc +++ b/src/target/spirv/codegen_spirv.cc @@ -80,16 +80,22 @@ std::vector CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std:: var_map_[pod_args[i].get()] = value; } } else { - DataType value_storage_type = DataType::Int(64); - spirv::Value ptr_ubo = - builder_->BufferArgument(builder_->GetSType(value_storage_type), 0, num_buffer, true); + spirv::Value ptr = builder_->DeclareUBO(value_types, num_buffer); for (size_t i = 0; i < pod_args.size(); ++i) { - spirv::SType ptr_type = builder_->GetPointerType(value_types[i], spv::StorageClassUniform); - spirv::Value ptr = builder_->StructArrayAccess( - ptr_type, ptr_ubo, MakeValue(PrimExpr(static_cast(i * 8)))); - var_map_[pod_args[i].get()] = - builder_->MakeValue(spv::OpLoad, value_types[i], ptr, spv::MemoryAccessMaskNone); + spirv::Value value = + builder_->GetUBO(ptr, value_types[i], static_cast(i)); + var_map_[pod_args[i].get()] = value; } + // DataType value_storage_type = DataType::Int(64); + // spirv::Value ptr_ubo = + // builder_->BufferArgument(builder_->GetSType(value_storage_type), 0, num_buffer, true); + // for (size_t i = 0; i < pod_args.size(); ++i) { + // spirv::SType ptr_type = builder_->GetPointerType(value_types[i], spv::StorageClassUniform); + // spirv::Value ptr = builder_->StructArrayAccess( + // ptr_type, ptr_ubo, MakeValue(PrimExpr(static_cast(i)))); + // var_map_[pod_args[i].get()] = + // builder_->MakeValue(spv::OpLoad, value_types[i], ptr, spv::MemoryAccessMaskNone); + //} } } this->VisitStmt(f->body); diff --git a/src/target/spirv/ir_builder.cc b/src/target/spirv/ir_builder.cc index 300355100bd7..41e5fe1b99d4 100644 --- a/src/target/spirv/ir_builder.cc +++ b/src/target/spirv/ir_builder.cc @@ -250,6 +250,51 @@ Value IRBuilder::GetPushConstant(Value ptr_push_const, const SType& v_type, uint return this->MakeValue(spv::OpLoad, v_type, ptr); } +Value IRBuilder::DeclareUBO(const std::vector& value_types, uint32_t binding) { + ICHECK_EQ(push_const_.id, 0); + SType struct_type; + struct_type.id = id_counter_++; + struct_type.type = DataType::Handle(); + ib_.Begin(spv::OpTypeStruct).Add(struct_type); + for (const SType& vtype : value_types) { + ib_.Add(vtype); + } + ib_.Commit(&global_); + + uint32_t offset = 0; + for (uint32_t i = 0; i < value_types.size(); ++i) { + ib_.Begin(spv::OpMemberDecorate) + .AddSeq(struct_type, i, spv::DecorationOffset, offset) + .Commit(&decorate_); + DataType t = value_types[i].type; + uint32_t nbits = t.bits() * t.lanes(); + ICHECK_EQ(nbits % 8, 0); + uint32_t bytes = (nbits / 8); + if (t.bits() == 32) { + // In our Vulkan runtime, each push constant always occupies 64 bit. + offset += bytes * 2; + } else { + ICHECK_EQ(t.bits(), 64); + offset += bytes; + } + } + // Decorate push constants as UBO + this->Decorate(spv::OpDecorate, struct_type, spv::DecorationBlock); + + SType ptr_type = GetPointerType(struct_type, spv::StorageClassUniform); + Value val = NewValue(ptr_type, kPushConstantPtr); + this->Decorate(spv::OpDecorate, val, spv::DecorationBinding, binding); + ib_.Begin(spv::OpVariable).AddSeq(ptr_type, val, spv::StorageClassUniform).Commit(&global_); + return val; +} + +Value IRBuilder::GetUBO(Value ptr_push_const, const SType& v_type, uint32_t index) { + SType ptr_vtype = this->GetPointerType(v_type, spv::StorageClassUniform); + Value ptr = this->MakeValue(spv::OpAccessChain, ptr_vtype, ptr_push_const, + IntImm(t_int32_, static_cast(index))); + return this->MakeValue(spv::OpLoad, v_type, ptr); +} + Value IRBuilder::NewFunction() { return NewValue(t_void_func_, kFunction); } void IRBuilder::CommitKernelFunction(const Value& func, const std::string& name) { diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h index ffda3333b57d..d8627f02c3e7 100644 --- a/src/target/spirv/ir_builder.h +++ b/src/target/spirv/ir_builder.h @@ -489,6 +489,15 @@ class IRBuilder { * \return the value of push constant */ Value GetPushConstant(Value ptr_push_const, const SType& v_type, uint32_t index); + + Value DeclareUBO(const std::vector& value_types, uint32_t binding); + /*! + * \brief Get i-th push constant + * \param v_type The value type + * \param index The push constant index + * \return the value of push constant + */ + Value GetUBO(Value ptr_ubo, const SType& v_type, uint32_t index); /*! * \brief Declare a new function * \return The created function ID. From 23b1f402471d9c26f615f31eec6f479158a85d41 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 08:25:48 +0900 Subject: [PATCH 08/22] add more log --- src/runtime/vulkan/vulkan.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index d79dcd90f83d..c3b232987a62 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -145,6 +145,8 @@ VulkanBuffer* CreateBuffer(const VulkanContext& vctx, size_t nbytes, VkBufferUsa uint32_t mem_type_index = vctx.compute_mtype_index; + LOG(INFO) << "compute_mtype_index: " << vctx.compute_mtype_index; + if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { // Find a memory type that supports UBO auto prop = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; @@ -185,6 +187,7 @@ VulkanBuffer* CreateBuffer(const VulkanContext& vctx, size_t nbytes, VkBufferUsa minfo.memoryTypeIndex = mem_type_index; VULKAN_CALL(vkAllocateMemory(vctx.device, &minfo, nullptr, &memory)); } else { + LOG(INFO) << "using dedicated allocation"; VkMemoryAllocateInfo minfo; minfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; minfo.pNext = nullptr; @@ -1174,6 +1177,7 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, descriptor_buffers.push_back(binfo); } if (vctx.UseImmediate()) { + LOG(INFO) << "Using immediate"; // Can safely capture by reference as this lambda is immediately executed on the calling thread. VulkanThreadEntry::ThreadLocal()->Stream(device_id)->Launch([&](VulkanStreamState* state) { vkCmdBindPipeline(state->cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline); From 7cfea184a5feb28228033752025304620d3d534b Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 08:37:11 +0900 Subject: [PATCH 09/22] do not delete ubo when not using it --- src/runtime/vulkan/vulkan.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index c3b232987a62..a45ed167ac13 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -828,10 +828,12 @@ class VulkanModuleNode final : public runtime::ModuleNode { vkDestroyDescriptorSetLayout(vctx.device, pe->descriptor_set_layout, nullptr); vkDestroyShaderModule(vctx.device, pe->shader, nullptr); // UBO - vkDestroyBuffer(vctx.device, pe->ubo.vk_buf->buffer, nullptr); - vkFreeMemory(vctx.device, pe->ubo.vk_buf->memory, nullptr); - delete pe->ubo.vk_buf; - delete[] pe->ubo.host_buf; + if (pe->ubo.host_buf) { + vkDestroyBuffer(vctx.device, pe->ubo.vk_buf->buffer, nullptr); + vkFreeMemory(vctx.device, pe->ubo.vk_buf->memory, nullptr); + delete pe->ubo.vk_buf; + delete[] pe->ubo.host_buf; + } } } } From 436ff80bf07570145468805b1a4c5e76653b2a47 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 11:58:14 +0900 Subject: [PATCH 10/22] cumsum and nms test working with ubo --- src/runtime/vulkan/vulkan.cc | 35 +++++++++++++++++++------------ src/target/spirv/codegen_spirv.cc | 13 +----------- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index a45ed167ac13..60f7e9046156 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -45,6 +45,8 @@ static constexpr const int kVulkanMaxNumDevice = 8; /*! \brief TVM Vulkan binary pack magic number */ static constexpr const int kVulkanModuleMagic = 0x02700027; +#define MAX_PUSHCONSTANTS 128 + class VulkanThreadEntry { public: VulkanThreadEntry(); @@ -93,7 +95,7 @@ struct VulkanBuffer { struct UniformBuffer { VulkanBuffer* vk_buf; - ArgUnion64* host_buf; + void* host_buf; }; struct VulkanPipeline { @@ -829,10 +831,11 @@ class VulkanModuleNode final : public runtime::ModuleNode { vkDestroyShaderModule(vctx.device, pe->shader, nullptr); // UBO if (pe->ubo.host_buf) { + LOG(INFO) << "destroying UBO"; vkDestroyBuffer(vctx.device, pe->ubo.vk_buf->buffer, nullptr); vkFreeMemory(vctx.device, pe->ubo.vk_buf->memory, nullptr); delete pe->ubo.vk_buf; - delete[] pe->ubo.host_buf; + // delete[] (ArgUnion64*)pe->ubo.host_buf; } } } @@ -897,7 +900,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { } size_t nbytes_scalars = num_pod * sizeof(ArgUnion64); - if (nbytes_scalars > 120) { + if (nbytes_scalars > MAX_PUSHCONSTANTS) { ICHECK(num_pod == num_pack_args); LOG(INFO) << "Adding ubo to the pipeline with binding = " << num_buffer; // UBO @@ -973,7 +976,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { playout_cinfo.setLayoutCount = 1; playout_cinfo.pSetLayouts = &(pe->descriptor_set_layout); - if (0 < nbytes_scalars && nbytes_scalars <= 120) { + if (0 < nbytes_scalars && nbytes_scalars <= MAX_PUSHCONSTANTS) { playout_cinfo.pushConstantRangeCount = 1; playout_cinfo.pPushConstantRanges = &crange; ICHECK_LE(crange.size, vctx.phy_device_prop.limits.maxPushConstantsSize); @@ -1002,14 +1005,13 @@ class VulkanModuleNode final : public runtime::ModuleNode { VULKAN_CALL(vkCreateComputePipelines(vctx.device, VK_NULL_HANDLE, 1, &pipeline_cinfo, nullptr, &(pe->pipeline))); - if (nbytes_scalars > 120) { + if (nbytes_scalars > MAX_PUSHCONSTANTS) { // Allocate, bind and map UBO LOG(INFO) << "Allocate ubo of size " << nbytes_scalars; UniformBuffer& ubo = pe->ubo; ubo.host_buf = new ArgUnion64[num_pod]; ubo.vk_buf = CreateBuffer(vctx, nbytes_scalars, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); - void* host_ptr = ubo.host_buf; - vkMapMemory(vctx.device, ubo.vk_buf->memory, 0, nbytes_scalars, 0, &host_ptr); + vkMapMemory(vctx.device, ubo.vk_buf->memory, 0, nbytes_scalars, 0, &(ubo.host_buf)); LOG(INFO) << "Mapping done"; } @@ -1166,7 +1168,8 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, binfo.range = VK_WHOLE_SIZE; descriptor_buffers[i] = binfo; } - if (num_pack_args_ != 0 && num_pack_args_ * sizeof(ArgUnion64) > 120) { + bool use_ubo = false; + if (num_pack_args_ != 0 && num_pack_args_ * sizeof(ArgUnion64) > MAX_PUSHCONSTANTS) { // UBO LOG(INFO) << "Copy ubo of size " << num_pack_args_ * sizeof(ArgUnion64); CHECK(pipeline->ubo.host_buf != nullptr); @@ -1177,6 +1180,7 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, binfo.offset = 0; binfo.range = VK_WHOLE_SIZE; descriptor_buffers.push_back(binfo); + use_ubo = true; } if (vctx.UseImmediate()) { LOG(INFO) << "Using immediate"; @@ -1187,7 +1191,7 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, vctx.descriptor_template_khr_functions->vkCmdPushDescriptorSetWithTemplateKHR( state->cmd_buffer_, pipeline->descriptor_update_template, pipeline->pipeline_layout, 0, descriptor_buffers.data()); - if (num_pack_args_ != 0) { + if (num_pack_args_ > 0 && num_pack_args_ <= MAX_PUSHCONSTANTS) { vkCmdPushConstants(state->cmd_buffer_, pipeline->pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, num_pack_args_ * sizeof(ArgUnion64), pack_args); @@ -1208,7 +1212,7 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, // Otherwise, the more expensive deferred path. std::vector pack_args_storage(pack_args, pack_args + num_pack_args_); - const auto& deferred_initializer = [&vctx, pipeline, descriptor_buffers]() { + const auto& deferred_initializer = [&vctx, pipeline, descriptor_buffers, use_ubo]() { std::vector write_descriptor_sets; write_descriptor_sets.resize(descriptor_buffers.size()); for (size_t i = 0; i < write_descriptor_sets.size(); i++) { @@ -1218,7 +1222,12 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, write_descriptor_sets[i].dstBinding = i; write_descriptor_sets[i].dstArrayElement = 0; write_descriptor_sets[i].descriptorCount = 1; - write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + if (use_ubo && i == write_descriptor_sets.size() - 1) { + write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + } else { + write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + } + write_descriptor_sets[i].pImageInfo = 0; write_descriptor_sets[i].pBufferInfo = &(descriptor_buffers[i]); write_descriptor_sets[i].pTexelBufferView = 0; @@ -1226,12 +1235,12 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, vkUpdateDescriptorSets(vctx.device, write_descriptor_sets.size(), write_descriptor_sets.data(), 0, 0); }; - const auto& deferred_kernel = [pipeline, wl, pack_args_storage](VulkanStreamState* state) { + const auto& deferred_kernel = [this, pipeline, wl, pack_args_storage](VulkanStreamState* state) { vkCmdBindPipeline(state->cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline); vkCmdBindDescriptorSets(state->cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline_layout, 0, 1, &(pipeline->descriptor_set), 0, nullptr); - if (pack_args_storage.size() != 0) { + if (num_pack_args_ > 0 && num_pack_args_ <= MAX_PUSHCONSTANTS) { vkCmdPushConstants(state->cmd_buffer_, pipeline->pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, pack_args_storage.size() * sizeof(ArgUnion64), pack_args_storage.data()); diff --git a/src/target/spirv/codegen_spirv.cc b/src/target/spirv/codegen_spirv.cc index 1ba675c3809d..3dd9551ba097 100644 --- a/src/target/spirv/codegen_spirv.cc +++ b/src/target/spirv/codegen_spirv.cc @@ -82,20 +82,9 @@ std::vector CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std:: } else { spirv::Value ptr = builder_->DeclareUBO(value_types, num_buffer); for (size_t i = 0; i < pod_args.size(); ++i) { - spirv::Value value = - builder_->GetUBO(ptr, value_types[i], static_cast(i)); + spirv::Value value = builder_->GetUBO(ptr, value_types[i], static_cast(i)); var_map_[pod_args[i].get()] = value; } - // DataType value_storage_type = DataType::Int(64); - // spirv::Value ptr_ubo = - // builder_->BufferArgument(builder_->GetSType(value_storage_type), 0, num_buffer, true); - // for (size_t i = 0; i < pod_args.size(); ++i) { - // spirv::SType ptr_type = builder_->GetPointerType(value_types[i], spv::StorageClassUniform); - // spirv::Value ptr = builder_->StructArrayAccess( - // ptr_type, ptr_ubo, MakeValue(PrimExpr(static_cast(i)))); - // var_map_[pod_args[i].get()] = - // builder_->MakeValue(spv::OpLoad, value_types[i], ptr, spv::MemoryAccessMaskNone); - //} } } this->VisitStmt(f->body); From 6c046699dcc9a6ef9163a471589a09f15e11640b Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 12:07:25 +0900 Subject: [PATCH 11/22] remove log --- src/runtime/vulkan/vulkan.cc | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index 60f7e9046156..e1a61bf7641e 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -122,7 +122,6 @@ uint32_t FindMemoryType(VkDevice logical_device, VkPhysicalDevice phy_device, Vk for (uint32_t i = 0; i < phy_mem_prop.memoryTypeCount; i++) { if ((type_bits & 1) == 1 && (phy_mem_prop.memoryTypes[i].propertyFlags & req_prop) == req_prop) { - LOG(INFO) << "Find memory type index " << i; return i; } type_bits >>= 1; @@ -147,8 +146,6 @@ VulkanBuffer* CreateBuffer(const VulkanContext& vctx, size_t nbytes, VkBufferUsa uint32_t mem_type_index = vctx.compute_mtype_index; - LOG(INFO) << "compute_mtype_index: " << vctx.compute_mtype_index; - if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { // Find a memory type that supports UBO auto prop = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; @@ -189,7 +186,6 @@ VulkanBuffer* CreateBuffer(const VulkanContext& vctx, size_t nbytes, VkBufferUsa minfo.memoryTypeIndex = mem_type_index; VULKAN_CALL(vkAllocateMemory(vctx.device, &minfo, nullptr, &memory)); } else { - LOG(INFO) << "using dedicated allocation"; VkMemoryAllocateInfo minfo; minfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; minfo.pNext = nullptr; @@ -794,7 +790,6 @@ class VulkanModuleNode final : public runtime::ModuleNode { explicit VulkanModuleNode(std::unordered_map smap, std::unordered_map fmap, std::string source) : smap_(smap), fmap_(fmap), source_(source) { - LOG(INFO) << source; } const char* type_key() const final { return "vulkan"; } @@ -831,7 +826,6 @@ class VulkanModuleNode final : public runtime::ModuleNode { vkDestroyShaderModule(vctx.device, pe->shader, nullptr); // UBO if (pe->ubo.host_buf) { - LOG(INFO) << "destroying UBO"; vkDestroyBuffer(vctx.device, pe->ubo.vk_buf->buffer, nullptr); vkFreeMemory(vctx.device, pe->ubo.vk_buf->memory, nullptr); delete pe->ubo.vk_buf; @@ -902,7 +896,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { size_t nbytes_scalars = num_pod * sizeof(ArgUnion64); if (nbytes_scalars > MAX_PUSHCONSTANTS) { ICHECK(num_pod == num_pack_args); - LOG(INFO) << "Adding ubo to the pipeline with binding = " << num_buffer; + //LOG(INFO) << "Adding ubo to the pipeline with binding = " << num_buffer; // UBO { VkDescriptorSetLayoutBinding bd; @@ -1007,12 +1001,10 @@ class VulkanModuleNode final : public runtime::ModuleNode { if (nbytes_scalars > MAX_PUSHCONSTANTS) { // Allocate, bind and map UBO - LOG(INFO) << "Allocate ubo of size " << nbytes_scalars; UniformBuffer& ubo = pe->ubo; ubo.host_buf = new ArgUnion64[num_pod]; ubo.vk_buf = CreateBuffer(vctx, nbytes_scalars, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); vkMapMemory(vctx.device, ubo.vk_buf->memory, 0, nbytes_scalars, 0, &(ubo.host_buf)); - LOG(INFO) << "Mapping done"; } if (vctx.UseImmediate()) { @@ -1171,10 +1163,8 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, bool use_ubo = false; if (num_pack_args_ != 0 && num_pack_args_ * sizeof(ArgUnion64) > MAX_PUSHCONSTANTS) { // UBO - LOG(INFO) << "Copy ubo of size " << num_pack_args_ * sizeof(ArgUnion64); CHECK(pipeline->ubo.host_buf != nullptr); memcpy(pipeline->ubo.host_buf, pack_args, num_pack_args_ * sizeof(ArgUnion64)); - LOG(INFO) << "copy done"; VkDescriptorBufferInfo binfo; binfo.buffer = pipeline->ubo.vk_buf->buffer; binfo.offset = 0; @@ -1183,7 +1173,6 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, use_ubo = true; } if (vctx.UseImmediate()) { - LOG(INFO) << "Using immediate"; // Can safely capture by reference as this lambda is immediately executed on the calling thread. VulkanThreadEntry::ThreadLocal()->Stream(device_id)->Launch([&](VulkanStreamState* state) { vkCmdBindPipeline(state->cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline); From c1b1c888f26071a98a20bc4300d9be96676ec0e3 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 13:19:19 +0900 Subject: [PATCH 12/22] cleaning up --- src/runtime/vulkan/vulkan.cc | 89 ++++++++++++------------------ src/runtime/vulkan/vulkan_common.h | 3 + src/target/spirv/codegen_spirv.cc | 5 +- 3 files changed, 43 insertions(+), 54 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index e1a61bf7641e..ff925730e783 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -45,8 +45,6 @@ static constexpr const int kVulkanMaxNumDevice = 8; /*! \brief TVM Vulkan binary pack magic number */ static constexpr const int kVulkanModuleMagic = 0x02700027; -#define MAX_PUSHCONSTANTS 128 - class VulkanThreadEntry { public: VulkanThreadEntry(); @@ -177,7 +175,6 @@ VulkanBuffer* CreateBuffer(const VulkanContext& vctx, size_t nbytes, VkBufferUsa } VkDeviceMemory memory; - // TODO: revisit memoryTypeIndex if (!dedicated_allocation) { VkMemoryAllocateInfo minfo; minfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; @@ -829,6 +826,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { vkDestroyBuffer(vctx.device, pe->ubo.vk_buf->buffer, nullptr); vkFreeMemory(vctx.device, pe->ubo.vk_buf->memory, nullptr); delete pe->ubo.vk_buf; + // TOOD(masahi): Fix segfault here // delete[] (ArgUnion64*)pe->ubo.host_buf; } } @@ -862,46 +860,12 @@ class VulkanModuleNode final : public runtime::ModuleNode { std::vector arg_template; uint32_t num_pod = 0, num_buffer = 0; - { - auto fit = fmap_.find(func_name); - ICHECK(fit != fmap_.end()); - for (DLDataType arg_type : fit->second.arg_types) { - if (arg_type.code == kTVMOpaqueHandle) { - { - VkDescriptorSetLayoutBinding bd; - bd.binding = num_buffer; - bd.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bd.descriptorCount = 1; - bd.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bd.pImmutableSamplers = nullptr; - arg_binding.push_back(bd); - } - { - VkDescriptorUpdateTemplateEntryKHR tpl; - tpl.dstBinding = num_buffer; - tpl.dstArrayElement = 0; - tpl.descriptorCount = 1; - tpl.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - tpl.offset = num_buffer * sizeof(VkDescriptorBufferInfo); - tpl.stride = sizeof(VkDescriptorBufferInfo); - arg_template.push_back(tpl); - } - ++num_buffer; - } else { - ++num_pod; - } - } - } - - size_t nbytes_scalars = num_pod * sizeof(ArgUnion64); - if (nbytes_scalars > MAX_PUSHCONSTANTS) { - ICHECK(num_pod == num_pack_args); - //LOG(INFO) << "Adding ubo to the pipeline with binding = " << num_buffer; - // UBO + auto push_arg_info = [&arg_binding, &arg_template](uint32_t binding, + VkDescriptorType desc_type) { { VkDescriptorSetLayoutBinding bd; - bd.binding = num_buffer; - bd.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bd.binding = binding; + bd.descriptorType = desc_type; bd.descriptorCount = 1; bd.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; bd.pImmutableSamplers = nullptr; @@ -909,14 +873,32 @@ class VulkanModuleNode final : public runtime::ModuleNode { } { VkDescriptorUpdateTemplateEntryKHR tpl; - tpl.dstBinding = num_buffer; + tpl.dstBinding = binding; tpl.dstArrayElement = 0; tpl.descriptorCount = 1; - tpl.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - tpl.offset = num_buffer * sizeof(VkDescriptorBufferInfo); + tpl.descriptorType = desc_type; + tpl.offset = binding * sizeof(VkDescriptorBufferInfo); tpl.stride = sizeof(VkDescriptorBufferInfo); arg_template.push_back(tpl); } + }; + + { + auto fit = fmap_.find(func_name); + ICHECK(fit != fmap_.end()); + for (DLDataType arg_type : fit->second.arg_types) { + if (arg_type.code == kTVMOpaqueHandle) { + push_arg_info(num_buffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + ++num_buffer; + } else { + ++num_pod; + } + } + } + + size_t nbytes_scalars = num_pod * sizeof(ArgUnion64); + if (nbytes_scalars > MAX_PUSHCONSTANTS) { + push_arg_info(num_buffer, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); } { @@ -1160,17 +1142,17 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, binfo.range = VK_WHOLE_SIZE; descriptor_buffers[i] = binfo; } - bool use_ubo = false; - if (num_pack_args_ != 0 && num_pack_args_ * sizeof(ArgUnion64) > MAX_PUSHCONSTANTS) { + const size_t nbytes_scalars = num_pack_args_ * sizeof(ArgUnion64); + bool use_ubo = num_pack_args_ != 0 && nbytes_scalars > MAX_PUSHCONSTANTS; + if (use_ubo) { // UBO - CHECK(pipeline->ubo.host_buf != nullptr); - memcpy(pipeline->ubo.host_buf, pack_args, num_pack_args_ * sizeof(ArgUnion64)); + CHECK(pipeline->ubo.host_buf) << "The UBO host is not allocated"; + memcpy(pipeline->ubo.host_buf, pack_args, nbytes_scalars); VkDescriptorBufferInfo binfo; binfo.buffer = pipeline->ubo.vk_buf->buffer; binfo.offset = 0; binfo.range = VK_WHOLE_SIZE; descriptor_buffers.push_back(binfo); - use_ubo = true; } if (vctx.UseImmediate()) { // Can safely capture by reference as this lambda is immediately executed on the calling thread. @@ -1211,15 +1193,16 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, write_descriptor_sets[i].dstBinding = i; write_descriptor_sets[i].dstArrayElement = 0; write_descriptor_sets[i].descriptorCount = 1; + write_descriptor_sets[i].pImageInfo = 0; + write_descriptor_sets[i].pBufferInfo = &(descriptor_buffers[i]); + write_descriptor_sets[i].pTexelBufferView = 0; + if (use_ubo && i == write_descriptor_sets.size() - 1) { + // The last binding is for UBO write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; } else { write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; } - - write_descriptor_sets[i].pImageInfo = 0; - write_descriptor_sets[i].pBufferInfo = &(descriptor_buffers[i]); - write_descriptor_sets[i].pTexelBufferView = 0; } vkUpdateDescriptorSets(vctx.device, write_descriptor_sets.size(), write_descriptor_sets.data(), 0, 0); diff --git a/src/runtime/vulkan/vulkan_common.h b/src/runtime/vulkan/vulkan_common.h index 3083ba6f9ce4..ab38da84b3df 100644 --- a/src/runtime/vulkan/vulkan_common.h +++ b/src/runtime/vulkan/vulkan_common.h @@ -35,6 +35,9 @@ namespace tvm { namespace runtime { namespace vulkan { +// TODO(masahi): Query this value using runtime API +#define MAX_PUSHCONSTANTS 128 + inline const char* VKGetErrorString(VkResult error) { switch (error) { case VK_SUCCESS: diff --git a/src/target/spirv/codegen_spirv.cc b/src/target/spirv/codegen_spirv.cc index 3dd9551ba097..371877a7dab8 100644 --- a/src/target/spirv/codegen_spirv.cc +++ b/src/target/spirv/codegen_spirv.cc @@ -30,6 +30,9 @@ #include +#include "../../runtime/vulkan/vulkan_common.h" +#include "../../runtime/pack_args.h" + namespace tvm { namespace codegen { @@ -72,7 +75,7 @@ std::vector CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std:: value_types.push_back(builder_->GetSType(pod_args[i].dtype())); } // All the POD arguments are passed in through PushConstant - if (pod_args.size() * 8 <= 128) { + if (pod_args.size() * sizeof(runtime::ArgUnion64) <= MAX_PUSHCONSTANTS) { spirv::Value ptr = builder_->DeclarePushConstant(value_types); for (size_t i = 0; i < pod_args.size(); ++i) { spirv::Value value = From fb27fbbaba1b4ea978da164d7d1b30c8ce0ca212 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 13:22:11 +0900 Subject: [PATCH 13/22] formatting --- src/runtime/vulkan/vulkan.cc | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index ff925730e783..cadbc6836797 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -786,8 +786,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { public: explicit VulkanModuleNode(std::unordered_map smap, std::unordered_map fmap, std::string source) - : smap_(smap), fmap_(fmap), source_(source) { - } + : smap_(smap), fmap_(fmap), source_(source) {} const char* type_key() const final { return "vulkan"; } @@ -826,7 +825,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { vkDestroyBuffer(vctx.device, pe->ubo.vk_buf->buffer, nullptr); vkFreeMemory(vctx.device, pe->ubo.vk_buf->memory, nullptr); delete pe->ubo.vk_buf; - // TOOD(masahi): Fix segfault here + // TOOD(masahi): Fix segfault here // delete[] (ArgUnion64*)pe->ubo.host_buf; } } @@ -888,7 +887,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { ICHECK(fit != fmap_.end()); for (DLDataType arg_type : fit->second.arg_types) { if (arg_type.code == kTVMOpaqueHandle) { - push_arg_info(num_buffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + push_arg_info(num_buffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); ++num_buffer; } else { ++num_pod; @@ -1198,10 +1197,10 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, write_descriptor_sets[i].pTexelBufferView = 0; if (use_ubo && i == write_descriptor_sets.size() - 1) { - // The last binding is for UBO - write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + // The last binding is for UBO + write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; } else { - write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; } } vkUpdateDescriptorSets(vctx.device, write_descriptor_sets.size(), write_descriptor_sets.data(), From 69f2d05cd74ec573d92a97e8596474a8e8ed9b18 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 13:25:07 +0900 Subject: [PATCH 14/22] revert BufferArgument change --- src/target/spirv/codegen_spirv.cc | 2 +- src/target/spirv/ir_builder.cc | 13 ++++++------- src/target/spirv/ir_builder.h | 9 ++------- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/target/spirv/codegen_spirv.cc b/src/target/spirv/codegen_spirv.cc index 371877a7dab8..9ff18e9b3529 100644 --- a/src/target/spirv/codegen_spirv.cc +++ b/src/target/spirv/codegen_spirv.cc @@ -30,8 +30,8 @@ #include -#include "../../runtime/vulkan/vulkan_common.h" #include "../../runtime/pack_args.h" +#include "../../runtime/vulkan/vulkan_common.h" namespace tvm { namespace codegen { diff --git a/src/target/spirv/ir_builder.cc b/src/target/spirv/ir_builder.cc index 41e5fe1b99d4..1b56fee19578 100644 --- a/src/target/spirv/ir_builder.cc +++ b/src/target/spirv/ir_builder.cc @@ -185,15 +185,14 @@ Value IRBuilder::FloatImm(const SType& dtype, double value) { } Value IRBuilder::BufferArgument(const SType& value_type, uint32_t descriptor_set, - uint32_t binding, bool uniform) { + uint32_t binding) { // NOTE: BufferBlock was deprecated in SPIRV 1.3 // use StorageClassStorageBuffer instead. - spv::StorageClass storage_class; - if (uniform) { - storage_class = spv::StorageClassUniform; - } else { - storage_class = spv::StorageClassStorageBuffer; - } +#if SPV_VERSION >= 0x10300 + spv::StorageClass storage_class = spv::StorageClassStorageBuffer; +#else + spv::StorageClass storage_class = spv::StorageClassUniform; +#endif SType sarr_type = GetStructArrayType(value_type, 0); SType ptr_type = GetPointerType(sarr_type, storage_class); diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h index d8627f02c3e7..0dae60f6da72 100644 --- a/src/target/spirv/ir_builder.h +++ b/src/target/spirv/ir_builder.h @@ -472,7 +472,7 @@ class IRBuilder { * \param binding The binding locaiton in descriptor set. * \param The argument type. */ - Value BufferArgument(const SType& value_type, uint32_t descriptor_set, uint32_t binding, bool uniform=false); + Value BufferArgument(const SType& value_type, uint32_t descriptor_set, uint32_t binding); /*! * \brief Declare POD arguments through push constants. @@ -490,13 +490,8 @@ class IRBuilder { */ Value GetPushConstant(Value ptr_push_const, const SType& v_type, uint32_t index); + // TODO doc Value DeclareUBO(const std::vector& value_types, uint32_t binding); - /*! - * \brief Get i-th push constant - * \param v_type The value type - * \param index The push constant index - * \return the value of push constant - */ Value GetUBO(Value ptr_ubo, const SType& v_type, uint32_t index); /*! * \brief Declare a new function From a5a97f4e397ad410dc2bdb5524a9e2388e207294 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 13:39:40 +0900 Subject: [PATCH 15/22] refactored codegen --- src/target/spirv/codegen_spirv.cc | 5 ++- src/target/spirv/ir_builder.cc | 54 ++++++++----------------------- src/target/spirv/ir_builder.h | 9 ++++-- 3 files changed, 22 insertions(+), 46 deletions(-) diff --git a/src/target/spirv/codegen_spirv.cc b/src/target/spirv/codegen_spirv.cc index 9ff18e9b3529..d4c29799bee9 100644 --- a/src/target/spirv/codegen_spirv.cc +++ b/src/target/spirv/codegen_spirv.cc @@ -74,7 +74,6 @@ std::vector CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std:: for (size_t i = 0; i < pod_args.size(); ++i) { value_types.push_back(builder_->GetSType(pod_args[i].dtype())); } - // All the POD arguments are passed in through PushConstant if (pod_args.size() * sizeof(runtime::ArgUnion64) <= MAX_PUSHCONSTANTS) { spirv::Value ptr = builder_->DeclarePushConstant(value_types); for (size_t i = 0; i < pod_args.size(); ++i) { @@ -83,9 +82,9 @@ std::vector CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std:: var_map_[pod_args[i].get()] = value; } } else { - spirv::Value ptr = builder_->DeclareUBO(value_types, num_buffer); + spirv::Value ptr = builder_->DeclareUniformBuffer(value_types, num_buffer); for (size_t i = 0; i < pod_args.size(); ++i) { - spirv::Value value = builder_->GetUBO(ptr, value_types[i], static_cast(i)); + spirv::Value value = builder_->GetUniform(ptr, value_types[i], static_cast(i)); var_map_[pod_args[i].get()] = value; } } diff --git a/src/target/spirv/ir_builder.cc b/src/target/spirv/ir_builder.cc index 1b56fee19578..dfd2b875708a 100644 --- a/src/target/spirv/ir_builder.cc +++ b/src/target/spirv/ir_builder.cc @@ -205,8 +205,8 @@ Value IRBuilder::BufferArgument(const SType& value_type, uint32_t descriptor_set return val; } -Value IRBuilder::DeclarePushConstant(const std::vector& value_types) { - ICHECK_EQ(push_const_.id, 0); +Value IRBuilder::DeclareStorageVariable(const std::vector& value_types, + spv::StorageClass storage_class) { SType struct_type; struct_type.id = id_counter_++; struct_type.type = DataType::Handle(); @@ -226,22 +226,26 @@ Value IRBuilder::DeclarePushConstant(const std::vector& value_types) { ICHECK_EQ(nbits % 8, 0); uint32_t bytes = (nbits / 8); if (t.bits() == 32) { - // In our Vulkan runtime, each push constant always occupies 64 bit. + // In our Vulkan runtime, each scalar argument always occupies 64 bit. offset += bytes * 2; } else { ICHECK_EQ(t.bits(), 64); offset += bytes; } } - // Decorate push constants as UBO this->Decorate(spv::OpDecorate, struct_type, spv::DecorationBlock); - SType ptr_type = GetPointerType(struct_type, spv::StorageClassPushConstant); + SType ptr_type = GetPointerType(struct_type, storage_class); Value val = NewValue(ptr_type, kPushConstantPtr); - ib_.Begin(spv::OpVariable).AddSeq(ptr_type, val, spv::StorageClassPushConstant).Commit(&global_); + ib_.Begin(spv::OpVariable).AddSeq(ptr_type, val, storage_class).Commit(&global_); return val; } +Value IRBuilder::DeclarePushConstant(const std::vector& value_types) { + ICHECK_EQ(push_const_.id, 0); + return DeclareStorageVariable(value_types, spv::StorageClassPushConstant); +} + Value IRBuilder::GetPushConstant(Value ptr_push_const, const SType& v_type, uint32_t index) { SType ptr_vtype = this->GetPointerType(v_type, spv::StorageClassPushConstant); Value ptr = this->MakeValue(spv::OpAccessChain, ptr_vtype, ptr_push_const, @@ -249,45 +253,13 @@ Value IRBuilder::GetPushConstant(Value ptr_push_const, const SType& v_type, uint return this->MakeValue(spv::OpLoad, v_type, ptr); } -Value IRBuilder::DeclareUBO(const std::vector& value_types, uint32_t binding) { - ICHECK_EQ(push_const_.id, 0); - SType struct_type; - struct_type.id = id_counter_++; - struct_type.type = DataType::Handle(); - ib_.Begin(spv::OpTypeStruct).Add(struct_type); - for (const SType& vtype : value_types) { - ib_.Add(vtype); - } - ib_.Commit(&global_); - - uint32_t offset = 0; - for (uint32_t i = 0; i < value_types.size(); ++i) { - ib_.Begin(spv::OpMemberDecorate) - .AddSeq(struct_type, i, spv::DecorationOffset, offset) - .Commit(&decorate_); - DataType t = value_types[i].type; - uint32_t nbits = t.bits() * t.lanes(); - ICHECK_EQ(nbits % 8, 0); - uint32_t bytes = (nbits / 8); - if (t.bits() == 32) { - // In our Vulkan runtime, each push constant always occupies 64 bit. - offset += bytes * 2; - } else { - ICHECK_EQ(t.bits(), 64); - offset += bytes; - } - } - // Decorate push constants as UBO - this->Decorate(spv::OpDecorate, struct_type, spv::DecorationBlock); - - SType ptr_type = GetPointerType(struct_type, spv::StorageClassUniform); - Value val = NewValue(ptr_type, kPushConstantPtr); +Value IRBuilder::DeclareUniformBuffer(const std::vector& value_types, uint32_t binding) { + Value val = DeclareStorageVariable(value_types, spv::StorageClassUniform); this->Decorate(spv::OpDecorate, val, spv::DecorationBinding, binding); - ib_.Begin(spv::OpVariable).AddSeq(ptr_type, val, spv::StorageClassUniform).Commit(&global_); return val; } -Value IRBuilder::GetUBO(Value ptr_push_const, const SType& v_type, uint32_t index) { +Value IRBuilder::GetUniform(Value ptr_push_const, const SType& v_type, uint32_t index) { SType ptr_vtype = this->GetPointerType(v_type, spv::StorageClassUniform); Value ptr = this->MakeValue(spv::OpAccessChain, ptr_vtype, ptr_push_const, IntImm(t_int32_, static_cast(index))); diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h index 0dae60f6da72..6dadc309531a 100644 --- a/src/target/spirv/ir_builder.h +++ b/src/target/spirv/ir_builder.h @@ -491,8 +491,8 @@ class IRBuilder { Value GetPushConstant(Value ptr_push_const, const SType& v_type, uint32_t index); // TODO doc - Value DeclareUBO(const std::vector& value_types, uint32_t binding); - Value GetUBO(Value ptr_ubo, const SType& v_type, uint32_t index); + Value DeclareUniformBuffer(const std::vector& value_types, uint32_t binding); + Value GetUniform(Value ptr_ubo, const SType& v_type, uint32_t index); /*! * \brief Declare a new function * \return The created function ID. @@ -560,6 +560,11 @@ class IRBuilder { val.flag = flag; return val; } + + // TOOD doc + Value DeclareStorageVariable(const std::vector& value_types, + spv::StorageClass storage_class); + // get constant given value encoded in uint64_t Value GetConst_(const SType& dtype, const uint64_t* pvalue); // declare type From 17597ae6507b24c056ef7aa99087c8ac514826ce Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 13:41:23 +0900 Subject: [PATCH 16/22] minor fix --- src/runtime/vulkan/vulkan.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index cadbc6836797..0edbe683aece 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -1144,8 +1144,7 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, const size_t nbytes_scalars = num_pack_args_ * sizeof(ArgUnion64); bool use_ubo = num_pack_args_ != 0 && nbytes_scalars > MAX_PUSHCONSTANTS; if (use_ubo) { - // UBO - CHECK(pipeline->ubo.host_buf) << "The UBO host is not allocated"; + CHECK(pipeline->ubo.host_buf) << "The UBO host buffer is not allocated"; memcpy(pipeline->ubo.host_buf, pack_args, nbytes_scalars); VkDescriptorBufferInfo binfo; binfo.buffer = pipeline->ubo.vk_buf->buffer; From bfec9d343461b8cf63afe44bf4220309be45055b Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Mon, 22 Mar 2021 17:19:54 +0900 Subject: [PATCH 17/22] introduce value kind for ubo --- src/target/spirv/ir_builder.cc | 8 ++++---- src/target/spirv/ir_builder.h | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/target/spirv/ir_builder.cc b/src/target/spirv/ir_builder.cc index dfd2b875708a..cd48c93530ec 100644 --- a/src/target/spirv/ir_builder.cc +++ b/src/target/spirv/ir_builder.cc @@ -206,7 +206,7 @@ Value IRBuilder::BufferArgument(const SType& value_type, uint32_t descriptor_set } Value IRBuilder::DeclareStorageVariable(const std::vector& value_types, - spv::StorageClass storage_class) { + spv::StorageClass storage_class, ValueKind kind) { SType struct_type; struct_type.id = id_counter_++; struct_type.type = DataType::Handle(); @@ -236,14 +236,14 @@ Value IRBuilder::DeclareStorageVariable(const std::vector& value_types, this->Decorate(spv::OpDecorate, struct_type, spv::DecorationBlock); SType ptr_type = GetPointerType(struct_type, storage_class); - Value val = NewValue(ptr_type, kPushConstantPtr); + Value val = NewValue(ptr_type, kind); ib_.Begin(spv::OpVariable).AddSeq(ptr_type, val, storage_class).Commit(&global_); return val; } Value IRBuilder::DeclarePushConstant(const std::vector& value_types) { ICHECK_EQ(push_const_.id, 0); - return DeclareStorageVariable(value_types, spv::StorageClassPushConstant); + return DeclareStorageVariable(value_types, spv::StorageClassPushConstant, kPushConstantPtr); } Value IRBuilder::GetPushConstant(Value ptr_push_const, const SType& v_type, uint32_t index) { @@ -254,7 +254,7 @@ Value IRBuilder::GetPushConstant(Value ptr_push_const, const SType& v_type, uint } Value IRBuilder::DeclareUniformBuffer(const std::vector& value_types, uint32_t binding) { - Value val = DeclareStorageVariable(value_types, spv::StorageClassUniform); + Value val = DeclareStorageVariable(value_types, spv::StorageClassUniform, kUniformPtr); this->Decorate(spv::OpDecorate, val, spv::DecorationBinding, binding); return val; } diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h index 6dadc309531a..16bff657bfe7 100644 --- a/src/target/spirv/ir_builder.h +++ b/src/target/spirv/ir_builder.h @@ -60,7 +60,8 @@ enum ValueKind { kStructArrayPtr, kPushConstantPtr, kFunction, - kExtInst + kExtInst, + kUniformPtr }; /*! \brief Represent the SPIRV Value */ @@ -563,7 +564,7 @@ class IRBuilder { // TOOD doc Value DeclareStorageVariable(const std::vector& value_types, - spv::StorageClass storage_class); + spv::StorageClass storage_class, ValueKind kind); // get constant given value encoded in uint64_t Value GetConst_(const SType& dtype, const uint64_t* pvalue); From 95ec1dbec1b6b1ac9204d717161c7fcdfe766ed3 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Mon, 22 Mar 2021 17:23:32 +0900 Subject: [PATCH 18/22] fix cpplint and revert float64 change --- python/tvm/topi/cuda/scan.py | 2 +- python/tvm/topi/cuda/sort.py | 4 ++-- src/target/spirv/ir_builder.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py index 25367bb7b04c..3240ebcd515c 100644 --- a/python/tvm/topi/cuda/scan.py +++ b/python/tvm/topi/cuda/scan.py @@ -104,7 +104,7 @@ def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add, i # The following algorithm performs parallel exclusive scan # Up Sweep of exclusive scan lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(scan_axis_size, "float32"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(scan_axis_size, "float64"))), "int64" ) with ib.for_range(0, lim, dtype="int64") as l2_width: width = 2 << l2_width diff --git a/python/tvm/topi/cuda/sort.py b/python/tvm/topi/cuda/sort.py index 5e6108737cd6..5ebd3060a6bb 100644 --- a/python/tvm/topi/cuda/sort.py +++ b/python/tvm/topi/cuda/sort.py @@ -239,7 +239,7 @@ def compare(a, b): # Sort the lower levels of the merge using odd-even sort, it's fast for small inputs lower_lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(block_size, "float32"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(block_size, "float64"))), "int64" ) _odd_even_sort( @@ -255,7 +255,7 @@ def compare(a, b): ) upper_lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(size, "float32"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(size, "float64"))), "int64" ) def get_merge_begin(source, base_idx, aCount, bCount, aStart, bStart, diag, step_count): diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h index 16bff657bfe7..c06ec0acf341 100644 --- a/src/target/spirv/ir_builder.h +++ b/src/target/spirv/ir_builder.h @@ -491,7 +491,7 @@ class IRBuilder { */ Value GetPushConstant(Value ptr_push_const, const SType& v_type, uint32_t index); - // TODO doc + // TODO(masahi): doc Value DeclareUniformBuffer(const std::vector& value_types, uint32_t binding); Value GetUniform(Value ptr_ubo, const SType& v_type, uint32_t index); /*! From 9a67f4a931a747b8b83fe3e919f47b3cf60bcaf8 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Thu, 8 Apr 2021 16:51:58 +0900 Subject: [PATCH 19/22] query push constant size using runtime API --- src/runtime/vulkan/vulkan.cc | 27 ++++++++++++++++++++------- src/runtime/vulkan/vulkan_common.h | 2 ++ src/target/spirv/codegen_spirv.cc | 4 +++- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index 0edbe683aece..0290fafe7b8a 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -786,7 +786,9 @@ class VulkanModuleNode final : public runtime::ModuleNode { public: explicit VulkanModuleNode(std::unordered_map smap, std::unordered_map fmap, std::string source) - : smap_(smap), fmap_(fmap), source_(source) {} + : smap_(smap), fmap_(fmap), source_(source), max_push_constants_(GetMaxPushConstantsSize()) { + LOG(INFO) << "VulkanModuleNode, max_push_constants: " << max_push_constants_; + } const char* type_key() const final { return "vulkan"; } @@ -896,7 +898,8 @@ class VulkanModuleNode final : public runtime::ModuleNode { } size_t nbytes_scalars = num_pod * sizeof(ArgUnion64); - if (nbytes_scalars > MAX_PUSHCONSTANTS) { + if (nbytes_scalars > max_push_constants_) { + LOG(INFO) << "Using ubo"; push_arg_info(num_buffer, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); } @@ -951,7 +954,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { playout_cinfo.setLayoutCount = 1; playout_cinfo.pSetLayouts = &(pe->descriptor_set_layout); - if (0 < nbytes_scalars && nbytes_scalars <= MAX_PUSHCONSTANTS) { + if (0 < nbytes_scalars && nbytes_scalars <= max_push_constants_) { playout_cinfo.pushConstantRangeCount = 1; playout_cinfo.pPushConstantRanges = &crange; ICHECK_LE(crange.size, vctx.phy_device_prop.limits.maxPushConstantsSize); @@ -980,7 +983,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { VULKAN_CALL(vkCreateComputePipelines(vctx.device, VK_NULL_HANDLE, 1, &pipeline_cinfo, nullptr, &(pe->pipeline))); - if (nbytes_scalars > MAX_PUSHCONSTANTS) { + if (nbytes_scalars > max_push_constants_) { // Allocate, bind and map UBO UniformBuffer& ubo = pe->ubo; ubo.host_buf = new ArgUnion64[num_pod]; @@ -1031,6 +1034,8 @@ class VulkanModuleNode final : public runtime::ModuleNode { return source_; } + uint32_t MaxPushConstantsSize() const { return max_push_constants_; } + private: // function information table. std::unordered_map smap_; @@ -1040,6 +1045,8 @@ class VulkanModuleNode final : public runtime::ModuleNode { std::string fmt_{"vulkan"}; // The source std::string source_; + // The maximum size of push constants in bytes + uint32_t max_push_constants_; // Guards accesses to `ecache_` std::mutex mutex_; @@ -1142,7 +1149,7 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, descriptor_buffers[i] = binfo; } const size_t nbytes_scalars = num_pack_args_ * sizeof(ArgUnion64); - bool use_ubo = num_pack_args_ != 0 && nbytes_scalars > MAX_PUSHCONSTANTS; + bool use_ubo = num_pack_args_ != 0 && nbytes_scalars > m_->MaxPushConstantsSize(); if (use_ubo) { CHECK(pipeline->ubo.host_buf) << "The UBO host buffer is not allocated"; memcpy(pipeline->ubo.host_buf, pack_args, nbytes_scalars); @@ -1160,7 +1167,7 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, vctx.descriptor_template_khr_functions->vkCmdPushDescriptorSetWithTemplateKHR( state->cmd_buffer_, pipeline->descriptor_update_template, pipeline->pipeline_layout, 0, descriptor_buffers.data()); - if (num_pack_args_ > 0 && num_pack_args_ <= MAX_PUSHCONSTANTS) { + if (num_pack_args_ > 0 && num_pack_args_ <= m_->MaxPushConstantsSize()) { vkCmdPushConstants(state->cmd_buffer_, pipeline->pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, num_pack_args_ * sizeof(ArgUnion64), pack_args); @@ -1210,7 +1217,7 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, vkCmdBindDescriptorSets(state->cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline_layout, 0, 1, &(pipeline->descriptor_set), 0, nullptr); - if (num_pack_args_ > 0 && num_pack_args_ <= MAX_PUSHCONSTANTS) { + if (num_pack_args_ > 0 && num_pack_args_ <= m_->MaxPushConstantsSize()) { vkCmdPushConstants(state->cmd_buffer_, pipeline->pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, pack_args_storage.size() * sizeof(ArgUnion64), pack_args_storage.data()); @@ -1265,6 +1272,12 @@ Module VulkanModuleLoadBinary(void* strm) { return VulkanModuleCreate(smap, fmap, ""); } +uint32_t GetMaxPushConstantsSize() { + int device_id = VulkanThreadEntry::ThreadLocal()->device.device_id; + const auto& vctx = VulkanDeviceAPI::Global()->context(device_id); + return vctx.phy_device_prop.limits.maxPushConstantsSize; +} + TVM_REGISTER_GLOBAL("runtime.module.loadfile_vulkan").set_body_typed(VulkanModuleLoadFile); TVM_REGISTER_GLOBAL("runtime.module.loadbinary_vulkan").set_body_typed(VulkanModuleLoadBinary); diff --git a/src/runtime/vulkan/vulkan_common.h b/src/runtime/vulkan/vulkan_common.h index ab38da84b3df..9ee28fee41c6 100644 --- a/src/runtime/vulkan/vulkan_common.h +++ b/src/runtime/vulkan/vulkan_common.h @@ -145,6 +145,8 @@ struct VulkanContext { bool UseImmediate() const { return descriptor_template_khr_functions.get() != nullptr; } }; +uint32_t GetMaxPushConstantsSize(); + } // namespace vulkan } // namespace runtime } // namespace tvm diff --git a/src/target/spirv/codegen_spirv.cc b/src/target/spirv/codegen_spirv.cc index d4c29799bee9..4d55f4c49a5f 100644 --- a/src/target/spirv/codegen_spirv.cc +++ b/src/target/spirv/codegen_spirv.cc @@ -74,7 +74,8 @@ std::vector CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std:: for (size_t i = 0; i < pod_args.size(); ++i) { value_types.push_back(builder_->GetSType(pod_args[i].dtype())); } - if (pod_args.size() * sizeof(runtime::ArgUnion64) <= MAX_PUSHCONSTANTS) { + const auto max_push_constants = runtime::vulkan::GetMaxPushConstantsSize(); + if (pod_args.size() * sizeof(runtime::ArgUnion64) <= max_push_constants) { spirv::Value ptr = builder_->DeclarePushConstant(value_types); for (size_t i = 0; i < pod_args.size(); ++i) { spirv::Value value = @@ -82,6 +83,7 @@ std::vector CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std:: var_map_[pod_args[i].get()] = value; } } else { + // If we need to pass more arguments than push constants could handle, we use UBO. spirv::Value ptr = builder_->DeclareUniformBuffer(value_types, num_buffer); for (size_t i = 0; i < pod_args.size(); ++i) { spirv::Value value = builder_->GetUniform(ptr, value_types[i], static_cast(i)); From a75a5b03b22b46b1709373027913c6738f5b0eb0 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Thu, 8 Apr 2021 17:15:48 +0900 Subject: [PATCH 20/22] let vkmap/unmap allocate and delete host_buf --- src/runtime/vulkan/vulkan.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index 0290fafe7b8a..82c2dc0e19b8 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -823,12 +823,11 @@ class VulkanModuleNode final : public runtime::ModuleNode { vkDestroyDescriptorSetLayout(vctx.device, pe->descriptor_set_layout, nullptr); vkDestroyShaderModule(vctx.device, pe->shader, nullptr); // UBO - if (pe->ubo.host_buf) { + if (pe->ubo.vk_buf) { + vkUnmapMemory(vctx.device, pe->ubo.vk_buf->memory); vkDestroyBuffer(vctx.device, pe->ubo.vk_buf->buffer, nullptr); vkFreeMemory(vctx.device, pe->ubo.vk_buf->memory, nullptr); delete pe->ubo.vk_buf; - // TOOD(masahi): Fix segfault here - // delete[] (ArgUnion64*)pe->ubo.host_buf; } } } @@ -986,7 +985,6 @@ class VulkanModuleNode final : public runtime::ModuleNode { if (nbytes_scalars > max_push_constants_) { // Allocate, bind and map UBO UniformBuffer& ubo = pe->ubo; - ubo.host_buf = new ArgUnion64[num_pod]; ubo.vk_buf = CreateBuffer(vctx, nbytes_scalars, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); vkMapMemory(vctx.device, ubo.vk_buf->memory, 0, nbytes_scalars, 0, &(ubo.host_buf)); } From 706fb3edb65f39f1cbc0c49e784475f3257fd3c0 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Thu, 8 Apr 2021 17:34:18 +0900 Subject: [PATCH 21/22] doc update --- src/runtime/vulkan/vulkan.cc | 7 ++----- src/runtime/vulkan/vulkan_common.h | 4 +--- src/target/spirv/ir_builder.h | 23 +++++++++++++++++++++-- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index 82c2dc0e19b8..c8a0858ec1bc 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -786,9 +786,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { public: explicit VulkanModuleNode(std::unordered_map smap, std::unordered_map fmap, std::string source) - : smap_(smap), fmap_(fmap), source_(source), max_push_constants_(GetMaxPushConstantsSize()) { - LOG(INFO) << "VulkanModuleNode, max_push_constants: " << max_push_constants_; - } + : smap_(smap), fmap_(fmap), source_(source), max_push_constants_(GetMaxPushConstantsSize()) {} const char* type_key() const final { return "vulkan"; } @@ -898,7 +896,6 @@ class VulkanModuleNode final : public runtime::ModuleNode { size_t nbytes_scalars = num_pod * sizeof(ArgUnion64); if (nbytes_scalars > max_push_constants_) { - LOG(INFO) << "Using ubo"; push_arg_info(num_buffer, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); } @@ -1044,7 +1041,7 @@ class VulkanModuleNode final : public runtime::ModuleNode { // The source std::string source_; // The maximum size of push constants in bytes - uint32_t max_push_constants_; + const uint32_t max_push_constants_; // Guards accesses to `ecache_` std::mutex mutex_; diff --git a/src/runtime/vulkan/vulkan_common.h b/src/runtime/vulkan/vulkan_common.h index 9ee28fee41c6..e94a9fe7fa90 100644 --- a/src/runtime/vulkan/vulkan_common.h +++ b/src/runtime/vulkan/vulkan_common.h @@ -35,9 +35,6 @@ namespace tvm { namespace runtime { namespace vulkan { -// TODO(masahi): Query this value using runtime API -#define MAX_PUSHCONSTANTS 128 - inline const char* VKGetErrorString(VkResult error) { switch (error) { case VK_SUCCESS: @@ -145,6 +142,7 @@ struct VulkanContext { bool UseImmediate() const { return descriptor_template_khr_functions.get() != nullptr; } }; +/*! \brief returns maximum push constant sizes in bytes for the target platform */ uint32_t GetMaxPushConstantsSize(); } // namespace vulkan diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h index c06ec0acf341..e252dcf94595 100644 --- a/src/target/spirv/ir_builder.h +++ b/src/target/spirv/ir_builder.h @@ -491,8 +491,21 @@ class IRBuilder { */ Value GetPushConstant(Value ptr_push_const, const SType& v_type, uint32_t index); - // TODO(masahi): doc + /*! + * \brief Declare POD arguments through uniform buffer. + * + * \note Only call this function once! + * \param value_types The values in the uniform buffer + * \param binding The binding locaiton in descriptor set + * \return reference to self. + */ Value DeclareUniformBuffer(const std::vector& value_types, uint32_t binding); + /*! + * \brief Get i-th uniform constant + * \param v_type The value type + * \param index The uniform index + * \return the value of uniform constant + */ Value GetUniform(Value ptr_ubo, const SType& v_type, uint32_t index); /*! * \brief Declare a new function @@ -562,7 +575,13 @@ class IRBuilder { return val; } - // TOOD doc + /*! + * \brief The common function to declare push constants and uniform buffe + * \param value_types The values in the push constants or uniform buffer + * \param storage_class An enum defined by SPIR-V indicating push constant or uniform + * \param kind An enum indicating push constant or uniform + * \return The created new label + */ Value DeclareStorageVariable(const std::vector& value_types, spv::StorageClass storage_class, ValueKind kind); From 1a3dbee99c9a2c362373707678d5657e59ea6827 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sat, 10 Apr 2021 09:19:06 +0900 Subject: [PATCH 22/22] fix typo --- src/target/spirv/ir_builder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h index e252dcf94595..05a2bc631743 100644 --- a/src/target/spirv/ir_builder.h +++ b/src/target/spirv/ir_builder.h @@ -576,7 +576,7 @@ class IRBuilder { } /*! - * \brief The common function to declare push constants and uniform buffe + * \brief The common function to declare push constants or uniform buffer * \param value_types The values in the push constants or uniform buffer * \param storage_class An enum defined by SPIR-V indicating push constant or uniform * \param kind An enum indicating push constant or uniform