Skip to content

Commit

Permalink
Converting local CPU target to support executable-create2.
Browse files Browse the repository at this point in the history
It already did for the inline HAL so this is mostly just removing the
pipeline layouts from the full HAL implementation.
  • Loading branch information
benvanik committed Aug 20, 2024
1 parent 7121ece commit a69a93b
Show file tree
Hide file tree
Showing 33 changed files with 123 additions and 929 deletions.
9 changes: 5 additions & 4 deletions compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,11 @@ class LLVMCPUTargetBackend final : public TargetBackend {

// Specify the constant and binding information used to validate
// dispatches.
// TODO(#18154): pack per-binding information bitfields.
dispatchAttrs.constantCount = exportOp.getLayout().getPushConstants();
dispatchAttrs.bindingCount =
exportOp.getLayout().getSetLayout(0).getBindings().size();
if (auto layoutAttr = exportOp.getLayout()) {
dispatchAttrs.constantCount = layoutAttr.getPushConstants();
dispatchAttrs.bindingCount =
layoutAttr.getSetLayout(0).getBindings().size();
}

LibraryBuilder::SourceLocation sourceLocation;
if (options.debugLevel >= 1) {
Expand Down
37 changes: 33 additions & 4 deletions compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ makeDispatchFunctionType(llvm::LLVMContext &context) {
// %struct.iree_hal_executable_dispatch_attrs_v0_t = type {
// i16,
// i8,
// i8
// i8,
// i32,
// i64[8]
// }
static llvm::StructType *makeDispatchAttrsType(llvm::LLVMContext &context) {
if (auto *existingType = llvm::StructType::getTypeByName(
Expand All @@ -121,12 +123,20 @@ static llvm::StructType *makeDispatchAttrsType(llvm::LLVMContext &context) {
}
auto *i8Type = llvm::IntegerType::getInt8Ty(context);
auto *i16Type = llvm::IntegerType::getInt16Ty(context);
auto *i32Type = llvm::IntegerType::getInt32Ty(context);
auto *i64Type = llvm::IntegerType::getInt64Ty(context);
auto *type =
llvm::StructType::create(context,
{
i16Type,
i8Type,
i8Type,
i16Type, i8Type, i8Type, i32Type,
i64Type, // [0]
i64Type, // [1]
i64Type, // [2]
i64Type, // [3]
i64Type, // [4]
i64Type, // [5]
i64Type, // [6]
i64Type, // [7]
},
"iree_hal_executable_dispatch_attrs_v0_t",
/*isPacked=*/false);
Expand Down Expand Up @@ -490,6 +500,7 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) {
auto *i8Type = llvm::IntegerType::getInt8Ty(context);
auto *i16Type = llvm::IntegerType::getInt16Ty(context);
auto *i32Type = llvm::IntegerType::getInt32Ty(context);
auto *i64Type = llvm::IntegerType::getInt64Ty(context);

// iree_hal_executable_export_table_v0_t::ptrs
SmallVector<llvm::Constant *> exportPtrValues;
Expand Down Expand Up @@ -520,6 +531,24 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) {
llvm::ConstantInt::get(i8Type, dispatch.attrs.constantCount),
// binding_count=
llvm::ConstantInt::get(i8Type, dispatch.attrs.bindingCount),
// reserved_0=
llvm::ConstantInt::get(i32Type, 0),
// reserved_1[0]=
llvm::ConstantInt::get(i64Type, 0),
// reserved_1[1]=
llvm::ConstantInt::get(i64Type, 0),
// reserved_1[2]=
llvm::ConstantInt::get(i64Type, 0),
// reserved_1[3]=
llvm::ConstantInt::get(i64Type, 0),
// reserved_1[4]=
llvm::ConstantInt::get(i64Type, 0),
// reserved_1[5]=
llvm::ConstantInt::get(i64Type, 0),
// reserved_1[6]=
llvm::ConstantInt::get(i64Type, 0),
// reserved_1[7]=
llvm::ConstantInt::get(i64Type, 0),
}));
}
exportAttrs = createArrayConstant(libraryName + "_attrs", dispatchAttrsType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#map2 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3], round_dims_to = array<i64: 16, 16, 16>>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_embedded_elf_x86_64_]> : !hal.device
#device_target_llvm_cpu = #hal.device.target<"local", [#executable_target_embedded_elf_x86_64_]> : !hal.device
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
util.func public @lhs_encoding(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
%3 = iree_encoding.set_encoding %arg0 : tensor<?x?xf32> -> tensor<?x?xf32, #encoding>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

module attributes {
hal.device.targets = [
#hal.device.target<"llvm-cpu", [
#hal.device.target<"local", [
#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
native_vector_size = 16 : index
}>
Expand Down
2 changes: 1 addition & 1 deletion compiler/plugins/target/LLVMCPU/test/smoketest_system.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

module attributes {
hal.device.targets = [
#hal.device.target<"llvm-cpu", [
#hal.device.target<"local", [
#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
native_vector_size = 16 : index
}>
Expand Down
1 change: 0 additions & 1 deletion compiler/plugins/target/VMVX/VMVXTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ class VMVXTargetBackend final : public TargetBackend {

// Specify the constant and binding information used to validate
// dispatches.
// TODO(#18154): pack per-binding information bitfields.
if (auto layoutAttr = exportOp.getLayout()) {
int64_t constantCount = layoutAttr.getPushConstants();
if (constantCount > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ module attributes {hal.device.targets = [#device_target_vulkan]} {
#map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3], round_dims_to = array<i64: 16, 16, 16>>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {target_triple = "x86_64-none-elf", cpu_features = "+avx512f"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_embedded_elf_x86_64_]> : !hal.device
#device_target_llvm_cpu = #hal.device.target<"local", [#executable_target_embedded_elf_x86_64_]> : !hal.device
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb">
#device_target_vulkan = #hal.device.target<"vulkan", [#executable_target_vulkan_spirv_fb]> : !hal.device
module attributes {hal.device.targets = [#hal.device.select<[#device_target_vulkan, #device_target_llvm_cpu]> : !hal.device]} {
Expand Down
2 changes: 1 addition & 1 deletion compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ def HAL_DeviceTargetAttr : AttrDef<HAL_Dialect, "DeviceTarget", [

Example:
```mlir
#hal.device.target<"llvm-cpu", {
#hal.device.target<"local", {
device_configuration = ...
}, [
#hal.executable.target<"llvm-cpu", "embedded-elf-arm_32">,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,14 @@ LocalDevice::LocalDevice(const LocalDevice::Options options)
IREE::HAL::DeviceTargetAttr LocalDevice::getDefaultDeviceTarget(
MLIRContext *context, const TargetRegistry &targetRegistry) const {
Builder b(context);
SmallVector<NamedAttribute> configItems;

// TODO(benvanik): flags for common queries.
SmallVector<NamedAttribute> deviceConfigAttrs;
deviceConfigAttrs.emplace_back(b.getStringAttr("executable_create_2"),
b.getUnitAttr());
auto deviceConfigAttr = b.getDictionaryAttr(deviceConfigAttrs);

auto configAttr = b.getDictionaryAttr(configItems);
SmallVector<NamedAttribute> executableConfigAttrs;
auto executableConfigAttr = b.getDictionaryAttr(executableConfigAttrs);

SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
for (auto backendName : options.defaultTargetBackends) {
Expand All @@ -50,23 +53,27 @@ IREE::HAL::DeviceTargetAttr LocalDevice::getDefaultDeviceTarget(
<< "\n";
return {};
}
targetBackend->getDefaultExecutableTargets(context, "local", configAttr,
executableTargetAttrs);
targetBackend->getDefaultExecutableTargets(
context, "local", executableConfigAttr, executableTargetAttrs);
}

return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("local"),
configAttr, executableTargetAttrs);
deviceConfigAttr,
executableTargetAttrs);
}

std::optional<IREE::HAL::DeviceTargetAttr>
LocalDevice::getHostDeviceTarget(MLIRContext *context,
const TargetRegistry &targetRegistry) const {
Builder b(context);
SmallVector<NamedAttribute> configItems;

// TODO(benvanik): flags for overrides or ask LLVM for info about the host.
SmallVector<NamedAttribute> deviceConfigAttrs;
deviceConfigAttrs.emplace_back(b.getStringAttr("executable_create_2"),
b.getUnitAttr());
auto deviceConfigAttr = b.getDictionaryAttr(deviceConfigAttrs);

auto configAttr = b.getDictionaryAttr(configItems);
SmallVector<NamedAttribute> executableConfigAttrs;
auto executableConfigAttr = b.getDictionaryAttr(executableConfigAttrs);

SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
for (auto backendName : options.defaultHostBackends) {
Expand All @@ -76,12 +83,13 @@ LocalDevice::getHostDeviceTarget(MLIRContext *context,
<< "\n";
return std::nullopt;
}
targetBackend->getHostExecutableTargets(context, "local", configAttr,
executableTargetAttrs);
targetBackend->getHostExecutableTargets(
context, "local", executableConfigAttr, executableTargetAttrs);
}

return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("local"),
configAttr, executableTargetAttrs);
deviceConfigAttr,
executableTargetAttrs);
}

Value LocalDevice::buildDeviceTargetMatch(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// Ensure devices are copied and made available:
#executable_target_embedded_elf_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
// CHECK: util.global private @device
util.global private @device = #hal.device.target<"llvm-cpu", [
util.global private @device = #hal.device.target<"local", [
#executable_target_embedded_elf_x86_64
]> : !hal.device

Expand Down Expand Up @@ -174,10 +174,10 @@ util.func public @main(%dynamic_arg: i32) -> !stream.timepoint attributes {

#executable_target_embedded_elf_aarch64 = #hal.executable.target<"llvm-cpu", "embedded-elf-aarch64">
#executable_target_embedded_elf_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
util.global private @device_a = #hal.device.target<"llvm-cpu", [
util.global private @device_a = #hal.device.target<"local", [
#executable_target_embedded_elf_aarch64
]> : !hal.device
util.global private @device_b = #hal.device.target<"llvm-cpu", [
util.global private @device_b = #hal.device.target<"local", [
#executable_target_embedded_elf_x86_64
]> : !hal.device

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-materialize-dispatch-instrumentation{buffer-size=64mib})' %s | FileCheck %s

module attributes {hal.device.targets = [
#hal.device.target<"llvm-cpu", [
#hal.device.target<"local", [
#hal.executable.target<"llvm-cpu", "embedded-elf-arm_64">,
#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
]> : !hal.device
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ module @module {
util.global private @optional = #hal.device.fallback<@device> : !hal.device
util.global private @ordinal = #hal.device.ordinal<0> : !hal.device
util.global private @selected = #hal.device.select<[
#hal.device.target<"llvm-cpu"> : !hal.device,
#hal.device.target<"local"> : !hal.device,
#hal.device.target<"vmvx"> : !hal.device
]> : !hal.device
util.func private @func() -> () attributes {
Expand Down
4 changes: 2 additions & 2 deletions docs/website/docs/community/blog/posts/microkernels.md
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ This then goes to the LLVM x86 backend, which produces x86 assembly.
[...]
// -----// IR Dump After Inliner (inline) //----- //
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+crc32,+f16c,+fsgsbase,+fxsr,+invpcid,+lzcnt,+movbe,+mwaitx,+pku,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+shstk,+vaes,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = "all"}>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", {executable_targets = [#executable_target_embedded_elf_x86_64_]}> : !hal.device
#device_target_llvm_cpu = #hal.device.target<"local", {executable_targets = [#executable_target_embedded_elf_x86_64_]}> : !hal.device
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
func.func @matmul_dynamic(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_dynamic(%input0: tensor<?x?xf32>, %input1: tensor<?x?xf32>, %input2: tensor<?x?xf32>) -> (%output0: tensor<?x?xf32>)"}} {
%0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
Expand Down Expand Up @@ -367,7 +367,7 @@ module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
// -----// IR Dump After CSE (cse) //----- //
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+crc32,+f16c,+fsgsbase,+fxsr,+invpcid,+lzcnt,+movbe,+mwaitx,+pku,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+shstk,+vaes,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,+evex512", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = "all"}>
#map = affine_map<()[s0] -> (s0 ceildiv 16)>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", {executable_targets = [#executable_target_embedded_elf_x86_64_]}> : !hal.device
#device_target_llvm_cpu = #hal.device.target<"local", {executable_targets = [#executable_target_embedded_elf_x86_64_]}> : !hal.device
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
func.func @matmul_dynamic(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_dynamic(%input0: tensor<?x?xf32>, %input1: tensor<?x?xf32>, %input2: tensor<?x?xf32>) -> (%output0: tensor<?x?xf32>)"}} {
%cst = arith.constant 0.000000e+00 : f32
Expand Down
25 changes: 0 additions & 25 deletions runtime/src/iree/hal/drivers/local_sync/sync_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "iree/hal/local/executable_environment.h"
#include "iree/hal/local/inline_command_buffer.h"
#include "iree/hal/local/local_executable_cache.h"
#include "iree/hal/local/local_pipeline_layout.h"
#include "iree/hal/utils/deferred_command_buffer.h"
#include "iree/hal/utils/file_transfer.h"
#include "iree/hal/utils/memory_file.h"
Expand Down Expand Up @@ -247,17 +246,6 @@ static iree_status_t iree_hal_sync_device_create_command_buffer(
}
}

static iree_status_t iree_hal_sync_device_create_descriptor_set_layout(
iree_hal_device_t* base_device,
iree_hal_descriptor_set_layout_flags_t flags,
iree_host_size_t binding_count,
const iree_hal_descriptor_set_layout_binding_t* bindings,
iree_hal_descriptor_set_layout_t** out_descriptor_set_layout) {
return iree_hal_local_descriptor_set_layout_create(
flags, binding_count, bindings,
iree_hal_device_host_allocator(base_device), out_descriptor_set_layout);
}

static iree_status_t iree_hal_sync_device_create_event(
iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
iree_hal_event_flags_t flags, iree_hal_event_t** out_event) {
Expand Down Expand Up @@ -290,16 +278,6 @@ static iree_status_t iree_hal_sync_device_import_file(
iree_hal_device_host_allocator(base_device), out_file);
}

static iree_status_t iree_hal_sync_device_create_pipeline_layout(
iree_hal_device_t* base_device, iree_host_size_t push_constants,
iree_host_size_t set_layout_count,
iree_hal_descriptor_set_layout_t* const* set_layouts,
iree_hal_pipeline_layout_t** out_pipeline_layout) {
return iree_hal_local_pipeline_layout_create(
push_constants, set_layout_count, set_layouts,
iree_hal_device_host_allocator(base_device), out_pipeline_layout);
}

static iree_status_t iree_hal_sync_device_create_semaphore(
iree_hal_device_t* base_device, uint64_t initial_value,
iree_hal_semaphore_flags_t flags, iree_hal_semaphore_t** out_semaphore) {
Expand Down Expand Up @@ -540,12 +518,9 @@ static const iree_hal_device_vtable_t iree_hal_sync_device_vtable = {
.query_i64 = iree_hal_sync_device_query_i64,
.create_channel = iree_hal_sync_device_create_channel,
.create_command_buffer = iree_hal_sync_device_create_command_buffer,
.create_descriptor_set_layout =
iree_hal_sync_device_create_descriptor_set_layout,
.create_event = iree_hal_sync_device_create_event,
.create_executable_cache = iree_hal_sync_device_create_executable_cache,
.import_file = iree_hal_sync_device_import_file,
.create_pipeline_layout = iree_hal_sync_device_create_pipeline_layout,
.create_semaphore = iree_hal_sync_device_create_semaphore,
.query_semaphore_compatibility =
iree_hal_sync_device_query_semaphore_compatibility,
Expand Down
Loading

0 comments on commit a69a93b

Please sign in to comment.