diff --git a/compiler/plugins/target/CUDA/CUDATarget.cpp b/compiler/plugins/target/CUDA/CUDATarget.cpp index 401b1402b134e..5c5c1bfbbe4ea 100644 --- a/compiler/plugins/target/CUDA/CUDATarget.cpp +++ b/compiler/plugins/target/CUDA/CUDATarget.cpp @@ -672,10 +672,9 @@ class CUDATargetBackend final : public TargetBackend { } auto layoutAttr = exportOp.getLayoutAttr(); - uint32_t constantCount = - static_cast(layoutAttr.getPushConstants()); + uint32_t constantCount = static_cast(layoutAttr.getConstants()); SmallVector bindingFlags; - for (auto bindingAttr : layoutAttr.getSetLayout(0).getBindings()) { + for (auto bindingAttr : layoutAttr.getBindings()) { iree_hal_cuda_BindingBits_enum_t flags = 0; if (allEnumBitsSet(bindingAttr.getFlags(), IREE::HAL::DescriptorFlags::ReadOnly)) { diff --git a/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp b/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp index ce342b561ad32..7db50acd0033c 100644 --- a/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp +++ b/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp @@ -399,9 +399,8 @@ class LLVMCPUTargetBackend final : public TargetBackend { // Specify the constant and binding information used to validate // dispatches. if (auto layoutAttr = exportOp.getLayout()) { - dispatchAttrs.constantCount = layoutAttr.getPushConstants(); - dispatchAttrs.bindingCount = - layoutAttr.getSetLayout(0).getBindings().size(); + dispatchAttrs.constantCount = layoutAttr.getConstants(); + dispatchAttrs.bindingCount = layoutAttr.getBindings().size(); } LibraryBuilder::SourceLocation sourceLocation; diff --git a/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp b/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp index 6bffd176b3e64..eb8918dbc7cab 100644 --- a/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp +++ b/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp @@ -277,10 +277,9 @@ class MetalSPIRVTargetBackend : public TargetBackend { }; auto layoutAttr = exportOp.getLayoutAttr(); - uint32_t constantCount = - static_cast(layoutAttr.getPushConstants()); + uint32_t constantCount = static_cast(layoutAttr.getConstants()); SmallVector bindingFlags; - for (auto bindingAttr : layoutAttr.getSetLayout(0).getBindings()) { + for (auto bindingAttr : layoutAttr.getBindings()) { iree_hal_metal_BindingBits_enum_t flags = 0; if (allEnumBitsSet(bindingAttr.getFlags(), IREE::HAL::DescriptorFlags::ReadOnly)) { diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp index 476d4cec6f327..04e87bb8552c4 100644 --- a/compiler/plugins/target/ROCM/ROCMTarget.cpp +++ b/compiler/plugins/target/ROCM/ROCMTarget.cpp @@ -655,10 +655,9 @@ class ROCMTargetBackend final : public TargetBackend { } auto layoutAttr = exportOp.getLayoutAttr(); - uint32_t constantCount = - static_cast(layoutAttr.getPushConstants()); + uint32_t constantCount = static_cast(layoutAttr.getConstants()); SmallVector bindingFlags; - for (auto bindingAttr : layoutAttr.getSetLayout(0).getBindings()) { + for (auto bindingAttr : layoutAttr.getBindings()) { iree_hal_hip_BindingBits_enum_t flags = 0; if (allEnumBitsSet(bindingAttr.getFlags(), IREE::HAL::DescriptorFlags::ReadOnly)) { diff --git a/compiler/plugins/target/VMVX/VMVXTarget.cpp b/compiler/plugins/target/VMVX/VMVXTarget.cpp index 4ac70111846d3..daba862d1e440 100644 --- a/compiler/plugins/target/VMVX/VMVXTarget.cpp +++ b/compiler/plugins/target/VMVX/VMVXTarget.cpp @@ -133,13 +133,13 @@ class VMVXTargetBackend final : public TargetBackend { // Specify the constant and binding information used to validate // dispatches. if (auto layoutAttr = exportOp.getLayout()) { - int64_t constantCount = layoutAttr.getPushConstants(); + int64_t constantCount = layoutAttr.getConstants(); if (constantCount > 0) { funcOp.setReflectionAttr("constant_count", executableBuilder.getI8IntegerAttr( static_cast(constantCount))); } - size_t bindingCount = layoutAttr.getSetLayout(0).getBindings().size(); + size_t bindingCount = layoutAttr.getBindings().size(); if (bindingCount > 0) { funcOp.setReflectionAttr("binding_count", executableBuilder.getI8IntegerAttr( diff --git a/compiler/plugins/target/VMVX/test/smoketest.mlir b/compiler/plugins/target/VMVX/test/smoketest.mlir index 44b3208907556..6bc30625f05cf 100644 --- a/compiler/plugins/target/VMVX/test/smoketest.mlir +++ b/compiler/plugins/target/VMVX/test/smoketest.mlir @@ -38,11 +38,10 @@ stream.executable public @add_dispatch_0 { // CHECK-LABEL: hal.executable public @add_dispatch_0 // CHECK-NEXT: hal.executable.variant public @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb">) { // CHECK-NEXT: hal.executable.export public @add_dispatch_0 ordinal(0) -// CHECK-SAME: layout(#hal.pipeline.layout, -// CHECK-SAME: <1, storage_buffer>, -// CHECK-SAME: <2, storage_buffer> +// CHECK-SAME: layout(#hal.pipeline.layout, +// CHECK-SAME: #hal.pipeline.binding, +// CHECK-SAME: #hal.pipeline.binding // CHECK: module attributes {vm.toplevel} { // CHECK-NEXT: vm.module public @module { // CHECK-NEXT: vm.func private @add_dispatch_0( diff --git a/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp index 22ae954e0a4db..58137fdb70666 100644 --- a/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp +++ b/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp @@ -61,12 +61,14 @@ struct VulkanSPIRVTargetOptions { }; } // namespace +using DescriptorSetLayout = std::pair>; + static std::tuple> createPipelineLayoutDefs(ArrayRef exportOps, FlatbufferBuilder &fbb) { - DenseMap descriptorSetLayoutMap; + DenseMap descriptorSetLayoutMap; DenseMap pipelineLayoutMap; SmallVector descriptorSetLayoutRefs; @@ -77,18 +79,20 @@ createPipelineLayoutDefs(ArrayRef exportOps, continue; // already present } + // Currently only one descriptor set on the compiler side. We could + // partition it by binding type (direct vs indirect, etc). SmallVector descriptorSetLayoutOrdinals; - for (auto descriptorSetLayoutAttr : pipelineLayoutAttr.getSetLayouts()) { - auto it = descriptorSetLayoutMap.find(descriptorSetLayoutAttr); - if (it != descriptorSetLayoutMap.end()) { - descriptorSetLayoutOrdinals.push_back(it->second); - continue; - } - + auto descriptorSetLayout = + DescriptorSetLayout(0, pipelineLayoutAttr.getBindings()); + auto it = descriptorSetLayoutMap.find(descriptorSetLayout); + if (it != descriptorSetLayoutMap.end()) { + descriptorSetLayoutOrdinals.push_back(it->second); + } else { SmallVector bindingRefs; - for (auto bindingAttr : descriptorSetLayoutAttr.getBindings()) { - uint32_t ordinal = static_cast(bindingAttr.getOrdinal()); + for (auto [i, bindingAttr] : + llvm::enumerate(pipelineLayoutAttr.getBindings())) { + uint32_t ordinal = static_cast(i); iree_hal_vulkan_VkDescriptorType_enum_t descriptorType = 0; switch (bindingAttr.getType()) { case IREE::HAL::DescriptorType::UniformBuffer: @@ -107,7 +111,7 @@ createPipelineLayoutDefs(ArrayRef exportOps, auto bindingsRef = fbb.createOffsetVecDestructive(bindingRefs); descriptorSetLayoutOrdinals.push_back(descriptorSetLayoutRefs.size()); - descriptorSetLayoutMap[descriptorSetLayoutAttr] = + descriptorSetLayoutMap[descriptorSetLayout] = descriptorSetLayoutRefs.size(); descriptorSetLayoutRefs.push_back( iree_hal_vulkan_DescriptorSetLayoutDef_create(fbb, bindingsRef)); @@ -116,7 +120,7 @@ createPipelineLayoutDefs(ArrayRef exportOps, fbb.createInt32Vec(descriptorSetLayoutOrdinals); iree_hal_vulkan_PushConstantRange_vec_ref_t pushConstantRangesRef = 0; - if (int64_t pushConstantCount = pipelineLayoutAttr.getPushConstants()) { + if (int64_t pushConstantCount = pipelineLayoutAttr.getConstants()) { SmallVector pushConstantRanges; iree_hal_vulkan_PushConstantRange range0; range0.stage_flags = 0x00000020u; // VK_SHADER_STAGE_COMPUTE_BIT diff --git a/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp b/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp index 54094f8f50dea..089f6a291aa85 100644 --- a/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp @@ -137,8 +137,7 @@ static bool canSetsBeMerged(Value v1, Value v2, BufferizationPlan &plan) { if (!v1InterfaceBinding || !v2InterfaceBinding) { return true; } - if (v1InterfaceBinding.getSet() != v2InterfaceBinding.getSet() || - v1InterfaceBinding.getBinding() != v2InterfaceBinding.getBinding() || + if (v1InterfaceBinding.getBinding() != v2InterfaceBinding.getBinding() || v1InterfaceBinding.getByteOffset() != v2InterfaceBinding.getByteOffset()) { // If the set, binding or offsets are different, map these to different diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir index 6dd1983602542..4491ab1766d15 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir @@ -1,18 +1,16 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-cpu-materialize-device-encoding),canonicalize,cse)" --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #encoding = #iree_encoding.encoding, matmul_narrow_M = 1 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], round_dims_to = array> func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f() attributes { hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> }{ %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1000], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1000xbf16> %3 = iree_encoding.set_encoding %2 : tensor<1x1000xbf16> -> tensor<1x1000xbf16, #encoding> flow.dispatch.tensor.store %3, %1, offsets = [0, 0], sizes = [1, 1000], strides = [1, 1] : tensor<1x1000xbf16, #encoding> -> !flow.dispatch.tensor> @@ -37,11 +35,9 @@ func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f() attributes // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -51,8 +47,8 @@ func.func @set_encoding_7x7x7_matmul_LHS() attributes { hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> } { %c0 = arith.constant 0 : index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %14 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [7, 7], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<7x7xf32> %17 = iree_encoding.set_encoding %14 : tensor<7x7xf32> -> tensor<7x7xf32, #encoding> flow.dispatch.tensor.store %17, %11, offsets = [0, 0], sizes = [7, 7], strides = [1, 1] : tensor<7x7xf32, #encoding> -> !flow.dispatch.tensor> @@ -69,11 +65,9 @@ func.func @set_encoding_7x7x7_matmul_LHS() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> @@ -83,8 +77,8 @@ func.func @set_encoding_128x80x32_batch_matmul_LHS() attributes { hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> } { %c0 = arith.constant 0 : index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %14 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0], sizes = [128, 80, 32], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x80x32xf32> %17 = iree_encoding.set_encoding %14 : tensor<128x80x32xf32> -> tensor<128x80x32xf32, #encoding> flow.dispatch.tensor.store %17, %11, offsets = [0, 0, 0], sizes = [128, 80, 32], strides = [1, 1, 1] @@ -93,8 +87,8 @@ func.func @set_encoding_128x80x32_batch_matmul_LHS() attributes { return } // CHECK-LABEL: func @set_encoding_128x80x32_batch_matmul_LHS( -// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.*}} !flow.dispatch.tensor> -// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.*}} !flow.dispatch.tensor> +// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.*}} !flow.dispatch.tensor> +// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} !flow.dispatch.tensor> // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]], offsets = [0, 0, 0], sizes = [128, 80, 32], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x80x32xf32> // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x10x32x8x1xf32> // CHECK: %[[PACK:.+]] = tensor.pack %[[INPUT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x80x32xf32> -> tensor<128x10x32x8x1xf32> @@ -102,11 +96,9 @@ func.func @set_encoding_128x80x32_batch_matmul_LHS() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> @@ -118,8 +110,8 @@ func.func @set_encoding_128x32x320_batch_matmul_RHS() attributes { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %5 = arith.index_castui %0 {stream.alignment = 64 : index} : i32 to index - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor> + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor> %16 = flow.dispatch.tensor.load %10, offsets = [0, 0, 0], sizes = [128, 32, 320], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x32x320xf32> %19 = iree_encoding.set_encoding %16 : tensor<128x32x320xf32> -> tensor<128x32x320xf32, #encoding> flow.dispatch.tensor.store %19, %13, offsets = [0, 0, 0], sizes = [128, 32, 320], strides = [1, 1, 1] @@ -128,8 +120,8 @@ func.func @set_encoding_128x32x320_batch_matmul_RHS() attributes { return } // CHECK-LABEL: func @set_encoding_128x32x320_batch_matmul_RHS( -// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.*}} !flow.dispatch.tensor> -// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.*}} !flow.dispatch.tensor> +// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.*}} !flow.dispatch.tensor> +// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} !flow.dispatch.tensor> // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]], offsets = [0, 0, 0], sizes = [128, 32, 320], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x32x320xf32> // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x40x32x8x1xf32> // CHECK: %[[PACK:.+]] = tensor.pack %[[INPUT]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x32x320xf32> -> tensor<128x40x32x8x1xf32> @@ -137,11 +129,9 @@ func.func @set_encoding_128x32x320_batch_matmul_RHS() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> @@ -153,8 +143,8 @@ func.func @unset_encoding_128x80x320_batch_matmul_RESULT() attributes { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %3 = arith.index_castui %0 : i32 to index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor> %10 = flow.dispatch.tensor.load %9, offsets = [0, 0, 0], sizes = [128, 80, 320], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x80x320xf32, #encoding> @@ -166,9 +156,9 @@ func.func @unset_encoding_128x80x320_batch_matmul_RESULT() attributes { // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[D0:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) // CHECK: %[[CAST:.+]] = arith.index_castui %[[D0]] : i32 to index -// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) +// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) // CHECK-SAME: : !flow.dispatch.tensor> -// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[CAST]]) +// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[CAST]]) // CHECK-SAME: : !flow.dispatch.tensor> // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0, 0], sizes = [128, 10, 40, 8, 8], strides = [1, 1, 1, 1, 1] @@ -255,12 +245,10 @@ func.func @matvec_shaped_matmul_lowering_f32f32f32_aarch64(%arg0: !hal.buffer_vi // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -275,11 +263,11 @@ func.func @matmul_lowering_f32f32f32_aarch64() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -307,12 +295,12 @@ func.func @matmul_lowering_f32f32f32_aarch64() attributes { // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 8, 1], strides = [1, 1, 1, 1] @@ -352,12 +340,10 @@ func.func @matvec_lowering_f32f32f32_aarch64(%arg0: tensor<16x16xf32>, %arg1: te // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -369,11 +355,11 @@ func.func @matvec_lowering_f32f32f32_aarch64() attributes { hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz"}> } { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> @@ -396,11 +382,11 @@ func.func @matvec_lowering_f32f32f32_aarch64() attributes { } // CHECK-LABEL: func @matvec_lowering_f32f32f32_aarch64() // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor> -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor> -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor> // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [2, 16, 8, 1], strides = [1, 1, 1, 1] @@ -416,12 +402,10 @@ func.func @matvec_lowering_f32f32f32_aarch64() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -436,11 +420,11 @@ func.func @matmul_lowering_f16f16f16_aarch64() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -468,12 +452,12 @@ func.func @matmul_lowering_f16f16f16_aarch64() attributes { // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 8, 1], strides = [1, 1, 1, 1] @@ -489,12 +473,10 @@ func.func @matmul_lowering_f16f16f16_aarch64() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -509,11 +491,11 @@ func.func @matmul_lowering_f32f32f32_x86_64() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -542,12 +524,12 @@ func.func @matmul_lowering_f32f32f32_x86_64() attributes { // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP1]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 8, 1], strides = [1, 1, 1, 1] @@ -563,12 +545,10 @@ func.func @matmul_lowering_f32f32f32_x86_64() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -583,11 +563,11 @@ func.func @matmul_lowering_f32f32f32_x86_64_avx2() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -615,12 +595,12 @@ func.func @matmul_lowering_f32f32f32_x86_64_avx2() attributes { // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 8, 1], strides = [1, 1, 1, 1] @@ -636,12 +616,10 @@ func.func @matmul_lowering_f32f32f32_x86_64_avx2() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -656,11 +634,11 @@ func.func @matmul_lowering_f32f32f32_x86_64_avx512f() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -688,12 +666,12 @@ func.func @matmul_lowering_f32f32f32_x86_64_avx512f() attributes { // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 16, 1], strides = [1, 1, 1, 1] @@ -709,12 +687,10 @@ func.func @matmul_lowering_f32f32f32_x86_64_avx512f() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -729,11 +705,11 @@ func.func @matmul_lowering_f16f16f32_x86_64_avx512f() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -761,12 +737,12 @@ func.func @matmul_lowering_f16f16f32_x86_64_avx512f() attributes { // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 16, 1], strides = [1, 1, 1, 1] @@ -782,12 +758,10 @@ func.func @matmul_lowering_f16f16f32_x86_64_avx512f() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -802,11 +776,11 @@ func.func @matmul_lowering_f16f16f16_x86_64_avx512f() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -834,12 +808,12 @@ func.func @matmul_lowering_f16f16f16_x86_64_avx512f() attributes { // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 16, 1], strides = [1, 1, 1, 1] @@ -855,12 +829,10 @@ func.func @matmul_lowering_f16f16f16_x86_64_avx512f() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -875,11 +847,11 @@ func.func @matmul_lowering_bf16bf16f32_x86_64_avx512f() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -907,12 +879,12 @@ func.func @matmul_lowering_bf16bf16f32_x86_64_avx512f() attributes { // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 16, 1], strides = [1, 1, 1, 1] @@ -928,12 +900,10 @@ func.func @matmul_lowering_bf16bf16f32_x86_64_avx512f() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -948,11 +918,11 @@ func.func @matmul_lowering_bf16bf16bf16_x86_64_avx512f() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -980,12 +950,12 @@ func.func @matmul_lowering_bf16bf16bf16_x86_64_avx512f() attributes { // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 16, 1], strides = [1, 1, 1, 1] @@ -1001,12 +971,10 @@ func.func @matmul_lowering_bf16bf16bf16_x86_64_avx512f() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1021,11 +989,11 @@ func.func @matmul_lowering_bf16bf16f32_x86_64_avx512bf16() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1055,12 +1023,12 @@ func.func @matmul_lowering_bf16bf16f32_x86_64_avx512bf16() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 16, 2], strides = [1, 1, 1, 1] @@ -1076,12 +1044,10 @@ func.func @matmul_lowering_bf16bf16f32_x86_64_avx512bf16() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1096,11 +1062,11 @@ func.func @matmul_lowering_bf16bf16bf16_x86_64_avx512bf16() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1130,12 +1096,12 @@ func.func @matmul_lowering_bf16bf16bf16_x86_64_avx512bf16() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 16, 2], strides = [1, 1, 1, 1] @@ -1151,12 +1117,10 @@ func.func @matmul_lowering_bf16bf16bf16_x86_64_avx512bf16() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1171,11 +1135,11 @@ func.func @matmul_lowering_f32f16f16_aarch64() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %lhs_f32 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1213,9 +1177,9 @@ func.func @matmul_lowering_f32f16f16_aarch64() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index // CHECK-DAG: %[[M_CEILDIV_8:.+]] = affine.apply #[[$MAP_CEILDIV_8]]()[%[[M]]] // CHECK-DAG: %[[N_CEILDIV_8:.+]] = affine.apply #[[$MAP_CEILDIV_8]]()[%[[N]]] -// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.*}} : !flow.dispatch.tensor>{%[[M_CEILDIV_8]], %[[K]]} -// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.*}} : !flow.dispatch.tensor>{%[[N_CEILDIV_8]], %[[K]]} -// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) {{.*}} : !flow.dispatch.tensor>{%[[M_CEILDIV_8]], %[[N_CEILDIV_8]]} +// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.*}} : !flow.dispatch.tensor>{%[[M_CEILDIV_8]], %[[K]]} +// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} : !flow.dispatch.tensor>{%[[N_CEILDIV_8]], %[[K]]} +// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) {{.*}} : !flow.dispatch.tensor>{%[[M_CEILDIV_8]], %[[N_CEILDIV_8]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M_CEILDIV_8]], %[[K]], 8, 1], {{.*}} -> tensor // CHECK: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[N_CEILDIV_8]], %[[K]], 8, 1], {{.*}} -> tensor // CHECK: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M_CEILDIV_8]], %[[N_CEILDIV_8]], 8, 8], {{.*}} -> tensor @@ -1226,12 +1190,10 @@ func.func @matmul_lowering_f32f16f16_aarch64() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1246,11 +1208,11 @@ func.func @matmul_lowering_f32f16f16_x86_64_avx512f() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %lhs_f32 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1289,9 +1251,9 @@ func.func @matmul_lowering_f32f16f16_x86_64_avx512f() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index // CHECK-DAG: %[[M_CEILDIV_16:.+]] = affine.apply #[[$MAP_CEILDIV_16]]()[%[[M]]] // CHECK-DAG: %[[N_CEILDIV_16:.+]] = affine.apply #[[$MAP_CEILDIV_16]]()[%[[N]]] -// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.*}} : !flow.dispatch.tensor>{%[[M_CEILDIV_16]], %[[K]]} -// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.*}} : !flow.dispatch.tensor>{%[[N_CEILDIV_16]], %[[K]]} -// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) {{.*}} : !flow.dispatch.tensor>{%[[M_CEILDIV_16]], %[[N_CEILDIV_16]]} +// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.*}} : !flow.dispatch.tensor>{%[[M_CEILDIV_16]], %[[K]]} +// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} : !flow.dispatch.tensor>{%[[N_CEILDIV_16]], %[[K]]} +// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) {{.*}} : !flow.dispatch.tensor>{%[[M_CEILDIV_16]], %[[N_CEILDIV_16]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M_CEILDIV_16]], %[[K]], 16, 1], {{.*}} -> tensor // CHECK: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[N_CEILDIV_16]], %[[K]], 16, 1], {{.*}} -> tensor // CHECK: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M_CEILDIV_16]], %[[N_CEILDIV_16]], 16, 16], {{.*}} -> tensor @@ -1302,12 +1264,10 @@ func.func @matmul_lowering_f32f16f16_x86_64_avx512f() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1322,11 +1282,11 @@ func.func @matmul_lowering_i8i8i32_aarch64() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1352,11 +1312,11 @@ func.func @matmul_lowering_i8i8i32_aarch64() attributes { // CHECK-DAG: %[[M:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[M]], %[[K]]} -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[K]], %[[N]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[M]], %[[N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0], sizes = [%[[M]], %[[K]]], strides = [1, 1] @@ -1372,12 +1332,10 @@ func.func @matmul_lowering_i8i8i32_aarch64() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1392,11 +1350,11 @@ func.func @matmul_lowering_i8i8i32_aarch64_dotprod() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1426,12 +1384,12 @@ func.func @matmul_lowering_i8i8i32_aarch64_dotprod() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 4], strides = [1, 1, 1, 1] @@ -1447,12 +1405,10 @@ func.func @matmul_lowering_i8i8i32_aarch64_dotprod() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1467,11 +1423,11 @@ func.func @matmul_lowering_i8i8i32_aarch64_i8mm() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1500,12 +1456,12 @@ func.func @matmul_lowering_i8i8i32_aarch64_i8mm() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP0]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 8], strides = [1, 1, 1, 1] @@ -1521,12 +1477,10 @@ func.func @matmul_lowering_i8i8i32_aarch64_i8mm() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1541,11 +1495,11 @@ func.func @matmul_lowering_i8i4i32_aarch64() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1576,12 +1530,12 @@ func.func @matmul_lowering_i8i4i32_aarch64() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP2]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 4, 2], strides = [1, 1, 1, 1] @@ -1597,12 +1551,10 @@ func.func @matmul_lowering_i8i4i32_aarch64() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1617,11 +1569,11 @@ func.func @matmul_lowering_i8i4i32_aarch64_dotprod() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1650,12 +1602,12 @@ func.func @matmul_lowering_i8i4i32_aarch64_dotprod() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP0]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 8], strides = [1, 1, 1, 1] @@ -1671,12 +1623,10 @@ func.func @matmul_lowering_i8i4i32_aarch64_dotprod() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1691,11 +1641,11 @@ func.func @matmul_lowering_i8i4i32_aarch64_i8mm() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1726,12 +1676,12 @@ func.func @matmul_lowering_i8i4i32_aarch64_i8mm() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP2]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 4, 16], strides = [1, 1, 1, 1] @@ -1802,12 +1752,10 @@ func.func @matmul_lowering_f32f32f32_riscv(%lhs: tensor, %rhs: tensor, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1822,11 +1770,11 @@ func.func @matmul_lowering_i8i8i32_riscv32_ukernel() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1856,12 +1804,12 @@ func.func @matmul_lowering_i8i8i32_riscv32_ukernel() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 4], strides = [1, 1, 1, 1] @@ -1877,12 +1825,10 @@ func.func @matmul_lowering_i8i8i32_riscv32_ukernel() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1897,11 +1843,11 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx2() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -1931,12 +1877,12 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx2() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 2], strides = [1, 1, 1, 1] @@ -1952,12 +1898,10 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx2() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1972,11 +1916,11 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx512bw() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -2006,12 +1950,12 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx512bw() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 16, 2], strides = [1, 1, 1, 1] @@ -2027,12 +1971,10 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx512bw() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -2047,11 +1989,11 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx512vnni() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -2081,12 +2023,12 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx512vnni() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 16, 2], strides = [1, 1, 1, 1] @@ -2160,12 +2102,10 @@ func.func @extend_batch_vecmat_explicit_unit_dim(%arg0: tensor<32x1x128xi8>, %ar // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -2180,11 +2120,11 @@ func.func @matmul_lowering_i16i16i32_x86_64_avx2() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -2214,12 +2154,12 @@ func.func @matmul_lowering_i16i16i32_x86_64_avx2() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 2], strides = [1, 1, 1, 1] @@ -2235,12 +2175,10 @@ func.func @matmul_lowering_i16i16i32_x86_64_avx2() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -2255,11 +2193,11 @@ func.func @matmul_lowering_i16ui4i32_x86_64_avx512vnni() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %lhs_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %lhs_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %rhs_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %rhs_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %out_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %out_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -2297,9 +2235,9 @@ func.func @matmul_lowering_i16ui4i32_x86_64_avx512vnni() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index // CHECK-DAG: %[[K_CEILDIV_8:.+]] = affine.apply #[[$MAP_CEILDIV_8]]()[%[[K]]] // CHECK-DAG: %[[N_CEILDIV_32:.+]] = affine.apply #[[$MAP_CEILDIV_32]]()[%[[N]]] -// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.*}} : !flow.dispatch.tensor>{%[[M]], %[[K_CEILDIV_8]]} -// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.*}} : !flow.dispatch.tensor>{%[[N_CEILDIV_32]], %[[K_CEILDIV_8]]} -// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) {{.*}} : !flow.dispatch.tensor>{%[[M]], %[[N_CEILDIV_32]]} +// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.*}} : !flow.dispatch.tensor>{%[[M]], %[[K_CEILDIV_8]]} +// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} : !flow.dispatch.tensor>{%[[N_CEILDIV_32]], %[[K_CEILDIV_8]]} +// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) {{.*}} : !flow.dispatch.tensor>{%[[M]], %[[N_CEILDIV_32]]} // CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M]], %[[K_CEILDIV_8]], 1, 8], {{.*}} -> tensor // CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[N_CEILDIV_32]], %[[K_CEILDIV_8]], 32, 8], {{.*}} -> tensor // CHECK-DAG: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M]], %[[N_CEILDIV_32]], 1, 32], {{.*}} -> tensor @@ -2792,13 +2730,11 @@ func.func @generic_batch_vecmat_transposed_i16u4i32(%arg0: tensor<32x128xi16>, % // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #encoding = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array> #encoding_bcast = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d2)>, round_dims_to = array> @@ -2807,10 +2743,10 @@ func.func @dequantization() attributes { } { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %7 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 128, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x128x64xi8, #encoding> %8 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 64], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x64xf32, #encoding_bcast> %9 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [2, 64], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x64xf32, #encoding_bcast> @@ -2851,11 +2787,9 @@ func.func @dequantization() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #encoding = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array> #encoding_bcast = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d1, d2)>, round_dims_to = array> @@ -2864,8 +2798,8 @@ func.func @broadcast_batch() attributes { } { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %8 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [128, 64], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x64xf32, #encoding_bcast> %13 = tensor.empty() : tensor<2x128x64xf32, #encoding> %14 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%8 : tensor<128x64xf32, #encoding_bcast>) outs(%13 : tensor<2x128x64xf32, #encoding>) { @@ -2892,11 +2826,9 @@ func.func @broadcast_batch() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #encoding = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array> #encoding_bcast = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1)>, round_dims_to = array> @@ -2905,8 +2837,8 @@ func.func @broadcast_M() attributes { } { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %8 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x128xf32, #encoding_bcast> %13 = tensor.empty() : tensor<2x128x64xf32, #encoding> %14 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%8 : tensor<2x128xf32, #encoding_bcast>) outs(%13 : tensor<2x128x64xf32, #encoding>) { @@ -2933,11 +2865,9 @@ func.func @broadcast_M() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #encoding = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array> #encoding_bcast = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d2)>, round_dims_to = array> @@ -2946,8 +2876,8 @@ func.func @broadcast_N() attributes { } { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %8 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 64], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x64xf32, #encoding_bcast> %13 = tensor.empty() : tensor<2x128x64xf32, #encoding> %14 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%8 : tensor<2x64xf32, #encoding_bcast>) outs(%13 : tensor<2x128x64xf32, #encoding>) { @@ -2974,11 +2904,9 @@ func.func @broadcast_N() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #encoding = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array> #encoding_bcast = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d2)>, round_dims_to = array> @@ -2987,8 +2915,8 @@ func.func @broadcast_K() attributes { } { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %8 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 64], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x64xf32, #encoding_bcast> %13 = tensor.empty() : tensor<2x128x64xf32, #encoding> %14 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%8 : tensor<2x64xf32, #encoding_bcast>) outs(%13 : tensor<2x128x64xf32, #encoding>) { diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir index 0464a42a26c6f..10ceeaae9fde2 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-cpu-materialize-device-encoding),canonicalize,cse)" --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -20,11 +18,11 @@ func.func @matmul_lowering_i8i8i32_vmvx_ukernel() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -55,17 +53,17 @@ func.func @matmul_lowering_i8i8i32_vmvx_ukernel() attributes { // CHECK: %[[LHS_TILE_SIZES:.+]]:2 = iree_codegen.query_tile_sizes tensor>> -> index, index // CHECK-DAG: %[[LHS_OUTER_SIZE0:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[M]], %[[LHS_TILE_SIZES]]#0] // CHECK-DAG: %[[LHS_OUTER_SIZE1:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[K]], %[[LHS_TILE_SIZES]]#1] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[LHS_OUTER_SIZE0]], %[[LHS_OUTER_SIZE1]], %[[LHS_TILE_SIZES]]#0, %[[LHS_TILE_SIZES]]#1} // CHECK: %[[RHS_TILE_SIZES:.+]]:2 = iree_codegen.query_tile_sizes tensor>> -> index, index // CHECK-DAG: %[[RHS_OUTER_SIZE0:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[N]], %[[RHS_TILE_SIZES]]#0] // CHECK-DAG: %[[RHS_OUTER_SIZE1:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[K]], %[[RHS_TILE_SIZES]]#1] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[RHS_OUTER_SIZE0]], %[[RHS_OUTER_SIZE1]], %[[RHS_TILE_SIZES]]#0, %[[RHS_TILE_SIZES]]#1} // CHECK: %[[RESULT_TILE_SIZES:.+]]:2 = iree_codegen.query_tile_sizes tensor>> -> index, index // CHECK-DAG: %[[RESULT_OUTER_SIZE0:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[M]], %[[RESULT_TILE_SIZES]]#0] // CHECK-DAG: %[[RESULT_OUTER_SIZE1:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[N]], %[[RESULT_TILE_SIZES]]#1] -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[RESULT_OUTER_SIZE0]], %[[RESULT_OUTER_SIZE1]], %[[RESULT_TILE_SIZES]]#0, %[[RESULT_TILE_SIZES]]#1} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[LHS_OUTER_SIZE0]], %[[LHS_OUTER_SIZE1]], %[[LHS_TILE_SIZES]]#0, %[[LHS_TILE_SIZES]]#1], strides = [1, 1, 1, 1] @@ -81,12 +79,10 @@ func.func @matmul_lowering_i8i8i32_vmvx_ukernel() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<()[s0] -> ((3 ceildiv s0) * s0)> #map1 = affine_map<()[s0] -> ((1 ceildiv s0) * s0)> @@ -102,9 +98,9 @@ func.func @fill_matmul(%arg0: index, %arg1: index, %arg2: index, %arg3: index, % %c32_i64 = arith.constant 32 : i64 %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%arg4, %arg5} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%arg4, %arg5} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 2], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x2xf32, #encoding_lhs> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x3xf32, #encoding_rhs> %7 = tensor.empty() : tensor<1x3xf32, #encoding_result> @@ -115,11 +111,11 @@ func.func @fill_matmul(%arg0: index, %arg1: index, %arg2: index, %arg3: index, % } // CHECK: func.func @fill_matmul // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor> -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor> -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor> // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [1, 1, 8, 4], strides = [1, 1, 1, 1] @@ -137,11 +133,9 @@ func.func @fill_matmul(%arg0: index, %arg1: index, %arg2: index, %arg3: index, % // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -153,9 +147,9 @@ func.func @set_encoding_dynamic() attributes { %c0 = arith.constant 0 : index %d0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %d1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%d0, %d1} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%d0, %d1} %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%d0, %d1], strides = [1, 1] : !flow.dispatch.tensor>{%d0, %d1} -> tensor @@ -172,10 +166,10 @@ func.func @set_encoding_dynamic() attributes { // CHECK-DAG: %[[CST:.+]] = arith.constant 0.0 // CHECK-DAG: %[[D0:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) // CHECK-DAG: %[[D1:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) -// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-DAG: %[[TILED_D0:.+]] = affine.apply #[[MAP0]]()[%[[D0]]] // CHECK-DAG: %[[TILED_D1:.+]] = affine.apply #[[MAP1]]()[%[[D1]]] -// CHECK-DAG: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_D0]], %[[TILED_D1]]} // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]] // CHECK: %[[EMPTY:.+]] = tensor.empty @@ -187,11 +181,9 @@ func.func @set_encoding_dynamic() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -204,9 +196,9 @@ func.func @unset_encoding_dynamic() attributes { %cst = arith.constant 0.000000e+00 : f32 %d0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %d1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%d0, %d1} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%d0, %d1} %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%d0, %d1], strides = [1, 1] : !flow.dispatch.tensor>{%d0, %d1} @@ -226,9 +218,9 @@ func.func @unset_encoding_dynamic() attributes { // CHECK-DAG: %[[D1:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[TILED_D0:.+]] = affine.apply #[[MAP0]]()[%[[D0]]] // CHECK-DAG: %[[TILED_D1:.+]] = affine.apply #[[MAP1]]()[%[[D1]]] -// CHECK-DAG: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK-DAG: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_D0]], %[[TILED_D1]]} -// CHECK-DAG: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_D0]], %[[TILED_D1]], 8, 4], strides = [1, 1, 1, 1] // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[D0]], %[[D1]]) @@ -238,12 +230,10 @@ func.func @unset_encoding_dynamic() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -258,11 +248,11 @@ func.func @matmul_lowering_f32f32f32_generic() attributes { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %K} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%K, %N} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%M, %N} %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1] : !flow.dispatch.tensor>{%M, %K} @@ -292,12 +282,12 @@ func.func @matmul_lowering_f32f32f32_generic() attributes { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[MAP0]]()[%[[M]]] // CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[MAP1]]()[%[[K]]] -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_K]]} // CHECK: %[[TILED_N:.+]] = affine.apply #[[MAP0]]()[%[[N]]] -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_N]], %[[TILED_K]]} -// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_M]], %[[TILED_N]]} // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 4], strides = [1, 1, 1, 1] diff --git a/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp b/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp index 5f25384ddec79..bc9420f265597 100644 --- a/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp @@ -92,10 +92,9 @@ struct ConvertHalInterfaceBindingSubspan final auto newOp = rewriter.replaceOpWithNewOp( - op, newResultTy, adaptor.getLayout(), adaptor.getSet(), - adaptor.getBinding(), adaptor.getByteOffset(), - adaptor.getDynamicDims(), adaptor.getAlignmentAttr(), - adaptor.getDescriptorFlagsAttr()); + op, newResultTy, adaptor.getLayout(), adaptor.getBinding(), + adaptor.getByteOffset(), adaptor.getDynamicDims(), + adaptor.getAlignmentAttr(), adaptor.getDescriptorFlagsAttr()); LLVM_DEBUG(llvm::dbgs() << "Bf16Emulation: new op: " << newOp << "\n"); (void)newOp; return success(); diff --git a/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp b/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp index e2c8805d270c0..772faf49a6273 100644 --- a/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp @@ -81,9 +81,9 @@ struct ConvertHalInterfaceBindingSubspan final } rewriter.replaceOpWithNewOp( - op, newResultType, adaptor.getLayout(), adaptor.getSet(), - adaptor.getBinding(), byteOffset, dynamicLinearizedSize, - adaptor.getAlignmentAttr(), adaptor.getDescriptorFlagsAttr()); + op, newResultType, adaptor.getLayout(), adaptor.getBinding(), + byteOffset, dynamicLinearizedSize, adaptor.getAlignmentAttr(), + adaptor.getDescriptorFlagsAttr()); return success(); } }; diff --git a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp index 2f7a39ebaacf8..aa9e124d5705c 100644 --- a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp @@ -285,7 +285,7 @@ struct FlattenBindingSubspan final auto newOffset = rewriter.create(loc, 0); auto newOp = rewriter.create( - subspanOp.getLoc(), newType, subspanOp.getLayout(), subspanOp.getSet(), + subspanOp.getLoc(), newType, subspanOp.getLayout(), subspanOp.getBinding(), newOffset, dynamicShape, subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr()); diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir index 7c4cd2f047e7f..5bdfb148e4009 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir @@ -268,13 +268,6 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> -]> #config = #iree_gpu.derived_thread_config module { func.func @inferred_im2col(%2: tensor<2x34x34x128xf16>, %3: tensor<2x128x8xf16>) -> tensor<2x128x8xf16> diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_create_fast_slow_path.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_create_fast_slow_path.mlir index 126c6f582638a..40ac0b7d9eb72 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_create_fast_slow_path.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_create_fast_slow_path.mlir @@ -1,22 +1,20 @@ // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-create-fast-slow-path))" --mlir-print-local-scope %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @padded_conv() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index %c32 = arith.constant 32 : index %c112 = arith.constant 112 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir index 0831e5b75c7e0..7a3ab28bb8a67 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-distribute, cse))" %s --split-input-file | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<()[s0] -> (s0 * 256)> #map1 = affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)> @@ -15,11 +13,11 @@ func.func @add_tensor() attributes {translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 %c64 = arith.constant 64 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<233x1024xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<233x1024xf32> memref.assume_alignment %0, 64 : memref<233x1024xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<233x1024xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<233x1024xf32> memref.assume_alignment %1, 64 : memref<233x1024xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<233x1024xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<233x1024xf32> memref.assume_alignment %2, 64 : memref<233x1024xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -51,12 +49,10 @@ func.func @add_tensor() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<()[s0] -> (s0 * 256)> #map1 = affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)> @@ -66,11 +62,11 @@ func.func @add_tensor_lane_id() attributes {translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 %c64 = arith.constant 64 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<233x1024xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<233x1024xf32> memref.assume_alignment %0, 64 : memref<233x1024xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<233x1024xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<233x1024xf32> memref.assume_alignment %1, 64 : memref<233x1024xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<233x1024xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<233x1024xf32> memref.assume_alignment %2, 64 : memref<233x1024xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pipeline.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pipeline.mlir index 3535a86173578..b483dda17dd61 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pipeline.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pipeline.mlir @@ -2,12 +2,10 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-pipelining{epilogue-peeling=false}))" --split-input-file %s | FileCheck %s // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-pipelining{pipeline-depth=3 schedule-index=2 epilogue-peeling=false}))" --split-input-file %s | FileCheck -check-prefix=CHECK-NV %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @_matmul_f16_f16_dispatch_0_fill_3456x1024() { %c2048 = arith.constant 2048 : index @@ -21,11 +19,11 @@ func.func @_matmul_f16_f16_dispatch_0_fill_3456x1024() { %3 = gpu.thread_id z %4 = memref.alloc() : memref<4x32x40xf16, 3> %5 = memref.alloc() : memref<4x32x40xf16, 3> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<3456x2048xf16> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<3456x2048xf16> memref.assume_alignment %6, 64 : memref<3456x2048xf16> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2048x1024xf16> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2048x1024xf16> memref.assume_alignment %7, 64 : memref<2048x1024xf16> - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf16> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf16> memref.assume_alignment %8, 64 : memref<3456x1024xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -63,12 +61,10 @@ func.func @_matmul_f16_f16_dispatch_0_fill_3456x1024() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @nvidia_tenscore_schedule_f16() { %c3 = arith.constant 3 : index @@ -86,11 +82,11 @@ func.func @nvidia_tenscore_schedule_f16() { %alloc = memref.alloc() : memref<128x256xf16, #gpu.address_space> %alloc_1 = memref.alloc() : memref<3x128x32xf16, #gpu.address_space> %alloc_2 = memref.alloc() : memref<3x32x256xf16, #gpu.address_space> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<512x1280xf16> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<512x1280xf16> memref.assume_alignment %3, 64 : memref<512x1280xf16> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<1280x1280xf16> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<1280x1280xf16> memref.assume_alignment %4, 64 : memref<1280x1280xf16> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<512x1280xf16> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<512x1280xf16> memref.assume_alignment %5, 64 : memref<512x1280xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -517,12 +513,10 @@ func.func @nvidia_tenscore_schedule_f16() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @nvidia_tenscore_schedule_f32() { %c31 = arith.constant 31 : index @@ -540,11 +534,11 @@ func.func @nvidia_tenscore_schedule_f32() { %alloc = memref.alloc() : memref<128x128xf32, #gpu.address_space> %alloc_2 = memref.alloc() : memref<3x128x32xf32, #gpu.address_space> %alloc_3 = memref.alloc() : memref<3x32x128xf32, #gpu.address_space> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<256x256xf32> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<256x256xf32> memref.assume_alignment %3, 64 : memref<256x256xf32> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<256x256xf32> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<256x256xf32> memref.assume_alignment %4, 64 : memref<256x256xf32> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<256x256xf32> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<256x256xf32> memref.assume_alignment %5, 64 : memref<256x256xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups.mlir index 9377136cd8bd4..7a0334c67f819 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups.mlir @@ -4,20 +4,18 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-reorder-workgroups{strategy=transpose}))" \ // RUN: --split-input-file %s | FileCheck --check-prefix=TRANSPOSE %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul() { %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index %c96 = arith.constant 96 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = tensor.empty() : tensor<128x96xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups_static.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups_static.mlir index 04ffb5b6427bd..a07b26f769fa8 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups_static.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups_static.mlir @@ -33,11 +33,9 @@ // TRANSPOSE-DAG: affine.apply #{{.+}}()[%[[REM]]] // TRANSPOSE: return -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @main_dispatch_0 { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -54,8 +52,8 @@ hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { %c64 = arith.constant 64 : index %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %2 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_y] diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_alloc.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_alloc.mlir index 527c083955c04..7598316bfc186 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_alloc.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_alloc.mlir @@ -1,19 +1,17 @@ // RUN: iree-opt %s --allow-unregistered-dialect --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-tensor-tile-to-serial-loops,iree-codegen-gpu-tensor-alloc))" | FileCheck %s // RUN: iree-opt %s --allow-unregistered-dialect --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-tensor-tile-to-serial-loops{coalesce-loops},iree-codegen-gpu-tensor-alloc))" | FileCheck %s --check-prefix=COALESCE_LOOPS -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_2048x512x1024() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %3 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y] @@ -38,19 +36,17 @@ func.func @matmul_2048x512x1024() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_1x384x384() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 384], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x384xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %4 = affine.apply affine_map<()[s0] -> (s0 * 128)>()[%workgroup_id_x] @@ -68,19 +64,17 @@ func.func @matmul_1x384x384() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_multi_uses() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %3 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y] @@ -107,12 +101,10 @@ func.func @matmul_multi_uses() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_33x33x903168_f32() { %c0 = arith.constant 0 : index @@ -130,10 +122,10 @@ func.func @matmul_33x33x903168_f32() { %5 = arith.index_castui %2 {stream.alignment = 4096 : index, stream.values = [1240289280 : index, 1789415424 : index]} : i32 to index %6 = arith.index_castui %3 {stream.alignment = 8192 : index, stream.values = [633077760 : index, 752295936 : index]} : i32 to index %7 = arith.index_castui %4 {stream.alignment = 64 : index, stream.values = [1486349952 : index, 1486358464 : index]} : i32 to index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor> %12 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_x] %13 = flow.dispatch.tensor.load %11, offsets = [%12, 0], sizes = [32, 33], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x33xf32> %14 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [903168, 33], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<903168x33xf32> @@ -160,23 +152,21 @@ func.func @matmul_33x33x903168_f32() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @weight_dequant_matmul() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_y = hal.interface.workgroup.id[1] : index %5 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y] %workgroup_id_x = hal.interface.workgroup.id[0] : index @@ -235,19 +225,17 @@ func.func @weight_dequant_matmul() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @conv() attributes {translation_info = #iree_codegen.translation_info, subgroup_m_count = 1, subgroup_n_count = 4>}>} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_z = hal.interface.workgroup.id[2] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %workgroup_id_x = hal.interface.workgroup.id[0] : index diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir index 364c25bb347d4..233c5b6491e7f 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-tensor-tile, cse))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 256)> @@ -14,9 +12,9 @@ module { func.func @add_tensor() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %3 = affine.apply #map()[%workgroup_id_x] @@ -35,9 +33,9 @@ module { // CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0 * 4)> // CHECK-LABEL: func.func @add_tensor -// CHECK-DAG: %[[A:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[B:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[C:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[A:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[B:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[C:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: %[[LA:.*]] = flow.dispatch.tensor.load %[[A]] // CHECK-DAG: %[[LB:.*]] = flow.dispatch.tensor.load %[[B]] // CHECK-DAG: %[[LC:.*]] = flow.dispatch.tensor.load %[[C]] @@ -58,11 +56,9 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 64)> @@ -73,8 +69,8 @@ module { func.func @reduction() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %2 = affine.apply #map()[%workgroup_id_x] %3 = flow.dispatch.tensor.load %1, offsets = [%2], sizes = [64], strides = [1] : !flow.dispatch.tensor> -> tensor<64xf32> @@ -116,11 +112,9 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 64)> @@ -131,8 +125,8 @@ module { func.func @reduction_broadcast() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %2 = affine.apply #map()[%workgroup_id_x] diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile.mlir index 9cc4a19ff6ed2..6b15d52b41c0c 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt -split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-tile))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @innermost_reduction() { %c1 = arith.constant 1 : index @@ -16,9 +14,9 @@ func.func @innermost_reduction() { %3 = arith.index_cast %0 {stream.alignment = 512 : index, stream.values = [0 : index, 394752 : index, 984064 : index]} : i32 to index %4 = arith.index_cast %1 {stream.alignment = 512 : index, stream.values = [0 : index, 196608 : index, 197120 : index]} : i32 to index %5 = arith.index_cast %2 {stream.alignment = 512 : index, stream.values = [512 : index, 197120 : index, 197632 : index]} : i32 to index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%3) : !flow.dispatch.tensor> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) : !flow.dispatch.tensor> - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%3) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) : !flow.dispatch.tensor> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %9 = affine.apply affine_map<()[s0] -> (s0 * 128)>()[%workgroup_id_x] @@ -61,11 +59,9 @@ func.func @innermost_reduction() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @has_scf_if() { %c49152 = arith.constant 49152 : index @@ -74,8 +70,8 @@ func.func @has_scf_if() { %c1023_i32 = arith.constant 1023 : i32 %c2_i32 = arith.constant 2 : i32 %c0_i32 = arith.constant 0 : i32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %2 = affine.apply affine_map<()[s0] -> (s0 * 256)>()[%workgroup_id_x] diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir index 2cea3e2a678ec..ee1cadd1a655b 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir @@ -1,15 +1,13 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-tile-reduction),canonicalize,cse)" --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @warp_reduction_dispatch() { %cst = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %2 = flow.dispatch.tensor.load %1, offsets = [%workgroup_id_x], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xf32> %3 = flow.dispatch.tensor.load %0, offsets = [%workgroup_id_x, 0], sizes = [1, 10240], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x10240xf32> @@ -49,18 +47,16 @@ func.func @warp_reduction_dispatch() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @warp_reduction_batch_matmul() { %cst = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %workgroup_id_z = hal.interface.workgroup.id[2] : index @@ -95,16 +91,14 @@ func.func @warp_reduction_batch_matmul() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @warp_reduction_broadcast_dispatch() { %cst = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %2 = flow.dispatch.tensor.load %1, offsets = [%workgroup_id_x, 0], sizes = [1, 10240], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x10240xf32> %3 = flow.dispatch.tensor.load %0, offsets = [%workgroup_id_x, 0], sizes = [1, 10240], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x10240xf32> @@ -163,22 +157,20 @@ func.func @warp_reduction_broadcast_dispatch() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @warp_reduction_multi_reduction() { %cst = arith.constant 0.000000e+00 : f32 - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor> + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %15 = flow.dispatch.tensor.load %14, offsets = [%workgroup_id_x], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xf32> %16 = flow.dispatch.tensor.load %13, offsets = [0, 0], sizes = [86, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<86x128xf32> diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir index bb3565f079d33..a60528daa21d6 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir @@ -1,19 +1,17 @@ // RUN: iree-opt %s --iree-transform-dialect-interpreter -transform-dialect-drop-schedule --split-input-file | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul() { %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index %c96 = arith.constant 96 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = tensor.empty() : tensor<128x96xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir index a50b6ec01dc6c..e365a2b9569ca 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --pass-pipeline='builtin.module(func.func(iree-codegen-vector-reduction-to-gpu, cse))' %s | FileCheck %s // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx940 --pass-pipeline='builtin.module(func.func(iree-codegen-vector-reduction-to-gpu, cse))' %s | FileCheck %s --check-prefix=CDNA3 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<()[s0, s1] -> (s1 * 2 + s0 floordiv 32)> #translation_info = #iree_codegen.translation_info @@ -17,8 +15,8 @@ module { %cst_1 = arith.constant dense<3.840000e+02> : vector<1xf32> %c32 = arith.constant 32 : index %c384 = arith.constant 384 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x384xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x384xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %thread_id_x = gpu.thread_id x %2 = affine.apply #map()[%thread_id_x, %workgroup_id_x] @@ -75,12 +73,10 @@ module { // Make sure memref.load from uniform buffers are hoisted out as uniform code. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, uniform_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #translation_info = #iree_codegen.translation_info #map = affine_map<()[s0, s1] -> (s1 * 2 + s0 floordiv 32)> @@ -93,14 +89,14 @@ module { %cst_1 = arith.constant dense<3.840000e+02> : vector<1xf32> %c32 = arith.constant 32 : index %c384 = arith.constant 384 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) offset(%c0) : memref<1xvector<4xi32>, #hal.descriptor_type> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) offset(%c0) : memref<1xvector<4xi32>, #hal.descriptor_type> %1 = memref.load %0[%c0] : memref<1xvector<4xi32>, #hal.descriptor_type> %2 = vector.extractelement %1[%c0 : index] : vector<4xi32> %3 = vector.extractelement %1[%c1 : index] : vector<4xi32> %4 = arith.index_castui %2 : i32 to index %5 = arith.index_castui %3 : i32 to index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) : memref<128x384xf32> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : memref<128xf32> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) : memref<128x384xf32> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : memref<128xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %thread_id_x = gpu.thread_id x %8 = affine.apply #map()[%thread_id_x, %workgroup_id_x] @@ -119,14 +115,14 @@ module { // CHECK-LABEL: func.func @reduce_uniform_buffer_offset() // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[C0]]] // CHECK: %[[EXT0:.+]] = vector.extractelement %[[LOAD]][%[[C0]] : index] : vector<4xi32> // CHECK: %[[EXT1:.+]] = vector.extractelement %[[LOAD]][%[[C1]] : index] : vector<4xi32> // CHECK: %[[OFFSET0:.+]] = arith.index_castui %[[EXT0]] : i32 to index // CHECK: %[[OFFSET1:.+]] = arith.index_castui %[[EXT1]] : i32 to index -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[OFFSET0]]) -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[OFFSET1]]) +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[OFFSET0]]) +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[OFFSET1]]) // CHECK: scf.for // CHECK-COUNT-5: gpu.shuffle // CHECK: arith.addf @@ -136,12 +132,10 @@ module { // Make sure memref.load from readonly storage buffers are hoisted out as uniform code. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<()[s0, s1] -> (s1 * 2 + s0 floordiv 32)> #translation_info = #iree_codegen.translation_info @@ -154,14 +148,14 @@ module { %cst_1 = arith.constant dense<3.840000e+02> : vector<1xf32> %c32 = arith.constant 32 : index %c384 = arith.constant 384 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<1xvector<4xi32>, #hal.descriptor_type> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<1xvector<4xi32>, #hal.descriptor_type> %1 = memref.load %0[%c0] : memref<1xvector<4xi32>, #hal.descriptor_type> %2 = vector.extractelement %1[%c0 : index] : vector<4xi32> %3 = vector.extractelement %1[%c1 : index] : vector<4xi32> %4 = arith.index_castui %2 : i32 to index %5 = arith.index_castui %3 : i32 to index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) : memref<128x384xf32> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : memref<128xf32> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) : memref<128x384xf32> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : memref<128xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %thread_id_x = gpu.thread_id x %8 = affine.apply #map()[%thread_id_x, %workgroup_id_x] @@ -180,14 +174,14 @@ module { // CHECK-LABEL: func.func @reduce_storage_buffer_offset() // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[C0]]] // CHECK: %[[EXT0:.+]] = vector.extractelement %[[LOAD]][%[[C0]] : index] : vector<4xi32> // CHECK: %[[EXT1:.+]] = vector.extractelement %[[LOAD]][%[[C1]] : index] : vector<4xi32> // CHECK: %[[OFFSET0:.+]] = arith.index_castui %[[EXT0]] : i32 to index // CHECK: %[[OFFSET1:.+]] = arith.index_castui %[[EXT1]] : i32 to index -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[OFFSET0]]) -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[OFFSET1]]) +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[OFFSET0]]) +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[OFFSET1]]) // CHECK: scf.for // CHECK-COUNT-5: gpu.shuffle // CHECK: arith.addf @@ -195,11 +189,9 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #translation_info = #iree_codegen.translation_info module { @@ -208,8 +200,8 @@ module { %cst = arith.constant dense<0.000000e+00> : vector<1xf32> %cst_0 = arith.constant 0.000000e+00 : f32 %c32 = arith.constant 32 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x32xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128x32xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x32xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128x32xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %alloc = memref.alloc() {alignment = 64 : i64} : memref<32xf32, #gpu.address_space> %2 = vector.transfer_read %0[%workgroup_id_x, %c0], %cst_0 {in_bounds = [true]} : memref<128x32xf32>, vector<32xf32> @@ -234,12 +226,10 @@ module { // Check that we multi-row matvec gets distributed across subgroup threads. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #translation_info = #iree_codegen.translation_info #map = affine_map<()[s0] -> (s0 * 4)> @@ -253,11 +243,11 @@ module { %c512 = arith.constant 512 : index %cst_1 = arith.constant 0.000000e+00 : f16 %thread_id_x = gpu.thread_id x - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x4096xf16, #hal.descriptor_type> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x4096xf16, #hal.descriptor_type> memref.assume_alignment %0, 64 : memref<1x4096xf16, #hal.descriptor_type> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<32000x4096xf16, #hal.descriptor_type> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<32000x4096xf16, #hal.descriptor_type> memref.assume_alignment %1, 64 : memref<32000x4096xf16, #hal.descriptor_type> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<1x32000xf16, #hal.descriptor_type> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<1x32000xf16, #hal.descriptor_type> memref.assume_alignment %2, 64 : memref<1x32000xf16, #hal.descriptor_type> %workgroup_id_x = hal.interface.workgroup.id[0] : index %3 = affine.apply #map()[%workgroup_id_x] @@ -291,19 +281,17 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #translation_info = #iree_codegen.translation_info module { func.func @simple_nd_write() attributes {translation_info = #translation_info} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<4x1024xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<4x1024xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<4x1024xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<4x1024xf32> %2 = vector.transfer_read %0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<4x1024xf32>, vector<4x1024xf32> vector.transfer_write %2, %1[%c0, %c0] {in_bounds = [true, true]} : vector<4x1024xf32>, memref<4x1024xf32> return diff --git a/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp b/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp index 7ae89bd344842..c45c62f9ba2ee 100644 --- a/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp @@ -156,7 +156,7 @@ struct ResolveExtractMetadataFromHalInterfaceBindingSubspan // `hal.interface.binding.subspan` is // // ```mlir - // hal.interface.binding.subspan layout(#pipeline_layout) set(0) + // hal.interface.binding.subspan layout(#pipeline_layout) // binding(1) offset(%offset) // : memref>{%s0, %s1} // ``` @@ -167,7 +167,7 @@ struct ResolveExtractMetadataFromHalInterfaceBindingSubspan // #map = affine_map<()[s0, s1, s2] -> (s0 + s1 * s2)> // %linearSize = affine.apply #map()[%offset, %s0, %s1] // %c0 = arith.constant 0 : index - // hal.interface.binding.subspan layout(#pipeline_layout) set(0) + // hal.interface.binding.subspan layout(#pipeline_layout) // binding(1) offset(%c0) // : memref{%linearSize} // ``` @@ -197,7 +197,7 @@ struct ResolveExtractMetadataFromHalInterfaceBindingSubspan Value zero = rewriter.create(loc, 0); auto linearInterfaceBinding = rewriter.create( - loc, newBufferType, binding.getLayoutAttr(), binding.getSetAttr(), + loc, newBufferType, binding.getLayoutAttr(), binding.getBindingAttr(), zero, dynamicLinearShape, binding.getAlignmentAttr(), binding.getDescriptorFlagsAttr()); diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp index e966749961e53..2de3dc30029a3 100644 --- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp @@ -616,9 +616,9 @@ struct MaterializeInterfaceBindingEncoding auto newResultType = IREE::Flow::DispatchTensorType::get( resultType.getAccess(), convertedBoundType); rewriter.replaceOpWithNewOp( - subspanOp, newResultType, subspanOp.getLayout(), subspanOp.getSet(), - subspanOp.getBinding(), subspanOp.getByteOffset(), newDynamicDims, - subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr()); + subspanOp, newResultType, subspanOp.getLayout(), subspanOp.getBinding(), + subspanOp.getByteOffset(), newDynamicDims, subspanOp.getAlignmentAttr(), + subspanOp.getDescriptorFlagsAttr()); return success(); } }; diff --git a/compiler/src/iree/compiler/Codegen/Common/test/bufferize_copy_only_dispatches.mlir b/compiler/src/iree/compiler/Codegen/Common/test/bufferize_copy_only_dispatches.mlir index cbbcde4115e4b..ab2a5bd354a71 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/bufferize_copy_only_dispatches.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/bufferize_copy_only_dispatches.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-bufferize-copy-only-dispatches))" --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @tensor_insert_slice() { %slice_size = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -20,9 +18,9 @@ func.func @tensor_insert_slice() { %dest_binding_size_x = hal.interface.constant.load layout(#pipeline_layout) ordinal(10) : index %source_binding_size_y = hal.interface.constant.load layout(#pipeline_layout) ordinal(11) : index %source_binding_size_x = hal.interface.constant.load layout(#pipeline_layout) ordinal(12) : index - %source = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %source = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%source_binding_size_y, %source_binding_size_x} - %dest = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %dest = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%dest_binding_size_y, %dest_binding_size_x} %source_load = flow.dispatch.tensor.load %source, offsets = [%source_offset_y, %source_offset_x], sizes = [1, %slice_size], strides = [%source_stride_y, %source_stride_x] @@ -43,8 +41,8 @@ func.func @tensor_insert_slice() { // CHECK-DAG: %[[SOURCE_OFFSET_X:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(6) // CHECK-DAG: %[[SOURCE_STRIDE_Y:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(7) // CHECK-DAG: %[[SOURCE_STRIDE_X:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(8) -// CHECK-DAG: %[[SOURCE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[SOURCE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[SOURCE_SUBVIEW:.+]] = memref.subview %[[SOURCE]][%[[SOURCE_OFFSET_Y]], %[[SOURCE_OFFSET_X]]] [1, %[[SLICE_SIZE]]] [%[[SOURCE_STRIDE_Y]], %[[SOURCE_STRIDE_X]]] // CHECK-DAG: %[[DEST_SUBVIEW:.+]] = memref.subview %[[DEST]][%[[DEST_OFFSET_Y]], %[[DEST_OFFSET_X]]] [%[[SLICE_SIZE]], 1] [%[[DEST_STRIDE_Y]], %[[DEST_STRIDE_X]]] // CHECK: linalg.generic @@ -53,24 +51,22 @@ func.func @tensor_insert_slice() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, uniform_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @UpSampling1D() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [2, 1, 3], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x3xf32> flow.dispatch.tensor.store %2, %0, offsets = [0, 0, 0], sizes = [2, 1, 3], strides = [1, 1, 1] : tensor<2x3xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @UpSampling1D() -// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[SOURCE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[SOURCE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[SOURCE_SUBVIEW:.+]] = memref.subview %[[SOURCE]][0, 0, 0] [2, 1, 3] // CHECK-DAG: %[[DEST_SUBVIEW:.+]] = memref.subview %[[DEST]][0, 0, 0] [2, 1, 3] // CHECK: linalg.generic @@ -79,15 +75,13 @@ func.func @UpSampling1D() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @concatenate_cst() { %cst = arith.constant dense<0> : tensor<2x3xi32> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> flow.dispatch.tensor.store %cst, %0, offsets = [0, 2], sizes = [2, 3], strides = [1, 1] : tensor<2x3xi32> -> !flow.dispatch.tensor> return } @@ -103,15 +97,13 @@ func.func @concatenate_cst() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @already_bufferized() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<1001xf32, #hal.descriptor_type> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<1001xf32, #hal.descriptor_type> memref.assume_alignment %0, 64 : memref<1001xf32, #hal.descriptor_type> %alloc = memref.alloc() : memref<1001xf32> linalg.fill ins(%cst : f32) outs(%alloc : memref<1001xf32>) diff --git a/compiler/src/iree/compiler/Codegen/Common/test/canonicalize_interface_load_store.mlir b/compiler/src/iree/compiler/Codegen/Common/test/canonicalize_interface_load_store.mlir index 023b33afb4858..54819cee63791 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/canonicalize_interface_load_store.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/canonicalize_interface_load_store.mlir @@ -1,18 +1,16 @@ // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-cleanup-buffer-alloc-view))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: func.func @fold_reshape_load() func.func @fold_reshape_load() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %cst = arith.constant 0.0 : f32 - // CHECK: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> + // CHECK: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> // CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[ARG]], {{.*}} : !flow.dispatch.tensor> -> tensor<3x3x96xf32> %3 = flow.dispatch.tensor.load %1, offsets=[0, 0, 0, 0], sizes =[3, 3, 1, 96], strides=[1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x1x96xf32> %4 = tensor.collapse_shape %3 [[0, 1, 2, 3]] : tensor<3x3x1x96xf32> into tensor<864xf32> @@ -26,19 +24,17 @@ func.func @fold_reshape_load() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: func.func @fold_reshape_store() func.func @fold_reshape_store() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %cst = arith.constant 0.0 : f32 - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - // CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + // CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : !flow.dispatch.tensor> // CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %{{.*}}, {{.*}} %3 = flow.dispatch.tensor.load %1, offsets=[0, 0, 0, 0], sizes =[3, 3, 1, 96], strides=[1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x1x96xf32> // CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}) outs(%[[LOAD]] : tensor<3x3x1x96xf32>) @@ -52,10 +48,8 @@ func.func @fold_reshape_store() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: func.func @dont_fold_reshape_with_not_full_load() func.func @dont_fold_reshape_with_not_full_load() { @@ -63,8 +57,8 @@ func.func @dont_fold_reshape_with_not_full_load() { %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index %c96 = arith.constant 96 : index - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %1, offsets = [%c3, %c0, %c0, %c0], sizes = [%c3, %c3, %c1, %c96], strides = [%c1, %c1, %c1, %c1] : !flow.dispatch.tensor> -> tensor<3x3x1x96xf32> // CHECK: tensor.collapse_shape // CHECK: tensor.expand_shape @@ -76,10 +70,8 @@ func.func @dont_fold_reshape_with_not_full_load() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: func.func @dont_fold_dynamic_reshape() func.func @dont_fold_dynamic_reshape() { @@ -88,8 +80,8 @@ func.func @dont_fold_dynamic_reshape() { %dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %dim1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %dim2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%dim0, %dim1} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%dim2} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%dim0, %dim1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%dim2} %3 = flow.dispatch.tensor.load %1, offsets=[0, 0, 0], sizes =[%dim0, %dim1, 96], strides=[1, 1, 1] : !flow.dispatch.tensor>{%dim0, %dim1} -> tensor // CHECK: tensor.collapse_shape // CHECK: tensor.expand_shape @@ -102,10 +94,8 @@ func.func @dont_fold_dynamic_reshape() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 288)> // CHECK-LABEL: func.func @fold_reshape_slice_store @@ -114,9 +104,9 @@ func.func @fold_reshape_slice_store(%x: index) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %cst = arith.constant 0.0 : f32 - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - // CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + // CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : !flow.dispatch.tensor> // CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %{{.*}}, {{.*}} %3 = flow.dispatch.tensor.load %1, offsets=[0, 0, 0, 0], sizes =[3, 3, 1, 96], strides=[1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x1x96xf32> // CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}) outs(%[[LOAD]] : tensor<3x3x1x96xf32>) diff --git a/compiler/src/iree/compiler/Codegen/Common/test/convert_bf16_to_uint16_buffers.mlir b/compiler/src/iree/compiler/Codegen/Common/test/convert_bf16_to_uint16_buffers.mlir index 1f99752853251..7a4c4717f3901 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/convert_bf16_to_uint16_buffers.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/convert_bf16_to_uint16_buffers.mlir @@ -1,27 +1,25 @@ // RUN: iree-opt --split-input-file \ // RUN: --iree-codegen-convert-bf16-to-uint16-buffers %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> // CHECK-LABEL: @bf16_conversion func.func @bf16_conversion() { %c0 = arith.constant 0 : index %c8 = arith.constant 8 : index - // CHECK-DAG: %[[BUF0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref>{%c8} - // CHECK-DAG: %[[BUF1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref>{%c8} - // CHECK-DAG: %[[BUF2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c0) : memref>{%c8} + // CHECK-DAG: %[[BUF0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref>{%c8} + // CHECK-DAG: %[[BUF1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref>{%c8} + // CHECK-DAG: %[[BUF2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c0) : memref>{%c8} // CHECK-DAG: %[[LOAD0:.+]] = memref.load %[[BUF0]][%arg0] : memref> // CHECK-DAG: %[[LOAD1:.+]] = memref.load %[[BUF1]][%arg0] : memref> // CHECK: memref.store %{{.+}}, %[[BUF2]][%arg0] : memref> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref>{%c8} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref>{%c8} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref>{%c8} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref>{%c8} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref>{%c8} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref>{%c8} %3 = gpu.thread_id x %4 = gpu.block_dim x scf.for %arg0 = %3 to %c8 step %4 { @@ -48,11 +46,9 @@ func.func @bf16_constant(%arg0 : bf16) -> bf16 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: @iree_uk_mmt4d @@ -77,11 +73,11 @@ func.func @mmt4d_bf16xbf16xf32() { %c0 = arith.constant 0 : index %c64 = arith.constant 64 : index %c128 = arith.constant 128 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x3x8x1xbf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x3x8x1xbf16> memref.assume_alignment %0, 64 : memref<1x3x8x1xbf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : memref<1x3x8x1xbf16, strided<[24, 8, 1, 1], offset: 32>> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : memref<1x3x8x1xbf16, strided<[24, 8, 1, 1], offset: 32>> memref.assume_alignment %1, 64 : memref<1x3x8x1xbf16, strided<[24, 8, 1, 1], offset: 32>> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c128) : memref<1x1x8x8xf32, strided<[64, 64, 8, 1], offset: 32>> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c128) : memref<1x1x8x8xf32, strided<[64, 64, 8, 1], offset: 32>> memref.assume_alignment %2, 64 : memref<1x1x8x8xf32, strided<[64, 64, 8, 1], offset: 32>> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -131,10 +127,8 @@ func.func @load_trunc_f32_bf16(%arg0 : memref<32xf32>, %arg1 : memref<32xbf16>) // is rewritten correctly, along with any following ops. // See issue https://github.com/iree-org/iree/issues/17177 -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: module @extract_strided_metadata @@ -144,7 +138,7 @@ module @extract_strided_metadata { func.func @external_func_entry_point() attributes {translation_info = #iree_codegen.translation_info} { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %1 = arith.index_castui %0 : i32 to index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%1) flags(ReadOnly) : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%1) flags(ReadOnly) : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>> // CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan {{.*}} : memref<1x8x768xi16, %base_buffer, %offset, %sizes:3, %strides:3 = iree_codegen.extract_strided_metadata %2 : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>> -> memref, index, index, index, index, index, index, index // CHECK: {{.+}} = iree_codegen.extract_strided_metadata %[[SUBSPAN]] : memref<1x8x768xi16, diff --git a/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir b/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir index 1101469a0fb3a..a185567d220c3 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir @@ -1,21 +1,19 @@ // RUN: iree-opt %s --pass-pipeline="builtin.module(func.func(iree-codegen-convert-to-destination-passing-style),canonicalize,cse)" --split-input-file | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul() { %m = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %n = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%m, %k} - %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%k, %n} - %init = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%m, %n} - %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%m, %n} + %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%m, %k} + %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%k, %n} + %init = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%m, %n} + %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%m, %n} %wg_id_y = hal.interface.workgroup.id[1] : index %wg_count_y = hal.interface.workgroup.count[1] : index %wg_size_y = hal.interface.workgroup.size[1] : index @@ -40,10 +38,10 @@ func.func @matmul() { return } // CHECK: func.func @matmul() -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK: scf.for %[[IV0:.+]] = // CHECK: scf.for %[[IV1:.+]] = // CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]] @@ -56,12 +54,10 @@ func.func @matmul() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_fill() { %cst = arith.constant 0.0 : f32 @@ -69,9 +65,9 @@ func.func @matmul_fill() { %m = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %n = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%m, %k} - %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%k, %n} - %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%m, %n} + %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%m, %k} + %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%k, %n} + %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%m, %n} %wg_id_y = hal.interface.workgroup.id[1] : index %wg_count_y = hal.interface.workgroup.count[1] : index %wg_size_y = hal.interface.workgroup.size[1] : index @@ -97,9 +93,9 @@ func.func @matmul_fill() { return } // CHECK: func.func @matmul_fill() -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: scf.for %[[IV0:.+]] = // CHECK: scf.for %[[IV1:.+]] = // CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]] @@ -114,21 +110,19 @@ func.func @matmul_fill() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_inplace() { %c0 = arith.constant 0 : index %m = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %n = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%m, %k} - %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%k, %n} - %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%m, %n} + %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%m, %k} + %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%k, %n} + %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%m, %n} %wg_id_y = hal.interface.workgroup.id[1] : index %wg_count_y = hal.interface.workgroup.count[1] : index %wg_size_y = hal.interface.workgroup.size[1] : index @@ -153,9 +147,9 @@ func.func @matmul_inplace() { return } // CHECK: func.func @matmul_inplace() -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: scf.for %[[IV0:.+]] = // CHECK: scf.for %[[IV1:.+]] = // CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]] @@ -168,11 +162,9 @@ func.func @matmul_inplace() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @reshape_simple() { %c0 = arith.constant 0 : index @@ -180,27 +172,25 @@ func.func @reshape_simple() { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %c12 = arith.constant 12 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor> -> tensor<12xi32> %3 = tensor.expand_shape %2 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32> flow.dispatch.tensor.store %3, %1, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : tensor<3x4xi32> -> !flow.dispatch.tensor> return } // CHECK: func.func @reshape_simple() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: %[[SOURCE:.+]] = flow.dispatch.tensor.load %[[ARG0]] // CHECK: %[[RESHAPE:.+]] = tensor.expand_shape %[[SOURCE]] // CHECK: flow.dispatch.tensor.store %[[RESHAPE]], %[[RET0]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @reshape_fused_source() { %c0 = arith.constant 0 : index @@ -208,8 +198,8 @@ func.func @reshape_fused_source() { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %c12 = arith.constant 12 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor> -> tensor<12xi32> %3 = tensor.expand_shape %2 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32> %4 = tensor.empty() : tensor<3x4xi32> @@ -225,8 +215,8 @@ func.func @reshape_fused_source() { return } // CHECK: func.func @reshape_fused_source() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: %[[TARGET:.+]] = flow.dispatch.tensor.load %[[RET0]] // CHECK: %[[SOURCE:.+]] = flow.dispatch.tensor.load %[[ARG0]] // CHECK: %[[RESHAPE:.+]] = tensor.expand_shape %[[SOURCE]] @@ -237,12 +227,10 @@ func.func @reshape_fused_source() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @reshape_fused_source_and_copyout() { %c0 = arith.constant 0 : index @@ -250,9 +238,9 @@ func.func @reshape_fused_source_and_copyout() { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %c12 = arith.constant 12 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor> -> tensor<12xi32> %4 = tensor.expand_shape %3 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32> %5 = tensor.empty() : tensor<3x4xi32> @@ -269,9 +257,9 @@ func.func @reshape_fused_source_and_copyout() { return } // CHECK: func.func @reshape_fused_source_and_copyout() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: %[[TARGET:.+]] = flow.dispatch.tensor.load %[[RET0]] // CHECK: %[[SOURCE:.+]] = flow.dispatch.tensor.load %[[ARG0]] // CHECK: %[[RESHAPE:.+]] = tensor.expand_shape %[[SOURCE]] @@ -283,11 +271,9 @@ func.func @reshape_fused_source_and_copyout() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @reshape_fused_target() { %c0 = arith.constant 0 : index @@ -295,8 +281,8 @@ func.func @reshape_fused_target() { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %c12 = arith.constant 12 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<3x4xi32> %3 = tensor.empty() : tensor<3x4xi32> %4 = linalg.generic { @@ -312,8 +298,8 @@ func.func @reshape_fused_target() { return } // CHECK: func.func @reshape_fused_target() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[SOURCE:.+]] = flow.dispatch.tensor.load %[[ARG0]] // CHECK-DAG: %[[TARGET:.+]] = flow.dispatch.tensor.load %[[RET0]] // CHECK-DAG: %[[RESHAPE_EXPAND:.+]] = tensor.expand_shape %[[TARGET]] {{\[}}[0, 1]{{\]}} @@ -325,12 +311,10 @@ func.func @reshape_fused_target() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @cast_followed_by_store() { %c0 = arith.constant 0 : index @@ -339,9 +323,9 @@ func.func @cast_followed_by_store() { %c64 = arith.constant 64 : index %c1 = arith.constant 1 : index %c32 = arith.constant 32 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -368,9 +352,9 @@ func.func @cast_followed_by_store() { return } // CHECK: func.func @cast_followed_by_store() -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: scf.for %[[IV0:.+]] = // CHECK: scf.for %[[IV1:.+]] = // CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]] @@ -385,13 +369,11 @@ func.func @cast_followed_by_store() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @multi_result() { @@ -408,10 +390,10 @@ func.func @multi_result() { %dim5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index %dim6 = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index %dim7 = hal.interface.constant.load layout(#pipeline_layout) ordinal(7) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%dim0, %dim1} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%dim2, %dim3} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%dim4, %dim5} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%dim6, %dim7} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%dim0, %dim1} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%dim2, %dim3} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%dim4, %dim5} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%dim6, %dim7} %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(8) : index %5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(9) : index %6 = hal.interface.constant.load layout(#pipeline_layout) ordinal(10) : index @@ -448,10 +430,10 @@ func.func @multi_result() { return } // CHECK: func.func @multi_result() -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RESULT0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// CHECK-DAG: %[[RESULT1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RESULT0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// CHECK-DAG: %[[RESULT1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK: scf.for %[[IV0:.+]] = // CHECK: scf.for %[[IV1:.+]] = // CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]] @@ -466,12 +448,10 @@ func.func @multi_result() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @unused_ins_operand() { %c64 = arith.constant 64 : index @@ -489,9 +469,9 @@ func.func @unused_ins_operand() { %9 = arith.index_cast %3 : i32 to index %10 = arith.index_cast %4 : i32 to index %11 = arith.index_cast %5 : i32 to index - %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c32) : !flow.dispatch.tensor>{%6, %7, %8} - %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c64) : !flow.dispatch.tensor>{%9, %10, %11} - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%9, %10, %8} + %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c32) : !flow.dispatch.tensor>{%6, %7, %8} + %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c64) : !flow.dispatch.tensor>{%9, %10, %11} + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%9, %10, %8} %15 = flow.dispatch.tensor.load %13, offsets = [0, 0, 0], sizes = [%9, %10, %11], strides = [1, 1, 1] : !flow.dispatch.tensor>{%9, %10, %11} -> tensor %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -531,8 +511,8 @@ func.func @unused_ins_operand() { return } // CHECK-LABEL: func.func @unused_ins_operand() -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: %[[IN_VIEW:.+]] = flow.dispatch.tensor.load %[[IN]] // CHECK-DAG: %[[OUT_VIEW:.+]] = flow.dispatch.tensor.load %[[OUT]] // CHECK: linalg.generic @@ -541,17 +521,15 @@ func.func @unused_ins_operand() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @cumsum__2x2x2x2x2x2x2() { %cst = arith.constant dense<0.000000e+00> : tensor<2x2x2x2x2x2x2xf32> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0, 0, 0], sizes = [3, 2, 2, 2, 2, 2, 2], strides = [1, 1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x2x2x2x2x2x2xf32> %3 = tensor.empty() : tensor<2xf32> %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0 + d7, d1, d2, d3, d4, d5, d6)>, @@ -577,17 +555,15 @@ func.func @cumsum__2x2x2x2x2x2x2() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @reduce_window_max_4x6xf32() { %cst = arith.constant dense<0xFF800000> : tensor<2x2xf32> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 4, 6], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x4x6xf32> %3 = tensor.empty() : tensor<2x2x3xf32> %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d2, d0 * 2 + d3, d1 * 3 + d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%2, %3 : tensor<2x4x6xf32>, tensor<2x2x3xf32>) outs(%cst : tensor<2x2xf32>) { @@ -607,14 +583,12 @@ func.func @reduce_window_max_4x6xf32() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @sort1D() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> %1 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor> -> tensor<4xi32> %2 = iree_linalg_ext.sort dimension(0) outs(%1 : tensor<4xi32>) { ^bb0(%arg0: i32, %arg1: i32): @@ -625,7 +599,7 @@ func.func @sort1D() { return } // CHECK: func.func @sort1D() -// CHECK-DAG: %[[BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK-DAG: %[[BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-DAG: %[[IN:.+]] = flow.dispatch.tensor.load %[[BUF]] // CHECK: %[[SORT:.+]] = iree_linalg_ext.sort // CHECK-SAME: outs(%[[IN]] : tensor<4xi32>) @@ -633,18 +607,16 @@ func.func @sort1D() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @clone_index_computations() { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %1 = arith.index_castui %0 : i32 to index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %4 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] @@ -664,31 +636,29 @@ func.func @clone_index_computations() { return } // CHECK-LABEL: func @clone_index_computations() -// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: scf.for // CHECK: %[[TILESIZE:.+]] = affine.min // CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[OUTPUT]], offsets = [{{.+}}], sizes = [%[[TILESIZE]]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<5, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @gemm_gather() { %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> - %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(5) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> + %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 512], strides = [1, 1] @@ -724,19 +694,17 @@ func.func @gemm_gather() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @reduce_broadcast_generic() { %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets= [0, 0], sizes = [10, 1024], strides= [1, 1] : !flow.dispatch.tensor> -> tensor<10x1024xf32> %4 = flow.dispatch.tensor.load %1, offsets= [0], sizes = [10], strides= [1] @@ -767,7 +735,7 @@ func.func @reduce_broadcast_generic() { return } // CHECK-LABEL: func @reduce_broadcast_generic -// CHECK: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]] // CHECK: %[[RESULT:.+]]:2 = linalg.generic // CHECK: linalg.generic @@ -776,16 +744,14 @@ func.func @reduce_broadcast_generic() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @pack() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> %3 = tensor.empty() : tensor<2x2x2x2xi32> %pack = tensor.pack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<4x4xi32> -> tensor<2x2x2x2xi32> @@ -793,24 +759,22 @@ func.func @pack() { return } // CHECK-LABEL: func.func @pack -// CHECK-DAG: %[[IN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[IN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[IN:.+]] = flow.dispatch.tensor.load %[[IN_BINDING]] // CHECK-DAG: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]] // CHECK: tensor.pack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @unpack() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x2x2xi32> %3 = tensor.empty() : tensor<4x4xi32> %4 = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<2x2x2x2xi32> -> tensor<4x4xi32> @@ -818,19 +782,17 @@ func.func @unpack() { return } // CHECK-LABEL: func.func @unpack -// CHECK-DAG: %[[IN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[IN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[IN:.+]] = flow.dispatch.tensor.load %[[IN_BINDING]] // CHECK-DAG: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]] // CHECK: tensor.unpack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -843,8 +805,8 @@ func.func @non_perfect_tiling_unpack() { %0:2 = iree_codegen.query_tile_sizes tensor<16x16xi32, #iree_encoding.encoding> -> index, index %1 = affine.apply affine_map<()[s0] -> (16 ceildiv s0)>()[%0#0] %2 = affine.apply affine_map<()[s0] -> (16 ceildiv s0)>()[%0#1] - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c512) flags(ReadOnly) : !flow.dispatch.tensor>{%1, %2, %0#0, %0#1} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c512) flags(ReadOnly) : !flow.dispatch.tensor>{%1, %2, %0#0, %0#1} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -877,26 +839,24 @@ func.func @non_perfect_tiling_unpack() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @multi_result_dispatches() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %30 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) + %30 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = tensor.empty() : tensor<120x360xf32> %cst = arith.constant 0.0 : f32 @@ -925,12 +885,12 @@ func.func @multi_result_dispatches() { return } // CHECK-LABEL: func @multi_result_dispatches() -// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[BIAS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// CHECK-DAG: %[[RESULT_BINDING0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[BIAS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// CHECK-DAG: %[[RESULT_BINDING0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK-DAG: %[[RESULT0:.+]] = flow.dispatch.tensor.load %[[RESULT_BINDING0]] -// CHECK-DAG: %[[RESULT_BINDING1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(4) +// CHECK-DAG: %[[RESULT_BINDING1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(4) // CHECK-DAG: %[[RESULT1:.+]] = flow.dispatch.tensor.load %[[RESULT_BINDING1]] // CHECK: %[[FILL:.+]] = linalg.fill // CHECK-SAME: outs(%[[RESULT1]] : @@ -948,12 +908,10 @@ func.func @multi_result_dispatches() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @if_conversion() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -961,11 +919,11 @@ func.func @if_conversion() { %size = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %cond = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : i1 %result_offset = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index - %then = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %then = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0} - %else = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %else = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%0} - %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0} %then_value = flow.dispatch.tensor.load %then, offsets = [%offset], sizes = [%size], strides = [1] : !flow.dispatch.tensor>{%0} -> tensor @@ -985,9 +943,9 @@ func.func @if_conversion() { // CHECK-DAG: %[[S1:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) // CHECK-DAG: %[[COND:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) // CHECK-DAG: %[[OFFSET:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) -// CHECK-DAG: %[[THEN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[ELSE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[THEN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[ELSE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: %[[THEN:.+]] = flow.dispatch.tensor.load %[[THEN_BINDING]] // CHECK-DAG: %[[ELSE:.+]] = flow.dispatch.tensor.load %[[ELSE_BINDING]] // CHECK: scf.if %[[COND]] { @@ -1003,11 +961,9 @@ func.func @if_conversion() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @if_conversion_clone_offsets() { %cst = arith.constant 0.000000e+00 : f32 @@ -1022,8 +978,8 @@ func.func @if_conversion_clone_offsets() { %7 = arith.index_castui %2 : i32 to index %8 = arith.index_castui %3 : i32 to index %9 = arith.index_castui %4 : i32 to index - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%6, %7} - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor>{%8, %9} + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%6, %7} + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor>{%8, %9} %12 = affine.apply affine_map<()[s0, s1] -> (-s0 + s1 + (s0 ceildiv 16) * 16)>()[%6, %6] %13 = affine.apply affine_map<()[s0, s1] -> (-s0 + s1 + (s0 ceildiv 16) * 16)>()[%7, %7] %workgroup_id_x = hal.interface.workgroup.id[0] : index diff --git a/compiler/src/iree/compiler/Codegen/Common/test/convolution_to_igemm.mlir b/compiler/src/iree/compiler/Codegen/Common/test/convolution_to_igemm.mlir index 46f30fe01b3c0..771ef0adc8c16 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/convolution_to_igemm.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/convolution_to_igemm.mlir @@ -31,21 +31,19 @@ func.func public @conv_with_consumer(%arg0: tensor<1x16x16x4xf32>, %arg1: tensor // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_gpu.lowering_config<{thread = [2, 16], subgroup = [2, 16]}> #map = affine_map<(d0, d1) -> (d0, d1)> module { func.func @fold_with_interface_tensor() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 16, 16, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x16x16x4xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 4, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x4x16xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [1, 14, 14, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x14x14x16xf32> diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_conv2d.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_conv2d.mlir index 38ee86b35ef83..d39c341568dae 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/decompose_conv2d.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/decompose_conv2d.mlir @@ -3,19 +3,17 @@ #config = #iree_codegen.lowering_config #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}> #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> module { func.func @restrict_num_workgroups() attributes {hal.executable.target = #executable_target_system_elf_arm_64_, translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %input = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 1, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x1x4x4xf32> %filter = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 4, 4], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x4x4xf32> %5 = tensor.empty() : tensor<1x1x1x4xf32> diff --git a/compiler/src/iree/compiler/Codegen/Common/test/eliminate_empty_tensors.mlir b/compiler/src/iree/compiler/Codegen/Common/test/eliminate_empty_tensors.mlir index 45301a4e5ba8f..f603d395cd2c8 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/eliminate_empty_tensors.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/eliminate_empty_tensors.mlir @@ -2,17 +2,15 @@ // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @eliminate_empty_tensors_with_store_op() { %c0 = arith.constant 0 : index %c8 = arith.constant 8 : index %c32 = arith.constant 32 : index %c128 = arith.constant 128 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> %1 = tensor.empty() : tensor<32x384xf32> scf.for %arg0 = %c0 to %c128 step %c32 { %2 = scf.for %arg1 = %c0 to %c32 step %c8 iter_args(%arg2 = %1) -> (tensor<32x384xf32>) { diff --git a/compiler/src/iree/compiler/Codegen/Common/test/emulate_narrow_type.mlir b/compiler/src/iree/compiler/Codegen/Common/test/emulate_narrow_type.mlir index 84f84cc8a0a31..f6b2ee26939fe 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/emulate_narrow_type.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/emulate_narrow_type.mlir @@ -1,13 +1,11 @@ // RUN: iree-opt --split-input-file --iree-codegen-emulate-narrow-type %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @memref_i4_to_i8() -> i4 { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<3x15xi4> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<3x15xi4> %1 = memref.load %0[%c0, %c0] : memref<3x15xi4> return %1 : i4 } @@ -16,14 +14,12 @@ func.func @memref_i4_to_i8() -> i4 { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @memref_i4_to_i8_dynamic(%arg0 : index, %arg1 : index, %arg2 : index) -> i4 { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%arg0) flags(ReadOnly) : memref>{%arg1, %arg2} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%arg0) flags(ReadOnly) : memref>{%arg1, %arg2} %1 = memref.load %0[%c0, %c0] : memref> return %1 : i4 } diff --git a/compiler/src/iree/compiler/Codegen/Common/test/flatten_memref_subspan.mlir b/compiler/src/iree/compiler/Codegen/Common/test/flatten_memref_subspan.mlir index 48c1922c00f46..bb3e4bd63ae7a 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/flatten_memref_subspan.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/flatten_memref_subspan.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --split-input-file --iree-codegen-flatten-memref-subspan --canonicalize --allow-unregistered-dialect %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @load_subspan_with_offset(%offset : index, %i0: index, %i1: index, %i2: index) -> f32 { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<6x7x8xf32, strided<[56, 8, 1], offset: ?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<6x7x8xf32, strided<[56, 8, 1], offset: ?>> %val = memref.load %subspan[%i0, %i1, %i2] : memref<6x7x8xf32, strided<[56, 8, 1], offset: ?>> return %val: f32 } @@ -17,20 +15,18 @@ func.func @load_subspan_with_offset(%offset : index, %i0: index, %i1: index, %i2 // CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index) // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]] -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[ZERO]]) : memref{%[[SIZE]]} +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[ZERO]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]] // CHECK: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[INDEX]]] // CHECK: return %[[LOAD]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @store_subspan_with_offset(%value: f32, %offset : index, %i0: index, %i1: index, %i2: index) { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<2x3x4xf32, strided<[12, 4, 1], offset: ?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<2x3x4xf32, strided<[12, 4, 1], offset: ?>> memref.store %value, %subspan[%i0, %i1, %i2] : memref<2x3x4xf32, strided<[12, 4, 1], offset: ?>> return } @@ -41,19 +37,17 @@ func.func @store_subspan_with_offset(%value: f32, %offset : index, %i0: index, % // CHECK-SAME: (%[[VALUE:.+]]: f32, %[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index) // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]] -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[ZERO]]) : memref{%[[SIZE]]} +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[ZERO]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]] // CHECK: memref.store %[[VALUE]], %[[SUBSPAN]][%[[INDEX]]] : memref // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @load_subspan_with_vector_element(%offset : index, %i0: index, %i1: index, %i2: index) -> vector<4xf32> { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<6x7x8xvector<4xf32>, strided<[56, 8, 1], offset:?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<6x7x8xvector<4xf32>, strided<[56, 8, 1], offset:?>> %val = memref.load %subspan[%i0, %i1, %i2] : memref<6x7x8xvector<4xf32>, strided<[56, 8, 1], offset:?>> return %val: vector<4xf32> } @@ -64,13 +58,11 @@ func.func @load_subspan_with_vector_element(%offset : index, %i0: index, %i1: in // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @load_subspan_with_16bit_element(%offset : index, %i0: index, %i1: index, %i2: index) -> f16 { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<6x7x8xf16, strided<[56, 8, 1], offset:?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<6x7x8xf16, strided<[56, 8, 1], offset:?>> %val = memref.load %subspan[%i0, %i1, %i2] : memref<6x7x8xf16, strided<[56, 8, 1], offset:?>> return %val: f16 } @@ -81,15 +73,13 @@ func.func @load_subspan_with_16bit_element(%offset : index, %i0: index, %i1: ind // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @store_subspan_with_leading_dynamic_dim(%value: f32, %offset : index, %i0: index, %i1: index, %i2: index) { %dim = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref>{%dim} + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref>{%dim} memref.store %value, %subspan[%i0, %i1, %i2] : memref> return } @@ -101,23 +91,21 @@ func.func @store_subspan_with_leading_dynamic_dim(%value: f32, %offset : index, // CHECK: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[DIM:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) : index // CHECK: %[[SIZE:.+]] = affine.apply #[[$SIZE_MAP]]()[%[[DIM]], %[[OFFSET]]] -// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$OFFSET_MAP]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]] // CHECK: memref.store %[[VALUE]], %[[DST]][%[[INDEX]]] : memref // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @store_subspan_with_all_dynamic_dim(%value: f32, %offset : index, %i0: index, %i1: index, %i2: index, %i3: index) { %dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %dim1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %dim2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %dim3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref>{%dim0, %dim1, %dim2, %dim3} + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref>{%dim0, %dim1, %dim2, %dim3} memref.store %value, %subspan[%i0, %i1, %i2, %i3] : memref> return } @@ -132,21 +120,19 @@ func.func @store_subspan_with_all_dynamic_dim(%value: f32, %offset : index, %i0: // CHECK: %[[DIM2:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index // CHECK: %[[DIM3:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(3) : index // CHECK: %[[SIZE:.+]] = affine.apply #[[$SIZE_MAP]]()[%[[DIM0]], %[[DIM1]], %[[DIM2]], %[[DIM3]], %[[OFFSET]]] -// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$OFFSET_MAP]]()[%[[OFFSET]], %[[DIM3]], %[[I3]], %[[DIM2]], %[[I2]], %[[I0]], %[[DIM1]], %[[I1]]] // CHECK: memref.store %[[VALUE]], %[[DST]][%[[INDEX]]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @store_subspan_with_mixed_dynamic_dim(%value: f32, %offset : index, %i0: index, %i1: index, %i2: index, %i3: index) { %dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %dim1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref>{%dim0, %dim1} + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref>{%dim0, %dim1} memref.store %value, %subspan[%i0, %i1, %i2, %i3] : memref> return } @@ -159,20 +145,18 @@ func.func @store_subspan_with_mixed_dynamic_dim(%value: f32, %offset : index, %i // CHECK: %[[DIM0:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) : index // CHECK: %[[DIM2:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) : index // CHECK: %[[SIZE:.+]] = affine.apply #[[$SIZE_MAP]]()[%[[DIM0]], %[[DIM2]], %[[OFFSET]]] -// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$OFFSET_MAP]]()[%[[OFFSET]], %[[I3]], %[[DIM2]], %[[I2]], %[[I0]], %[[I1]]] // CHECK: memref.store %[[VALUE]], %[[DST]][%[[INDEX]]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @store_subspan_with_flow_control(%value: f32, %offset : index, %i0: index, %i1: index, %i2: index) { %dim = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref>{%dim} + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref>{%dim} scf.for %i = %i0 to %i1 step %i2 { memref.store %value, %subspan[%i0, %i1, %i2] : memref> } @@ -186,7 +170,7 @@ func.func @store_subspan_with_flow_control(%value: f32, %offset : index, %i0: in // CHECK: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[DIM:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) : index // CHECK: %[[SIZE:.+]] = affine.apply #[[$SIZE_MAP]]()[%[[DIM]], %[[OFFSET]]] -// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: scf.for // CHECK: %[[INDEX:.+]] = affine.apply #[[$OFFSET_MAP]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]] // CHECK: memref.store %[[VALUE]], %[[DST]][%[[INDEX]]] : memref @@ -256,14 +240,12 @@ func.func @load_global_with_offset(%i0: index, %i1: index, %i2: index, %i3: inde // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @transfer_read_subspan_with_offset( %arg0 : index, %arg1: index, %arg2: index, %arg3: index) -> vector<4xf32> { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%arg0) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%arg0) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>> %cst = arith.constant 0.0 : f32 %val = vector.transfer_read %subspan[%arg1, %arg2, %arg3], %cst {in_bounds = [true]} : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>>, vector<4xf32> return %val: vector<4xf32> @@ -278,21 +260,19 @@ func.func @transfer_read_subspan_with_offset( // CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[ARG0]]] -// CHECK: %[[MEMREF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[MEMREF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]] // CHECK: %[[VEC:.+]] = vector.transfer_read %[[MEMREF]][%[[INDEX]]] // CHECK: return %[[VEC]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @transfer_write_subspan_with_offset( %arg0 : index, %arg1: index, %arg2: index, %arg3: index, %arg4 : vector<4xf32>) { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%arg0) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%arg0) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>> vector.transfer_write %arg4, %subspan[%arg1, %arg2, %arg3] {in_bounds = [true]} : vector<4xf32>, memref<6x7x8xf32, strided<[56, 8, 1], offset:?>> return } @@ -307,21 +287,19 @@ func.func @transfer_write_subspan_with_offset( // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: vector<4xf32> // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[ARG0]]] -// CHECK: %[[MEMREF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[MEMREF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]] // CHECK: vector.transfer_write %[[ARG4]], %[[MEMREF]][%[[INDEX]]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @load_store_subspan_with_zero_offset(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) { - %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%arg0, %arg1} - %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref{%arg0, %arg1} + %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%arg0, %arg1} + %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref{%arg0, %arg1} %val = memref.load %subspan0[%arg2, %arg3] : memref memref.store %val, %subspan1[%arg2, %arg3] : memref return @@ -335,9 +313,9 @@ func.func @load_store_subspan_with_zero_offset(%arg0 : index, %arg1 : index, %ar // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index // CHECK: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[D0:.+]] = affine.apply #[[$MAP0]]()[%[[ARG0]], %[[ARG1]]] -// CHECK: %[[BINDING0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[D0]]} +// CHECK: %[[BINDING0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[D0]]} // CHECK: %[[D1:.+]] = affine.apply #[[$MAP0]]()[%[[ARG0]], %[[ARG1]]] -// CHECK: %[[BINDING1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) offset(%[[C0]]) : memref{%[[D1]]} +// CHECK: %[[BINDING1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) offset(%[[C0]]) : memref{%[[D1]]} // CHECK: %[[OFFSET0:.+]] = affine.apply #[[$MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] // CHECK: %[[VAL:.+]] = memref.load %[[BINDING0]][%[[OFFSET0]]] // CHECK: %[[OFFSET1:.+]] = affine.apply #[[$MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] @@ -345,16 +323,14 @@ func.func @load_store_subspan_with_zero_offset(%arg0 : index, %arg1 : index, %ar // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @load_store_rank_zero_subspan_with_zero_offset() { %zero = arith.constant 0 : index - %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%zero) : memref - %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%zero) : memref + %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%zero) : memref + %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%zero) : memref %val = memref.load %subspan0[] : memref memref.store %val, %subspan1[] : memref return @@ -362,20 +338,18 @@ func.func @load_store_rank_zero_subspan_with_zero_offset() { //CHECK-LABEL: func.func @load_store_rank_zero_subspan_with_zero_offset // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref -// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) offset(%[[C0]]) : memref +// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref +// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) offset(%[[C0]]) : memref // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @load_store_rank_zero_subspan_with_offset(%offset : index) { - %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref> - %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%offset) : memref> + %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref> + %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%offset) : memref> %val = memref.load %subspan0[] : memref> memref.store %val, %subspan1[] : memref> return @@ -387,9 +361,9 @@ func.func @load_store_rank_zero_subspan_with_offset(%offset : index) { // CHECK-SAME: (%[[OFFSET:.+]]: index) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE0:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]] -// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE0]]} +// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE0]]} // CHECK-DAG: %[[SIZE1:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]] -// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) offset(%[[C0]]) : memref{%[[SIZE1]]} +// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) offset(%[[C0]]) : memref{%[[SIZE1]]} // CHECK: %[[INDEX0:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]]] // CHECK: %[[LOAD:.+]] = memref.load %[[SPAN0]][%[[INDEX0]]] : memref // CHECK: %[[INDEX1:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]]] @@ -397,13 +371,11 @@ func.func @load_store_rank_zero_subspan_with_offset(%offset : index) { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @collapse_shape(%offset : index, %i0 : index, %i1 : index) -> f32 { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<4x5x6x7xf32, strided<[210, 42, 7, 1], offset:?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<4x5x6x7xf32, strided<[210, 42, 7, 1], offset:?>> %collapse = memref.collapse_shape %subspan[[0, 1], [2, 3]] : memref<4x5x6x7xf32, strided<[210, 42, 7, 1], offset:?>> into memref<20x42xf32, strided<[42, 1], offset:?>> %value = memref.load %collapse[%i0, %i1] : memref<20x42xf32, strided<[42, 1], offset:?>> return %value : f32 @@ -415,19 +387,17 @@ func.func @collapse_shape(%offset : index, %i0 : index, %i1 : index) -> f32 { // CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]] -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]]] // CHECK: memref.load %[[SUBSPAN]][%[[INDEX]]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @expand_shape(%offset : index, %i0: index, %i1: index, %i2: index, %i3: index) -> f32 { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<20x42xf32, strided<[42, 1], offset:?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<20x42xf32, strided<[42, 1], offset:?>> %expand = memref.expand_shape %subspan[[0, 1], [2, 3]] output_shape [4, 5, 6, 7] : memref<20x42xf32, strided<[42, 1], offset:?>> into memref<4x5x6x7xf32, strided<[210, 42, 7, 1], offset:?>> %value = memref.load %expand[%i0, %i1, %i2, %i3] : memref<4x5x6x7xf32, strided<[210, 42, 7, 1], offset:?>> return %value : f32 @@ -439,19 +409,17 @@ func.func @expand_shape(%offset : index, %i0: index, %i1: index, %i2: index, %i3 // CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index, %[[I3:.+]]: index) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]] -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]], %[[I3]]] // CHECK: memref.load %[[SUBSPAN]][%[[INDEX]]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @expand_shape2(%offset : index, %i0: index, %i1: index) -> f32 { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<128xf32, strided<[1], offset: ?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<128xf32, strided<[1], offset: ?>> %expand = memref.expand_shape %subspan [[0, 1]] output_shape [1, 128] : memref<128xf32, strided<[1], offset: ?>> into memref<1x128xf32, strided<[128, 1], offset: ?>> %value = memref.load %expand[%i0, %i1] : memref<1x128xf32, strided<[128, 1], offset: ?>> return %value : f32 @@ -463,7 +431,7 @@ func.func @expand_shape2(%offset : index, %i0: index, %i1: index) -> f32 { // CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]] -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]]] // CHECK: memref.load %[[SUBSPAN]][%[[INDEX]]] @@ -473,13 +441,11 @@ func.func @expand_shape2(%offset : index, %i0: index, %i1: index) -> f32 { // be able to do so (a memref cast is inserted to move between unknown and // known dim). -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @static_collapse_shape_to_1d_static(%offset : index, %i: index) { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>> %collapse = memref.collapse_shape %subspan [[0, 1, 2]] : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>> into memref<336xf32, strided<[1], offset: ?>> "unregistered.opaque"(%collapse) : (memref<336xf32, strided<[1], offset: ?>>) -> () } @@ -491,20 +457,18 @@ func.func @static_collapse_shape_to_1d_static(%offset : index, %i: index) { // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[OFFSET:.+]] = affine.apply #[[$MAP0]]()[%[[ARG0]] // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP1]]()[%[[ARG0]] -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[SUBSPAN]][%[[OFFSET]]] [336] [1] : memref to memref<336xf32, strided<[1], offset: ?>> // CHECK: "unregistered.opaque"(%[[SUBVIEW]]) // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @subview(%offset : index, %i0: index, %i1: index) -> f32 { %c0 = arith.constant 0 : index - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<32x128xf32, strided<[128, 1], offset: ?>> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<32x128xf32, strided<[128, 1], offset: ?>> %expand = memref.subview %subspan[%i0, %i1][16, 8][1, 1] : memref<32x128xf32, strided<[128, 1], offset: ?>> to memref<16x8xf32, strided<[128, 1], offset: ?>> %value = memref.load %expand[%c0, %c0] : memref<16x8xf32, strided<[128, 1], offset: ?>> return %value : f32 @@ -516,7 +480,7 @@ func.func @subview(%offset : index, %i0: index, %i1: index) -> f32 { // CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]] -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]]] // CHECK: memref.load %[[SUBSPAN]][%[[INDEX]]] @@ -553,13 +517,11 @@ func.func @subgroup_mma_store(%i0: index, %i1: index, %val: !gpu.mma_matrix<16x1 // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @subgroup_mma_load_with_offset(%offset : index, %i0: index, %i1: index) -> !gpu.mma_matrix<16x16xf16, "AOp"> { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<32x32xf16, strided<[32, 1], offset: ?>, 3> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<32x32xf16, strided<[32, 1], offset: ?>, 3> %0 = gpu.subgroup_mma_load_matrix %subspan[%i0, %i1] {leadDimension = 32 : index} : memref<32x32xf16, strided<[32, 1], offset: ?>, 3> -> !gpu.mma_matrix<16x16xf16, "AOp"> return %0 : !gpu.mma_matrix<16x16xf16, "AOp"> } @@ -570,20 +532,18 @@ func.func @subgroup_mma_load_with_offset(%offset : index, %i0: index, %i1: index // CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index) // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]]] -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[ZERO]]) : memref{%[[SIZE]]} +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[ZERO]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP2]]()[%[[OFFSET]], %[[I0]], %[[I1]]] // CHECK: %[[LD:.+]] = gpu.subgroup_mma_load_matrix %[[SUBSPAN]][%[[INDEX]]] {leadDimension = 32 : index} // CHECK: return %[[LD]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @subgroup_mma_store_with_offset(%offset : index, %i0: index, %i1: index, %val: !gpu.mma_matrix<16x16xf16, "COp">) { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<32x32xf16, strided<[32, 1], offset: ?>, 3> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<32x32xf16, strided<[32, 1], offset: ?>, 3> gpu.subgroup_mma_store_matrix %val, %subspan[%i0, %i1] {leadDimension = 128 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<32x32xf16, strided<[32, 1], offset: ?>, 3> return } @@ -594,19 +554,17 @@ func.func @subgroup_mma_store_with_offset(%offset : index, %i0: index, %i1: inde // CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[VAL:.+]]: !gpu.mma_matrix<16x16xf16, "COp"> // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]]] -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[ZERO]]) : memref{%[[SIZE]]} +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[ZERO]]) : memref{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP2]]()[%[[OFFSET]], %[[I0]], %[[I1]]] // CHECK: gpu.subgroup_mma_store_matrix %[[VAL]], %[[SUBSPAN]][%[[INDEX]]] {leadDimension = 128 : index} // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @load_uniform_buffer(%offset: index, %i0: index, %i1 : index, %i2: index) -> i32 { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type> %val = memref.load %subspan[%i0, %i1, %i2] : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type> return %val: i32 } @@ -615,7 +573,7 @@ func.func @load_uniform_buffer(%offset: index, %i0: index, %i1 : index, %i2: ind // CHECK-LABEL: func.func @load_uniform_buffer // CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index) // CHECK: %[[C0:.+]] = arith.constant 0 : index -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref> +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref> // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]] // CHECK: %[[LD:.+]] = memref.load %[[SUBSPAN]][%[[INDEX]]] : memref> // CHECK: return %[[LD]] : i32 @@ -623,13 +581,11 @@ func.func @load_uniform_buffer(%offset: index, %i0: index, %i1 : index, %i2: ind // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @store_uniform_buffer(%value : i32, %offset: index, %i0: index, %i1 : index, %i2: index) { - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type> memref.store %value, %subspan[%i0, %i1, %i2] : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type> return } @@ -640,21 +596,19 @@ func.func @store_uniform_buffer(%value : i32, %offset: index, %i0: index, %i1 : // CHECK-SAME: (%[[VAL:.+]]: i32, %[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index) // CHECK: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]] -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref>{%[[SIZE]]} +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref>{%[[SIZE]]} // CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]] // CHECK: memref.store %[[VAL]], %[[SUBSPAN]][%[[INDEX]]] : memref> // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @reinterpret_cast_lowering_static_zero_offset() -> f32 { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%0, %1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%0, %1} %3 = memref.reinterpret_cast %2 to offset: [0], sizes: [], strides: [] : memref to memref %4 = memref.load %3[] : memref return %4 : f32 @@ -664,16 +618,14 @@ func.func @reinterpret_cast_lowering_static_zero_offset() -> f32 { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @reinterpret_cast_lowering_dynamic_zero_offset() -> f32 { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%0, %1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%0, %1} %3 = memref.reinterpret_cast %2 to offset: [%c0], sizes: [], strides: [] : memref to memref %4 = memref.load %3[] : memref return %4 : f32 diff --git a/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir b/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir index b2529d63a1620..613948088ebaa 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-fold-affinemin-in-distributed-loops, canonicalize)))))' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable public @generic_static { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -23,8 +21,8 @@ hal.executable public @generic_static { // CHECK:} -> tensor<32x32xf32> // CHECK: flow.dispatch.tensor.store {{.*}} sizes = [32, 32], strides = [1, 1] : tensor<32x32xf32> -> !flow.dispatch.tensor> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %2 = affine.min affine_map<()[s0] -> (32, s0 * -32 + 4096)>()[%workgroup_id_y] diff --git a/compiler/src/iree/compiler/Codegen/Common/test/hoist_unrolled_vector_extract_insert_slice.mlir b/compiler/src/iree/compiler/Codegen/Common/test/hoist_unrolled_vector_extract_insert_slice.mlir index 2e1ab038567b2..594b8ee119a18 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/hoist_unrolled_vector_extract_insert_slice.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/hoist_unrolled_vector_extract_insert_slice.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-hoist-vector-extract-insert-slice))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @hoist_unrolled_vector_for_mma() { %c0 = arith.constant 0 : index @@ -13,11 +11,11 @@ func.func @hoist_unrolled_vector_for_mma() { %cst_0 = arith.constant dense<0.000000e+00> : vector<32x32xf32> %c64 = arith.constant 64 : index %c2048 = arith.constant 2048 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<3456x2048xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<3456x2048xf16> memref.assume_alignment %0, 64 : memref<3456x2048xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<2048x1024xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<2048x1024xf16> memref.assume_alignment %1, 64 : memref<2048x1024xf16> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf32> memref.assume_alignment %2, 64 : memref<3456x1024xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %3 = gpu.thread_id x diff --git a/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir b/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir index c96a6dbe4e080..fbe6b456489c8 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir @@ -1,22 +1,20 @@ // RUN: iree-opt %s --pass-pipeline="builtin.module(func.func(iree-codegen-iree-comprehensive-bufferize, canonicalize, cse, canonicalize))" --split-input-file | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul() { %c0 = arith.constant 0 : index %m = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %n = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%m, %k} - %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%k, %n} - %init = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%m, %n} - %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%m, %n} + %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%m, %k} + %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%k, %n} + %init = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%m, %n} + %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%m, %n} %wg_id_y = hal.interface.workgroup.id[1] : index %wg_count_y = hal.interface.workgroup.count[1] : index %wg_size_y = hal.interface.workgroup.size[1] : index @@ -47,10 +45,10 @@ func.func @matmul() { // CHECK-DAG: %[[M:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK-DAG: %[[WG_ID_Y:.+]] = hal.interface.workgroup.id[1] // CHECK-DAG: %[[WG_COUNT_Y:.+]] = hal.interface.workgroup.count[1] // CHECK-DAG: %[[WG_SIZE_Y:.+]] = hal.interface.workgroup.size[1] @@ -78,12 +76,10 @@ func.func @matmul() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_fill() { %cst = arith.constant 0.0 : f32 @@ -94,9 +90,9 @@ func.func @matmul_fill() { %k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %base_offset_i32 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) alignment(8) : i32 %base_offset = arith.index_castui %base_offset_i32 : i32 to index - %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor>{%m, %k} - %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%base_offset) : !flow.dispatch.tensor>{%k, %n} - %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c1024) : !flow.dispatch.tensor>{%m, %n} + %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor>{%m, %k} + %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%base_offset) : !flow.dispatch.tensor>{%k, %n} + %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c1024) : !flow.dispatch.tensor>{%m, %n} %wg_id_y = hal.interface.workgroup.id[1] : index %wg_count_y = hal.interface.workgroup.count[1] : index %wg_size_y = hal.interface.workgroup.size[1] : index @@ -131,11 +127,11 @@ func.func @matmul_fill() { // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[BASE_OFFSET_I32:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(3) // CHECK-DAG: %[[BASE_OFFSET:.+]] = arith.index_castui %[[BASE_OFFSET_I32]] -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(32) +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(32) // CHECK-DAG: memref.assume_alignment %[[LHS]], 32 -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[BASE_OFFSET]]) +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[BASE_OFFSET]]) // CHECK-DAG: memref.assume_alignment %[[RHS]], 8 -// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c1024) +// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c1024) // CHECK-DAG: memref.assume_alignment %[[RESULT]], 64 // CHECK-DAG: %[[WG_ID_Y:.+]] = hal.interface.workgroup.id[1] // CHECK-DAG: %[[WG_COUNT_Y:.+]] = hal.interface.workgroup.count[1] @@ -164,11 +160,9 @@ func.func @matmul_fill() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @elementwise() { %c4 = arith.constant 4 : index @@ -177,8 +171,8 @@ func.func @elementwise() { %c512 = arith.constant 512 : index %c64 = arith.constant 64 : index %c10 = arith.constant 10 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c512) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c64) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c512) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c64) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %2 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] @@ -213,8 +207,8 @@ func.func @elementwise() { // CHECK: func.func @elementwise() // CHECK-DAG: %[[CST_TENSOR:.+]] = arith.constant dense_resource<__elided__> : tensor<1x10xf32> // CHECK-DAG: %[[CST_BUF:.+]] = bufferization.to_memref %[[CST_TENSOR]] -// CHECK-DAG: %[[IN_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.+}} : memref<1x10xf32, strided<[10, 1], offset: 128>, #hal.descriptor_type> -// CHECK-DAG: %[[OUT_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.+}} : memref<1x10xf32, strided<[10, 1], offset: 16>, #hal.descriptor_type> +// CHECK-DAG: %[[IN_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.+}} : memref<1x10xf32, strided<[10, 1], offset: 128>, #hal.descriptor_type> +// CHECK-DAG: %[[OUT_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.+}} : memref<1x10xf32, strided<[10, 1], offset: 16>, #hal.descriptor_type> // CHECK: scf.for // CHECK-DAG: %[[SUB_IN1:.+]] = memref.subview %[[IN_BUF]] // CHECK-DAG: %[[SUB_OUT1:.+]] = memref.subview %[[OUT_BUF]] @@ -228,18 +222,16 @@ func.func @elementwise() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0] -> (s0 * 2)> #map1 = affine_map<(d0) -> (d0)> func.func @rank_reduced_slice() { %c10 = arith.constant 10 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %3 = affine.apply #map0()[%workgroup_id_x] @@ -257,8 +249,8 @@ func.func @rank_reduced_slice() { return } // CHECK: func.func @rank_reduced_slice() -// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<1x40xf32, #hal.descriptor_type> -// CHECK-DAG: %[[DST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<10xf32, #hal.descriptor_type> +// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<1x40xf32, #hal.descriptor_type> +// CHECK-DAG: %[[DST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<10xf32, #hal.descriptor_type> // CHECK: scf.for %[[IV0:.+]] = // CHECK-DAG: %[[SRC_SUBVIEW:.+]] = memref.subview %[[SRC_BINDING]][0, %[[IV0]]] [1, 2] [1, 1] : memref<1x40xf32{{.+}}> to memref<2xf32 // CHECK-DAG: %[[DST_SUBVIEW:.+]] = memref.subview %[[DST_BINDING]][%[[IV0]]] [2] [1] : memref<10xf32{{.+}}> to memref<2xf32 @@ -271,11 +263,9 @@ func.func @rank_reduced_slice() { // Checks that there are no errors in early bufferized copy ops. The // bufferization pass should make it as it is. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @early_bufferized_copy_cst_ops() { %c0 = arith.constant 0 : index @@ -283,9 +273,9 @@ func.func @early_bufferized_copy_cst_ops() { %c2 = arith.constant 2 : index %cst = arith.constant dense<0> : tensor<2x3xi32> %0 = bufferization.to_memref %cst : memref<2x3xi32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2x5xi32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2x5xi32> memref.assume_alignment %1, 64 : memref<2x5xi32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = memref.subview %1[%c0, %c2] [2, 3] [%c1, %c1] : memref<2x5xi32> to memref<2x3xi32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>> linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%0 : memref<2x3xi32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>) outs(%3 : memref<2x3xi32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>) { ^bb0(%arg0: i32, %arg1: i32): @@ -299,12 +289,10 @@ func.func @early_bufferized_copy_cst_ops() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @tile_from_tensor_load_inplace() { %c2 = arith.constant 2 : index @@ -313,9 +301,9 @@ func.func @tile_from_tensor_load_inplace() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index scf.for %arg0 = %workgroup_id_y to %c2 step %c2 { @@ -331,9 +319,9 @@ func.func @tile_from_tensor_load_inplace() { } // CHECK-LABEL: func.func @tile_from_tensor_load_inplace() -// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: scf.for %[[IV0:.+]] = {{.+}} { // CHECK: scf.for %[[IV1:.+]] = {{.+}} { // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] @@ -345,13 +333,11 @@ func.func @tile_from_tensor_load_inplace() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @tile_from_tensor_load_inplace_and_copy() { %c2 = arith.constant 2 : index @@ -360,10 +346,10 @@ func.func @tile_from_tensor_load_inplace_and_copy() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index scf.for %arg0 = %workgroup_id_y to %c2 step %c2 { @@ -380,10 +366,10 @@ func.func @tile_from_tensor_load_inplace_and_copy() { } // CHECK-LABEL: func.func @tile_from_tensor_load_inplace_and_copy() -// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK: scf.for %[[IV0:.+]] = {{.+}} { // CHECK: scf.for %[[IV1:.+]] = {{.+}} { // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] @@ -397,12 +383,10 @@ func.func @tile_from_tensor_load_inplace_and_copy() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @tile_from_pointwise_lhs_inplace() { @@ -412,9 +396,9 @@ func.func @tile_from_pointwise_lhs_inplace() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index scf.for %arg0 = %workgroup_id_y to %c2 step %c2 { @@ -436,9 +420,9 @@ func.func @tile_from_pointwise_lhs_inplace() { } // CHECK-LABEL: func.func @tile_from_pointwise_lhs_inplace() -// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: scf.for %[[IV0:.+]] = {{.+}} { // CHECK: scf.for %[[IV1:.+]] = {{.+}} { // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] @@ -454,13 +438,11 @@ func.func @tile_from_pointwise_lhs_inplace() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @tile_from_pointwise_outs() { @@ -470,10 +452,10 @@ func.func @tile_from_pointwise_outs() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index scf.for %arg0 = %workgroup_id_y to %c2 step %c2 { @@ -494,10 +476,10 @@ func.func @tile_from_pointwise_outs() { return } // CHECK-LABEL: func.func @tile_from_pointwise_outs() -// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[TENSOR_INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[TENSOR_INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK: scf.for %[[IV0:.+]] = {{.+}} { // CHECK: scf.for %[[IV1:.+]] = {{.+}} { // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] @@ -513,12 +495,10 @@ func.func @tile_from_pointwise_outs() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @tile_from_pointwise_outs_inplace() { @@ -529,9 +509,9 @@ func.func @tile_from_pointwise_outs_inplace() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index scf.for %arg0 = %workgroup_id_y to %c2 step %c2 { @@ -552,9 +532,9 @@ func.func @tile_from_pointwise_outs_inplace() { } // CHECK-LABEL: func.func @tile_from_pointwise_outs_inplace() -// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: scf.for %[[IV0:.+]] = {{.+}} { // CHECK: scf.for %[[IV1:.+]] = {{.+}} { // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] @@ -568,12 +548,10 @@ func.func @tile_from_pointwise_outs_inplace() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @tile_from_matmul_outs_inplace() { %c2 = arith.constant 2 : index @@ -582,9 +560,9 @@ func.func @tile_from_matmul_outs_inplace() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index scf.for %arg0 = %workgroup_id_y to %c2 step %c2 { @@ -601,9 +579,9 @@ func.func @tile_from_matmul_outs_inplace() { } // CHECK-LABEL: func.func @tile_from_matmul_outs_inplace() -// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: scf.for %[[IV0:.+]] = {{.+}} { // CHECK: scf.for %[[IV1:.+]] = {{.+}} { // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] @@ -616,12 +594,10 @@ func.func @tile_from_matmul_outs_inplace() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<(d0)[s0, s1] -> (-d0 + s0, s1)> #map1 = affine_map<(d0)[s0, s1] -> (-d0 + s1, s0)> @@ -633,9 +609,9 @@ func.func @bufferize_dynamic_inplace() { %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index %5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3} - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%4, %5} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%4, %5} %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_size_y = hal.interface.workgroup.size[1] : index %workgroup_id_x = hal.interface.workgroup.id[0] : index @@ -671,9 +647,9 @@ func.func @bufferize_dynamic_inplace() { // CHECK: %[[DIM3:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(3) : index // CHECK: %[[DIM4:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(4) : index // CHECK: %[[DIM5:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(5) : index -// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref>{%[[DIM0]], %[[DIM1]]} -// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref>{%[[DIM2]], %[[DIM3]]} -// CHECK: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref>{%[[DIM4]], %[[DIM5]]} +// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref>{%[[DIM0]], %[[DIM1]]} +// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref>{%[[DIM2]], %[[DIM3]]} +// CHECK: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref>{%[[DIM4]], %[[DIM5]]} // CHECK-DAG: %[[WGSIZE_X:.+]] = hal.interface.workgroup.size[0] // CHECK-DAG: %[[WGSIZE_Y:.+]] = hal.interface.workgroup.size[1] // CHECK: scf.for %[[IV0:.+]] = {{.+}} { @@ -691,39 +667,35 @@ func.func @bufferize_dynamic_inplace() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @reshape_simple() { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor> -> tensor<12xi32> %3 = tensor.expand_shape %2 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32> flow.dispatch.tensor.store %3, %1, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : tensor<3x4xi32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @reshape_simple() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: %[[RESHAPE:.+]] = memref.expand_shape %[[ARG0]] {{\[}}[0, 1]] // CHECK: linalg.generic {{.*}} ins(%[[RESHAPE]] {{.*}} outs(%[[RET0]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> module { func.func @reshape_fused_source() { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<3x4xi32> %3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor> -> tensor<12xi32> %4 = tensor.expand_shape %3 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32> @@ -737,8 +709,8 @@ module { } } // CHECK-LABEL: func.func @reshape_fused_source() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<12xi32, #hal.descriptor_type> -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<3x4xi32, #hal.descriptor_type> +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<12xi32, #hal.descriptor_type> +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<3x4xi32, #hal.descriptor_type> // CHECK: %[[RESHAPE:.+]] = memref.expand_shape %[[ARG0]] {{\[}}[0, 1]] // CHECK: linalg.generic // CHECK-SAME: ins(%[[RESHAPE]] : memref<3x4xi32 @@ -746,19 +718,17 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @reshape_fused_source_and_copyout() { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<3x4xi32> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor> -> tensor<12xi32> %5 = tensor.expand_shape %4 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32> %6 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%5 : tensor<3x4xi32>) outs(%2 : tensor<3x4xi32>) { @@ -771,9 +741,9 @@ func.func @reshape_fused_source_and_copyout() { return } // CHECK-LABEL: func.func @reshape_fused_source_and_copyout() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<12xi32, #hal.descriptor_type> -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<3x4xi32, #hal.descriptor_type> -// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<3x4xi32, #hal.descriptor_type> +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<12xi32, #hal.descriptor_type> +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<3x4xi32, #hal.descriptor_type> +// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<3x4xi32, #hal.descriptor_type> // CHECK: %[[RESHAPE:.+]] = memref.expand_shape %[[ARG0]] {{\[}}[0, 1]] // CHECK: linalg.generic // CHECK-SAME: ins(%[[RESHAPE]] : memref<3x4xi32 @@ -782,16 +752,14 @@ func.func @reshape_fused_source_and_copyout() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @reshape_fused_target() { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor> -> tensor<12xi32> %3 = tensor.expand_shape %2 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<3x4xi32> @@ -805,8 +773,8 @@ func.func @reshape_fused_target() { return } // CHECK-LABEL: func.func @reshape_fused_target() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<3x4xi32, #hal.descriptor_type> -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<12xi32, #hal.descriptor_type> +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<3x4xi32, #hal.descriptor_type> +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<12xi32, #hal.descriptor_type> // CHECK: %[[RESHAPE:.+]] = memref.expand_shape %[[RET0]] {{\[}}[0, 1]] // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0]] : memref<3x4xi32 @@ -814,12 +782,10 @@ func.func @reshape_fused_target() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<(d0)[s0] -> (-d0 + 1, s0)> #map1 = affine_map<(d0)[s0] -> (-d0 + 3, s0)> @@ -827,9 +793,9 @@ func.func @dot_general_lowering() { %cst = arith.constant 0.000000e+00 : f32 %c3 = arith.constant 3 : index %c1 = arith.constant 1 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1, 1, 2], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x1x2xf32> %4 = tensor.collapse_shape %3 [[0, 1], [2]] : tensor<1x1x2xf32> into tensor<1x2xf32> %workgroup_size_x = hal.interface.workgroup.size[0] : index @@ -857,10 +823,10 @@ func.func @dot_general_lowering() { return } // CHECK-LABEL: func.func @dot_general_lowering() -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[RESHAPE_LHS:.+]] = memref.collapse_shape %[[LHS]] -// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: scf.for %[[IV0:.+]] = {{.+}} { // CHECK: scf.for %[[IV1:.+]] = {{.+}} { // CHECK-DAG: %[[LHS_TILE:.+]] = memref.subview %[[RESHAPE_LHS]][%[[IV0]], 0] @@ -874,37 +840,33 @@ func.func @dot_general_lowering() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @slice() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3} %6 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor %7 = tensor.extract_slice %6[%0, %1] [%2, %3] [1, 1] : tensor to tensor flow.dispatch.tensor.store %7, %5, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%2, %3} return } // CHECK-LABEL: func.func @slice() -// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[SUBVIEW:.+]] = memref.subview %[[ARG]] // CHECK: linalg.generic {{.*}} ins(%[[SUBVIEW]] {{.*}} outs(%[[RETURN]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @slice_rank_reducing() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -912,27 +874,25 @@ func.func @slice_rank_reducing() { %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%4, %4, %4} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%4, %4, %4} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3} %7 = flow.dispatch.tensor.load %5, offsets = [0, 0, 0], sizes = [%4, %4, %4], strides = [1, 1, 1] : !flow.dispatch.tensor>{%4, %4, %4} -> tensor %8 = tensor.extract_slice %7[%0, %0, %1] [%2, 1, %3] [1, 1, 1] : tensor to tensor flow.dispatch.tensor.store %8, %6, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%2, %3} return } // CHECK-LABEL: func.func @slice_rank_reducing() -// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[SUBVIEW:.+]] = memref.subview %[[ARG]] // CHECK: linalg.generic {{.*}} ins(%[[SUBVIEW]] {{.*}} outs(%[[RETURN]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @slice_multiple_copy() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -942,9 +902,9 @@ func.func @slice_multiple_copy() { %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index %5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index %6 = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%6, %6, %6} - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%3, %4, %5} - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%3, %5} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%6, %6, %6} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%3, %4, %5} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%3, %5} %10 = flow.dispatch.tensor.load %7, offsets = [0, 0, 0], sizes = [%6, %6, %6], strides = [1, 1, 1] : !flow.dispatch.tensor>{%6, %6, %6} -> tensor %11 = tensor.extract_slice %10[%0, %1, %2] [%3, %4, %5] [1, 1, 1] : tensor to tensor %12 = tensor.extract_slice %10[%0, %1, %2] [%3, 1, %5] [1, 1, 1] : tensor to tensor @@ -953,9 +913,9 @@ func.func @slice_multiple_copy() { return } // CHECK-LABEL: func.func @slice_multiple_copy() -// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: %[[SIZE1:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index // CHECK-DAG: %[[SIZE2:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index // CHECK-DAG: %[[SIZE3:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index @@ -966,15 +926,13 @@ func.func @slice_multiple_copy() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @slice_in_place() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} %3 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor flow.dispatch.tensor.store %3, %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%0, %1} return @@ -984,39 +942,35 @@ func.func @slice_in_place() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @slice_whole_stride_dispatch_0() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3} %6 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor %7 = tensor.extract_slice %6[1, 0] [1, 4] [1, 1] : tensor to tensor<1x4xi32> flow.dispatch.tensor.store %7, %5, offsets = [0, 0], sizes = [1, 4], strides = [1, 1] : tensor<1x4xi32> -> !flow.dispatch.tensor>{%2, %3} return } // CHECK-LABEL: func.func @slice_whole_stride_dispatch_0() -// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[SUB_IN_FIXED:.+]] = memref.subview %[[INPUT]][1, 0] [1, 4] [1, 1] // CHECK-DAG: %[[SUB_OUT_FIXED:.+]] = memref.subview %[[OUTPUT]][0, 0] [1, 4] [1, 1] // CHECK: linalg.generic {{.*}} ins(%[[SUB_IN_FIXED]] {{.*}} outs(%[[SUB_OUT_FIXED]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @subtensor_insert() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1025,9 +979,9 @@ func.func @subtensor_insert() { %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index %5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3} - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%4, %5} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%4, %5} %9 = flow.dispatch.tensor.load %6, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor %10 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor %11 = tensor.insert_slice %9 into %10[3, 4] [%0, %1] [1, 1] : tensor into tensor @@ -1035,9 +989,9 @@ func.func @subtensor_insert() { return } // CHECK-LABEL: func.func @subtensor_insert() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: %[[D0:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) : index // CHECK-DAG: %[[D1:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) : index // CHECK-DAG: %[[D2:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index @@ -1052,15 +1006,13 @@ func.func @subtensor_insert() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @tensor_extract() { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 9], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<3x9xi32> %3 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %4 = tensor.extract %3[] : tensor @@ -1069,8 +1021,8 @@ func.func @tensor_extract() { return } // CHECK-LABEL: func.func @tensor_extract() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: %[[LOAD:.+]] = memref.load %[[ARG0]] // CHECK: linalg.fill // CHECK-SAME: ins(%[[LOAD]] : @@ -1078,31 +1030,27 @@ func.func @tensor_extract() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @load_to_store() { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<3x4xi32> flow.dispatch.tensor.store %2, %0, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : tensor<3x4xi32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @load_to_store() -// CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<3x4xi32, #hal.descriptor_type> -// CHECK: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<3x4xi32, #hal.descriptor_type> +// CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<3x4xi32, #hal.descriptor_type> +// CHECK: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<3x4xi32, #hal.descriptor_type> // CHECK: linalg.generic {{.*}} ins(%[[IN]] {{.*}} outs(%[[OUT]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0, s1] -> (s0 * s1)> #map1 = affine_map<(d0)[s0] -> (-d0 + 5, s0)> @@ -1111,8 +1059,8 @@ func.func @rhs_non_splat_constant() { %cst_0 = arith.constant 0.000000e+00 : f32 %c5 = arith.constant 5 : index %c1 = arith.constant 1 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 5, 3, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x5x3x1xf32> %3 = tensor.collapse_shape %2 [[0, 1], [2, 3]] : tensor<1x5x3x1xf32> into tensor<5x3xf32> %workgroup_size_x = hal.interface.workgroup.size[0] : index @@ -1142,8 +1090,8 @@ func.func @rhs_non_splat_constant() { // CHECK-LABEL: func.func @rhs_non_splat_constant // CHECK-DAG: %[[CONSTANT:.+]] = arith.constant {{.+}} : tensor<3x5xf32> // CHECK-DAG: %[[RHS:.+]] = bufferization.to_memref %[[CONSTANT]] -// CHECK-DAG: %[[LHS_INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<1x5x3x1xf32, #hal.descriptor_type> -// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<5x5xf32, #hal.descriptor_type> +// CHECK-DAG: %[[LHS_INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<1x5x3x1xf32, #hal.descriptor_type> +// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<5x5xf32, #hal.descriptor_type> // CHECK: %[[LHS:.+]] = memref.collapse_shape %[[LHS_INPUT]] // CHECK: scf.for %[[IV0:.+]] = // CHECK: scf.for %[[IV1:.+]] = @@ -1158,12 +1106,10 @@ func.func @rhs_non_splat_constant() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<(d0, d1) -> (d0)> #map1 = affine_map<(d0, d1) -> (d0, d1)> @@ -1173,9 +1119,9 @@ func.func @gather() { %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%3, %4} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%3, %4} %8 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [%3, %4], strides = [1, 1] : !flow.dispatch.tensor>{%3, %4} -> tensor %9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor %10 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [%2], strides = [1] : !flow.dispatch.tensor>{%2} -> tensor @@ -1190,26 +1136,24 @@ func.func @gather() { return } // CHECK-LABEL: func.func @gather() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: linalg.generic // CHECK: %[[VAL:.+]] = memref.load %[[ARG0]] // CHECK: linalg.yield %[[VAL]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @pooling_nhwc_sum() { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [1, 2, 2, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x2x2x1xf32> %4 = bufferization.alloc_tensor() : tensor<2x3xf32> %5 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor @@ -1222,9 +1166,9 @@ func.func @pooling_nhwc_sum() { } // CHECK-LABEL: func.func @pooling_nhwc_sum // CHECK-DAG: %[[WINDOW:.+]] = memref.alloc() : memref<2x3xf32> -// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref> -// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<1x4x6x1xf32, #hal.descriptor_type> -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<1x2x2x1xf32, #hal.descriptor_type> +// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref> +// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<1x4x6x1xf32, #hal.descriptor_type> +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<1x2x2x1xf32, #hal.descriptor_type> // CHECK: %[[INIT_VAL:.+]] = memref.load %[[INIT]][] : memref // CHECK: linalg.fill // CHECK-SAME: ins(%[[INIT_VAL]] : @@ -1237,12 +1181,10 @@ func.func @pooling_nhwc_sum() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0, s1] -> (s0 * s1)> #map1 = affine_map<(d0)[s0, s1] -> (-d0 + s1, s0)> @@ -1255,10 +1197,10 @@ func.func @read_only_subtensor() { %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index %5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%2, %3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%2, %3} %8 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%4, %5} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%4, %5} %10 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [%4, %5], strides = [1, 1] : !flow.dispatch.tensor>{%4, %5} -> tensor %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_size_y = hal.interface.workgroup.size[1] : index @@ -1291,9 +1233,9 @@ func.func @read_only_subtensor() { return } // CHECK-LABEL: func.func @read_only_subtensor -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref> -// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref> -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref> +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref> +// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref> +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref> // CHECK: scf.for // CHECK: scf.for // CHECK-DAG: %[[SV1:.+]] = memref.subview %[[ARG0]] @@ -1305,19 +1247,17 @@ func.func @read_only_subtensor() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0) -> (d0)> func.func @reshape_read_only() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2} %5 = flow.dispatch.tensor.load %4, offsets = [0], sizes = [%2], strides = [1] : !flow.dispatch.tensor>{%2} -> tensor %6 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor %7 = tensor.collapse_shape %6 [[0, 1]] : tensor into tensor @@ -1332,8 +1272,8 @@ func.func @reshape_read_only() { return } // CHECK-LABEL: func.func @reshape_read_only -// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: %[[RESHAPE:.+]] = memref.collapse_shape %[[INPUT]] // CHECK: linalg.generic // CHECK-SAME: ins(%[[RESHAPE]] : memref, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<(d0, d1, d2, d3) -> (d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> func.func @use_buffer_for_operand_when_output_tensor_not_used() { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %3, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x112x112x32xf32> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 255, 255, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x225x225x16xf32> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 16, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x16x32xf32> @@ -1374,7 +1312,7 @@ func.func @use_buffer_for_operand_when_output_tensor_not_used() { // CHECK: func.func @use_buffer_for_operand_when_output_tensor_not_used() // CHECK-NOT: memref.alloc -// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK: linalg.fill // CHECK-SAME: outs(%[[OUTPUT]] : // CHECK-NEXT: linalg.conv_2d_nhwc_hwcf @@ -1385,23 +1323,21 @@ func.func @use_buffer_for_operand_when_output_tensor_not_used() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d3)> func.func @dont_use_buffer_for_operand_when_output_tensor_used() { %cst = arith.constant 1.000000e+00 : f32 %cst_0 = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %3, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x112x112x32xf32> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x225x225x16xf32> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 16, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x16x32xf32> @@ -1421,7 +1357,7 @@ func.func @dont_use_buffer_for_operand_when_output_tensor_used() { } // CHECK-LABEL: func.func @dont_use_buffer_for_operand_when_output_tensor_used() // CHECK-DAG: %[[ALLOC:.+]] = memref.alloc -// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK: linalg.fill // CHECK-SAME: outs(%[[ALLOC]] : // CHECK-NEXT: linalg.conv_2d_nhwc_hwcf @@ -1434,11 +1370,9 @@ func.func @dont_use_buffer_for_operand_when_output_tensor_used() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map0 = affine_map<(d0) -> (-d0 + 4)> #map1 = affine_map<(d0) -> (d0)> @@ -1447,8 +1381,8 @@ func.func @bufferize_cst_output_tensor() { %c-2147483648_i32 = arith.constant -2147483648 : i32 %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant dense<[1, 2, 3, 4, 5]> : tensor<5xi32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [5], strides = [1] : !flow.dispatch.tensor> -> tensor<5xf32> %4 = linalg.fill ins(%c-2147483648_i32 : i32) outs(%2 : tensor) -> tensor @@ -1469,20 +1403,18 @@ func.func @bufferize_cst_output_tensor() { // CHECK-DAG: %[[CST1:.+]] = arith.constant -2147483648 : i32 // CHECK-DAG: %[[CST5:.+]] = arith.constant dense<[1, 2, 3, 4, 5]> : tensor<5xi32> // CHECK: %[[CAST5:.+]] = bufferization.to_memref %[[CST5]] : memref<5xi32> -// CHECK: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<5xf32, #hal.descriptor_type> -// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref> +// CHECK: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<5xf32, #hal.descriptor_type> +// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref> // CHECK: linalg.fill ins(%[[CST1]] : i32) outs(%[[OUTPUT]] : memref) // CHECK: linalg.generic // CHECK-SAME: ins(%[[INPUT]], %[[CAST5]] : {{.*}}) outs(%[[OUTPUT]] : memref) // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<()[s0] -> (s0 * 32)> func.func @cast_follwed_by_store() { @@ -1491,9 +1423,9 @@ func.func @cast_follwed_by_store() { %c64 = arith.constant 64 : index %c1 = arith.constant 1 : index %c32 = arith.constant 32 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -1520,9 +1452,9 @@ func.func @cast_follwed_by_store() { } // CHECK-LABEL: func.func @cast_follwed_by_store() // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<4x32x1024xf32, #hal.descriptor_type> -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4x1024x64xf32, #hal.descriptor_type> -// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<4x32x64xf32, #hal.descriptor_type> +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4x32x1024xf32, #hal.descriptor_type> +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4x1024x64xf32, #hal.descriptor_type> +// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<4x32x64xf32, #hal.descriptor_type> // CHECK-DAG: %[[LHSV:.+]] = memref.subview %[[LHS]] // CHECK-DAG: %[[RHSV:.+]] = memref.subview %[[RHS]] // CHECK-DAG: %[[RESULTV:.+]] = memref.subview %[[RESULT]] @@ -1533,11 +1465,9 @@ func.func @cast_follwed_by_store() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @rank_reduced_subtensor_insert() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1545,8 +1475,8 @@ func.func @rank_reduced_subtensor_insert() { %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3, %4} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3, %4} %7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor %8 = flow.dispatch.tensor.load %6, offsets = [0, 0, 0], sizes = [%2, %3, %4], strides = [1, 1, 1] : !flow.dispatch.tensor>{%2, %3, %4} -> tensor %9 = tensor.insert_slice %7 into %8[0, 0, 0] [1, %3, %4] [1, 1, 1] : tensor into tensor @@ -1554,19 +1484,17 @@ func.func @rank_reduced_subtensor_insert() { return } // CHECK-LABEL: func.func @rank_reduced_subtensor_insert() -// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RET:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RET:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[RET]] // CHECK: linalg.generic {{.*}} ins(%[[ARG]] {{.*}} outs(%[[SUBVIEW]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -1577,9 +1505,9 @@ func.func @bufferize_transfer_op_inplace() { %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index %c1 = arith.constant 1 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [%c0, %c0], sizes = [2, 3], strides = [%c1, %c1] : !flow.dispatch.tensor> -> tensor<2x3xf32> %5 = flow.dispatch.tensor.load %1, offsets = [%c0, %c0], sizes = [3, 1], strides = [%c1, %c1] : !flow.dispatch.tensor> -> tensor<3x1xf32> %6 = flow.dispatch.tensor.load %3, offsets = [%c0, %c0], sizes = [2, 1], strides = [%c1, %c1] : !flow.dispatch.tensor> -> tensor<2x1xf32> @@ -1607,9 +1535,9 @@ func.func @bufferize_transfer_op_inplace() { } // CHECK-LABEL: func.func @bufferize_transfer_op_inplace() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: %[[ARG1V:.+]] = memref.subview %[[ARG1]] // CHECK-DAG: %[[RET0V:.+]] = memref.subview %[[RET0]] // CHECK-COUNT-6: vector.transfer_read %[[ARG0]] @@ -1621,13 +1549,11 @@ func.func @bufferize_transfer_op_inplace() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<(d0)[s0, s1] -> (-d0 + s0, s1)> #map1 = affine_map<(d0, d1) -> (d0, d1)> @@ -1641,10 +1567,10 @@ func.func @multi_result() { %5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index %6 = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index %7 = hal.interface.constant.load layout(#pipeline_layout) ordinal(7) : index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3} - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%4, %5} - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%6, %7} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3} + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%4, %5} + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%6, %7} %12 = hal.interface.constant.load layout(#pipeline_layout) ordinal(8) : index %13 = hal.interface.constant.load layout(#pipeline_layout) ordinal(9) : index %workgroup_id_x = hal.interface.workgroup.id[0] : index @@ -1678,10 +1604,10 @@ func.func @multi_result() { return } // CHECK-LABEL: func.func @multi_result() -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK-DAG: %[[ARG0V:.+]] = memref.subview %[[ARG0]] // CHECK-DAG: %[[ARG1V:.+]] = memref.subview %[[ARG1]] // CHECK-DAG: %[[RET0V:.+]] = memref.subview %[[RET0]] @@ -1692,13 +1618,11 @@ func.func @multi_result() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0] -> (s0 * 128)> #map1 = affine_map<(d0)[s0] -> (-d0 + s0, 128)> @@ -1710,10 +1634,10 @@ func.func @multi_result_reduce() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%0, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%2} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%2} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%0, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%2} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%2} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %7 = affine.apply #map0()[%workgroup_id_x] @@ -1743,10 +1667,10 @@ func.func @multi_result_reduce() { return } // CHECK-LABEL: func.func @multi_result_reduce -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK: scf.for // CHECK-DAG: %[[ARG0_SV:.+]] = memref.subview %[[ARG0]] // CHECK-DAG: %[[ARG1_SV:.+]] = memref.subview %[[ARG1]] @@ -1762,12 +1686,10 @@ func.func @multi_result_reduce() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0] -> (s0 * 64)> #map1 = affine_map<(d0) -> (-d0 + 250, 64)> @@ -1784,9 +1706,9 @@ func.func @l1_tiled_matmul_no_fill_readwrite() { %c1 = arith.constant 1 : index %c250 = arith.constant 250 : index %c370 = arith.constant 370 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -1834,9 +1756,9 @@ func.func @l1_tiled_matmul_no_fill_readwrite() { // CHECK-DAG: %[[K:.+]] = arith.constant 144 : index // CHECK-DAG: %[[L1_MN_SIZE:.+]] = arith.constant 32 : index // CHECK-DAG: %[[L1_K_SIZE:.+]] = arith.constant 24 : index -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<250x144xf32, #hal.descriptor_type> -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<144x370xf32, #hal.descriptor_type> -// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<250x370xf32, #hal.descriptor_type> +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<250x144xf32, #hal.descriptor_type> +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<144x370xf32, #hal.descriptor_type> +// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<250x370xf32, #hal.descriptor_type> // CHECK: scf.for %[[WORKGROUP_I:.+]] = %{{.*}} to %[[M]] step %{{.*}} { // CHECK: scf.for %[[WORKGROUP_J:.+]] = %{{.*}} to %[[N]] step %{{.*}} { // CHECK-DAG: %[[WORKGROUP_I_SIZE:.+]] = affine.min #{{.*}}(%[[WORKGROUP_I]]) @@ -1858,12 +1780,10 @@ func.func @l1_tiled_matmul_no_fill_readwrite() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0] -> (s0 * 64)> #map1 = affine_map<(d0) -> (-d0 + 250, 64)> @@ -1881,9 +1801,9 @@ func.func @l1_tiled_matmul() { %c1 = arith.constant 1 : index %c250 = arith.constant 250 : index %c370 = arith.constant 370 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -1932,9 +1852,9 @@ func.func @l1_tiled_matmul() { // CHECK-DAG: %[[K:.+]] = arith.constant 144 : index // CHECK-DAG: %[[L1_MN_SIZE:.+]] = arith.constant 32 : index // CHECK-DAG: %[[L1_K_SIZE:.+]] = arith.constant 24 : index -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<250x144xf32, #hal.descriptor_type> -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<144x370xf32, #hal.descriptor_type> -// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<250x370xf32, #hal.descriptor_type> +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<250x144xf32, #hal.descriptor_type> +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<144x370xf32, #hal.descriptor_type> +// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<250x370xf32, #hal.descriptor_type> // CHECK: scf.for %[[WORKGROUP_I:.+]] = %{{.*}} to %[[M]] step %{{.*}} { // CHECK: scf.for %[[WORKGROUP_J:.+]] = %{{.*}} to %[[N]] step %{{.*}} { // CHECK-DAG: %[[WORKGROUP_I_SIZE:.+]] = affine.min #{{.*}}(%[[WORKGROUP_I]]) @@ -1956,11 +1876,9 @@ func.func @l1_tiled_matmul() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0, s1] -> (s1 * s0)> #map1 = affine_map<(d0)[s0, s1] -> (-d0 + s1, s0)> @@ -1972,8 +1890,8 @@ func.func @tensor_insert_slice() { %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index %5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%2, %3} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%4, %5} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%2, %3} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%4, %5} %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_size_y = hal.interface.workgroup.size[1] : index %workgroup_id_x = hal.interface.workgroup.id[0] : index @@ -1998,8 +1916,8 @@ func.func @tensor_insert_slice() { } // CHECK: #[[MAP:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> // CHECK: func.func @tensor_insert_slice() -// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref> -// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref> +// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref> +// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref> // CHECK-DAG: %[[OFFSET_Y:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) // CHECK-DAG: %[[OFFSET_X:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) // CHECK: scf.for %[[IV0:.+]] = @@ -2012,12 +1930,10 @@ func.func @tensor_insert_slice() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0] -> (s0 * 64)> #map1 = affine_map<(d0)[s0] -> (-d0 + s0, 64)> @@ -2026,9 +1942,9 @@ func.func @dynamic_update_slice() { %c0_i32 = arith.constant 0 : i32 %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%1, %0} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%1, %0} %5 = flow.dispatch.tensor.load %3, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %6 = tensor.extract %5[] : tensor %7 = arith.cmpi slt, %6, %c0_i32 : i32 @@ -2049,8 +1965,8 @@ func.func @dynamic_update_slice() { return } // CHECK-LABEL: func.func @dynamic_update_slice() -// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref> -// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref> +// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref> +// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref> // CHECK-DAG: %[[OFFSET_Y:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) // CHECK-DAG: %[[OFFSET_X:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK: scf.for %[[IV0:.+]] = @@ -2062,13 +1978,11 @@ func.func @dynamic_update_slice() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0, s1] -> (s0 * s1)> #map1 = affine_map<(d0)[s0, s1] -> (-d0 + s1, s0)> @@ -2083,10 +1997,10 @@ func.func @multi_level_tile_fuse() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %5, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -2146,10 +2060,10 @@ func.func @multi_level_tile_fuse() { // CHECK-DAG: %[[M:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref>{%[[M]], %[[K]]} -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref>{%[[K]], %[[N]]} -// CHECK-DAG: %[[SCALAR:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref> -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) : memref>{%[[M]], %[[N]]} +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref>{%[[M]], %[[K]]} +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref>{%[[K]], %[[N]]} +// CHECK-DAG: %[[SCALAR:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref> +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) : memref>{%[[M]], %[[N]]} // CHECK: scf.for // CHECK: scf.for // CHECK-DAG: %[[LHS_SUBVIEW1:.+]] = memref.subview %[[LHS]] @@ -2174,13 +2088,11 @@ func.func @multi_level_tile_fuse() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0] -> (s0 * 4)> #map1 = affine_map<(d0) -> (-d0 + 2, 4)> @@ -2194,10 +2106,10 @@ func.func @operand_fusion() { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %5, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -2230,10 +2142,10 @@ func.func @operand_fusion() { // CHECK-DAG: %[[M:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) -// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref>{%[[M]], %[[K]]} -// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref>{%[[K]], %[[N]]} -// CHECK-DAG: %[[SCALAR:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref> -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) : memref>{%[[M]], %[[N]]} +// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref>{%[[M]], %[[K]]} +// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref>{%[[K]], %[[N]]} +// CHECK-DAG: %[[SCALAR:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref> +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) : memref>{%[[M]], %[[N]]} // CHECK: scf.for // CHECK: scf.for // CHECK-DAG: %[[LHS_SUBVIEW1:.+]] = memref.subview %[[LHS]] @@ -2250,11 +2162,9 @@ func.func @operand_fusion() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0] -> (s0 * 4)> #map1 = affine_map<()[s0] -> (s0 * 2)> @@ -2270,9 +2180,9 @@ func.func @dot_general_nontrivial_batching_mutliple_parallel_dimension() { %cst_0 = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index %c64 = arith.constant 64 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c64) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c64) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -2312,26 +2222,24 @@ func.func @dot_general_nontrivial_batching_mutliple_parallel_dimension() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @no_op_subview() { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %1} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %1} %4 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor %5 = tensor.extract_slice %4[0, 0] [%0, %1] [1, 1] : tensor to tensor flow.dispatch.tensor.store %5, %3, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%0, %1} return } // CHECK-LABEL: func.func @no_op_subview() -// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[SRC_DUP:.+]] = memref.subview %[[SRC]] // CHECK: linalg.generic // CHECK-SAME: ins(%[[SRC_DUP]] : @@ -2339,17 +2247,15 @@ func.func @no_op_subview() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @rank_reducing_no_op_subview() { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} %3 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, %0], strides = [1, 1] : !flow.dispatch.tensor>{%0} -> tensor<1x?xf32> %4 = tensor.extract_slice %3[0, 0] [1, %0] [1, 1] : tensor<1x?xf32> to tensor flow.dispatch.tensor.store %4, %2, offsets = [0], sizes = [%0], strides = [1] : tensor -> !flow.dispatch.tensor>{%0} @@ -2357,8 +2263,8 @@ func.func @rank_reducing_no_op_subview() { } // CHECK-LABEL: func.func @rank_reducing_no_op_subview() -// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[SRC]][0, 0] [1, %{{.+}}] // CHECK: linalg.generic // CHECK-SAME: ins(%[[SUBVIEW]] : @@ -2382,18 +2288,16 @@ func.func @fft_tensor(%idx: index) -> (tensor<1024xf32>, tensor<1024xf32>) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @scan_1d_dim0_inclusive_sum() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [6], strides = [1] : !flow.dispatch.tensor> -> tensor<6xf32> %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [6], strides = [1] : !flow.dispatch.tensor> -> tensor<6xf32> %5 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor @@ -2414,14 +2318,12 @@ func.func @scan_1d_dim0_inclusive_sum() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @sort1D() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> %1 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor> -> tensor<4xi32> %2 = iree_linalg_ext.sort dimension(0) outs(%1 : tensor<4xi32>) { ^bb0(%arg0: i32, %arg1: i32): @@ -2432,25 +2334,23 @@ func.func @sort1D() { return } // CHECK-LABEL: func.func @sort1D -// CHECK: %[[BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<4xi32, #hal.descriptor_type> +// CHECK: %[[BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<4xi32, #hal.descriptor_type> // CHECK: iree_linalg_ext.sort // CHECK-SAME: outs(%[[BUF]] : memref<4xi32{{.+}}>) // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @scatter_update_scalar_1D() { %c4 = arith.constant 4 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [8], strides = [1] : !flow.dispatch.tensor> -> tensor<8xi32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -2468,9 +2368,9 @@ func.func @scatter_update_scalar_1D() { return } // CHECK: func.func @scatter_update_scalar_1D -// CHECK-DAG: %[[UPDATE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<4xi32, #hal.descriptor_type> -// CHECK-DAG: %[[INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<4x1xi32, #hal.descriptor_type> -// CHECK-DAG: %[[ORIGINAL:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c0) : memref<8xi32, #hal.descriptor_type> +// CHECK-DAG: %[[UPDATE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<4xi32, #hal.descriptor_type> +// CHECK-DAG: %[[INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<4x1xi32, #hal.descriptor_type> +// CHECK-DAG: %[[ORIGINAL:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c0) : memref<8xi32, #hal.descriptor_type> // CHECK: scf.for %[[I:.+]] = %{{.+}} to %{{.+}} step %{{.+}} // CHECK: iree_linalg_ext.scatter // CHECK-SAME: ins(%[[UPDATE]], %[[INDICES]] @@ -2478,17 +2378,15 @@ func.func @scatter_update_scalar_1D() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @topk() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> %input_values = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [200, 8], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<200x8xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %input_indices = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [200, 8], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<200x8xi32> %out_values = bufferization.alloc_tensor() : tensor<200x3xf32> %out_indices = bufferization.alloc_tensor() : tensor<200x3xi32> @@ -2503,8 +2401,8 @@ func.func @topk() { return } // CHECK: func.func @topk -// CHECK-DAG: %[[INPUT_VALUES:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<200x8xf32, #hal.descriptor_type> -// CHECK-DAG: %[[INPUT_INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<200x8xi32, #hal.descriptor_type> +// CHECK-DAG: %[[INPUT_VALUES:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<200x8xf32, #hal.descriptor_type> +// CHECK-DAG: %[[INPUT_INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<200x8xi32, #hal.descriptor_type> // CHECK-DAG: %[[OUTPUT_VALUES:.+]] = memref.alloc() : memref<200x3xf32> // CHECK-DAG: %[[OUTPUT_INDICES:.+]] = memref.alloc() : memref<200x3xi32> // CHECK: iree_linalg_ext.topk @@ -2513,17 +2411,15 @@ func.func @topk() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @iree_linalg_ext_pack() { %c0 = arith.constant 0 : index %c0_i32 = arith.constant 0 : i32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [2, 2, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x3x3xi32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> %4 = iree_linalg_ext.pack %3 padding_value(%c0_i32 : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %2 : (tensor<4x4xi32> tensor<2x2x3x3xi32>) -> tensor<2x2x3x3xi32> @@ -2532,24 +2428,22 @@ func.func @iree_linalg_ext_pack() { } // CHECK: func.func @iree_linalg_ext_pack // CHECK-DAG: %[[PAD:.+]] = arith.constant 0 : i32 -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type> -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<2x2x3x3xi32, #hal.descriptor_type> +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type> +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<2x2x3x3xi32, #hal.descriptor_type> // CHECK: iree_linalg_ext.pack %[[IN]] // CHECK-SAME: padding_value(%[[PAD]] : i32) // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %[[OUT]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @iree_linalg_ext_unpack() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x2x2xi32> %4 = iree_linalg_ext.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %2 : (tensor<2x2x2x2xi32> tensor<4x4xi32>) -> tensor<4x4xi32> @@ -2557,24 +2451,22 @@ func.func @iree_linalg_ext_unpack() { return } // CHECK: func.func @iree_linalg_ext_unpack -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<2x2x2x2xi32, #hal.descriptor_type> -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type> +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<2x2x2x2xi32, #hal.descriptor_type> +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type> // CHECK: iree_linalg_ext.unpack %[[IN]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @iree_linalg_ext_unpack_fully_dynamic() { %c0 = arith.constant 0 : index %inner_d0 = util.unfoldable_constant 2 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, %inner_d0, %inner_d0], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x?x?xi32> %4 = iree_linalg_ext.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [%inner_d0, %inner_d0] into %2 : (tensor<2x2x?x?xi32> tensor<4x4xi32>) -> tensor<4x4xi32> @@ -2589,17 +2481,15 @@ func.func @iree_linalg_ext_unpack_fully_dynamic() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @tensor_pack() { %c0 = arith.constant 0 : index %c0_i32 = arith.constant 0 : i32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [2, 2, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x3x3xi32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> %4 = tensor.pack %3 padding_value(%c0_i32 : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %2 : tensor<4x4xi32> -> tensor<2x2x3x3xi32> @@ -2608,24 +2498,22 @@ func.func @tensor_pack() { } // CHECK: func.func @tensor_pack // CHECK-DAG: %[[PAD:.+]] = arith.constant 0 : i32 -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type> -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<2x2x3x3xi32, #hal.descriptor_type> +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type> +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<2x2x3x3xi32, #hal.descriptor_type> // CHECK: iree_linalg_ext.pack %[[IN]] // CHECK-SAME: padding_value(%[[PAD]] : i32) // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %[[OUT]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @tensor_unpack() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x2x2xi32> %4 = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %2 : tensor<2x2x2x2xi32> -> tensor<4x4xi32> @@ -2633,24 +2521,22 @@ func.func @tensor_unpack() { return } // CHECK: func.func @tensor_unpack -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<2x2x2x2xi32, #hal.descriptor_type> -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type> +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<2x2x2x2xi32, #hal.descriptor_type> +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type> // CHECK: iree_linalg_ext.unpack %[[IN]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @tensor_unpack_fully_dynamic() { %c0 = arith.constant 0 : index %inner_d0 = util.unfoldable_constant 2 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, %inner_d0, %inner_d0], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x?x?xi32> %4 = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [%inner_d0, %inner_d0] into %2 : tensor<2x2x?x?xi32> -> tensor<4x4xi32> @@ -2665,20 +2551,18 @@ func.func @tensor_unpack_fully_dynamic() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @reduction_ew() { %c5120 = arith.constant 5120 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c5120) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c5120) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c5120) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c5120) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 1001], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1001xf32> %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1001], strides = [1] : !flow.dispatch.tensor> -> tensor<1001xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, 1001], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1001xf32> @@ -2700,29 +2584,27 @@ func.func @reduction_ew() { } // CHECK: func.func @reduction_ew -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c5120) : memref<1001xf32, strided<[1], offset: 1280>, #hal.descriptor_type> -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c5120) : memref<1x1001xf32, strided<[1001, 1], offset: 1280>, #hal.descriptor_type> -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<1x1001xf32, #hal.descriptor_type> +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c5120) : memref<1001xf32, strided<[1], offset: 1280>, #hal.descriptor_type> +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c5120) : memref<1x1001xf32, strided<[1001, 1], offset: 1280>, #hal.descriptor_type> +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<1x1001xf32, #hal.descriptor_type> // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, uniform_buffer>, - #hal.descriptor_set.binding<2, uniform_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @uniform_storage_buffer() { %c0 = arith.constant 0 : index %m = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %n = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%m, %k} - %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%k, %n} - %init = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%m, %n} - %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%m, %n} + %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%m, %k} + %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%k, %n} + %init = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%m, %n} + %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%m, %n} %wg_id_y = hal.interface.workgroup.id[1] : index %wg_count_y = hal.interface.workgroup.count[1] : index %wg_size_y = hal.interface.workgroup.size[1] : index @@ -2748,30 +2630,28 @@ func.func @uniform_storage_buffer() { } // CHECK-LABEL: func.func @uniform_storage_buffer() -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref> -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref> -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref> -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) : memref> +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) : memref> +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(1) : memref> +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(2) : memref> +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(3) : memref> // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, uniform_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @micro_kernel_op() { %d0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %d1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %s0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : f32 %s1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : i64 - %arg0_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%d0, %d1} - %arg1_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%d0, %d1} - %arg2_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%d0, %d1} - %arg3_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%d0, %d1} + %arg0_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%d0, %d1} + %arg1_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%d0, %d1} + %arg2_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%d0, %d1} + %arg3_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%d0, %d1} %arg0 = flow.dispatch.tensor.load %arg0_binding, offsets = [0, 0], sizes = [%d0, %d1], strides = [1, 1] : !flow.dispatch.tensor>{%d0, %d1} -> tensor %arg1 = flow.dispatch.tensor.load %arg1_binding, offsets = [0, 0], sizes = [%d0, %d1], strides = [1, 1] @@ -2792,10 +2672,10 @@ func.func @micro_kernel_op() { // CHECK-LABEL: func @micro_kernel_op() // CHECK-DAG: %[[S0:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) // CHECK-DAG: %[[S1:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(3) -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref> -// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref> -// CHECK-DAG: %[[ARG2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref> -// CHECK-DAG: %[[ARG3:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) : memref> +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref> +// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref> +// CHECK-DAG: %[[ARG2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref> +// CHECK-DAG: %[[ARG3:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) : memref> // CHECK: iree_codegen.ukernel.generic "foo" // CHECK-SAME: ins(%[[ARG0]] : // CHECK-SAME: outs(%[[ARG1]], %[[ARG2]] : @@ -2804,17 +2684,15 @@ func.func @micro_kernel_op() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @sub_byte_bufferize_with_offset() { %c64 = arith.constant 64 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %2 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] %3 = flow.dispatch.tensor.load %1, offsets = [%2], sizes = [64], strides = [1] : !flow.dispatch.tensor> -> tensor<64xf32> @@ -2830,7 +2708,7 @@ func.func @sub_byte_bufferize_with_offset() { } // CHECK-LABEL: func.func @sub_byte_bufferize_with_offset() // CHECK: %[[C64:.+]] = arith.constant 64 : index -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: memref<64xi4, strided<[1], offset: 128> // ----- diff --git a/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir b/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir index ae710180c9958..5b47372dd8d51 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir @@ -64,14 +64,12 @@ func.func @resolve_subview_rankreducing_not_at_the_end_memref(%arg0: memref<8x16 // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @resolve_binding_subspan_zero_offset_memref() -> (memref, index, index, index, index, index) { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<512x384xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<512x384xf32> %base_buffer, %offset, %sizes:2, %strides:2 = memref.extract_strided_metadata %0 : memref<512x384xf32> -> memref, index, index, index, index, index return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : memref, index, index, index, index, index } @@ -80,19 +78,17 @@ func.func @resolve_binding_subspan_zero_offset_memref() -> (memref, index, // CHECK-DAG: %[[C384:.+]] = arith.constant 384 : index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<196608xf32> +// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<196608xf32> // CHECK: %[[BASE_PTR:.+]] = memref.reinterpret_cast %[[BINDING]] to offset: [0], sizes: [], strides: [] // CHECK: return %[[BASE_PTR]], %[[C0]], %[[C512]], %[[C384]], %[[C384]], %[[C1]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @resolve_binding_subspan_offset_index_memref(%arg0 : index) -> (memref, index, index, index, index, index) { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%arg0) : memref<512x384xindex, strided<[384, 1], offset:?>> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%arg0) : memref<512x384xindex, strided<[384, 1], offset:?>> %base_buffer, %offset, %sizes:2, %strides:2 = memref.extract_strided_metadata %0 : memref<512x384xindex, strided<[384, 1], offset:?>> -> memref, index, index, index, index, index return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : memref, index, index, index, index, index } @@ -106,20 +102,18 @@ func.func @resolve_binding_subspan_offset_index_memref(%arg0 : index) -> (memref // CHECK: %[[SIZEOF:.+]] = util.sizeof index // CHECK: %[[OFFSET:.+]] = affine.apply #[[MAP0]]()[%arg0, %[[SIZEOF]]] // CHECK: %[[SUBSPAN_SIZE:.+]] = affine.apply #[[MAP1]]()[%arg0, %[[SIZEOF]]] -// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref{%[[SUBSPAN_SIZE]]} +// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref{%[[SUBSPAN_SIZE]]} // CHECK: %[[BASE_PTR:.+]] = memref.reinterpret_cast %[[BINDING]] to offset: [0], sizes: [], strides: [] // CHECK: return %[[BASE_PTR]], %[[OFFSET]], %[[C512]], %[[C384]], %[[C384]], %[[C1]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @resolve_binding_subspan_dyn_dims_memref(%arg0 : index, %arg1 : index) -> (memref, index, index, index, index, index) { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref{%arg0, %arg1} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref{%arg0, %arg1} %base_buffer, %offset, %sizes:2, %strides:2 = memref.extract_strided_metadata %0 : memref -> memref, index, index, index, index, index return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : memref, index, index, index, index, index } @@ -128,7 +122,7 @@ func.func @resolve_binding_subspan_dyn_dims_memref(%arg0 : index, %arg1 : index) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[MAP]]()[%arg0, %arg1] -// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref{%[[SIZE]]} +// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref{%[[SIZE]]} // CHECK: %[[BASE_PTR:.+]] = memref.reinterpret_cast %[[BINDING]] to offset: [0], sizes: [], strides: [] // CHECK: return %[[BASE_PTR]], %[[C0]], %arg0, %arg1, %arg1, %[[C1]] @@ -186,15 +180,13 @@ func.func @resolve_global_memref() -> (memref, index, index, index, index, // Tests for the part of the pass that converts iree_codegen to memref. -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @external_func_entry_point() -> (memref, index) { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %1 = arith.index_castui %0 : i32 to index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%1) flags(ReadOnly) : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%1) flags(ReadOnly) : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>> %base_buffer, %offset, %sizes:3, %strides:3 = iree_codegen.extract_strided_metadata %2 : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>> -> memref, index, index, index, index, index, index, index return %base_buffer, %offset : memref, index } diff --git a/compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_into_nop.mlir b/compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_into_nop.mlir index 83c7dc7f3b358..3ca8c276107e5 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_into_nop.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_into_nop.mlir @@ -175,35 +175,31 @@ func.func @batch_matmul_fill_dynamic(%arg0 : tensor, %arg1 : tensor, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #encoding_lhs = #iree_encoding.encoding, matmul_narrow_M = 1 : index, matmul_narrow_N = 1 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]> func.func @drop_encoding_for_hal_flow_ops_static() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1xf32> %3 = iree_encoding.set_encoding %2 : tensor<1x1xf32> -> tensor<1x1xf32, #encoding_lhs> flow.dispatch.tensor.store %3, %1, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : tensor<1x1xf32, #encoding_lhs> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @drop_encoding_for_hal_flow_ops_static -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.+}} : !flow.dispatch.tensor> -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.+}} : !flow.dispatch.tensor> +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.+}} : !flow.dispatch.tensor> +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.+}} : !flow.dispatch.tensor> // CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[IN]] // CHECK: flow.dispatch.tensor.store %[[LOAD]], %[[OUT]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #encoding_lhs = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]> func.func @drop_encoding_for_hal_flow_ops_dynamic() { @@ -225,15 +221,15 @@ func.func @drop_encoding_for_hal_flow_ops_dynamic() { %13 = arith.index_castui %12 : i64 to index %14 = flow.dispatch.workload.ordinal %8, 0 : index %15 = flow.dispatch.workload.ordinal %13, 1 : index - %16 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%14, %15} - %17 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%14, %15} + %16 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%14, %15} + %17 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%14, %15} %18 = flow.dispatch.tensor.load %16, offsets = [0, 0], sizes = [%14, %15], strides = [1, 1] : !flow.dispatch.tensor>{%14, %15} -> tensor %19 = iree_encoding.set_encoding %18 : tensor -> tensor flow.dispatch.tensor.store %19, %17, offsets = [0, 0], sizes = [%14, %15], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%14, %15} return } // CHECK-LABEL: func.func @drop_encoding_for_hal_flow_ops_dynamic -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.+}} : !flow.dispatch.tensor> -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.+}} : !flow.dispatch.tensor> +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.+}} : !flow.dispatch.tensor> +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.+}} : !flow.dispatch.tensor> // CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[IN]] // CHECK: flow.dispatch.tensor.store %[[LOAD]], %[[OUT]] diff --git a/compiler/src/iree/compiler/Codegen/Common/test/materialize_user_configs.mlir b/compiler/src/iree/compiler/Codegen/Common/test/materialize_user_configs.mlir index 8e80747233c0c..6bc67b850d01f 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/materialize_user_configs.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/materialize_user_configs.mlir @@ -3,20 +3,18 @@ #config = #iree_codegen.lowering_config #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {target_triple = "x86_64-xyz-xyz"}> #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #compilation = #iree_codegen.compilation_info module { func.func @preset_config() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x512xf32> %5 = tensor.empty() : tensor<128x512xf32> diff --git a/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir b/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir index 1255453008579..5b072cb6e8d31 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir @@ -172,12 +172,10 @@ func.func @negative_fold_insert_slice_into_transfer_write_dynamic(%v: vector<4x7 // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<()[s0] -> (s0 * 64)> @@ -192,7 +190,7 @@ func.func @batch_matmul_with_padding_strategy(%arg0: tensor<1x?x1280xf16>, %arg1 %c1 = arith.constant 1 : index %cst_0 = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_z = hal.interface.workgroup.id[2] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %1 = affine.apply #map()[%workgroup_id_y] @@ -229,19 +227,17 @@ func.func @batch_matmul_with_padding_strategy(%arg0: tensor<1x?x1280xf16>, %arg1 // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @_batch_matmul_narrow_n_2_dispatch_4_unpack_i32() attributes {translation_info = #iree_codegen.translation_info} { %c0_i32 = arith.constant 0 : i32 %c2 = arith.constant 2 : index %c128 = arith.constant 128 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index scf.for %arg0 = %workgroup_id_x to %c2 step %workgroup_count_x { diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir index 50a28e395244a..18c1677695681 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir @@ -1,6 +1,8 @@ // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-codegen-reconcile-translation-info)))" %s --verify-diagnostics | FileCheck %s -#pipeline_layout = #hal.pipeline.layout]>]> +#pipeline_layout = #hal.pipeline.layout +]> hal.executable private @err_multiple_entry_point { // expected-error @+1 {{reconciliation for multiple export ops unsupported}} hal.executable.variant public @reconcile_workgroup_size target(#hal.executable.target<"", "", {}>) { @@ -11,7 +13,9 @@ hal.executable private @err_multiple_entry_point { // ----- -#pipeline_layout = #hal.pipeline.layout]>]> +#pipeline_layout = #hal.pipeline.layout +]> hal.executable private @reconcile_workgroup_size { hal.executable.variant public @reconcile_workgroup_size target(#hal.executable.target<"", "", {}>) { hal.executable.export public @entry_point layout(#pipeline_layout) @@ -31,7 +35,9 @@ hal.executable private @reconcile_workgroup_size { // ----- -#pipeline_layout = #hal.pipeline.layout]>]> +#pipeline_layout = #hal.pipeline.layout +]> hal.executable private @single_translation_info { hal.executable.variant public @single_translation_info target(#hal.executable.target<"", "", {}>) { hal.executable.export public @entry_point layout(#pipeline_layout) @@ -51,7 +57,9 @@ hal.executable private @single_translation_info { // ----- -#pipeline_layout = #hal.pipeline.layout]>]> +#pipeline_layout = #hal.pipeline.layout +]> hal.executable private @err_mistmatched_workgroup_size { hal.executable.variant public @err_mismatched_workgroup_size target(#hal.executable.target<"", "", {}>) { // expected-error @+1 {{failed to reconcile workgroup sizes}} @@ -69,7 +77,9 @@ hal.executable private @err_mistmatched_workgroup_size { // ----- -#pipeline_layout = #hal.pipeline.layout]>]> +#pipeline_layout = #hal.pipeline.layout +]> hal.executable private @err_mistmatched_workgroup_size2 { hal.executable.variant public @err_mismatched_workgroup_size2 target(#hal.executable.target<"", "", {}>) { // expected-error @+1 {{failed to reconcile workgroup sizes}} @@ -87,7 +97,9 @@ hal.executable private @err_mistmatched_workgroup_size2 { // ----- -#pipeline_layout = #hal.pipeline.layout]>]> +#pipeline_layout = #hal.pipeline.layout +]> hal.executable private @reconcile_subgroup_size { hal.executable.variant public @reconcile_subgroup_size target(#hal.executable.target<"", "", {}>) { hal.executable.export public @entry_point layout(#pipeline_layout) @@ -107,7 +119,9 @@ hal.executable private @reconcile_subgroup_size { // ----- -#pipeline_layout = #hal.pipeline.layout]>]> +#pipeline_layout = #hal.pipeline.layout +]> hal.executable private @err_reconcile_subgroup_size { hal.executable.variant public @err_reconcile_subgroup_size target(#hal.executable.target<"", "", {}>) { hal.executable.export public @entry_point layout(#pipeline_layout) @@ -127,7 +141,9 @@ hal.executable private @err_reconcile_subgroup_size { // ----- -#pipeline_layout = #hal.pipeline.layout]>]> +#pipeline_layout = #hal.pipeline.layout +]> hal.executable private @llvm_func_attrs { hal.executable.variant public @llvm_func_attrs target(#hal.executable.target<"", "", {}>) { hal.executable.export public @entry_point layout(#pipeline_layout) diff --git a/compiler/src/iree/compiler/Codegen/Common/test/remove_dead_allocs.mlir b/compiler/src/iree/compiler/Codegen/Common/test/remove_dead_allocs.mlir index 6607225f0ad17..0ec9ceb99d28b 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/remove_dead_allocs.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/remove_dead_allocs.mlir @@ -19,13 +19,11 @@ func.func @alloc_keep(%arg0: index, %arg1: index) -> memref { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @cleanup_only_assume_alignment_uses() { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<42xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<42xf32> memref.assume_alignment %0, 64 : memref<42xf32> return } diff --git a/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir index 4fa8f0a4413b3..dc7a35671952c 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-remove-single-iteration-loop)))))' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #translation_info = #iree_codegen.translation_info // CHECK-LABEL: func.func @dispatch_0() @@ -48,11 +46,9 @@ hal.executable private @dispatch_0 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @workgroup_tile_loop() @@ -85,11 +81,9 @@ hal.executable private @workgroup_tile_loop { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @workgroup_tile_loop_negative() @@ -122,11 +116,9 @@ hal.executable private @workgroup_tile_loop_negative { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @both_workgroup_and_workitem() @@ -187,7 +179,11 @@ hal.executable private @both_workgroup_and_workitem { // ----- -#pipeline_layout = #hal.pipeline.layout, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding +]> #translation = #iree_codegen.translation_info #map0 = affine_map<()[s0] -> (s0 ceildiv 4)> #map1 = affine_map<()[s0] -> (s0 * 4)> @@ -206,11 +202,11 @@ hal.executable private @simple_mul { %cst = arith.constant 0.000000e+00 : f32 %c4 = arith.constant 4 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<4xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<4xf32> memref.assume_alignment %0, 64 : memref<4xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<4xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<4xf32> memref.assume_alignment %1, 64 : memref<4xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<4xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<4xf32> memref.assume_alignment %2, 64 : memref<4xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index diff --git a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir index 89ac36091ca13..408885d729ddf 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir @@ -2,13 +2,11 @@ // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-tile-and-distribute-to-workgroups{max-workgroup-parallel-dims=1}, canonicalize)), cse)))' --split-input-file %s | FileCheck %s -check-prefix=CHECKW // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-tile-and-distribute-to-workgroups{distribution-method=2})), canonicalize, cse)))' --split-input-file %s | FileCheck %s -check-prefix=NO-LOOP #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -31,13 +29,13 @@ hal.executable private @matmul_tensors { %0 = flow.dispatch.workload.ordinal %cl_0, 0 : index %1 = flow.dispatch.workload.ordinal %cl_1, 1 : index %2 = flow.dispatch.workload.ordinal %cl_2, 2 : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor>{%0, %2} -> tensor @@ -72,10 +70,10 @@ hal.executable private @matmul_tensors { // CHECK-DAG: %[[M:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) // CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) // CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) -// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[INIT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[INIT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK-DAG: %[[WG_ID_X:.+]] = hal.interface.workgroup.id[0] // CHECK-DAG: %[[WG_COUNT_X:.+]] = hal.interface.workgroup.count[0] // CHECK-DAG: %[[WG_ID_Y:.+]] = hal.interface.workgroup.id[1] @@ -100,12 +98,10 @@ hal.executable private @matmul_tensors { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -128,11 +124,11 @@ hal.executable private @add { %cl_1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %0 = flow.dispatch.workload.ordinal %cl_0, 0 : index %1 = flow.dispatch.workload.ordinal %cl_1, 1 : index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%1} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor @@ -175,12 +171,10 @@ hal.executable private @add { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -205,11 +199,11 @@ hal.executable private @add4D { %1 = flow.dispatch.workload.ordinal %cl_1, 1 : index %2 = flow.dispatch.workload.ordinal %cl_2, 2 : index %3 = flow.dispatch.workload.ordinal %cl_3, 3 : index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} %7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %2, %3} -> tensor @@ -254,12 +248,10 @@ hal.executable private @add4D { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -284,11 +276,11 @@ hal.executable private @add_distribute4D { %1 = flow.dispatch.workload.ordinal %cl_1, 1 : index %2 = flow.dispatch.workload.ordinal %cl_2, 2 : index %3 = flow.dispatch.workload.ordinal %cl_3, 3 : index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} %7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %2, %3} -> tensor @@ -337,9 +329,9 @@ hal.executable private @add_distribute4D { // CHECK-DAG: %[[D1:.*]] = hal.interface.constant.load layout({{.+}}) ordinal(1) : index // CHECK-DAG: %[[D2:.*]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index // CHECK-DAG: %[[D3:.*]] = hal.interface.constant.load layout({{.+}}) ordinal(3) : index -// CHECK-DAG: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(32) : !flow.dispatch.tensor>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]} -// CHECK-DAG: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(32) : !flow.dispatch.tensor>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]} -// CHECK-DAG: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(32) : !flow.dispatch.tensor>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]} +// CHECK-DAG: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(32) : !flow.dispatch.tensor>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]} +// CHECK-DAG: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(32) : !flow.dispatch.tensor>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]} +// CHECK-DAG: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(32) : !flow.dispatch.tensor>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]} // CHECK: %[[WORKGROUP_ID_X:.*]] = hal.interface.workgroup.id[0] : index // CHECK: %[[WORKGROUP_COUNT_X:.*]] = hal.interface.workgroup.count[0] : index // CHECK: %[[WORKGROUP_ID_Y:.*]] = hal.interface.workgroup.id[1] : index @@ -374,12 +366,10 @@ hal.executable private @add_distribute4D { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -404,11 +394,11 @@ hal.executable private @add_distribute4D_zero_tile_size { %1 = flow.dispatch.workload.ordinal %cl_1, 1 : index %2 = flow.dispatch.workload.ordinal %cl_2, 2 : index %3 = flow.dispatch.workload.ordinal %cl_3, 3 : index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} %7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %2, %3} -> tensor @@ -449,12 +439,10 @@ hal.executable private @add_distribute4D_zero_tile_size { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -479,11 +467,11 @@ hal.executable private @batch_matmul_tensors { %1 = flow.dispatch.workload.ordinal %cl_1, 1 : index %2 = flow.dispatch.workload.ordinal %cl_2, 2 : index %3 = flow.dispatch.workload.ordinal %cl_3, 3 : index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor>{%0, %1, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor>{%0, %3, %2} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2} %7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0], sizes = [%0, %1, %3], strides = [1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %3} -> tensor @@ -523,12 +511,10 @@ hal.executable private @batch_matmul_tensors { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64"> #translation = #iree_codegen.translation_info @@ -542,11 +528,11 @@ hal.executable private @preset_config_matmul_tensors { builtin.module { func.func @preset_config() attributes {translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> @@ -587,11 +573,9 @@ hal.executable private @preset_config_matmul_tensors { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64"> #translation = #iree_codegen.translation_info @@ -624,8 +608,8 @@ hal.executable public @copy_op { %dest_offset_x = flow.dispatch.workload.ordinal %cl_7, 7: index %slice_size_y = flow.dispatch.workload.ordinal %cl_8, 8: index %slice_size_x = flow.dispatch.workload.ordinal %cl_9, 9: index - %source = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%source_size_y, %source_size_x} - %dest = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref{%dest_size_y, %dest_size_x} + %source = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%source_size_y, %source_size_x} + %dest = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref{%dest_size_y, %dest_size_x} %source_subview = memref.subview %source[%source_offset_y, %source_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref to memref> %dest_subview = memref.subview %dest[%dest_offset_y, %dest_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref to memref> linalg.generic { @@ -663,8 +647,8 @@ hal.executable public @copy_op { // CHECK-DAG: %[[DEST_OFFSET_X:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(7) : index // CHECK-DAG: %[[SLICE_SIZE_Y:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(8) : index // CHECK-DAG: %[[SLICE_SIZE_X:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(9) : index -// CHECK-DAG: %[[SOURCE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[DEST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[SOURCE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[DEST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[SOURCE:.+]] = memref.subview %[[SOURCE_BINDING]][%[[SOURCE_OFFSET_Y]], %[[SOURCE_OFFSET_X]]] // CHECK-DAG: %[[DEST:.+]] = memref.subview %[[DEST_BINDING]][%[[DEST_OFFSET_Y]], %[[DEST_OFFSET_X]]] // CHECK-DAG: %[[WG_ID_X:.+]] = hal.interface.workgroup.id[0] @@ -688,11 +672,9 @@ hal.executable public @copy_op { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64"> #translation = #iree_codegen.translation_info @@ -708,9 +690,9 @@ hal.executable private @static_1d_fft_stage2 { %c2 = arith.constant 2 : index %cst = arith.constant dense<[1.000000e+00, 6.12323426E-17]> : tensor<2xf32> %cst_0 = arith.constant dense<[-0.000000e+00, -1.000000e+00]> : tensor<2xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xf32> @@ -743,11 +725,9 @@ hal.executable private @static_1d_fft_stage2 { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64"> #translation = #iree_codegen.translation_info @@ -765,8 +745,8 @@ hal.executable private @static_3d_fft_stage3 { %cst_0 = arith.constant dense<[-0.000000e+00, -0.707106769, -1.000000e+00, -0.707106769]> : tensor<4xf32> %0 = bufferization.to_memref %cst_0 : memref<4xf32> %1 = bufferization.to_memref %cst : memref<4xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x128x32xf32> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<64x128x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x128x32xf32> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<64x128x32xf32> iree_linalg_ext.fft {lowering_config = #config} ins(%c3, %1, %0 : index, memref<4xf32>, memref<4xf32>) outs(%2, %3 : memref<64x128x32xf32>, memref<64x128x32xf32>) return @@ -794,12 +774,10 @@ hal.executable private @static_3d_fft_stage3 { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64"> #map0 = affine_map<(d0, d1) -> (d0, d1)> @@ -823,11 +801,11 @@ hal.executable private @outs_fusion { %0 = flow.dispatch.workload.ordinal %cl_0, 0 : index %1 = flow.dispatch.workload.ordinal %cl_1, 1 : index %2 = flow.dispatch.workload.ordinal %cl_2, 2 : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} %6 = tensor.empty(%0, %1) : tensor %7 = linalg.generic { @@ -879,12 +857,10 @@ hal.executable private @outs_fusion { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -918,11 +894,11 @@ hal.executable private @conv { %6 = flow.dispatch.workload.ordinal %cl_6, 6 : index %7 = flow.dispatch.workload.ordinal %cl_7, 7 : index %8 = flow.dispatch.workload.ordinal %cl_8, 8 : index - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%4, %5, %3, %6} - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %7, %8, %6} %12 = flow.dispatch.tensor.load %9, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %2, %3} -> tensor @@ -969,12 +945,10 @@ hal.executable private @conv { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -991,11 +965,11 @@ hal.executable private @conv_static { builtin.module { func.func @conv_static() attributes {translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 161, 161, 96], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x161x161x96xf32> @@ -1043,11 +1017,9 @@ hal.executable private @conv_static { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -1065,9 +1037,9 @@ hal.executable private @generic_static { } builtin.module { func.func @generic_static() attributes {translation_info = #translation} { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [96, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<96x16xf32> @@ -1110,12 +1082,10 @@ hal.executable private @generic_static { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", { data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", @@ -1132,11 +1102,11 @@ hal.executable private @matmul_static { builtin.module { func.func @matmul_static() attributes {translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [196, 240], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<196x240xf32> @@ -1165,12 +1135,10 @@ hal.executable private @matmul_static { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", { data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", @@ -1187,11 +1155,11 @@ hal.executable private @restrict_num_workgroups { builtin.module { func.func @restrict_num_workgroups() attributes {translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 11, 11, 576], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x11x11x576xf32> @@ -1219,12 +1187,10 @@ hal.executable private @restrict_num_workgroups { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -1292,12 +1258,10 @@ hal.executable private @reduction { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -1318,11 +1282,11 @@ hal.executable private @gemm_unit_N { %cl_1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %0 = flow.dispatch.workload.ordinal %cl_0, 0 : index %1 = flow.dispatch.workload.ordinal %cl_1, 1 : index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %1} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%1} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} %5 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%1, 1], strides = [1, 1] : !flow.dispatch.tensor>{%1} -> tensor @@ -1364,12 +1328,10 @@ hal.executable private @gemm_unit_N { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -1387,11 +1349,11 @@ hal.executable private @gemm_unit_M_unit_N { func.func @gemm_unit_M_unit_N() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, %0], strides = [1, 1] : !flow.dispatch.tensor>{%0} -> tensor<1x?xf32> @@ -1423,11 +1385,9 @@ hal.executable private @gemm_unit_M_unit_N { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -1452,9 +1412,9 @@ hal.executable private @generic_unit_dims { %1 = flow.dispatch.workload.ordinal %cl_1, 1 : index %2 = flow.dispatch.workload.ordinal %cl_2, 2 : index %3 = flow.dispatch.workload.ordinal %cl_3, 3 : index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%0, %1, %2, %3} %6 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0, 0, 0, 0, 0], sizes = [1, %0, 1, 1, %1, %2, 1, %3], strides = [1, 1, 1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %2, %3} -> tensor<1x?x1x1x?x?x1x?xf32> @@ -1497,11 +1457,9 @@ hal.executable private @generic_unit_dims { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -1521,9 +1479,9 @@ hal.executable private @reduce_to_scalar { func.func @reduce_to_scalar() attributes {translation_info = #translation} { %cl_0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %0 = flow.dispatch.workload.ordinal %cl_0, 0 : index - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [%0], strides = [1] : !flow.dispatch.tensor>{%0} -> tensor @@ -1557,11 +1515,9 @@ hal.executable private @reduce_to_scalar { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -1578,9 +1534,9 @@ hal.executable private @scalar { } builtin.module { func.func @scalar() attributes {translation_info = #translation} { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor @@ -1614,11 +1570,9 @@ hal.executable private @scalar { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -1635,9 +1589,9 @@ hal.executable private @rank_reduced_slice { } builtin.module { func.func @rank_reduced_slice() attributes {translation_info = #translation} { - %in_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %in_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> - %out_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %out_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %in = flow.dispatch.tensor.load %in_binding, offsets = [3, 10], sizes = [1, 10], strides = [2, 1] : !flow.dispatch.tensor> -> tensor<10xf32> @@ -1667,9 +1621,9 @@ hal.executable private @rank_reduced_slice { // CHECK: hal.return %[[C5]], %[[C1]], %[[C1]] // CHECK: func.func @rank_reduced_slice() // CHECK-SAME: translation_info = #[[TRANSLATION]] -// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: : !flow.dispatch.tensor> -// CHECK-DAG: %[[DST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[DST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: : !flow.dispatch.tensor> // CHECK: scf.for %[[IV0:.+]] = // CHECK: %[[OFFSET:.+]] = affine.apply #[[MAP]]()[%[[IV0]]] @@ -1681,13 +1635,11 @@ hal.executable private @rank_reduced_slice { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -1709,13 +1661,13 @@ hal.executable private @matmul_interchange { %0 = flow.dispatch.workload.ordinal %cl_0, 0 : index %1 = flow.dispatch.workload.ordinal %cl_1, 1 : index %2 = flow.dispatch.workload.ordinal %cl_2, 2 : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor>{%0, %2} -> tensor @@ -1753,11 +1705,9 @@ hal.executable private @matmul_interchange { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @no_compute { hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) { @@ -1784,9 +1734,9 @@ hal.executable private @no_compute { %7 = flow.dispatch.workload.ordinal %2, 2 : index %8 = flow.dispatch.workload.ordinal %3, 3 : index %9 = flow.dispatch.workload.ordinal %4, 4 : index - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref{%5, %6, %7} + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref{%5, %6, %7} memref.assume_alignment %10, 64 : memref - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<1x?x?xf32>{%8, %9} + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<1x?x?xf32>{%8, %9} memref.assume_alignment %11, 64 : memref<1x?x?xf32> return } @@ -1800,13 +1750,11 @@ hal.executable private @no_compute { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @tile_multiuse_producer { hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf_x86_64", {}>) { @@ -1820,13 +1768,13 @@ hal.executable private @tile_multiuse_producer { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %s0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %s0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %s1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %s1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %s2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) + %s2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<12x128x128xf32> @@ -1877,10 +1825,10 @@ hal.executable private @tile_multiuse_producer { } } // CHECK-LABEL: func @tile_multiuse_producer() -// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) -// CHECK-DAG: %[[RESULT_BINDING0:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) -// CHECK-DAG: %[[RESULT_BINDING1:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) -// CHECK-DAG: %[[RESULT_BINDING2:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) +// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) +// CHECK-DAG: %[[RESULT_BINDING0:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) +// CHECK-DAG: %[[RESULT_BINDING1:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) +// CHECK-DAG: %[[RESULT_BINDING2:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) // CHECK: scf.for %[[IV0:.+]] = // CHECK: scf.for %[[IV1:.+]] = // CHECK: %[[SRC:.+]] = flow.dispatch.tensor.load %[[SRC_BINDING]], offsets = [%[[IV0]], %[[IV1]], 0] @@ -1902,13 +1850,11 @@ hal.executable private @tile_multiuse_producer { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @no_tile { hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) { @@ -1921,10 +1867,10 @@ hal.executable private @no_tile { func.func @no_tile() attributes {translation_info = #iree_codegen.translation_info} { %c0 = arith.constant 0 : index %c64 = arith.constant 64 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c64) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c64) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [10], strides = [1] : !flow.dispatch.tensor> -> tensor<10xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [10], strides = [1] : !flow.dispatch.tensor> -> tensor<10xi32> %6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [3], strides = [1] : !flow.dispatch.tensor> -> tensor<3xf32> @@ -1947,11 +1893,9 @@ hal.executable private @no_tile { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @pack_lowering { hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) { @@ -1964,9 +1908,9 @@ hal.executable private @pack_lowering { func.func @gemm_lhs_pack() attributes {translation_info = #iree_codegen.translation_info} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [100, 250], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<100x250xf32> @@ -1990,11 +1934,9 @@ hal.executable private @pack_lowering { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @pack_lowering { hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) { @@ -2008,9 +1950,9 @@ hal.executable private @pack_lowering { %c0 = arith.constant 0 : index %c114688 = arith.constant 114688 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c114688) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c114688) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<250x500xf32> @@ -2033,18 +1975,13 @@ hal.executable private @pack_lowering { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @clone_index_computations { hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) { - hal.executable.export public @clone_index_computations ordinal(0) layout( - #hal.pipeline.layout, <1, storage_buffer>]>]>) - { + hal.executable.export public @clone_index_computations ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3 : index, %arg4 : index): %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4 hal.return %x, %y, %z : index, index, index @@ -2065,11 +2002,11 @@ hal.executable private @clone_index_computations { %5 = flow.dispatch.workload.ordinal %1, 1 : index %6 = flow.dispatch.workload.ordinal %2, 2 : index %7 = flow.dispatch.workload.ordinal %3, 3 : index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%4, %5} %9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%6] %10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%7] - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%9, %10} %12 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [%4, %5], strides = [1, 1] : !flow.dispatch.tensor>{%4, %5} -> tensor @@ -2102,11 +2039,11 @@ hal.executable private @clone_index_computations { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @dynamic_unpack { hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) { @@ -2131,8 +2068,8 @@ hal.executable private @dynamic_unpack { %5 = flow.dispatch.workload.ordinal %1, 1 : index %6 = flow.dispatch.workload.ordinal %2, 2 : index %7 = flow.dispatch.workload.ordinal %3, 3 : index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%4, %5} - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%4, %5} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%4, %5} -> tensor %11 = tensor.empty(%6, %7) : tensor %12 = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 @@ -2151,18 +2088,15 @@ hal.executable private @dynamic_unpack { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @dynamic_unpack_dynamic_tile { hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) { - hal.executable.export public @dynamic_unpack_dynamic_tile ordinal(0) layout( - #hal.pipeline.layout, <1, storage_buffer>]>]>) - { + hal.executable.export public @dynamic_unpack_dynamic_tile ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index): %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4 hal.return %x, %y, %z : index, index, index @@ -2185,8 +2119,8 @@ hal.executable private @dynamic_unpack_dynamic_tile { %5 = flow.dispatch.workload.ordinal %1, 1 : index %6 = flow.dispatch.workload.ordinal %2, 2 : index %7 = flow.dispatch.workload.ordinal %3, 3 : index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%4, %5, %c32, %c16} - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%4, %5, %c32, %c16} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, %c32, %c16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%4, %5, %c32, %c16} -> tensor %11 = tensor.empty(%6, %7) : tensor %12 = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [%c32, %c16] into %11 @@ -2205,11 +2139,9 @@ hal.executable private @dynamic_unpack_dynamic_tile { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @unpack_elem { hal.executable.variant public @embedded_elf_arm_64 target(<"llvm-cpu", "embedded-elf-arm_64", {}>) { @@ -2221,8 +2153,8 @@ hal.executable private @unpack_elem { builtin.module { func.func @unpack_elem() attributes {translation_info = #iree_codegen.translation_info} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [16, 48, 8, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x48x8x8xf32> %3 = tensor.empty() : tensor<128x384xf32> %4 = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %3 {lowering_config = #iree_codegen.lowering_config} : tensor<16x48x8x8xf32> -> tensor<128x384xf32> @@ -2246,11 +2178,9 @@ hal.executable private @unpack_elem { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> @@ -2277,10 +2207,10 @@ hal.executable private @dynamic_unpack_fusion { %0:2 = iree_codegen.query_tile_sizes tensor<12544x16xi32, #iree_encoding.encoding> -> index, index %1 = affine.apply affine_map<()[s0] -> (12544 ceildiv s0)>()[%0#0] %2 = affine.apply affine_map<()[s0] -> (16 ceildiv s0)>()[%0#1] - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c200960) flags(ReadOnly) : !flow.dispatch.tensor>{%1, %2, %0#0, %0#1} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1003776) flags(ReadOnly) : !flow.dispatch.tensor> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1053952) flags(ReadOnly) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c200960) flags(ReadOnly) : !flow.dispatch.tensor>{%1, %2, %0#0, %0#1} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1003776) flags(ReadOnly) : !flow.dispatch.tensor> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1053952) flags(ReadOnly) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %10 = flow.dispatch.tensor.load %3, offsets = [0, 0, 0, 0], sizes = [%1, %2, %0#0, %0#1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%1, %2, %0#0, %0#1} -> tensor %11 = flow.dispatch.tensor.load %4, offsets = [0], sizes = [12544], strides = [1] : !flow.dispatch.tensor> -> tensor<12544xi32> %12 = flow.dispatch.tensor.load %5, offsets = [0], sizes = [16], strides = [1] : !flow.dispatch.tensor> -> tensor<16xi32> @@ -2311,15 +2241,13 @@ hal.executable private @dynamic_unpack_fusion { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer>, - #hal.descriptor_set.binding<5, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @elem_pack { hal.executable.variant public @embedded_elf_arm_64 target(<"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>) { @@ -2338,15 +2266,15 @@ hal.executable private @elem_pack { %c1572864 = arith.constant 1572864 : index %c2359296 = arith.constant 2359296 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1339392) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c786432) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c823296) flags(ReadOnly) : !flow.dispatch.tensor> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c825344) flags(ReadOnly) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c1572864) : !flow.dispatch.tensor> - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(5) alignment(64) offset(%c2359296) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1339392) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c786432) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c823296) flags(ReadOnly) : !flow.dispatch.tensor> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c825344) flags(ReadOnly) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c1572864) : !flow.dispatch.tensor> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(5) alignment(64) offset(%c2359296) : !flow.dispatch.tensor> %9 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1, 2, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x2x512xf32> %10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x512xf32> %11 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x512xf32> @@ -2385,16 +2313,17 @@ hal.executable private @elem_pack { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @scatter { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { hal.executable.export public @scatter ordinal(0) - layout(#hal.pipeline.layout, <1, storage_buffer>]>]>) + layout(#hal.pipeline.layout, + #hal.pipeline.binding + ]>) { ^bb0(%arg0: !hal.device): %x, %y, %z = flow.dispatch.workgroup_count_from_slice @@ -2406,9 +2335,9 @@ hal.executable private @scatter { %c251668480 = arith.constant 251668480 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c228075520) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c251668480) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c228075520) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c251668480) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [5898240], strides = [1] : !flow.dispatch.tensor> -> tensor<5898240xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [5898240, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<5898240x4xi32> %5 = tensor.empty() : tensor<1x640x48x48xf32> @@ -2429,11 +2358,9 @@ hal.executable private @scatter { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @collapse_workgroups_dispatch_dispatch_0 { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -2445,8 +2372,8 @@ hal.executable private @collapse_workgroups_dispatch_dispatch_0 { builtin.module { func.func @collapse_workgroups_dispatch_dispatch_0_generic_1024x128x16x64() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1024, 16, 128, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1024x16x128x64xf32> %3 = tensor.empty() : tensor<1024x128x16x64xf32> %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<1024x16x128x64xf32>) outs(%3 : tensor<1024x128x16x64xf32>) attrs = {lowering_config = #iree_codegen.lowering_config} { @@ -2472,13 +2399,11 @@ hal.executable private @collapse_workgroups_dispatch_dispatch_0 { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", { data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", @@ -2498,13 +2423,13 @@ hal.executable private @matmul_tensors { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor>{%0, %2} -> tensor @@ -2539,13 +2464,11 @@ hal.executable private @matmul_tensors { #config = #iree_codegen.lowering_config #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> #map = affine_map<()[s0] -> (s0 ceildiv 64)> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #translation = #iree_codegen.translation_info module { @@ -2563,10 +2486,10 @@ module { %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -2584,9 +2507,9 @@ module { } // CHECK-LABEL: func.func @matmul_already_distributed -// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // CHECK-NOT: scf.for // CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]], offsets = [%workgroup_id_y, 0] // CHECK: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]], offsets = [0, %workgroup_id_x] @@ -2597,11 +2520,9 @@ module { // Check that the distribution avoids distributing unit-trip count loops. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @avoid_unit_range_distribute { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -2638,9 +2559,9 @@ hal.executable private @avoid_unit_range_distribute { %20 = arith.index_castui %19 : i64 to index %21 = flow.dispatch.workload.ordinal %15, 0 : index %22 = flow.dispatch.workload.ordinal %20, 1 : index - %23 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%21, %22} - %24 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor>{%22} - %25 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%22} + %23 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%21, %22} + %24 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor>{%22} + %25 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%22} %26 = flow.dispatch.tensor.load %23, offsets = [0, 0, 0, 0, 0], sizes = [32, %21, %22, 16, 16], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor>{%21, %22} -> tensor<32x?x?x16x16xf16> %27 = flow.dispatch.tensor.load %24, offsets = [0, 0, 0, 0, 0], sizes = [32, %22, 8, 16, 16], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor>{%22} -> tensor<32x?x8x16x16xf16> %28 = tensor.empty(%22) : tensor<32x?x16x8x16xf16> @@ -2674,12 +2595,10 @@ hal.executable private @avoid_unit_range_distribute { // Check that the distribution avoids distributing unit-trip count loops. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @set_size_to_tilesize_when_divisible { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -2715,10 +2634,10 @@ hal.executable private @set_size_to_tilesize_when_divisible { %19 = arith.ori %16, %18 : i64 %20 = arith.index_castui %19 : i64 to index %21 = flow.dispatch.workload.ordinal %20, 1 : index - %22 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %22 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> %23 = flow.dispatch.workload.ordinal %21, 2 : index - %24 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%21} - %25 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%10) : !flow.dispatch.tensor>{%23} + %24 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%21} + %25 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%10) : !flow.dispatch.tensor>{%23} %26 = flow.dispatch.workload.ordinal %15, 0 : index %27 = flow.dispatch.tensor.load %24, offsets = [0, 0, 0, 0], sizes = [%21, 16, 32, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%21} -> tensor %28 = flow.dispatch.tensor.load %22, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x32x128xf16> @@ -2751,12 +2670,10 @@ hal.executable private @set_size_to_tilesize_when_divisible { // ----- #config = #iree_codegen.lowering_config -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64"> #translation = #iree_codegen.translation_info @@ -2770,11 +2687,11 @@ hal.executable private @reshape_matmul_tensors { builtin.module { func.func @reshape_matmul() attributes {translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 2, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x2x256xf32> diff --git a/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir b/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir index 982c6ab3f8dcc..55b6ffdfd3778 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir @@ -1,15 +1,13 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-type-propagation))" --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @generic_op_illegal_operand() { %d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%d} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%d} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%d} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%d} %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor>{%d} -> tensor %3 = arith.trunci %2 : tensor to tensor %4 = tensor.empty(%d) : tensor @@ -25,8 +23,8 @@ func.func @generic_op_illegal_operand() { return } // CHECK-LABEL: func.func @generic_op_illegal_operand() -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]] // CHECK-DAG: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor // CHECK: %[[GENERIC:.+]] = linalg.generic @@ -40,16 +38,14 @@ func.func @generic_op_illegal_operand() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @generic_op_illegal_operand_i7() { %d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%d} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%d} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%d} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%d} %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor>{%d} -> tensor %3 = arith.trunci %2 : tensor to tensor %4 = tensor.empty(%d) : tensor @@ -65,8 +61,8 @@ func.func @generic_op_illegal_operand_i7() { return } // CHECK-LABEL: func.func @generic_op_illegal_operand_i7() -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]] // CHECK-DAG: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor // CHECK: %[[GENERIC:.+]] = linalg.generic @@ -80,16 +76,14 @@ func.func @generic_op_illegal_operand_i7() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @generic_op_illegal_operand_i33() { %d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%d} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%d} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%d} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%d} %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor>{%d} -> tensor %3 = arith.trunci %2 : tensor to tensor %4 = tensor.empty(%d) : tensor @@ -105,8 +99,8 @@ func.func @generic_op_illegal_operand_i33() { return } // CHECK-LABEL: func.func @generic_op_illegal_operand_i33() -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]] // CHECK-DAG: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor // CHECK: %[[GENERIC:.+]] = linalg.generic @@ -120,16 +114,14 @@ func.func @generic_op_illegal_operand_i33() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @generic_op_illegal_result() { %d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%d} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%d} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%d} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%d} %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor>{%d} -> tensor %3 = tensor.empty(%d) : tensor %4 = linalg.generic { @@ -145,8 +137,8 @@ func.func @generic_op_illegal_result() { return } // CHECK-LABEL: func.func @generic_op_illegal_result() -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]] // CHECK-DAG: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor // CHECK: %[[GENERIC:.+]] = linalg.generic @@ -160,18 +152,16 @@ func.func @generic_op_illegal_result() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @tensor_extract() { %d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %offset = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %size = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%d} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%d} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%d} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%d} %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor>{%d} -> tensor %3 = tensor.extract_slice %2[%offset] [%size] [1] : tensor to tensor %4 = arith.trunci %3 : tensor to tensor @@ -180,28 +170,26 @@ func.func @tensor_extract() { return } // CHECK-LABEL: func.func @tensor_extract() -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]] // CHECK: %[[EXTRACT:.+]] = tensor.extract_slice %[[INTENSOR]] // CHECK: flow.dispatch.tensor.store %[[EXTRACT]], %[[OUT]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @tensor_insert() { %d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %offset = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %size = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%d} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%d} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%d} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%d} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%d} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%d} %3 = flow.dispatch.tensor.load %0, offsets = [%offset], sizes=[%size], strides=[1] : !flow.dispatch.tensor>{%d} -> tensor %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor>{%d} -> tensor %5 = arith.trunci %3 : tensor to tensor @@ -212,9 +200,9 @@ func.func @tensor_insert() { return } // CHECK-LABEL: func.func @tensor_insert() -// CHECK-DAG: %[[IN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[IN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[IN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[IN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: %[[IN1TENSOR:.+]] = flow.dispatch.tensor.load %[[IN1]] // CHECK-DAG: %[[IN2TENSOR:.+]] = flow.dispatch.tensor.load %[[IN2]] // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[IN1TENSOR]] into %[[IN2TENSOR]] @@ -222,18 +210,16 @@ func.func @tensor_insert() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @for_loop() { %d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %lb = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %step = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%d} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%d} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%d} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%d} %2 = flow.dispatch.tensor.load %0, offsets=[0], sizes=[%d], strides=[1] : !flow.dispatch.tensor>{%d} -> tensor %3 = flow.dispatch.tensor.load %1, offsets=[0], sizes=[%d], strides=[1] : !flow.dispatch.tensor>{%d} -> tensor %4 = arith.trunci %2 : tensor to tensor @@ -249,8 +235,8 @@ func.func @for_loop() { return } // CHECK-LABEL: func.func @for_loop() -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]] // CHECK-DAG: %[[OUTTENSOR:.+]] = flow.dispatch.tensor.load %[[OUT]] // CHECK: %[[FOR:.+]] = scf.for @@ -262,14 +248,12 @@ func.func @for_loop() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @fill_op() { %d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%d} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%d} %1 = tensor.empty(%d) : tensor %false = arith.constant false %2 = linalg.fill ins(%false : i1) outs(%1 : tensor) -> tensor @@ -278,7 +262,7 @@ func.func @fill_op() { return } // CHECK-LABEL: func.func @fill_op() -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-DAG: %[[INIT:.+]] = tensor.empty // CHECK-DAG: %[[FALSE:.+]] = arith.constant false // CHECK-DAG: %[[EXT_SCALAR:.+]] = arith.extui %[[FALSE]] @@ -289,16 +273,14 @@ func.func @fill_op() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> #map = affine_map<(d0) -> (d0)> func.func @constant_op() { - %a = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %b = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %c = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> + %a = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %b = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %c = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> %at = flow.dispatch.tensor.load %a, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor> -> tensor<4xi32> %bt = flow.dispatch.tensor.load %b, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor> -> tensor<4xi32> %select = arith.constant dense<[true, false, true, false]> : tensor<4xi1> @@ -326,16 +308,14 @@ func.func @constant_op() { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> #map = affine_map<(d0) -> (d0)> func.func @constant_splat_op() { - %a = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %b = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %c = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> + %a = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %b = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %c = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> %at = flow.dispatch.tensor.load %a, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor> -> tensor<4xi32> %bt = flow.dispatch.tensor.load %b, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor> -> tensor<4xi32> %select = arith.constant dense : tensor<4xi1> @@ -357,18 +337,16 @@ func.func @constant_splat_op() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @tensor_extract() { %c0 = arith.constant 0 : index %c13 = arith.constant 13 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [14], strides = [1] : !flow.dispatch.tensor> -> tensor<14xi8> @@ -389,7 +367,7 @@ func.func @tensor_extract() { return } // CHECK-LABEL: func @tensor_extract() -// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: !flow.dispatch.tensor> // CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[BINDING]] // CHECK: %[[EXTRACTED:.+]] = tensor.extract %[[LOAD]] @@ -425,18 +403,16 @@ func.func @named_op(%arg0 : tensor, %arg1 : tensor) -> tensor, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @scatter() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [8], strides = [1] : !flow.dispatch.tensor> -> tensor<8xi8> %4 = arith.trunci %3 : tensor<8xi8> to tensor<8xi1> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [8, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<8x1xi32> @@ -453,9 +429,9 @@ func.func @scatter() { } // CHECK-LABEL: func.func @scatter() -// CHECK-DAG: %[[UPDATES:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[UPDATES:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: %[[UPDATES_TENSOR:.+]] = flow.dispatch.tensor.load %[[UPDATES]] // CHECK-DAG: %[[INDICES_TENSOR:.+]] = flow.dispatch.tensor.load %[[INDICES]] // CHECK-DAG: %[[OUT_TENSOR:.+]] = flow.dispatch.tensor.load %[[OUT]] @@ -472,16 +448,14 @@ func.func @scatter() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @sort() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xi8> %3 = arith.trunci %2 : tensor<1xi8> to tensor<1xi1> %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xi32> @@ -496,8 +470,8 @@ func.func @sort() { // CHECK-LABEL: func.func @sort() // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[A_TENSOR:.+]] = flow.dispatch.tensor.load %[[A]] // CHECK-DAG: %[[B_TENSOR:.+]] = flow.dispatch.tensor.load %[[B]] // CHECK: %[[SORT:.+]]:2 = iree_linalg_ext.sort dimension(0) @@ -511,16 +485,14 @@ func.func @sort() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @sort_secondary() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xi32> %3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xi8> %4 = arith.trunci %3 : tensor<1xi8> to tensor<1xi1> @@ -536,8 +508,8 @@ func.func @sort_secondary() { // CHECK-LABEL: func.func @sort_secondary() // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[A_TENSOR:.+]] = flow.dispatch.tensor.load %[[A]] // CHECK-DAG: %[[B_TENSOR:.+]] = flow.dispatch.tensor.load %[[B]] // CHECK: %[[SORT:.+]]:2 = iree_linalg_ext.sort dimension(0) @@ -549,16 +521,14 @@ func.func @sort_secondary() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @branch_op() { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i8 %4 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %5 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor diff --git a/compiler/src/iree/compiler/Codegen/Common/test/type_propagation_packing.mlir b/compiler/src/iree/compiler/Codegen/Common/test/type_propagation_packing.mlir index 367a1fcad4954..d09593312cb18 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/type_propagation_packing.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/type_propagation_packing.mlir @@ -1,15 +1,13 @@ // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-type-propagation))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @generic_op_i4() { %d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%d} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%d} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%d} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%d} %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor>{%d} -> tensor %4 = tensor.empty(%d) : tensor %5 = linalg.generic { @@ -25,8 +23,8 @@ func.func @generic_op_i4() { } // CHECK-LABEL: func.func @generic_op_i4() -// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]{{.+}} -> tensor // CHECK-DAG: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor // CHECK: %[[GENERIC:.+]] = linalg.generic diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp index 1143c67025c00..dcab0b713fc1c 100644 --- a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp +++ b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp @@ -121,8 +121,7 @@ findOrCreateSubspanBuffer(RewriterBase &rewriter, if (!bufferMemrefType) continue; - if (bufferSubspanOp.getSet() != subspanOp.getSet() || - bufferSubspanOp.getBinding() != subspanOp.getBinding() || + if (bufferSubspanOp.getBinding() != subspanOp.getBinding() || bufferSubspanOp.getDescriptorType() != subspanOp.getDescriptorType() || bufferSubspanOp.getByteOffset() != subspanOp.getByteOffset() || !llvm::equal(bufferSubspanOp.getDynamicDims(), @@ -139,7 +138,7 @@ findOrCreateSubspanBuffer(RewriterBase &rewriter, // Just change the result type of the InterfaceBindingSubspanOp. Value buffer = rewriter.create( subspanOp->getLoc(), memRefType, subspanOp.getLayout(), - subspanOp.getSet(), subspanOp.getBinding(), subspanOp.getByteOffset(), + subspanOp.getBinding(), subspanOp.getByteOffset(), subspanOp.getDynamicDims(), subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr()); rewriter.create( diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp index 21847b4185360..e006e1d5f1101 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp @@ -309,9 +309,10 @@ struct ConvertHALInterfaceBindingSubspanOp subspanOp, "failed to convert interface.binding.subspan result to memref type"); } - auto memRefDesc = abi.loadBinding( - subspanOp, subspanOp.getFlatBindingIndex(), operands.getByteOffset(), - memRefType, operands.getDynamicDims(), rewriter); + auto memRefDesc = + abi.loadBinding(subspanOp, subspanOp.getBinding().getSExtValue(), + operands.getByteOffset(), memRefType, + operands.getDynamicDims(), rewriter); rewriter.replaceOp(subspanOp, {memRefDesc}); return success(); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp index 6c9af2971008e..154ab37ecc03c 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp @@ -277,13 +277,13 @@ LLVM::DIDerivedTypeAttr ExecutableLibraryDI::getDispatchStateV0T() { getMemberOf("workgroup_size_x", getUint32T(), &offsetInBits), getMemberOf("workgroup_size_y", getUint32T(), &offsetInBits), getMemberOf("workgroup_size_z", getUint16T(), &offsetInBits), - getMemberOf("push_constant_count", getUint16T(), &offsetInBits), + getMemberOf("constant_count", getUint16T(), &offsetInBits), getMemberOf("workgroup_count_x", getUint32T(), &offsetInBits), getMemberOf("workgroup_count_y", getUint32T(), &offsetInBits), getMemberOf("workgroup_count_z", getUint16T(), &offsetInBits), getMemberOf("max_concurrency", getUint8T(), &offsetInBits), getMemberOf("binding_count", getUint8T(), &offsetInBits), - getMemberOf("push_constants", + getMemberOf("constants", getPtrOf(getConstOf(getArrayOf(getUint32T(), 64))), &offsetInBits), getMemberOf( @@ -412,7 +412,7 @@ HALDispatchABI::getDispatchStateType(MLIRContext *context, fieldTypes.push_back(uint32Type); fieldTypes.push_back(uint16Type); - // uint16_t push_constant_count; + // uint16_t constant_count; fieldTypes.push_back(uint16Type); // uint32_t workgroup_count_x; @@ -428,7 +428,7 @@ HALDispatchABI::getDispatchStateType(MLIRContext *context, // uint8_t binding_count; fieldTypes.push_back(uint8Type); - // const uint32_t * push_constants; + // const uint32_t * constants; // void *const * binding_ptrs; // const size_t * binding_lengths; fieldTypes.push_back(opaquePtrType); @@ -698,11 +698,11 @@ Value HALDispatchABI::loadWorkgroupLocalMemoryPtr(Operation *forOp, Value HALDispatchABI::loadPushConstantCount(Operation *forOp, OpBuilder &builder) { auto countValue = - loadFieldValue(forOp, DispatchStateField::push_constant_count, builder); + loadFieldValue(forOp, DispatchStateField::constant_count, builder); auto resultValue = castValueToType( forOp->getLoc(), countValue, typeConverter->convertType(builder.getIndexType()), builder); - return buildValueDI(forOp, resultValue, "push_constant_count", di.getSizeT(), + return buildValueDI(forOp, resultValue, "constant_count", di.getSizeT(), builder); } @@ -710,7 +710,7 @@ Value HALDispatchABI::loadPushConstant(Operation *forOp, int64_t offset, Type resultType, OpBuilder &builder) { auto loc = forOp->getLoc(); auto constantsPtrValue = - loadFieldValue(forOp, DispatchStateField::push_constants, builder); + loadFieldValue(forOp, DispatchStateField::constants, builder); auto pushConstantType = IntegerType::get(context, 32); Value constantPtrValue = builder.create( loc, constantsPtrValue.getType(), pushConstantType, constantsPtrValue, @@ -719,8 +719,7 @@ Value HALDispatchABI::loadPushConstant(Operation *forOp, int64_t offset, builder.create(loc, pushConstantType, constantPtrValue); auto resultValue = castValueToType(loc, constantValue, resultType, builder); return buildValueDI(forOp, resultValue, - StringRef("push_constant[") + std::to_string(offset) + - "]", + StringRef("constant[") + std::to_string(offset) + "]", di.getBasicType(resultType), builder); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h index d87b063a79ab7..ab341cd19c0cb 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h @@ -156,13 +156,13 @@ class HALDispatchABI { /*uint32_t*/ workgroup_size_x, /*uint32_t*/ workgroup_size_y, /*uint16_t*/ workgroup_size_z, - /*uint16_t*/ push_constant_count, + /*uint16_t*/ constant_count, /*uint32_t*/ workgroup_count_x, /*uint32_t*/ workgroup_count_y, /*uint16_t*/ workgroup_count_z, /*uint8_t*/ max_concurrency, /*uint8_t*/ binding_count, - /*intptr_t*/ push_constants, + /*intptr_t*/ constants, /*intptr_t*/ binding_ptrs, /*intptr_t*/ binding_lengths, }; diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir index 327ef623bd015..839e38bc7e05a 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir @@ -5,11 +5,9 @@ data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android29"}> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @mmt4d_kernel_dispatch() attributes {hal.executable.target = #target} { %c0_i8 = arith.constant 0 : i8 @@ -19,11 +17,11 @@ func.func @mmt4d_kernel_dispatch() attributes {hal.executable.target = #target} %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index %c64 = arith.constant 64 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<1x2x8x4xi8> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<1x2x8x4xi8> memref.assume_alignment %0, 64 : memref<1x2x8x4xi8> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c64) : memref<1x2x8x4xi8> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c64) : memref<1x2x8x4xi8> memref.assume_alignment %1, 64 : memref<1x2x8x4xi8> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c128) : memref<1x1x8x8xi32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c128) : memref<1x1x8x8xi32> memref.assume_alignment %2, 64 : memref<1x1x8x8xi32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_vector_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_vector_lowering.mlir index 814138ac52e33..38dd757706344 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_vector_lowering.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_vector_lowering.mlir @@ -2,12 +2,10 @@ // RUN: iree-opt %s --pass-pipeline="builtin.module(func.func(iree-llvmcpu-mmt4d-vector-lowering{vector-contract-custom-kernels=false}))" --split-input-file | FileCheck %s -check-prefix=CHECK-KERNEL-OFF // RUN: iree-opt %s --pass-pipeline="builtin.module(func.func(iree-llvmcpu-mmt4d-vector-lowering{vector-contract-custom-kernels=true}))" --split-input-file | FileCheck %s -check-prefix=CHECK-KERNEL-ON -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0] -> (s0 * 64)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2)> @@ -24,9 +22,9 @@ module { %cst_0 = arith.constant 0.000000e+00 : f32 %c384 = arith.constant 384 : index %c128 = arith.constant 128 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -71,9 +69,9 @@ module { // CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index // CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index // CHECK-DAG: %[[C64:.+]] = arith.constant 64 : index -// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : !flow.dispatch.tensor> -// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : !flow.dispatch.tensor> -// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : !flow.dispatch.tensor> +// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : !flow.dispatch.tensor> +// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : !flow.dispatch.tensor> +// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : !flow.dispatch.tensor> // CHECK: %[[DST_TILE_INIT:.+]] = tensor.empty() // CHECK: scf.for %[[I_IDX:.+]] = {{.*}} to %[[C384]] step %{{[0-9]*}} { // CHECK: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]], {{.*}} -> tensor<64x512xf32> @@ -95,15 +93,13 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer>, - #hal.descriptor_set.binding<5, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map0 = affine_map<()[s0] -> (s0 * 64)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2)> @@ -125,12 +121,12 @@ module { %c1835008 = arith.constant 1835008 : index %c0 = arith.constant 0 : index %c64 = arith.constant 64 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) offset(%c1835008) : !flow.dispatch.tensor> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(5) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) offset(%c1835008) : !flow.dispatch.tensor> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(5) : !flow.dispatch.tensor> %6 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [2, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x512xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir index 20c59336353b8..4b3902c0a1638 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-llvm))))' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_64", { cpu_features = "+m,+a,+f,+d,+c", @@ -27,8 +25,8 @@ hal.executable private @apply_scale_no_vector_feature { %cst = arith.constant dense<19689> : vector<2xi32> %cst_0 = arith.constant dense<15> : vector<2xi8> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2xi32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xi32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2xi32> %2 = vector.load %0[%c0] : memref<2xi32>, vector<2xi32> %3 = tosa.apply_scale %2, %cst, %cst_0 {double_round = false} : (vector<2xi32>, vector<2xi32>, vector<2xi8>) -> vector<2xi32> vector.store %3, %1[%c0] : memref<2xi32>, vector<2xi32> @@ -48,11 +46,9 @@ hal.executable private @apply_scale_no_vector_feature { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_64", { cpu_features = "+m,+a,+f,+d,+c,+zvl512b,+v", @@ -75,8 +71,8 @@ hal.executable private @apply_scale_v { %cst = arith.constant dense<19689> : vector<2xi32> %cst_0 = arith.constant dense<15> : vector<2xi8> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2xi32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xi32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2xi32> %2 = vector.load %0[%c0] : memref<2xi32>, vector<2xi32> %3 = tosa.apply_scale %2, %cst, %cst_0 {double_round = false} : (vector<2xi32>, vector<2xi32>, vector<2xi8>) -> vector<2xi32> vector.store %3, %1[%c0] : memref<2xi32>, vector<2xi32> @@ -94,11 +90,9 @@ hal.executable private @apply_scale_v { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_64", { cpu_features = "+m,+a,+f,+d,+c,+zvl512b,+zve64x", @@ -121,8 +115,8 @@ hal.executable private @apply_scale_zve64x { %cst = arith.constant dense<19689> : vector<2xi32> %cst_0 = arith.constant dense<15> : vector<2xi8> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2xi32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xi32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2xi32> %2 = vector.load %0[%c0] : memref<2xi32>, vector<2xi32> %3 = tosa.apply_scale %2, %cst, %cst_0 {double_round = false} : (vector<2xi32>, vector<2xi32>, vector<2xi8>) -> vector<2xi32> vector.store %3, %1[%c0] : memref<2xi32>, vector<2xi32> @@ -140,11 +134,9 @@ hal.executable private @apply_scale_zve64x { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_64", { cpu_features = "+m,+a,+f,+d,+c,+zvl512b,+zve32x", @@ -167,8 +159,8 @@ hal.executable private @apply_scale_zve32x { %cst = arith.constant dense<19689> : vector<2xi32> %cst_0 = arith.constant dense<15> : vector<2xi8> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2xi32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xi32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2xi32> %2 = vector.load %0[%c0] : memref<2xi32>, vector<2xi32> %3 = tosa.apply_scale %2, %cst, %cst_0 {double_round = false} : (vector<2xi32>, vector<2xi32>, vector<2xi8>) -> vector<2xi32> vector.store %3, %1[%c0] : memref<2xi32>, vector<2xi32> @@ -193,11 +185,9 @@ hal.executable private @apply_scale_zve32x { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_64", { cpu_features = "+m,+a,+f,+d,+c,+zvl512b,+zve32f", @@ -220,8 +210,8 @@ hal.executable private @apply_scale_zve32f { %cst = arith.constant dense<19689> : vector<2xi32> %cst_0 = arith.constant dense<15> : vector<2xi8> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2xi32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xi32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2xi32> %2 = vector.load %0[%c0] : memref<2xi32>, vector<2xi32> %3 = tosa.apply_scale %2, %cst, %cst_0 {double_round = false} : (vector<2xi32>, vector<2xi32>, vector<2xi8>) -> vector<2xi32> vector.store %3, %1[%c0] : memref<2xi32>, vector<2xi32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir index 6268c4fb6494d..f9189dbb409cc 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir @@ -43,11 +43,9 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @interleave_and_bitcast_lowering() { %cst = arith.constant dense<4> : vector<4x2xi8> @@ -58,8 +56,8 @@ func.func @interleave_and_bitcast_lowering() { %c3 = arith.constant 3 : index %c4096 = arith.constant 4096 : index %c8192 = arith.constant 8192 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c4096) flags(ReadOnly) : memref<128xi8, strided<[1], offset: 4096>> - %out_buffer = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c8192) : memref<256x64xi4, strided<[64, 1], offset: 8192>> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c4096) flags(ReadOnly) : memref<128xi8, strided<[1], offset: 4096>> + %out_buffer = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c8192) : memref<256x64xi4, strided<[64, 1], offset: 8192>> %2 = vector.load %0[%c0] : memref<128xi8, strided<[1], offset: 4096>>, vector<2xi8> %3 = vector.bitcast %2 : vector<2xi8> to vector<4xi4> %4 = vector.insert %3, %cst_0 [3] : vector<4xi4> into vector<4x4xi4> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir index 8675111e4cf6e..bda5e45d2d241 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --iree-convert-to-llvm --split-input-file %s | FileCheck %s --dump-input=always -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: llvm.func @binding_ptrs( @@ -19,7 +17,7 @@ func.func @binding_ptrs() { // CHECK: %[[BASE_PTR:.+]] = llvm.load %[[ARRAY_PTR]] : !llvm.ptr -> !llvm.ptr %c72 = arith.constant 72 : index %c128 = arith.constant 128 : index - %memref = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%c72) : memref>{%c128} + %memref = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%c72) : memref>{%c128} // CHECK: %[[OFFSET_PTR0:.+]] = llvm.getelementptr %[[BASE_PTR]][18] // CHECK: %[[OFFSET_D0:.+]] = llvm.mul %[[C5]], %[[C2]] @@ -40,11 +38,9 @@ llvm.func @sink(%arg0: f32) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: llvm.func @binding_ptrs_dynamic( @@ -80,7 +76,7 @@ func.func @binding_ptrs_dynamic() { // CHECK: %[[BINDING_PTRS:.+]] = llvm.extractvalue %[[STATE3]][10] // CHECK: %[[ARRAY_PTR:.+]] = llvm.getelementptr %[[BINDING_PTRS]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.ptr // CHECK: %[[BASE_PTR:.+]] = llvm.load %[[ARRAY_PTR]] : !llvm.ptr -> !llvm.ptr - %memref = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%offset) : memref>{%dim0, %dim1, %dim2} + %memref = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%offset) : memref>{%dim0, %dim1, %dim2} // CHECK: %[[BASE_BIT_OFFSET:.+]] = llvm.mul %[[OFFSET_ZEXT]], %[[C8]] // CHECK: %[[BASE_OFFSET:.+]] = llvm.udiv %[[BASE_BIT_OFFSET]], %[[C32]] @@ -108,11 +104,9 @@ llvm.func @sink(%arg0: f32) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: llvm.func @binding_ptrs_sub_byte_dynamic( @@ -131,7 +125,7 @@ func.func @binding_ptrs_sub_byte_dynamic() { // CHECK: %[[BINDING_PTRS:.+]] = llvm.extractvalue %[[STATE3]][10] // CHECK: %[[ARRAY_PTR:.+]] = llvm.getelementptr %[[BINDING_PTRS]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.ptr // CHECK: %[[BASE_PTR:.+]] = llvm.load %[[ARRAY_PTR]] : !llvm.ptr -> !llvm.ptr - %memref = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%offset) : memref>{%dim0} + %memref = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%offset) : memref>{%dim0} // CHECK: %[[BASE_BIT_OFFSET:.+]] = llvm.mul %[[OFFSET_ZEXT]], %[[C8]] // CHECK: %[[BASE_OFFSET:.+]] = llvm.udiv %[[BASE_BIT_OFFSET]], %[[C4]] diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_constants.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_constants.mlir index cccef04c46d45..9be75c968822a 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_constants.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_constants.mlir @@ -1,9 +1,7 @@ // RUN: iree-opt --iree-convert-to-llvm --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: llvm.func @constant_values diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir index 7601ed03628c8..db939594a5f7f 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir @@ -1,20 +1,18 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --verify-diagnostics --split-input-file %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> #translation = #iree_codegen.translation_info func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32> // expected-error @+1 {{expected four tiling levels, got 0}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>) return @@ -22,21 +20,19 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_embe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32> // expected-error @+1 {{native_vector_size must be empty}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>) return @@ -44,12 +40,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_embe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -57,9 +51,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_embe module { func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32> // expected-error @+1 {{expected only parallel dims to be set in the second tiling level, got 2-th tile size set}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>) return @@ -68,21 +62,19 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32> // expected-error @+1 {{only reduction dims to be set in the third tiling level, got 1-th tile size set}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>) return @@ -90,21 +82,19 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_embe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32> // expected-error @+1 {{expected [0, 2) to be set exactly once in interchange #0}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>) return @@ -112,21 +102,19 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_embe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<36x9x9x512xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<3x3x512x512xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<36x7x7x512xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<36x9x9x512xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<3x3x512x512xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<36x7x7x512xf32> // expected-error @+1 {{can't decompose the conv op}} linalg.conv_2d_nhwc_hwcf {lowering_config = #config} ins(%0, %1 : memref<36x9x9x512xf32>, memref<3x3x512x512xf32>) outs(%2 : memref<36x7x7x512xf32>) return @@ -134,12 +122,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_embe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -147,9 +133,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_embe module { func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1x11x11x576xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5x5x576xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1x7x7x576xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1x11x11x576xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5x5x576xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1x7x7x576xf32> // expected-error @+1 {{can't decompose the conv op}} linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : tensor<2xi64>, lowering_config = #config, strides = dense<1> : tensor<2xi64>} ins(%0, %1 : memref<1x11x11x576xf32>, memref<5x5x576xf32>) outs(%2 : memref<1x7x7x576xf32>) return diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir index 10aa93a1a2443..a44a37000a171 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmcpu-peel))" -split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @peel_static_matmul() { %c16 = arith.constant 16 : index @@ -16,9 +14,9 @@ func.func @peel_static_matmul() { %c512 = arith.constant 512 : index %c128 = arith.constant 128 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_arm_sme_streaming_mode_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_arm_sme_streaming_mode_tests.mlir index f9eae566f85b9..7769fc6f42f25 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_arm_sme_streaming_mode_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_arm_sme_streaming_mode_tests.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --iree-codegen-linalg-to-llvm-pipeline=enable-arm-sme --split-input-file %s | FileCheck %s // RUN: iree-opt --iree-codegen-linalg-to-llvm-pipeline=enable-arm-sme --iree-llvmcpu-force-arm-streaming --split-input-file %s | FileCheck %s -check-prefixes=FORCE-ARM-STREAMING -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> module { module { @@ -16,7 +14,7 @@ module { %c1 = arith.constant 1 : index %cst = arith.constant 0.000000e+00 : f32 %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = tensor.empty() : tensor<1xf32> %3 = linalg.fill {lowering_config = #iree_codegen.lowering_config} ins(%cst : f32) outs(%2 : tensor<1xf32>) -> tensor<1xf32> @@ -39,12 +37,10 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> module { module { @@ -54,7 +50,7 @@ module { %c1 = arith.constant 1 : index %cst = arith.constant 0.000000e+00 : f32 %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = tensor.empty() : tensor<1xf32> %3 = linalg.fill {lowering_config = #iree_codegen.lowering_config} ins(%cst : f32) outs(%2 : tensor<1xf32>) -> tensor<1xf32> @@ -78,12 +74,10 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> module { module { @@ -93,7 +87,7 @@ module { %c1 = arith.constant 1 : index %cst = arith.constant 0.000000e+00 : f32 %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = tensor.empty() : tensor<100x100xf32> %3 = linalg.fill {lowering_config = #iree_codegen.lowering_config} ins(%cst : f32) outs(%2 : tensor<100x100xf32>) -> tensor<100x100xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir index e26a427d7bea1..faca2839fd2d5 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d1)> @@ -15,9 +13,9 @@ module { %c0 = arith.constant 0 : index %cst = arith.constant 3.40282347E+38 : f32 %cst_0 = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor> -> tensor<512xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x512xf32> %5 = tensor.empty() : tensor<24x512x16x1xf32> @@ -47,12 +45,10 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d1)> @@ -62,9 +58,9 @@ module { %c0 = arith.constant 0 : index %cst = arith.constant 3.40282347E+38 : f32 %cst_0 = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [24, 32, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<24x32x16x16xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor> -> tensor<512xf32> %5 = tensor.empty() : tensor<384x512xf32> @@ -93,19 +89,17 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> module { func.func @unaligned_pack() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [383, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<383x512xf32> %3 = tensor.empty() : tensor<24x512x16x1xf32> %pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<383x512xf32> -> tensor<24x512x16x1xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_conv_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_conv_tests.mlir index b619559ffe3cf..71c88e4ab734a 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_conv_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_conv_tests.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}> func.func @pad_conv_2d_nchw_fchw_1x320x64x64x320x3x3() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { @@ -23,12 +21,12 @@ func.func @pad_conv_2d_nchw_fchw_1x320x64x64x320x3x3() attributes {hal.executabl %7 = arith.index_castui %2 {stream.alignment = 256 : index, stream.values = [10507520 : index, 21488640 : index]} : i32 to index %8 = arith.index_castui %3 {stream.alignment = 256 : index, stream.values = [10508800 : index, 21489920 : index]} : i32 to index %9 = arith.index_castui %4 {stream.alignment = 128 : index, stream.values = [10486400 : index, 10487680 : index]} : i32 to index - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c5243520) flags(ReadOnly) : !flow.dispatch.tensor> - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> - %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor> - %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> - %15 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor> + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c5243520) flags(ReadOnly) : !flow.dispatch.tensor> + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> + %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor> + %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> + %15 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor> %16 = flow.dispatch.tensor.load %10, offsets = [0, 0, 0, 0], sizes = [1, 320, 64, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x320x64x64xf32> %17 = flow.dispatch.tensor.load %11, offsets = [0, 0, 0, 0], sizes = [320, 320, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<320x320x3x3xf32> %18 = flow.dispatch.tensor.load %12, offsets = [0, 0], sizes = [1, 320], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x320xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir index ab8b261fbe300..899c422c1c84f 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir @@ -1,18 +1,16 @@ // RUN: iree-opt --pass-pipeline="builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))" --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @pad_only_dispatch() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c634816 = arith.constant 634816 : index %c3846080 = arith.constant 3846080 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c634816) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c3846080) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c634816) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c3846080) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x112x112x64xf32> %padded = tensor.pad %2 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): @@ -47,12 +45,10 @@ func.func @pad_only_dispatch() attributes {hal.executable.target = #executable_t // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> @@ -64,10 +60,10 @@ func.func @pad_with_producer_dispatch() attributes {hal.executable.target = #exe %c0 = arith.constant 0 : index %cst = arith.constant 1.001000e-05 : f32 %cst_0 = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c802816) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c72545728) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c72676800) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c802816) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c72545728) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c72676800) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 56, 56, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x56x56x256xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [1, 1, 256, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x1x256x128xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor> -> tensor<128xf32> @@ -128,20 +124,18 @@ func.func @pad_with_producer_dispatch() attributes {hal.executable.target = #exe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @pad_consumer_fusion_dispatch() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 14, 14, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x14x14x256xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 256, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x256x256xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [1, 14, 14, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x14x14x256xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir index e8602dc6995aa..d3c411e9677d0 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir @@ -1,20 +1,18 @@ // RUN: iree-opt --pass-pipeline='builtin.module(func.func(iree-llvmcpu-lower-executable-target))' -split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {native_vector_size = 64}> func.func @no_peel_static_matmul() attributes {hal.executable.target = #executable_target_system_elf_x86_64_, translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 64], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x64xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [64, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x512xf32> %5 = tensor.empty() : tensor<128x512xf32> @@ -34,21 +32,19 @@ func.func @no_peel_static_matmul() attributes {hal.executable.target = #executab // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {native_vector_size = 64}> func.func @peel_static_matmul() attributes {hal.executable.target = #executable_target_system_elf_x86_64_, translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 49], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x49xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [49, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<49x512xf32> %5 = tensor.empty() : tensor<128x512xf32> @@ -80,12 +76,10 @@ func.func @peel_static_matmul() attributes {hal.executable.target = #executable_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -98,9 +92,9 @@ func.func @peel_dynamic_matmul() attributes {hal.executable.target = #executable %3 = arith.index_cast %0 : i32 to index %4 = arith.index_cast %1 : i32 to index %5 = arith.index_cast %2 : i32 to index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%4, %3} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%3, %5} - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%4, %5} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%4, %3} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%3, %5} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%4, %5} %9 = flow.dispatch.tensor.load %6, offsets = [0, 0], sizes = [%4, %3], strides = [1, 1] : !flow.dispatch.tensor>{%4, %3} -> tensor %10 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [%3, %5], strides = [1, 1] : !flow.dispatch.tensor>{%3, %5} -> tensor %11 = tensor.empty(%4, %5) : tensor @@ -140,12 +134,10 @@ func.func @peel_dynamic_matmul() attributes {hal.executable.target = #executable // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -159,9 +151,9 @@ module { %3 = arith.index_cast %0 : i32 to index %4 = arith.index_cast %1 : i32 to index %5 = arith.index_cast %2 : i32 to index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%4, %3} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%3, %5} - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%4, %5} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%4, %3} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%3, %5} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%4, %5} %9 = flow.dispatch.tensor.load %6, offsets = [0, 0], sizes = [%4, %3], strides = [1, 1] : !flow.dispatch.tensor>{%4, %3} -> tensor %10 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [%3, %5], strides = [1, 1] : !flow.dispatch.tensor>{%3, %5} -> tensor %11 = tensor.empty(%4, %5) : tensor diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir index 3d4f9c71253fa..639f81337d6a4 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --iree-llvmcpu-reassociate-fp-reductions=false --split-input-file %s | FileCheck %s // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --iree-llvmcpu-reassociate-fp-reductions=true --split-input-file %s | FileCheck %s --check-prefix=REORDERCHECK -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -15,8 +13,8 @@ func.func @split_reduction_innermost_reduction_no_dynamic_perfect_tiling_support %c0 = arith.constant 0 : index %cst = arith.constant dense<0> : tensor<1024x512xi32> %c1_i32 = arith.constant 1 : i32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1024x512x256xi32> %3 = tensor.empty() : tensor<1024x512xi32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<1024x512x256xi32>) outs(%cst : tensor<1024x512xi32>) { @@ -48,11 +46,9 @@ func.func @split_reduction_innermost_reduction_no_dynamic_perfect_tiling_support // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -62,8 +58,8 @@ func.func @split_reduction_innermost_reduction_no_dynamic_perfect_tiling_float_s %c0 = arith.constant 0 : index %cst = arith.constant dense<0.000000e+00> : tensor<1024x512xf32> %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1024x512x256xf32> %3 = tensor.empty() : tensor<1024x512xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<1024x512x256xf32>) outs(%cst : tensor<1024x512xf32>) { @@ -98,11 +94,9 @@ func.func @split_reduction_innermost_reduction_no_dynamic_perfect_tiling_float_s // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -112,8 +106,8 @@ func.func @split_reduction_innermost_reduction_next_dynamic_supported() attribut %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %1 = arith.index_castui %0 : i32 to index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} %4 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [1024, %1, 256], strides = [1, 1, 1] : !flow.dispatch.tensor>{%1} -> tensor<1024x?x256xi32> %5 = tensor.empty(%1) : tensor<1024x?xi32> %6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<1024x?xi32>) -> tensor<1024x?xi32> @@ -140,11 +134,9 @@ func.func @split_reduction_innermost_reduction_next_dynamic_supported() attribut // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -152,8 +144,8 @@ func.func @split_reduction_innermost_reduction_next_dynamic_supported() attribut func.func @split_reduction_innermost_reduction_next_imperfect_tiling_supported() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant dense<0> : tensor<1024x513xi32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 513, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1024x513x256xi32> %3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<1024x513x256xi32>) outs(%cst : tensor<1024x513xi32>) { ^bb0(%in: i32, %out: i32): @@ -178,11 +170,9 @@ func.func @split_reduction_innermost_reduction_next_imperfect_tiling_supported() // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -192,8 +182,8 @@ func.func @split_reduction_innermost_dynamic_reduction_unsupported() attributes %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %1 = arith.index_castui %0 : i32 to index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} %4 = flow.dispatch.tensor.load %3, offsets = [0, 0, 0], sizes = [1024, 512, %1], strides = [1, 1, 1] : !flow.dispatch.tensor>{%1} -> tensor<1024x512x?xi32> %5 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%4 : tensor<1024x512x?xi32>) outs(%cst : tensor<1024x512xi32>) { ^bb0(%in: i32, %out: i32): @@ -209,11 +199,9 @@ func.func @split_reduction_innermost_dynamic_reduction_unsupported() attributes // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -221,8 +209,8 @@ func.func @split_reduction_innermost_dynamic_reduction_unsupported() attributes func.func @split_reduction_innermost_imperfect_reduction_unsupported() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant dense<0> : tensor<1024x512xi32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 512, 257], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1024x512x257xi32> %3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<1024x512x257xi32>) outs(%cst : tensor<1024x512xi32>) { ^bb0(%in: i32, %out: i32): @@ -238,11 +226,9 @@ func.func @split_reduction_innermost_imperfect_reduction_unsupported() attribute // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d0, d2, d1)> @@ -250,8 +236,8 @@ func.func @split_reduction_innermost_imperfect_reduction_unsupported() attribute func.func @split_reduction_not_innermost_reduction_unsupported() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant dense<0> : tensor<1024x256xi32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1024x512x256xi32> %3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<1024x512x256xi32>) outs(%cst : tensor<1024x256xi32>) { ^bb0(%in: i32, %out: i32): @@ -268,11 +254,9 @@ func.func @split_reduction_not_innermost_reduction_unsupported() attributes {hal // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -280,8 +264,8 @@ func.func @split_reduction_not_innermost_reduction_unsupported() attributes {hal func.func @split_reduction_double_reduction_unsupported() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant dense<0> : tensor<1024xi32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1024x512x256xi32> %3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "reduction", "reduction"]} ins(%2 : tensor<1024x512x256xi32>) outs(%cst : tensor<1024xi32>) { ^bb0(%in: i32, %out: i32): diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir index f646cee2d08b8..4d91d677d45f3 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir @@ -6,11 +6,9 @@ // and the conversion to destination passing style. Running CSE // before hoists the fill and the empty out of the loop causing // issues with the conversion. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -23,8 +21,8 @@ func.func @check_no_cse() attributes {hal.executable.target = #executable_target %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %2 = arith.index_cast %0 {stream.alignment = 512 : index, stream.values = [0 : index, 10752 : index]} : i32 to index %3 = arith.index_cast %1 {stream.alignment = 512 : index, stream.values = [10752 : index, 21504 : index]} : i32 to index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%2) : !flow.dispatch.tensor> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%3) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%2) : !flow.dispatch.tensor> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%3) : !flow.dispatch.tensor> %6 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [7, 384], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<7x384xf32> %7 = tensor.empty() : tensor<7xf32> %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<7xf32>) -> tensor<7xf32> @@ -51,12 +49,10 @@ func.func @check_no_cse() attributes {hal.executable.target = #executable_target // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -70,10 +66,10 @@ func.func @peel_partially_unaligned_matmul() attributes {hal.executable.target = %5 = arith.index_castui %1 {stream.alignment = 64 : index, stream.values = [576704 : index, 1763072 : index]} : i32 to index %6 = arith.index_castui %2 {stream.alignment = 64 : index, stream.values = [908480 : index, 2094848 : index]} : i32 to index %7 = arith.index_castui %3 {stream.alignment = 128 : index, stream.values = [2304 : index, 134016 : index]} : i32 to index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor> - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor> + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor> %12 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [1, 576], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x576xf32> %13 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [576, 144], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<576x144xf32> %14 = flow.dispatch.tensor.load %10, offsets = [0, 0], sizes = [1, 144], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x144xf32> @@ -103,12 +99,10 @@ func.func @peel_partially_unaligned_matmul() attributes {hal.executable.target = // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> func.func @batch_matmul_dynamic() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { @@ -126,9 +120,9 @@ func.func @batch_matmul_dynamic() attributes {hal.executable.target = #executabl %9 = arith.index_cast %3 : i32 to index %10 = arith.index_cast %4 : i32 to index %11 = arith.index_cast %5 : i32 to index - %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6, %7, %9} - %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%10, %11, %8} - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6, %7, %8} + %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6, %7, %9} + %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%10, %11, %8} + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6, %7, %8} %15 = flow.dispatch.tensor.load %12, offsets = [0, 0, 0], sizes = [%6, %7, %9], strides = [1, 1, 1] : !flow.dispatch.tensor>{%6, %7, %9} -> tensor %16 = flow.dispatch.tensor.load %13, offsets = [0, 0, 0], sizes = [%10, %11, %8], strides = [1, 1, 1] : !flow.dispatch.tensor>{%10, %11, %8} -> tensor %17 = tensor.empty(%6, %7, %8) : tensor @@ -142,20 +136,18 @@ func.func @batch_matmul_dynamic() attributes {hal.executable.target = #executabl // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d0 * 1536 + d1)> #map1 = affine_map<(d0, d1) -> (d0, d1)> func.func @check_buffer_ops_vectorization() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x1024xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x1024xi32> memref.assume_alignment %0, 64 : memref<128x1024xi32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128x1536xi32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128x1536xi32> memref.assume_alignment %1, 64 : memref<128x1536xi32> %subview = memref.subview %1[0, 0] [128, 1024] [1, 1] : memref<128x1536xi32> to memref<128x1024xi32, #map> linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%0 : memref<128x1024xi32>) outs(%subview : memref<128x1024xi32, #map>) { @@ -171,12 +163,10 @@ func.func @check_buffer_ops_vectorization() attributes {hal.executable.target = // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2, d3) -> (d3)> @@ -188,9 +178,9 @@ func.func @vectorize_fill_conv2d_generic() attributes {hal.executable.target = # %cst_2 = arith.constant 0.166666672 : f32 %cst_3 = arith.constant dense<0.000000e+00> : tensor<16xf32> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x225x225x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x3x16xf32> %5 = tensor.empty() : tensor<1x112x112x16xf32> @@ -221,15 +211,13 @@ func.func @vectorize_fill_conv2d_generic() attributes {hal.executable.target = # // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer>, - #hal.descriptor_set.binding<5, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -238,12 +226,12 @@ func.func @multi_result() attributes {hal.executable.target = #executable_target %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e-03 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(5) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(5) alignment(64) offset(%c0) : !flow.dispatch.tensor> %6 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x128xf32> %7 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> %8 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [256], strides = [1] : !flow.dispatch.tensor> -> tensor<256xf32> @@ -268,19 +256,17 @@ func.func @multi_result() attributes {hal.executable.target = #executable_target // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf", ukernels = "mmt4d"}> func.func @ukernel_dispatch() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 4, 8, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x4x8x32xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [16, 4, 16, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x4x16x32xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [2, 16, 8, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x16x8x16xf32> @@ -301,12 +287,10 @@ func.func @ukernel_dispatch() attributes {hal.executable.target = #executable_ta // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf", ukernels = "all"}> #map = affine_map<()[s0, s1, s2] -> (s0 - s1 * (s0 ceildiv s2), s0 ceildiv s2)> @@ -317,9 +301,9 @@ func.func @dispatch() attributes {hal.executable.target = #executable_target_emb %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %2 = arith.index_castui %0 : i32 to index %3 = arith.index_castui %1 : i32 to index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%2} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%3} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%2} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%2} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %7 = affine.min #map()[%2, %workgroup_id_x, %workgroup_count_x] @@ -332,11 +316,11 @@ func.func @dispatch() attributes {hal.executable.target = #executable_target_emb return } // CHECK: func @dispatch -// CHECK: %[[INPUT0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[INPUT0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK-SAME: memref> -// CHECK: %[[INPUT1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[INPUT1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-SAME: memref> -// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-SAME: memref> // CHECK-DAG: %[[OFFSET:.+]] = affine.apply // CHECK-DAG: %[[SIZE:.+]] = affine.min @@ -349,12 +333,10 @@ func.func @dispatch() attributes {hal.executable.target = #executable_target_emb // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #config1 = #iree_codegen.lowering_config @@ -364,9 +346,9 @@ func.func @unsupported_ukernel_fallback_to_vectorization() attributes {hal.execu %c1024 = arith.constant 1024 : index %c132096 = arith.constant 132096 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1024) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c132096) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1024) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c132096) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 256, 1, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x256x1x1xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 256, 128, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x256x128x1xi8> %5 = tensor.empty() : tensor<1x4x1x128xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_transpose_avx2_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_transpose_avx2_tests.mlir index 97ef7fc30bd02..77b4078da2c1b 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_transpose_avx2_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_transpose_avx2_tests.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d1, d0)> @@ -12,8 +10,8 @@ func.func @transpose_10_8x8_pattern() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x1024xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x512xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<512x1024xf32>) outs(%3 : tensor<1024x512xf32>) { @@ -37,11 +35,9 @@ func.func @transpose_10_8x8_pattern() attributes {hal.executable.target = #execu // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d0, d2, d1)> @@ -49,8 +45,8 @@ func.func @transpose_10_8x8_pattern() attributes {hal.executable.target = #execu func.func @transpose_021_8x8_pattern() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 96, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x96x128xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [64, 128, 96], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x128x96xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<64x96x128xf32>) outs(%3 : tensor<64x128x96xf32>) { @@ -74,11 +70,9 @@ func.func @transpose_021_8x8_pattern() attributes {hal.executable.target = #exec // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d1, d2, d0)> @@ -86,8 +80,8 @@ func.func @transpose_021_8x8_pattern() attributes {hal.executable.target = #exec func.func @transpose_201_8x8_pattern() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 96, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x96x128xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [128, 64, 96], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x64x96xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<64x96x128xf32>) outs(%3 : tensor<128x64x96xf32>) { @@ -111,11 +105,9 @@ func.func @transpose_201_8x8_pattern() attributes {hal.executable.target = #exec // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d2, d1, d0)> @@ -123,8 +115,8 @@ func.func @transpose_201_8x8_pattern() attributes {hal.executable.target = #exec func.func @transpose_210_8x8_pattern() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 96, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x96x128xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [128, 96, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x96x64xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<64x96x128xf32>) outs(%3 : tensor<128x96x64xf32>) { @@ -148,11 +140,9 @@ func.func @transpose_210_8x8_pattern() attributes {hal.executable.target = #exec // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d2, d0, d1)> @@ -160,8 +150,8 @@ func.func @transpose_210_8x8_pattern() attributes {hal.executable.target = #exec func.func @transpose_120_8x8_pattern() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 96, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x96x128xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [96, 128, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<96x128x64xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<64x96x128xf32>) outs(%3 : tensor<96x128x64xf32>) { @@ -185,11 +175,9 @@ func.func @transpose_120_8x8_pattern() attributes {hal.executable.target = #exec // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d1, d0, d2)> @@ -197,8 +185,8 @@ func.func @transpose_120_8x8_pattern() attributes {hal.executable.target = #exec func.func @transpose_102() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 96, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x96x128xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [96, 64, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<96x64x128xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<64x96x128xf32>) outs(%3 : tensor<96x64x128xf32>) { @@ -215,11 +203,9 @@ func.func @transpose_102() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d1, d0)> @@ -227,8 +213,8 @@ func.func @transpose_102() attributes {hal.executable.target = #executable_targe func.func @test_no_avx2_feature() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x1024xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x512xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<512x1024xf32>) outs(%3 : tensor<1024x512xf32>) { diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vector_masking_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vector_masking_tests.mlir index c93ab703ac8af..005492f617fc7 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vector_masking_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vector_masking_tests.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' -split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -15,9 +13,9 @@ func.func @mask_dynamic_generic_add() attributes {hal.executable.target = #execu %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %2 = arith.index_cast %0 : i32 to index %3 = arith.index_cast %1 : i32 to index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%2, %3} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%2, %3} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%2, %3} %7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor %8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor %9 = tensor.empty(%2, %3) : tensor @@ -43,11 +41,9 @@ func.func @mask_dynamic_generic_add() attributes {hal.executable.target = #execu // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -58,8 +54,8 @@ func.func @mask_dynamic_reduction() attributes {hal.executable.target = #executa %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %2 = arith.index_cast %0 : i32 to index %3 = arith.index_cast %1 : i32 to index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%2, %3} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2} %6 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor %7 = tensor.empty(%2) : tensor %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor) -> tensor @@ -78,12 +74,10 @@ func.func @mask_dynamic_reduction() attributes {hal.executable.target = #executa // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_riscv_32_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_32", {data_layout = "e-m:e-p:32:32-i64:64-n32-S128", native_vector_size = 32 : index, target_triple = "riscv32-none-elf"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -93,9 +87,9 @@ func.func @mask_dynamic_generic_add() attributes {hal.executable.target = #execu %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %2 = arith.index_cast %0 : i32 to index %3 = arith.index_cast %1 : i32 to index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%2, %3} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%2, %3} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%2, %3} %7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor %8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor %9 = tensor.empty(%2, %3) : tensor @@ -121,12 +115,10 @@ func.func @mask_dynamic_generic_add() attributes {hal.executable.target = #execu // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -136,9 +128,9 @@ func.func @mask_dynamic_generic_add() attributes {hal.executable.target = #execu %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %2 = arith.index_cast %0 : i32 to index %3 = arith.index_cast %1 : i32 to index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%2, %3} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%2, %3} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%2, %3} %7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor %8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor %9 = tensor.empty(%2, %3) : tensor @@ -159,13 +151,11 @@ func.func @mask_dynamic_generic_add() attributes {hal.executable.target = #execu // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @mask_matmul_sve() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { @@ -174,10 +164,10 @@ func.func @mask_matmul_sve() attributes {hal.executable.target = #executable_tar %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor>{%0, %2} -> tensor %8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor>{%2, %1} -> tensor %9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor @@ -193,12 +183,10 @@ func.func @mask_matmul_sve() attributes {hal.executable.target = #executable_tar // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -208,9 +196,9 @@ func.func @mask_dynamic_generic_add() attributes {hal.executable.target = #execu %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %2 = arith.index_cast %0 : i32 to index %3 = arith.index_cast %1 : i32 to index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %3} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%2, %3} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%2, %3} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%2, %3} %7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor %8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor>{%2, %3} -> tensor %9 = tensor.empty(%2, %3) : tensor diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vectorize_nd_extract_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vectorize_nd_extract_tests.mlir index 1e2e60deb2cf8..f6782a5f04e75 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vectorize_nd_extract_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vectorize_nd_extract_tests.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "system-elf-riscv_64", {cpu = "generic-rv64", cpu_features = "+m,+a,+f,+d,+v", data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", native_vector_size = 64 : index, target_triple = "riscv64"}> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> @@ -19,8 +17,8 @@ func.func @main_dispatch_77_generic_1x257x257x21() attributes {hal.executable.ta %c32_i32 = arith.constant 32 : i32 %cst_2 = arith.constant 1.000000e+00 : f32 %c0_i32 = arith.constant 0 : i32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1115136) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1115136) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 33, 33, 21], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x33x33x21xf32> %3 = tensor.empty() : tensor<1x257x257x21xf32> %4 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%3 : tensor<1x257x257x21xf32>) { diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir index 410d80149ecb7..1903cabd690a0 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @matmul_tensors_default() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { @@ -15,10 +13,10 @@ func.func @matmul_tensors_default() attributes {hal.executable.target = #executa %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor>{%0, %2} -> tensor %8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor>{%2, %1} -> tensor %9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor @@ -35,13 +33,11 @@ func.func @matmul_tensors_default() attributes {hal.executable.target = #executa // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @i4_i4_i32_matmul() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { @@ -50,10 +46,10 @@ func.func @i4_i4_i32_matmul() attributes {hal.executable.target = #executable_ta %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor>{%0, %2} -> tensor %8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor>{%2, %1} -> tensor %9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor @@ -71,12 +67,10 @@ func.func @i4_i4_i32_matmul() attributes {hal.executable.target = #executable_ta // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @batch_matmul_tensors() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { @@ -85,9 +79,9 @@ func.func @batch_matmul_tensors() attributes {hal.executable.target = #executabl %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor>{%0, %1, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) : !flow.dispatch.tensor>{%0, %3, %2} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor>{%0, %1, %3} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor>{%0, %3, %2} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2} %7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0], sizes = [%0, %1, %3], strides = [1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %3} -> tensor %8 = flow.dispatch.tensor.load %5, offsets = [0, 0, 0], sizes = [%0, %3, %2], strides = [1, 1, 1] : !flow.dispatch.tensor>{%0, %3, %2} -> tensor %9 = tensor.empty(%0, %1, %2) : tensor @@ -105,19 +99,17 @@ func.func @batch_matmul_tensors() attributes {hal.executable.target = #executabl // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}> func.func @matmul_static() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [196, 240], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<196x240xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [240, 40], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<240x40xf32> %5 = tensor.empty() : tensor<196x40xf32> @@ -135,21 +127,19 @@ func.func @matmul_static() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}> func.func @conv_static() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index %c607520 = arith.constant 607520 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c607520) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c607520) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 51, 41, 512], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x51x41x512xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 512, 512], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x512x512xf32> %5 = tensor.empty() : tensor<1x25x20x512xf32> @@ -166,19 +156,17 @@ func.func @conv_static() attributes {hal.executable.target = #executable_target_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}> func.func @restrict_num_workgroups() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 11, 11, 576], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x11x11x576xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [5, 5, 576], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<5x5x576xf32> %5 = tensor.empty() : tensor<1x7x7x576xf32> @@ -196,20 +184,18 @@ func.func @restrict_num_workgroups() attributes {hal.executable.target = #execut // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}> func.func @matmul_aarch_i8_i8_i32_static() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} { %c0_i32 = arith.constant 0 : i32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 384], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x384xi8> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [384, 1536], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x1536xi8> %5 = tensor.empty() : tensor<128x1536xi32> @@ -227,12 +213,10 @@ func.func @matmul_aarch_i8_i8_i32_static() attributes {hal.executable.target = # // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}> func.func @matmul_aarch_i8_i8_i32_dynamic() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} { @@ -240,9 +224,9 @@ func.func @matmul_aarch_i8_i8_i32_dynamic() attributes {hal.executable.target = %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %1} %6 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor>{%0, %2} -> tensor %7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor>{%2, %1} -> tensor %8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor @@ -259,18 +243,16 @@ func.func @matmul_aarch_i8_i8_i32_dynamic() attributes {hal.executable.target = // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}> func.func @pack() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<20x40xf32> %3 = tensor.empty() : tensor<4x48x8x1xf32> %pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %3 : tensor<20x40xf32> -> tensor<4x48x8x1xf32> @@ -286,11 +268,9 @@ func.func @pack() attributes {hal.executable.target = #executable_target_system_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}> func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} { @@ -304,8 +284,8 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl %5 = arith.index_castui %1 : i32 to index %6 = arith.index_castui %2 : i32 to index %7 = arith.index_castui %3 : i32 to index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%4, %5} - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%4, %5} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%4, %5} -> tensor %11 = tensor.empty(%6, %7) : tensor %unpack = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor -> tensor @@ -321,21 +301,19 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @mmt4d_384x384x512_4x1x4_dispatch_0() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { %c0 = arith.constant 0 : index %c96 = arith.constant 96 : index %c128 = arith.constant 128 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [96, 384, 4, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<96x384x4x1xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [128, 384, 4, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x384x4x1xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [96, 384, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<96x128x4x4xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sme_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sme_lowering_strategy.mlir index 11dc420413d1d..eb84c70bb80bb 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sme_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sme_lowering_strategy.mlir @@ -1,16 +1,14 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --iree-llvmcpu-enable-scalable-vectorization=true --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @transpose_f32() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x32xf32> %3 = tensor.empty() : tensor<32x32xf32> %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<32x32xf32>) outs(%3 : tensor<32x32xf32>) { @@ -30,17 +28,15 @@ func.func @transpose_f32() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @transpose_f64() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x32xf64> %3 = tensor.empty() : tensor<32x32xf64> %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<32x32xf64>) outs(%3 : tensor<32x32xf64>) { @@ -60,17 +56,15 @@ func.func @transpose_f64() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @transpose_unsupported_not_rank_2() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 4, 8], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x4x8xf32> %3 = tensor.empty() : tensor<2x8x4xf32> %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<2x4x8xf32>) outs(%3 : tensor<2x8x4xf32>) { @@ -90,17 +84,15 @@ func.func @transpose_unsupported_not_rank_2() attributes {hal.executable.target // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @transpose_unsupported_not_simple_transpose() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x32xf32> %3 = tensor.empty() : tensor<32x32xf32> %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<32x32xf32>) outs(%3 : tensor<32x32xf32>) { diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy.mlir index 139f1967e043c..757a039ed1197 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy.mlir @@ -3,13 +3,11 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' \ // RUN: --iree-llvmcpu-enable-scalable-vectorization=true --split-input-file --iree-llvmcpu-disable-arm-sme-tiling %s | FileCheck %s --check-prefixes=CHECK,DISABLE-ARM-SME -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @matmul_tensors() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { @@ -18,10 +16,10 @@ func.func @matmul_tensors() attributes {hal.executable.target = #executable_targ %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor>{%0, %2} -> tensor %8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor>{%2, %1} -> tensor %9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor @@ -39,19 +37,17 @@ func.func @matmul_tensors() attributes {hal.executable.target = #executable_targ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @static_tensors_non_pow_two_sizes() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [15, 14], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<15x14xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [14, 7], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<14x7xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [15, 7], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<15x7xf32> @@ -69,19 +65,17 @@ func.func @static_tensors_non_pow_two_sizes() attributes {hal.executable.target // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @static_tensors_1x1() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1xf32> @@ -99,13 +93,11 @@ func.func @static_tensors_1x1() attributes {hal.executable.target = #executable_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @matmul_tensors() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { @@ -114,10 +106,10 @@ func.func @matmul_tensors() attributes {hal.executable.target = #executable_targ %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor>{%0, %2} -> tensor %8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor>{%2, %1} -> tensor %9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor @@ -142,12 +134,10 @@ func.func @matmul_tensors() attributes {hal.executable.target = #executable_targ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {cpu = "", cpu_features = "+v9a,+sve", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", link_embedded = false, native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android34"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -165,11 +155,11 @@ func.func @matmul_with_fill() attributes {hal.executable.target = #executable_ta %7 = arith.index_castui %2 : i32 to index %8 = arith.index_castui %3 : i32 to index %9 = arith.index_castui %4 : i32 to index - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor> - %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> - %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor> + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor> + %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> + %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor> %15 = flow.dispatch.tensor.load %10, offsets = [0, 0], sizes = [1024, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x256xi8> %16 = flow.dispatch.tensor.load %11, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xi8> %17 = flow.dispatch.tensor.load %12, offsets = [0], sizes = [1024], strides = [1] : !flow.dispatch.tensor> -> tensor<1024xf32> @@ -201,19 +191,17 @@ func.func @matmul_with_fill() attributes {hal.executable.target = #executable_ta // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {cpu = "", cpu_features = "+v9a,+sve", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", link_embedded = false, native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android34"}> func.func @depthwise_conv() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 161, 161, 240], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x57x57x72xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 240], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x72xf32> %5 = tensor.empty() : tensor<1x28x28x72xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir index 02ad9c9ab076a..ec9241b3c3637 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir @@ -2,13 +2,11 @@ // RUN: --iree-llvmcpu-enable-scalable-vectorization=true --iree-llvmcpu-vector-pproc-strategy=peel \ // RUN: --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @matmul_tensors() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { @@ -17,10 +15,10 @@ func.func @matmul_tensors() attributes {hal.executable.target = #executable_targ %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor>{%0, %1} %7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor>{%0, %2} -> tensor %8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor>{%2, %1} -> tensor %9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor @@ -38,19 +36,17 @@ func.func @matmul_tensors() attributes {hal.executable.target = #executable_targ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @static_tensors_non_pow_two_sizes() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [15, 14], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<15x14xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [14, 7], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<14x7xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [15, 7], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<15x7xf32> @@ -68,19 +64,17 @@ func.func @static_tensors_non_pow_two_sizes() attributes {hal.executable.target // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> func.func @static_tensors_1x1() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1xf32> @@ -99,19 +93,17 @@ func.func @static_tensors_1x1() attributes {hal.executable.target = #executable_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}> func.func @depthwise_conv() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %input = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 1, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x1x4x4xf32> %filter = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 4, 4], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x4x4xf32> %5 = tensor.empty() : tensor<1x1x1x4xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir index fe4c802cbb3a0..3042c00ee55c5 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir @@ -1,18 +1,16 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --iree-llvmcpu-disable-distribution --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @matmul_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x512xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x128xf32> %5 = tensor.empty() : tensor<384x128xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir index 07769efdd97fa..02095fcec42d6 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir @@ -1,18 +1,16 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_riscv_32_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_32", {cpu_features = "+m,+f", data_layout = "e-m:e-p:32:32-i64:64-n32-S128", native_vector_size = 16 : index, target_triple = "riscv32-none-elf"}> func.func @matmul_riscv() attributes {hal.executable.target = #executable_target_embedded_elf_riscv_32_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x512xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x128xf32> %5 = tensor.empty() : tensor<384x128xf32> @@ -32,19 +30,17 @@ func.func @matmul_riscv() attributes {hal.executable.target = #executable_target // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_riscv_32_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_32", {cpu_features = "+m,+f", data_layout = "e-m:e-p:32:32-i64:64-n32-S128", native_vector_size = 16 : index, target_triple = "riscv32-none-elf"}> func.func @thin_depthwise_conv_static() attributes {hal.executable.target = #executable_target_embedded_elf_riscv_32_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 161, 161, 240], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x57x57x72xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 240], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x72xf32> %5 = tensor.empty() : tensor<1x28x28x72xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir index 2e21413fbb8fa..e67e7afb8e975 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir @@ -1,19 +1,17 @@ // RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @matvec_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 384], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x384xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [384], strides = [1] : !flow.dispatch.tensor> -> tensor<384xf32> %5 = tensor.empty() : tensor<128xf32> @@ -32,12 +30,10 @@ func.func @matvec_static() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @matvec_dynamic() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { @@ -49,9 +45,9 @@ func.func @matvec_dynamic() attributes {hal.executable.target = #executable_targ %3 = arith.index_cast %0 : i32 to index %4 = arith.index_cast %1 : i32 to index %5 = arith.index_cast %2 : i32 to index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%3, %4} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%5} - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%3, %4} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%5} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%3} %9 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %10 = arith.index_cast %9 : i32 to index %11 = flow.dispatch.tensor.load %8, offsets = [0], sizes = [%10], strides = [1] : !flow.dispatch.tensor>{%3} -> tensor @@ -72,20 +68,18 @@ func.func @matvec_dynamic() attributes {hal.executable.target = #executable_targ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @dot_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [384], strides = [1] : !flow.dispatch.tensor> -> tensor<384xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [384], strides = [1] : !flow.dispatch.tensor> -> tensor<384xf32> %5 = tensor.empty() : tensor @@ -104,12 +98,10 @@ func.func @dot_static() attributes {hal.executable.target = #executable_target_e // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @dot_dynamic() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { @@ -119,10 +111,10 @@ func.func @dot_dynamic() attributes {hal.executable.target = #executable_target_ %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %2 = arith.index_cast %0 : i32 to index %3 = arith.index_cast %1 : i32 to index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %4, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%2} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%2} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%3} %8 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [%2], strides = [1] : !flow.dispatch.tensor>{%2} -> tensor %9 = flow.dispatch.tensor.load %7, offsets = [0], sizes = [%3], strides = [1] : !flow.dispatch.tensor>{%3} -> tensor %10 = linalg.fill ins(%cst : f32) outs(%5 : tensor) -> tensor @@ -140,12 +132,10 @@ func.func @dot_dynamic() attributes {hal.executable.target = #executable_target_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -154,9 +144,9 @@ func.func @dynamic_add() attributes {hal.executable.target = #executable_target_ %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%1} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%1} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor %6 = flow.dispatch.tensor.load %3, offsets = [0], sizes = [%1], strides = [1] : !flow.dispatch.tensor>{%1} -> tensor %7 = tensor.empty(%0, %1) : tensor @@ -177,12 +167,10 @@ func.func @dynamic_add() attributes {hal.executable.target = #executable_target_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> @@ -191,9 +179,9 @@ func.func @add4D() attributes {hal.executable.target = #executable_target_embedd %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor>{%0, %1, %2, %3} %7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %2, %3} -> tensor %8 = flow.dispatch.tensor.load %5, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %2, %3} -> tensor %9 = tensor.empty(%0, %1, %2, %3) : tensor @@ -215,18 +203,16 @@ func.func @add4D() attributes {hal.executable.target = #executable_target_embedd // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> func.func @add_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [64, 16, 32, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x16x32x128xf32> %3 = tensor.empty() : tensor<64x16x32x128xf32> %4 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<64x16x32x128xf32>) outs(%3 : tensor<64x16x32x128xf32>) { @@ -247,12 +233,10 @@ func.func @add_static() attributes {hal.executable.target = #executable_target_e // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -262,9 +246,9 @@ func.func @preset_config_matmul_tensors() attributes { translation_info = #translation } { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x512xf32> %5 = tensor.empty() : tensor<128x512xf32> @@ -283,19 +267,17 @@ func.func @preset_config_matmul_tensors() attributes { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @matmul_partially_peel() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16641, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16641x16xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 8], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x8xf32> %5 = tensor.empty() : tensor<16641x8xf32> @@ -314,11 +296,9 @@ func.func @matmul_partially_peel() attributes {hal.executable.target = #executab // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -329,8 +309,8 @@ func.func @copy_op_dynamic() attributes {hal.executable.target = #executable_tar %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index %5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%0, %1} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref{%2, %3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%0, %1} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref{%2, %3} %subview = memref.subview %7[%4, %5] [%0, %1] [1, 1] : memref to memref> linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%6 : memref) outs(%subview : memref>) { ^bb0(%in: i32, %out: i32): @@ -348,11 +328,9 @@ func.func @copy_op_dynamic() attributes {hal.executable.target = #executable_tar // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @static_1d_fft_stage2() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} { @@ -360,8 +338,8 @@ func.func @static_1d_fft_stage2() attributes {hal.executable.target = #executabl %c2 = arith.constant 2 : index %cst = arith.constant dense<[1.000000e+00, 6.12323426E-17]> : tensor<2xf32> %cst_0 = arith.constant dense<[-0.000000e+00, -1.000000e+00]> : tensor<2xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xf32> %4:2 = iree_linalg_ext.fft ins(%c2, %cst, %cst_0 : index, tensor<2xf32>, tensor<2xf32>) outs(%2, %3 : tensor<32xf32>, tensor<32xf32>) : tensor<32xf32>, tensor<32xf32> @@ -379,11 +357,9 @@ func.func @static_1d_fft_stage2() attributes {hal.executable.target = #executabl // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @static_3d_fft_stage3() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} { @@ -392,8 +368,8 @@ func.func @static_3d_fft_stage3() attributes {hal.executable.target = #executabl %cst_0 = arith.constant dense<[-0.000000e+00, -0.707106769, -1.000000e+00, -0.707106769]> : tensor<4xf32> %0 = bufferization.to_memref %cst_0 : memref<4xf32> %1 = bufferization.to_memref %cst : memref<4xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x128x32xf32> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<64x128x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x128x32xf32> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<64x128x32xf32> iree_linalg_ext.fft ins(%c3, %1, %0 : index, memref<4xf32>, memref<4xf32>) outs(%2, %3 : memref<64x128x32xf32>, memref<64x128x32xf32>) return } @@ -407,12 +383,10 @@ func.func @static_3d_fft_stage3() attributes {hal.executable.target = #executabl // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -424,9 +398,9 @@ func.func @outs_fusion_fn() attributes {hal.executable.target = #executable_targ %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %2} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%2, %1} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %2} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%2, %1} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %1} %6 = tensor.empty(%0, %1) : tensor %7 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel"]} outs(%6 : tensor) { ^bb0(%out: f32): @@ -456,12 +430,10 @@ func.func @outs_fusion_fn() attributes {hal.executable.target = #executable_targ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @conv_dynamic() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} { @@ -474,9 +446,9 @@ func.func @conv_dynamic() attributes {hal.executable.target = #executable_target %6 = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index %7 = hal.interface.constant.load layout(#pipeline_layout) ordinal(7) : index %8 = hal.interface.constant.load layout(#pipeline_layout) ordinal(8) : index - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%4, %5, %3, %6} - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor>{%0, %7, %8, %6} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1, %2, %3} + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%4, %5, %3, %6} + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor>{%0, %7, %8, %6} %12 = flow.dispatch.tensor.load %9, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %2, %3} -> tensor %13 = flow.dispatch.tensor.load %10, offsets = [0, 0, 0, 0], sizes = [%4, %5, %3, %6], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%4, %5, %3, %6} -> tensor %14 = flow.dispatch.tensor.load %11, offsets = [0, 0, 0, 0], sizes = [%0, %7, %8, %6], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%0, %7, %8, %6} -> tensor @@ -494,20 +466,18 @@ func.func @conv_dynamic() attributes {hal.executable.target = #executable_target // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @conv_static() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index %c607520 = arith.constant 607520 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c607520) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c607520) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x225x225x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x3x16xf32> %5 = tensor.empty() : tensor<1x112x112x16xf32> @@ -525,20 +495,18 @@ func.func @conv_static() attributes {hal.executable.target = #executable_target_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @conv_nchw_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 128, 30, 30], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x128x30x30xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [128, 128, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x128x3x3xf32> %5 = tensor.empty() : tensor<1x128x28x28xf32> @@ -556,19 +524,17 @@ func.func @conv_nchw_static() attributes {hal.executable.target = #executable_ta // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @depthwise_conv_static() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 161, 161, 240], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x161x161x240xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 240], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x240xf32> %5 = tensor.empty() : tensor<1x80x80x240xf32> @@ -587,19 +553,17 @@ func.func @depthwise_conv_static() attributes {hal.executable.target = #executab // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-unknown-linux-gnu"}> func.func @thin_depthwise_conv_static() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 161, 161, 240], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x57x57x72xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 240], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x72xf32> %5 = tensor.empty() : tensor<1x28x28x72xf32> @@ -618,19 +582,17 @@ func.func @thin_depthwise_conv_static() attributes {hal.executable.target = #exe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "cascadelake", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vnni,+adx,+clflushopt,+clwb,+cx16,+cx8,+crc32,+f16c,+fsgsbase,+fxsr,+invpcid,+lzcnt,+movbe,+pku,+prfchw,+rdrnd,+rdseed,+sahf,+x87,+xsave,+xsavec,+xsaveopt,+xsaves", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf", ukernels = false}> func.func @pooling_nchw_max() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c3846080 = arith.constant 3846080 : index %c0 = arith.constant 0 : index %cst = arith.constant -3.40282347E+38 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c3846080) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c3846080) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 64, 114, 114], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x64x114x114xf32> %3 = tensor.empty() : tensor<1x64x56x56xf32> %4 = tensor.empty() : tensor<3x3xf32> @@ -649,18 +611,16 @@ func.func @pooling_nchw_max() attributes {hal.executable.target = #executable_ta // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-pc-linux-gnu"}> #map = affine_map<(d0, d1) -> (d1, d0)> #map1 = affine_map<(d0, d1) -> (d0, d1)> func.func @generic_static() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [96, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<96x16xf32> %3 = tensor.empty() : tensor<16x96xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<96x16xf32>) outs(%3 : tensor<16x96xf32>) { @@ -679,19 +639,17 @@ func.func @generic_static() attributes {hal.executable.target = #executable_targ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @matmul_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x512xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x128xf32> %5 = tensor.empty() : tensor<384x128xf32> @@ -745,20 +703,18 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @matmul_i8_i8_i32_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0_i32 = arith.constant 0 : i32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 384], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x384xi8> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [384, 1536], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x1536xi8> %5 = tensor.empty() : tensor<128x1536xi32> @@ -777,21 +733,19 @@ func.func @matmul_i8_i8_i32_static() attributes {hal.executable.target = #execut // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @gemm_unit_N() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %1} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%1} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%1} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} %5 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%1, 1], strides = [1, 1] : !flow.dispatch.tensor>{%1} -> tensor %6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor>{%0, %1} -> tensor %7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%0, 1], strides = [1, 1] : !flow.dispatch.tensor>{%0} -> tensor @@ -809,20 +763,18 @@ func.func @gemm_unit_N() attributes {hal.executable.target = #executable_target_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @gemm_unit_M_unit_N() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor>{%0} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, %0], strides = [1, 1] : !flow.dispatch.tensor>{%0} -> tensor<1x?xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, 1], strides = [1, 1] : !flow.dispatch.tensor>{%0} -> tensor %6 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1xf32> @@ -840,22 +792,20 @@ func.func @gemm_unit_M_unit_N() attributes {hal.executable.target = #executable_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @matmul_odd() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [33, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<33x16xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 49], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x49xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [33, 49], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<33x49xf32> @@ -875,11 +825,9 @@ func.func @matmul_odd() attributes {hal.executable.target = #executable_target_e // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)> @@ -889,8 +837,8 @@ func.func @generic_unit_dims_dynamic() attributes {hal.executable.target = #exec %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0, %1, %2, %3} - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor>{%0, %1, %2, %3} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0, %1, %2, %3} + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor>{%0, %1, %2, %3} %6 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0, 0, 0, 0, 0], sizes = [1, %0, 1, 1, %1, %2, 1, %3], strides = [1, 1, 1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor>{%0, %1, %2, %3} -> tensor<1x?x1x1x?x?x1x?xf32> %7 = tensor.empty(%0, %1, %2, %3) : tensor<1x?x1x1x?x?x1x?xf32> %8 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%6 : tensor<1x?x1x1x?x?x1x?xf32>) outs(%7 : tensor<1x?x1x1x?x?x1x?xf32>) { @@ -910,11 +858,9 @@ func.func @generic_unit_dims_dynamic() attributes {hal.executable.target = #exec // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0) -> (d0)> @@ -922,8 +868,8 @@ func.func @generic_unit_dims_dynamic() attributes {hal.executable.target = #exec func.func @reduce_to_scalar_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor> -> tensor<128xf32> %3 = tensor.empty() : tensor %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor) -> tensor @@ -945,11 +891,9 @@ func.func @reduce_to_scalar_static() attributes {hal.executable.target = #execut // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0) -> (d0)> @@ -957,8 +901,8 @@ func.func @reduce_to_scalar_static() attributes {hal.executable.target = #execut func.func @reduce_to_scalar_dynamic() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor>{%0} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor>{%0} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [%0], strides = [1] : !flow.dispatch.tensor>{%0} -> tensor %4 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %5 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["reduction"]} ins(%3 : tensor) outs(%4 : tensor) { @@ -978,18 +922,16 @@ func.func @reduce_to_scalar_dynamic() attributes {hal.executable.target = #execu // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<() -> ()> func.func @scalar() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %3 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %4 = linalg.generic {indexing_maps = [#map, #map], iterator_types = []} ins(%2 : tensor) outs(%3 : tensor) { @@ -1006,11 +948,9 @@ func.func @scalar() attributes {hal.executable.target = #executable_target_embed // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d1, d0)> @@ -1018,8 +958,8 @@ func.func @scalar() attributes {hal.executable.target = #executable_target_embed func.func @transpose_8x8() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x1024xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x512xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<512x1024xf32>) outs(%3 : tensor<1024x512xf32>) { @@ -1035,11 +975,9 @@ func.func @transpose_8x8() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2,+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d1, d0)> @@ -1047,8 +985,8 @@ func.func @transpose_8x8() attributes {hal.executable.target = #executable_targe func.func @transpose_16x16() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x1024xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x512xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<512x1024xf32>) outs(%3 : tensor<1024x512xf32>) { @@ -1064,12 +1002,10 @@ func.func @transpose_16x16() attributes {hal.executable.target = #executable_tar // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -1079,9 +1015,9 @@ func.func @multi_root() attributes {hal.executable.target = #executable_target_e %c6144 = arith.constant 6144 : index %c792576 = arith.constant 792576 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c792576) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c792576) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<12x128x128xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [12, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<12x128xf32> %5 = tensor.empty() : tensor<12x128xf32> @@ -1116,18 +1052,16 @@ func.func @multi_root() attributes {hal.executable.target = #executable_target_e // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> func.func @pack() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<20x40xf32> %3 = tensor.empty() : tensor<2x48x16x1xf32> %pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf32> -> tensor<2x48x16x1xf32> @@ -1144,18 +1078,16 @@ func.func @pack() attributes {hal.executable.target = #executable_target_embedde // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> func.func @pack_f16() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<20x40xf16> %3 = tensor.empty() : tensor<2x48x16x1xf16> %pack = tensor.pack %2 padding_value(%cst : f16) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf16> -> tensor<2x48x16x1xf16> @@ -1172,17 +1104,15 @@ func.func @pack_f16() attributes {hal.executable.target = #executable_target_emb // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> func.func @pack_many_elements() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1200, 500000], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1200x500000xf32> %3 = tensor.empty() : tensor<31250x1200x16x1xf32> %pack = tensor.pack %2 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %3 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32> @@ -1199,12 +1129,10 @@ func.func @pack_many_elements() attributes {hal.executable.target = #executable_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d1)> @@ -1213,9 +1141,9 @@ func.func @unpack_generic_pack(%arg0: !stream.binding {stream.alignment = 64 : i %c0 = arith.constant 0 : index %cst = arith.constant 3.40282347E+38 : f32 %cst_0 = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [24, 32, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<24x32x16x16xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor> -> tensor<512xf32> %5 = tensor.empty() : tensor<24x512x16x1xf32> @@ -1247,18 +1175,16 @@ func.func @unpack_generic_pack(%arg0: !stream.binding {stream.alignment = 64 : i // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @elem_pack() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 384], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x384xf32> %3 = tensor.empty() : tensor<128x384xf32> %4 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<128x384xf32>) outs(%3 : tensor<128x384xf32>) { @@ -1284,11 +1210,9 @@ func.func @elem_pack() attributes {hal.executable.target = #executable_target_em // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf", ukernels = false}> #map = affine_map<(d0, d1) -> (d1, d0)> @@ -1297,8 +1221,8 @@ func.func @transpose_pack() attributes {hal.executable.target = #executable_targ %c1579008 = arith.constant 1579008 : index %c3147776 = arith.constant 3147776 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1579008) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c3147776) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1579008) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c3147776) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [30522, 768], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<30522x768xf32> %3 = tensor.empty() : tensor<768x30522xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<30522x768xf32>) outs(%3 : tensor<768x30522xf32>) { @@ -1323,14 +1247,12 @@ func.func @transpose_pack() attributes {hal.executable.target = #executable_targ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf", ukernels = false}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -1341,11 +1263,11 @@ func.func @reduction_broadcast_pack() attributes {hal.executable.target = #execu %cst = arith.constant -0.000000e+00 : f32 %cst_0 = arith.constant 1.024000e+03 : f32 %cst_1 = arith.constant 9.99999996E-13 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [384, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x1024xf32> %6 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [384], strides = [1] : !flow.dispatch.tensor> -> tensor<384xf32> %7 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [1024], strides = [1] : !flow.dispatch.tensor> -> tensor<1024xf32> @@ -1396,12 +1318,10 @@ func.func @reduction_broadcast_pack() attributes {hal.executable.target = #execu // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf", ukernels = false}> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -1409,9 +1329,9 @@ func.func @reduction_broadcast_pack() attributes {hal.executable.target = #execu func.func @reduction_pack() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant -0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [384, 1024, 32], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<384x1024x32xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [384, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x1024xf32> %5 = tensor.empty() : tensor<1024x24x16x1xf32> @@ -1445,18 +1365,16 @@ func.func @reduction_pack() attributes {hal.executable.target = #executable_targ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @unpack_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c41943040 = arith.constant 41943040 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c41943040) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c41943040) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [64, 256, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x256x16x16xf32> %3 = tensor.empty() : tensor<1024x4096xf32> %unpack = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<64x256x16x16xf32> -> tensor<1024x4096xf32> @@ -1473,12 +1391,10 @@ func.func @unpack_static() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d0)> @@ -1486,9 +1402,9 @@ func.func @unpack_static() attributes {hal.executable.target = #executable_targe #map2 = affine_map<(d0, d1) -> (d0, d1)> func.func @unpack_elem() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [48, 64, 8, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<48x64x8x2xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor> -> tensor<128xf32> %5 = tensor.empty() : tensor<128x384xf32> @@ -1512,13 +1428,11 @@ func.func @unpack_elem() attributes {hal.executable.target = #executable_target_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> #map = affine_map<(d0, d1) -> (d1)> @@ -1529,10 +1443,10 @@ func.func @quant_model() attributes {hal.executable.target = #executable_target_ %c-128_i32 = arith.constant -128 : i32 %c127_i32 = arith.constant 127 : i32 %c0_i32 = arith.constant 0 : i32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2304, 24], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2304x24xi8> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [24, 144], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<24x144xi8> %6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [144], strides = [1] : !flow.dispatch.tensor> -> tensor<144xi32> @@ -1560,19 +1474,17 @@ func.func @quant_model() attributes {hal.executable.target = #executable_target_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = false}> func.func @test() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %c6364136223846793005_i64 = arith.constant 6364136223846793005 : i64 %c1442695040888963407_i64 = arith.constant 1442695040888963407 : i64 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %extracted = tensor.extract %2[] : tensor %3 = arith.muli %extracted, %c6364136223846793005_i64 : i64 @@ -1588,12 +1500,10 @@ func.func @test() attributes {hal.executable.target = #executable_target_embedde // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "cascadelake", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : index, target_triple = "x86_64-unknown-linux-gnu", ukernels = false}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -1601,9 +1511,9 @@ func.func @test() attributes {hal.executable.target = #executable_target_embedde func.func @non_trivial_program() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [128, 1, 128, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x1x128x1xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [128, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x1xf32> %5 = tensor.empty() : tensor<1x1xf32> @@ -1631,12 +1541,10 @@ func.func @non_trivial_program() attributes {hal.executable.target = #executable // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "cascadelake", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = true}> func.func @batch_mmt4d() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { @@ -1657,9 +1565,9 @@ func.func @batch_mmt4d() attributes {hal.executable.target = #executable_target_ %11 = arith.shli %10, %c32_i64 : i64 %12 = arith.ori %9, %11 : i64 %13 = arith.index_castui %12 : i64 to index - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %15 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> - %16 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%13) : !flow.dispatch.tensor> + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %15 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> + %16 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%13) : !flow.dispatch.tensor> %17 = flow.dispatch.tensor.load %14, offsets = [0, 0, 0, 0, 0], sizes = [128, 10, 32, 8, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x10x32x8x1xf32> %18 = flow.dispatch.tensor.load %15, offsets = [0, 0, 0, 0, 0], sizes = [128, 80, 32, 4, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x80x32x4x1xf32> %19 = tensor.empty() : tensor<128x10x80x8x4xf32> @@ -1676,20 +1584,18 @@ func.func @batch_mmt4d() attributes {hal.executable.target = #executable_target_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "cascadelake", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> func.func @mmt4d_with_large_reduction() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [7, 18176, 16, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<7x18176x16x1xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [284, 18176, 16, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<284x18176x16x1xf32> %5 = tensor.empty() : tensor<7x284x16x16xf32> @@ -1706,19 +1612,17 @@ func.func @mmt4d_with_large_reduction() attributes {hal.executable.target = #exe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @pad_only() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c634816 = arith.constant 634816 : index %c3846080 = arith.constant 3846080 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c634816) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c3846080) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c634816) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c3846080) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x112x112x64xf32> %padded = tensor.pad %2 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): @@ -1738,11 +1642,9 @@ func.func @pad_only() attributes {hal.executable.target = #executable_target_emb // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { cpu = "generic", cpu_features = "", @@ -1750,8 +1652,8 @@ func.func @pad_only() attributes {hal.executable.target = #executable_target_emb native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> func.func @winograd_output_transform() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 8, 2, 6, 6, 128], strides = [1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x8x2x6x6x128xf16> %3 = tensor.empty() : tensor<2x36x36x128xf16> %4 = iree_linalg_ext.winograd.output_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<8x8x2x6x6x128xf16>) outs(%3 : tensor<2x36x36x128xf16>) -> tensor<2x36x36x128xf16> @@ -1767,11 +1669,9 @@ func.func @winograd_output_transform() attributes {hal.executable.target = #exec // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { cpu = "generic", cpu_features = "", @@ -1779,8 +1679,8 @@ func.func @winograd_output_transform() attributes {hal.executable.target = #exec native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> func.func @winograd_input_transform() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 34, 34, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x34x34x128xf16> %3 = tensor.empty() : tensor<8x8x2x6x6x128xf16> %4 = iree_linalg_ext.winograd.input_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<2x34x34x128xf16>) outs(%3 : tensor<8x8x2x6x6x128xf16>) -> tensor<8x8x2x6x6x128xf16> @@ -1796,11 +1696,9 @@ func.func @winograd_input_transform() attributes {hal.executable.target = #execu // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { cpu = "generic", cpu_features = "", @@ -1808,8 +1706,8 @@ func.func @winograd_input_transform() attributes {hal.executable.target = #execu native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> func.func @winograd_filter_transform() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [3, 3, 64, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x64x128xf32> %3 = tensor.empty() : tensor<8x8x64x128xf32> %4 = iree_linalg_ext.winograd.filter_transform output_tile_size(6) kernel_size(3) kernel_dimensions([0, 1]) ins(%2 : tensor<3x3x64x128xf32>) outs(%3 : tensor<8x8x64x128xf32>) -> tensor<8x8x64x128xf32> @@ -1825,13 +1723,11 @@ func.func @winograd_filter_transform() attributes {hal.executable.target = #exec // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { cpu = "generic", cpu_features = "", @@ -1840,10 +1736,10 @@ func.func @winograd_filter_transform() attributes {hal.executable.target = #exec func.func @attention() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index %scale = arith.constant 0.125 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<20x4096x64xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<20x4096x64xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<20x4096x64xf16> @@ -1866,13 +1762,11 @@ func.func @attention() attributes {hal.executable.target = #executable_target_em // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", { cpu = "generic", cpu_features = "", @@ -1880,10 +1774,10 @@ func.func @attention() attributes {hal.executable.target = #executable_target_em native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> func.func @elementwise_output_transposed() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %5 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [768], strides = [1] : !flow.dispatch.tensor> -> tensor<768xi64> %6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xi64> @@ -1916,9 +1810,9 @@ func.func @elementwise_output_transposed() attributes {hal.executable.target = # module { func.func @test_mod_vectorizing_strategy_peeling() attributes {hal.executable.target = #executable_target_system_elf_x86_64_}{ %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(, <1, storage_buffer, ReadOnly>, <2, storage_buffer>], flags = Indirect>]>) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(, <1, storage_buffer, ReadOnly>, <2, storage_buffer>], flags = Indirect>]>) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(, <1, storage_buffer, ReadOnly>, <2, storage_buffer>], flags = Indirect>]>) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#hal.pipeline.layout, #hal.pipeline.binding, #hal.pipeline.binding]>) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#hal.pipeline.layout, #hal.pipeline.binding, #hal.pipeline.binding]>) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#hal.pipeline.layout, #hal.pipeline.binding, #hal.pipeline.binding]>) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [6], strides = [1] : !flow.dispatch.tensor> -> tensor<6xi32> %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [6], strides = [1] : !flow.dispatch.tensor> -> tensor<6xi32> %5 = tensor.empty() : tensor<6xi32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile_and_fuse.mlir index 284dd8ce4f622..44c02928b1205 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile_and_fuse.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile_and_fuse.mlir @@ -101,12 +101,10 @@ func.func @multi_config(%arg0 : tensor, %arg1 : tensor, %arg2 // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @shared_out_operand() { %cst = arith.constant 0.000000e+00 : f32 @@ -117,10 +115,10 @@ func.func @shared_out_operand() { %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %2 = arith.index_castui %0 {stream.alignment = 1024 : index, stream.values = [205824 : index, 795648 : index, 1385472 : index, 1975296 : index, 2565120 : index, 3154944 : index, 3744768 : index]} : i32 to index %3 = arith.index_castui %1 {stream.alignment = 1024 : index, stream.values = [0 : index, 3072 : index, 6144 : index, 9216 : index, 12288 : index, 15360 : index, 18432 : index]} : i32 to index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%2) flags(ReadOnly) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c600576) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%2) flags(ReadOnly) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c600576) : !flow.dispatch.tensor> %8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [391, 384], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<391x384xf32> %9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [384, 384], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x384xf32> %10 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [384], strides = [1] : !flow.dispatch.tensor> -> tensor<384xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir index 2c8388bf28b7c..a2944d9d125c5 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir @@ -1,18 +1,16 @@ // RUN: iree-opt %s --iree-transform-dialect-interpreter --transform-dialect-drop-schedule | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> func.func @pad_matmul_static_dispatch_0() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<250x500xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [500, 1020], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<500x1020xf32> %5 = tensor.empty() : tensor<250x1020xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_lowering.mlir index 71adee33b8901..6f6ab10e3ebcf 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_lowering.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_lowering.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-llvmcpu-vector-lowering-pipeline))" --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_391x384x384_f32() { %cst = arith.constant 0.000000e+00 : f32 @@ -19,13 +17,13 @@ func.func @matmul_391x384x384_f32() { %cst_0 = arith.constant dense<0.000000e+00> : vector<8x32xf32> %cst_1 = arith.constant dense<6.000000e+00> : vector<8x32xf32> %alloca = memref.alloca() {alignment = 64 : i64} : memref<8x32xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<391x384xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<391x384xf32> memref.assume_alignment %0, 64 : memref<391x384xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<384x384xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<384x384xf32> memref.assume_alignment %1, 64 : memref<384x384xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<384xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<384xf32> memref.assume_alignment %2, 64 : memref<384xf32> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<391x384xf32> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<391x384xf32> memref.assume_alignment %3, 64 : memref<391x384xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -83,13 +81,11 @@ func.func @matmul_391x384x384_f32() { // Check that vector.loads whose elements are extracted and // consumed in a scalar fashion are scalarized. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_scalar_loads() { %cst = arith.constant 0.000000e+00 : f32 @@ -102,13 +98,13 @@ func.func @matmul_scalar_loads() { %cst_0 = arith.constant dense<0.000000e+00> : vector<8x32xf32> %cst_1 = arith.constant dense<6.000000e+00> : vector<8x32xf32> %alloca = memref.alloca() {alignment = 64 : i64} : memref<8x32xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<391x384xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<391x384xf32> memref.assume_alignment %0, 64 : memref<391x384xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<384x384xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<384x384xf32> memref.assume_alignment %1, 64 : memref<384x384xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<384xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<384xf32> memref.assume_alignment %2, 64 : memref<384xf32> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<391x384xf32> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<391x384xf32> memref.assume_alignment %3, 64 : memref<391x384xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -146,16 +142,14 @@ func.func @matmul_scalar_loads() { // Make sure we don't transpose a mask but create a transposed mask instead. -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @transpose_mask() { %a = arith.constant 4 : index %b = arith.constant 8 : index %c0 = arith.constant 0 : index - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<4x2xi1> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<4x2xi1> %mask = vector.create_mask %a, %b : vector<2x4xi1> %transpose_mask = vector.transpose %mask, [1, 0] : vector<2x4xi1> to vector<4x2xi1> vector.transfer_write %transpose_mask, %3[%c0, %c0] {in_bounds = [true, true]} : vector<4x2xi1>, memref<4x2xi1> @@ -174,12 +168,10 @@ func.func @transpose_mask() { // Make sure that the gather patterns get rid of vector.gather over strided // memref. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @gather_strided_memref() { %cst = arith.constant dense<0.000000e+00> : vector<4xf32> @@ -187,11 +179,11 @@ func.func @gather_strided_memref() { %c0_i32 = arith.constant 0 : i32 %c4 = arith.constant 4 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<2592000x3xf32, #hal.descriptor_type> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<2592000x3xf32, #hal.descriptor_type> memref.assume_alignment %0, 64 : memref<2592000x3xf32, #hal.descriptor_type> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<518400xi32, #hal.descriptor_type> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<518400xi32, #hal.descriptor_type> memref.assume_alignment %1, 64 : memref<518400xi32, #hal.descriptor_type> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<518400xf32, #hal.descriptor_type> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<518400xf32, #hal.descriptor_type> memref.assume_alignment %2, 64 : memref<518400xf32, #hal.descriptor_type> %subview = memref.subview %0[0, 0] [2592000, 1] [1, 1] : memref<2592000x3xf32, #hal.descriptor_type> to memref<2592000xf32, strided<[3]>, #hal.descriptor_type> %workgroup_id_x = hal.interface.workgroup.id[0] : index diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_with_masking_and_hoist.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_with_masking_and_hoist.mlir index 2e9b3c44c923a..6d5e65aedefdd 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_with_masking_and_hoist.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_with_masking_and_hoist.mlir @@ -34,12 +34,10 @@ // CHECK-NEXT: %[[INSERT_SLICE:.*]] = tensor.insert_slice %[[OUT_WRITE]] into %[[OUT_SLICE]]{{.*}} : tensor<8x?xf32> into tensor<8x?xf32> // CHECK-NEXT: tensor.insert_slice %[[INSERT_SLICE]] into %[[OUT_TENSOR_1]]{{.*}} : tensor<8x?xf32> into tensor<1024x1024xf32> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @pipeline() { %c1 = arith.constant 1 : index @@ -47,9 +45,9 @@ func.func @pipeline() { %c16 = arith.constant 16 : index %c8 = arith.constant 8 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x1024xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x1024xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x1024xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/verify_linalg_transform_legality.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/verify_linalg_transform_legality.mlir index 5d7e055a2034c..9aa61c29a37c3 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/verify_linalg_transform_legality.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/verify_linalg_transform_legality.mlir @@ -1,18 +1,16 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmcpu-verify-linalg-transform-legality))" %s --verify-diagnostics -split-input-file -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_123x456xf32_times_456x789xf32_into_123x789xf32_dispatch_0() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [123, 4, 114], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<123x4x114xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 114, 789], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x114x789xf32> %5 = tensor.empty() : tensor<4x123x789xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp index a1112454fcb90..e5f1149f43dfc 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp @@ -202,8 +202,6 @@ class TestLLVMGPULegalizeOpPass final } }; -using SetBinding = std::pair; - /// Convention with the HAL side to pass kernel arguments. /// The bindings are ordered based on binding set and binding index then /// compressed and mapped to dense set of arguments. @@ -211,21 +209,16 @@ using SetBinding = std::pair; /// InterfaceBindingOp and kernel argument index. /// For instance if the kernel has (set, bindings) A(0, 1), B(1, 5), C(0, 6) it /// will return the mapping [A, 0], [C, 1], [B, 2] -static llvm::SmallDenseMap +static llvm::SmallDenseMap getKernelArgMapping(Operation *funcOp) { - llvm::SetVector usedBindingSet; + llvm::SetVector usedBindingSet; funcOp->walk([&](IREE::HAL::InterfaceBindingSubspanOp subspanOp) { - usedBindingSet.insert( - SetBinding(subspanOp.getSet(), subspanOp.getBinding())); + usedBindingSet.insert(subspanOp.getBinding()); }); auto sparseBindings = usedBindingSet.takeVector(); std::sort(sparseBindings.begin(), sparseBindings.end(), - [](SetBinding lhs, SetBinding rhs) { - if (lhs.first == rhs.first) - return lhs.second.ult(rhs.second); - return lhs.first.ult(rhs.first); - }); - llvm::SmallDenseMap mapBindingArgIndex; + [](APInt lhs, APInt rhs) { return lhs.ult(rhs); }); + llvm::SmallDenseMap mapBindingArgIndex; for (auto [index, binding] : llvm::enumerate(sparseBindings)) { mapBindingArgIndex[binding] = index; } @@ -263,8 +256,7 @@ class ConvertFunc : public ConvertToLLVMPattern { } else { llvmType = LLVM::LLVMPointerType::get(rewriter.getContext()); } - llvmInputTypes[argMapping[SetBinding(subspanOp.getSet(), - subspanOp.getBinding())]] = llvmType; + llvmInputTypes[argMapping[subspanOp.getBinding()]] = llvmType; }); // As a convention with HAL, push constants are appended as kernel arguments // after all the binding inputs. @@ -353,8 +345,8 @@ class ConvertIREEBindingSubspanOp : public ConvertToLLVMPattern { operands, op->getAttrDictionary()); MemRefType memrefType = llvm::dyn_cast(subspanOp.getResult().getType()); - mlir::BlockArgument llvmBufferArg = llvmFuncOp.getArgument( - argMapping[SetBinding(subspanOp.getSet(), subspanOp.getBinding())]); + mlir::BlockArgument llvmBufferArg = + llvmFuncOp.getArgument(argMapping[subspanOp.getBinding()]); // As a convention with HAL all the kernel argument pointers are 16Bytes // aligned. llvmFuncOp.setArgAttr(llvmBufferArg.getArgNumber(), diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td index 0d361fecaddef..ac3e7eef75136 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td @@ -165,7 +165,7 @@ def VectorToWarpExecuteOnLane0Op : Op + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128xf32> %1 = gpu.thread_id x %2 = arith.cmpi ult, %1, %c1 : index scf.if %2 { @@ -186,7 +186,7 @@ def VectorToWarpExecuteOnLane0Op : Op : vector<128xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128xf32> %1 = gpu.thread_id x %2 = arith.cmpi ult, %1, %c32 : index // Single-warp guard filters out threads 32-63. @@ -266,7 +266,7 @@ def VectorWarpDistributionOp : Op : vector<128xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128xf32> %1 = gpu.thread_id x %2 = arith.cmpi ult, %1, %c32 : index // Single-warp guard filters out threads 32-63. @@ -290,7 +290,7 @@ def VectorWarpDistributionOp : Op : vector<128xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128xf32> %1 = gpu.thread_id x %2 = arith.cmpi ult, %1, %c32 : index // Single-warp guard filters out threads 32-63. diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir index b7ca495bb962d..8cb1b2e537ba3 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir @@ -16,12 +16,10 @@ // OPT-IN: #[[$TRANSLATION:.+]] = #iree_codegen.translation_info, // OPT-IN-SAME: no_reduce_shared_memory_bank_conflicts -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable public @main_0_dispatch_0 { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -54,9 +52,9 @@ hal.executable public @main_0_dispatch_0 { }>} { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1280xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<10240x1280xf16> %5 = tensor.empty() : tensor<2048x10240xf32> @@ -92,12 +90,10 @@ hal.executable public @main_0_dispatch_0 { // OPT-IN: #[[$TRANSLATION:.+]] = #iree_codegen.translation_info, // OPT-IN-SAME: reorder_workgroups = "transpose" -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable public @main_0_dispatch_0 { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -131,9 +127,9 @@ hal.executable public @main_0_dispatch_0 { }>} { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1280xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<10240x1280xf16> %5 = tensor.empty() : tensor<2048x10240xf32> @@ -164,12 +160,10 @@ hal.executable public @main_0_dispatch_0 { // OPT-OUT: #[[$TRANSLATION:.+]] = #iree_codegen.translation_info, // OPT-OUT-SAME: reorder_workgroups = "none" -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable public @main_0_dispatch_0 { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -192,9 +186,9 @@ hal.executable public @main_0_dispatch_0 { }>} { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1280xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<10240x1280xf16> %5 = tensor.empty() : tensor<2048x10240xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir index 3450851071963..710168c8635ca 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir @@ -13,12 +13,10 @@ // CHECK-SAME: intrinsic = #iree_gpu.mma_layout // CHECK-SAME: subgroup_m_count = 1, subgroup_n_count = 4 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d2, d4)> #map1 = affine_map<(d0, d1, d2, d3, d4) -> (d1, d3, d4)> @@ -26,9 +24,9 @@ func.func @expanded_matmul_transpose_b() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 64, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x64x2048xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [10, 64, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<10x64x2048xf16> %5 = tensor.empty() : tensor<2x10x64x64xf16> @@ -54,19 +52,17 @@ func.func @expanded_matmul_transpose_b() { // CHECK-SAME: intrinsic = #iree_gpu.mma_layout // CHECK-SAME: subgroup_m_count = 2, subgroup_n_count = 2 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @conv_nhwc() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 258, 514, 768], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x258x514x768xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 768, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x768x256xf16> %5 = tensor.empty() : tensor<2x256x512x256xf32> @@ -81,12 +77,10 @@ func.func @conv_nhwc() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %5 = tensor.empty() : tensor<256x256xf32> @@ -123,19 +117,17 @@ func.func @matmul_256x256x256() attributes {hal.executable.target = #executable_ // CHECK-SAME: intrinsic = #iree_gpu.mma_layout // CHECK-SAME: subgroup_m_count = 2, subgroup_n_count = 2 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @mfma_matmul_1024x1024x1024() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x1024xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x1024xf16> %5 = tensor.empty() : tensor<1024x1024xf32> @@ -156,12 +148,10 @@ func.func @mfma_matmul_1024x1024x1024() { // CHECK-SAME: intrinsic = #iree_gpu.mma_layout // CHECK-SAME: subgroup_m_count = 2, subgroup_n_count = 2 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d5, d2 + d6, d3 + d7, d8)> @@ -171,9 +161,9 @@ func.func @mfma_matmul_1024x1024x1024() { func.func @conv_nchwc() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [2, 20, 34, 34, 64], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x20x34x34x64xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 20, 3, 3, 160, 64], strides = [1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x20x3x3x160x64xf16> %5 = tensor.empty() : tensor<2x8x32x32x160xf32> @@ -207,19 +197,17 @@ func.func @conv_nchwc() { // WMMA-SAME: intrinsic = #iree_gpu.mma_layout // WMMA-SAME: subgroup_m_count = 2, subgroup_n_count = 2 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @wmma_matmul_1024x1024x1024() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x1024xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x1024xf16> %5 = tensor.empty() : tensor<1024x1024xf32> @@ -240,19 +228,17 @@ func.func @wmma_matmul_1024x1024x1024() { // CHECK-SAME: intrinsic = #iree_gpu.mma_layout // CHECK-SAME: subgroup_m_count = 1, subgroup_n_count = 1 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @unaligned_mk_batch_matmul() { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 968, 1281], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x968x1281xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [64, 1281, 1281], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x1281x1281xf16> %5 = tensor.empty() : tensor<64x968x1281xf16> @@ -273,19 +259,17 @@ func.func @unaligned_mk_batch_matmul() { // CHECK-SAME: intrinsic = #iree_gpu.mma_layout // CHECK-SAME: subgroup_m_count = 1, subgroup_n_count = 4 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @unaligned_m_batch_matmul_64x72x1280x1280() { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 72, 1280], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x72x1280xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [64, 1280, 1280], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x1280x1280xf16> %5 = tensor.empty() : tensor<64x72x1280xf16> @@ -300,19 +284,17 @@ func.func @unaligned_m_batch_matmul_64x72x1280x1280() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @narrow_n_batch_matmul_64x968x4x320_f16() { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 968, 320], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x968x320xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [64, 320, 4], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x320x4xf16> %5 = tensor.empty() : tensor<64x968x4xf16> @@ -327,12 +309,10 @@ func.func @narrow_n_batch_matmul_64x968x4x320_f16() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_dynamic_dim() { %c0 = arith.constant 0 : index @@ -345,10 +325,10 @@ func.func @matmul_dynamic_dim() { %4 = arith.shli %3, %c32_i64 : i64 %5 = arith.ori %2, %4 : i64 %6 = arith.index_castui %5 : i64 to index - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> %8 = flow.dispatch.workload.ordinal %6, 0 : index - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%8} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%8} %11 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [%8, 256], strides = [1, 1] : !flow.dispatch.tensor>{%8} -> tensor %12 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %13 = tensor.empty(%8) : tensor @@ -369,21 +349,19 @@ func.func @matmul_dynamic_dim() { // CHECK-LABEL: func.func @attention_20x4096x64x4096x64() -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @attention_20x4096x64x4096x64() { %cst = arith.constant 1.250000e-01 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<20x4096x64xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<20x4096x64xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<20x4096x64xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/lowering_scalar_dispatch.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/lowering_scalar_dispatch.mlir index 5c0ba99d8603d..41253b97f066d 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/lowering_scalar_dispatch.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/lowering_scalar_dispatch.mlir @@ -2,7 +2,10 @@ #executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb"> -#pipeline_layout = #hal.pipeline.layout, <1, storage_buffer>]>]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding +]> hal.executable @scalar_dispatch { hal.executable.variant public @rocm_hsaco_fb target(#executable_target_rocm_hsaco_fb) { @@ -16,8 +19,8 @@ hal.executable @scalar_dispatch { %c0 = arith.constant 0 : index %c6364136223846793005_i64 = arith.constant 6364136223846793005 : i64 %c1442695040888963407_i64 = arith.constant 1442695040888963407 : i64 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %extracted = tensor.extract %2[] : tensor %3 = arith.muli %extracted, %c6364136223846793005_i64 : i64 @@ -32,8 +35,8 @@ hal.executable @scalar_dispatch { // CHECK-LABEL: func.func @scalar_dispatch() // CHECK-SAME: translation_info = #iree_codegen.translation_info -// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: memref.load %[[SPAN0]][] : memref> // CHECK: arith.muli {{.+}} : i64 // CHECK: arith.addi {{.+}} : i64 diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir index a9f320c906035..94e8a0b79bf04 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx940 \ // RUN: --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-llvmgpu-lower-executable-target)))))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_gpu.lowering_config<{workgroup = [64, 64, 0], reduction = [0, 0, 4], thread = [8, 4]}> hal.executable public @main { @@ -21,9 +19,9 @@ hal.executable public @main { attributes {translation_info = #iree_codegen.translation_info} { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1280xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<10240x1280xf16> %5 = tensor.empty() : tensor<2048x10240xf32> @@ -42,9 +40,9 @@ hal.executable public @main { // analysis should be able to simplify the below to just two barriers. // CHECK-LABEL: func @matmul_transpose_b -// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: memref.alloc() : memref<64x8xf16, #gpu.address_space> // CHECK-DAG: memref.alloc() : memref<64x8xf16, #gpu.address_space> // CHECK: %[[LOOP:.+]] = scf.for %[[IV:.+]] = %c0 to %c1280 step %c4 {{.*}} -> (vector<8x4xf32>) @@ -65,12 +63,10 @@ hal.executable public @main { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_gpu.lowering_config<{workgroup = [64, 64, 0], reduction = [0, 0, 2], subgroup = [2, 2], mma_kind = #iree_gpu.mma_layout}> hal.executable public @main { @@ -85,9 +81,9 @@ hal.executable public @main { attributes {translation_info = #iree_codegen.translation_info} { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1280xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<10240x1280xf16> %5 = tensor.empty() : tensor<2048x10240xf32> @@ -103,9 +99,9 @@ hal.executable public @main { } // CHECK-LABEL: func @matmul_transpose_b_mfma -// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: memref.alloc() : memref<64x36xf16, #gpu.address_space> // CHECK-DAG: memref.alloc() : memref<64x36xf16, #gpu.address_space> // CHECK: %[[LOOP:.+]] = scf.for %[[IV:.+]] = %c0 to %c80 step %c2 {{.*}} -> (vector<2x2x4x1xf32>) @@ -129,12 +125,10 @@ hal.executable public @main { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer, ReadOnly>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_gpu.lowering_config<{workgroup = [1, 64, 64, 0], reduction = [0, 0, 0, 2], subgroup = [1, 2, 2], mma_kind = #iree_gpu.mma_layout}> hal.executable private @main { @@ -148,9 +142,9 @@ hal.executable private @main { func.func @conv_igemm_im2col() attributes {translation_info = #iree_codegen.translation_info} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 34, 34, 1280], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x34x34x1280xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 1280, 1280], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x1280x1280xf16> %5 = tensor.empty() : tensor<2x16x16x1280xf32> @@ -187,9 +181,9 @@ hal.executable private @main { } // CHECK-LABEL: func @conv_igemm_im2col -// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: memref.alloc() : memref<1x64x36xf16, #gpu.address_space> // CHECK-DAG: memref.alloc() : memref<32x68xf16, #gpu.address_space> // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index @@ -217,12 +211,10 @@ hal.executable private @main { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_gpu.lowering_config<{ workgroup = [64, 64, 0], @@ -241,9 +233,9 @@ hal.executable public @main { attributes {translation_info = #iree_codegen.translation_info} { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1280xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<10240x1280xf16> %5 = tensor.empty() : tensor<2048x10240xf32> @@ -259,9 +251,9 @@ hal.executable public @main { } // CHECK-LABEL: func @matmul_transpose_b_wmma -// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: memref.alloc() : memref<64x36xf16, #gpu.address_space> // CHECK-DAG: memref.alloc() : memref<64x36xf16, #gpu.address_space> // CHECK: %[[LOOP:.+]] = scf.for %[[IV:.+]] = %c0 to %c80 step %c2 {{.*}} -> (vector<2x2x8x1x1xf32>) @@ -285,12 +277,10 @@ hal.executable public @main { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_gpu.lowering_config<{ workgroup = [64, 64, 0], @@ -313,9 +303,9 @@ hal.executable public @main { attributes {translation_info = #iree_codegen.translation_info} { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1280x!eltype> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<10240x1280x!eltype> %5 = tensor.empty() : tensor<2048x10240x!aeltype> @@ -339,12 +329,10 @@ hal.executable public @main { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_gpu.lowering_config<{ workgroup = [64, 64, 0], @@ -367,9 +355,9 @@ hal.executable public @main { attributes {translation_info = #iree_codegen.translation_info} { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1280x!eltype> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<10240x1280x!eltype> %5 = tensor.empty() : tensor<2048x10240x!aeltype> @@ -393,12 +381,10 @@ hal.executable public @main { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_gpu.lowering_config<{ workgroup = [64, 64, 0], @@ -421,9 +407,9 @@ hal.executable public @main { attributes {translation_info = #iree_codegen.translation_info} { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1280x!eltype> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<10240x1280x!eltype> %5 = tensor.empty() : tensor<2048x10240x!aeltype> @@ -447,12 +433,10 @@ hal.executable public @main { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_gpu.lowering_config<{ workgroup = [64, 64, 0], @@ -475,9 +459,9 @@ hal.executable public @main { attributes {translation_info = #iree_codegen.translation_info} { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1280x!eltype> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<10240x1280x!eltype> %5 = tensor.empty() : tensor<2048x10240x!aeltype> @@ -509,19 +493,15 @@ hal.executable public @main { #translation_info = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout< - push_constants = 0, - sets = [ - <0, bindings = [ - <0, storage_buffer, "ReadOnly|Indirect">, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer, Indirect> - ], flags = Indirect> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding +]> hal.executable public @main { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { - hal.executable.export public @conv_nchw_fused ordinal(0) layout(#pipeline_layout) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]} { + hal.executable.export public @conv_nchw_fused ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device): %x, %y, %z = flow.dispatch.workgroup_count_from_slice hal.return %x, %y, %z : index, index, index @@ -531,9 +511,9 @@ hal.executable public @main { %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant dense<1.0> : tensor<1x64xf32> %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 64, 58, 58], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x64x58x58xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [64, 64, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x64x3x3xf32> %5 = tensor.empty() : tensor<1x64x56x56xf32> @@ -573,19 +553,15 @@ hal.executable public @main { #translation_info = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout< - push_constants = 0, - sets = [ - <0, bindings = [ - <0, storage_buffer, ReadOnly>, - <1, storage_buffer, "ReadOnly|Indirect">, - <2, storage_buffer, Indirect> - ], flags = Indirect> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding +]> hal.executable public @main { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { - hal.executable.export public @skinny_matmul_config ordinal(0) layout(#pipeline_layout) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]} { + hal.executable.export public @skinny_matmul_config ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device): %x, %y, %z = flow.dispatch.workgroup_count_from_slice hal.return %x, %y, %z : index, index, index @@ -597,10 +573,10 @@ hal.executable public @main { %c111444672 = arith.constant 111444672 : index %c4014080 = arith.constant 4014080 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c102227904) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c4014080) flags("ReadOnly|Indirect") : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c111444672) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c102227904) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c4014080) flags("ReadOnly|Indirect") : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c111444672) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 3136], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x3136xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor> -> tensor<128xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute.mlir index d8f96739cbe3a..1b7c8183815e1 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute.mlir @@ -12,12 +12,10 @@ // to be migrated to the rocdl heuristics, but for now is just physically // located here. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @matmul_256x256x256_f16_f32 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -30,9 +28,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { func.func @matmul_256x256x256_f16_f32() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %5 = tensor.empty() : tensor<256x256xf32> @@ -63,12 +61,10 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @matmul_256x256x256_f16_f16 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -81,9 +77,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { func.func @matmul_256x256x256_f16_f16() { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %5 = tensor.empty() : tensor<256x256xf16> @@ -112,12 +108,10 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @expanded_matmul_transpose_b_executable { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -130,11 +124,11 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { func.func @expanded_matmul_transpose_b() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 64, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x64x2048xf16> @@ -184,12 +178,10 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // Basic f8, f8 -> f32 matmul. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @matmul_256x256x256_f8_f32 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -202,9 +194,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { func.func @matmul_256x256x256_f8_f32() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf8E4M3FNUZ> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf8E4M3FNUZ> %5 = tensor.empty() : tensor<256x256xf32> @@ -235,12 +227,10 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // Basic i8, i8 -> i32 matmul. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @matmul_256x256x256_i8_i32 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -253,9 +243,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { func.func @matmul_256x256x256_i8_i32() { %cst = arith.constant 0 : i32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xi8> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xi8> %5 = tensor.empty() : tensor<256x256xi32> @@ -286,12 +276,10 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // Basic i8, i8 -> i32 matmul_transpose_b. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @matmul_transpose_b_256x256x256_i8_i32 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -304,9 +292,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { func.func @matmul_transpose_b_256x256x256_i8_i32() { %cst = arith.constant 0 : i32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xi8> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xi8> %5 = tensor.empty() : tensor<256x256xi32> @@ -335,12 +323,10 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @conv_nhwc_dispatch_0 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -353,9 +339,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { func.func @conv_nhwc() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 258, 514, 768], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x258x514x768xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 768, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x768x256xf16> %5 = tensor.empty() : tensor<2x256x512x256xf32> @@ -377,12 +363,10 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb"> #map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)> @@ -403,9 +387,9 @@ hal.executable public @main_dispatch_expanded_matmul { %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %2 = arith.index_castui %0 : i32 to index %3 = arith.index_castui %1 : i32 to index - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%2) flags(ReadOnly) : !flow.dispatch.tensor> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%3) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%2) flags(ReadOnly) : !flow.dispatch.tensor> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%3) : !flow.dispatch.tensor> %7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0], sizes = [2, 1024, 1280], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x1024x1280xf16> %8 = flow.dispatch.tensor.load %5, offsets = [0, 0, 0], sizes = [20, 64, 1280], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<20x64x1280xf16> %9 = tensor.empty() : tensor<2x1024x20x64xf16> @@ -446,12 +430,10 @@ hal.executable public @main_dispatch_expanded_matmul { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @matmul_256x256x256_f16_f32 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -464,9 +446,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { func.func @matmul_256x256x256_f16_f32() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %5 = tensor.empty() : tensor<256x256xf32> @@ -498,12 +480,10 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @matmul_256x256x256_f16_f16 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -516,9 +496,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { func.func @matmul_256x256x256_f16_f16() { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x256xf16> %5 = tensor.empty() : tensor<256x256xf16> @@ -550,12 +530,10 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @unaligned_mk_batch_matmul_64x978x1281x1281_f16_f16 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -568,9 +546,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { func.func @unaligned_nk_batch_matmul() { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 968, 1281], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x968x1281xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [64, 1281, 1281], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x1281x1281xf16> %5 = tensor.empty() : tensor<64x968x1281xf16> @@ -595,9 +573,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // CHECK-DAG: %[[RHS_SHARED_SUB:.+]] = memref.subview %[[RHS_SHARED]][0, 0, 0] [1, 16, 16] [1, 1, 1] // CHECK-DAG: %[[LHS_SHARED:.+]] = memref.alloc() : memref<1x16x20xf16, #gpu.address_space> // CHECK-DAG: %[[LHS_SHARED_SUB:.+]] = memref.subview %[[LHS_SHARED]][0, 0, 0] [1, 16, 16] [1, 1, 1] -// CHECK-DAG: %[[LHS_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64x968x1281xf16, #hal.descriptor_type> -// CHECK-DAG: %[[RHS_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<64x1281x1281xf16, #hal.descriptor_type> -// CHECK-DAG: %[[OUT_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c0) : memref<64x968x1281xf16, #hal.descriptor_type> +// CHECK-DAG: %[[LHS_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64x968x1281xf16, #hal.descriptor_type> +// CHECK-DAG: %[[RHS_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<64x1281x1281xf16, #hal.descriptor_type> +// CHECK-DAG: %[[OUT_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c0) : memref<64x968x1281xf16, #hal.descriptor_type> // CHECK-DAG: %[[LHS_GLOBAL_SUB:.+]] = memref.subview %[[LHS_GLOBAL]] // CHECK-DAG: %[[RHS_GLOBAL_SUB:.+]] = memref.subview %[[RHS_GLOBAL]] // CHECK: %[[LHS_LOAD:.+]] = vector.transfer_read %[[LHS_GLOBAL_SUB]]{{.+}} {in_bounds = [true, false, false]} @@ -634,11 +612,9 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { // NOTE: This test is not exhaustive of all possible ways the above condition is breaking, // but rather is an example of a matmul shape from a model that broke our compilation heuristic. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable public @contract_schedule_considering_read_layout { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -656,9 +632,9 @@ hal.executable public @contract_schedule_considering_read_layout { %3 = arith.index_castui %0 : i32 to index %4 = arith.index_castui %1 : i32 to index %5 = arith.index_castui %2 : i32 to index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor> - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor> %9 = flow.dispatch.tensor.load %6, offsets = [0, 0, 0], sizes = [2, 160, 1536], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x160x1536xf16> %10 = flow.dispatch.tensor.load %7, offsets = [0, 0, 0], sizes = [2, 1536, 1536], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x1536x1536xf16> %11 = tensor.empty() : tensor<2x160x1536xf16> @@ -690,13 +666,11 @@ hal.executable public @contract_schedule_considering_read_layout { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @attention_20x4096x64x4096x64 { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -709,10 +683,10 @@ hal.executable private @attention_20x4096x64x4096x64 { func.func @attention_20x4096x64x4096x64() { %cst = arith.constant 1.250000e-01 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<20x4096x64xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<20x4096x64xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<20x4096x64xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_warp_reduction.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_warp_reduction.mlir index 846b30e0ef9f1..4128e4f66700d 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_warp_reduction.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_warp_reduction.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx940 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-rocdl-configuration-pipeline), iree-codegen-linalg-to-rocdl-pipeline2)))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @warp_reduction { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -17,8 +15,8 @@ hal.executable private @warp_reduction { func.func @warp_reduction() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x512xf32> %3 = tensor.empty() : tensor<2xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2xf32>) -> tensor<2xf32> @@ -42,12 +40,10 @@ hal.executable private @warp_reduction { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable public @main_dispatch_517 { hal.executable.variant public @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -62,9 +58,9 @@ hal.executable public @main_dispatch_517 { %c128 = arith.constant 128 : index %c0 = arith.constant 0 : index %c394240 = arith.constant 394240 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c394240) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c394240) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1280xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1280, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1280x1280xf32> %5 = tensor.empty() : tensor<1x1280xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir index e029174936e8d..649cbd9b65c00 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir @@ -2,21 +2,19 @@ // RUN: --iree-gpu-test-target=sm_60 | \ // RUN: FileCheck --check-prefix=CHECK %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @_attention_dispatch_0() { %c0 = arith.constant 0 : index %cst = arith.constant 1.250000e-01 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [192, 1024, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<192x1024x64xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [192, 1024, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<192x1024x64xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [192, 1024, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<192x1024x64xf16> @@ -52,16 +50,16 @@ func.func @_attention_dispatch_0() { // CHECK-DAG: %[[C1024:.+]] = arith.constant 1024 : index // CHECK-DAG: %[[CST_5:.+]] = arith.constant 0.000000e+00 : f32 // CHECK-dAG: %[[CST_6:.+]] = arith.constant dense<1.802980e-01> : vector<128x64xf16> -// CHECK: %[[D0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) +// CHECK: %[[D0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) // CHECK-SAME: offset(%[[C0]]) flags(ReadOnly) : memref<192x1024x64xf16, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D0]], 64 : memref<192x1024x64xf16, #hal.descriptor_type> -// CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) +// CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) // CHECK-SAME: offset(%[[C0]]) flags(ReadOnly) : memref<192x1024x64xf16, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D1]], 64 : memref<192x1024x64xf16, #hal.descriptor_type> -// CHECK: %[[D2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) +// CHECK: %[[D2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) // CHECK-SAME: offset(%[[C0]]) flags(ReadOnly) : memref<192x1024x64xf16, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D2]], 64 : memref<192x1024x64xf16, #hal.descriptor_type> -// CHECK: %[[D3:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) alignment(64) +// CHECK: %[[D3:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) alignment(64) // CHECK-SAME: offset(%[[C0]]) : memref<192x1024x64xf16, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D3]], 64 : memref<192x1024x64xf16, #hal.descriptor_type> // CHECK: %[[WORKGROUP_ID_X:.+]] = hal.interface.workgroup.id[0] : index diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma.mlir index 7e69786e5a7c6..13a84ec704f0b 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma.mlir @@ -2,21 +2,19 @@ // RUN: --iree-gpu-test-target=gfx908 | \ // RUN: FileCheck --check-prefix=CHECK %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @attention_dispatch_0_attention_16x16384x128xf16() { %c0 = arith.constant 0 : index %scale = arith.constant 0.08838834764 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 16384, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x16384x128xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 16384, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x16384x128xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [16, 16384, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x16384x128xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_matvec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_matvec.mlir index 2782383d65c78..1a60072f8ed51 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_matvec.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_matvec.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx940 --pass-pipeline='builtin.module(iree-llvmgpu-select-lowering-strategy)' %s | FileCheck %s // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx1100 --pass-pipeline='builtin.module(iree-llvmgpu-select-lowering-strategy)' %s | FileCheck %s --check-prefix=CDNA3 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @dynamic_batch_matvec() { %c32_i64 = arith.constant 32 : i64 @@ -21,11 +19,11 @@ func.func @dynamic_batch_matvec() { %7 = arith.index_castui %2 : i32 to index %8 = arith.index_castui %3 : i32 to index %9 = arith.index_castui %4 : i32 to index - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor> + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor> %11 = flow.dispatch.workload.ordinal %8, 0 : index %12 = flow.dispatch.workload.ordinal %9, 1 : index - %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor>{%11} - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor>{%12} + %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor>{%11} + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor>{%12} %15 = flow.dispatch.tensor.load %13, offsets = [0, 0, 0], sizes = [32, 1, %11], strides = [1, 1, 1] : !flow.dispatch.tensor>{%11} -> tensor<32x1x?xf16> %16 = flow.dispatch.tensor.load %14, offsets = [0, 0, 0], sizes = [32, %12, 128], strides = [1, 1, 1] : !flow.dispatch.tensor>{%12} -> tensor<32x?x128xf16> %17 = tensor.empty() : tensor<32x1x128xf16> @@ -46,12 +44,10 @@ func.func @dynamic_batch_matvec() { // This test uses special heuristics that needs to check the backend in the #hal.executable.target. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb"> #map = affine_map<(d0, d1, d2) -> (d0, d2)> @@ -60,9 +56,9 @@ func.func @dynamic_batch_matvec() { func.func @vmt1() attributes {hal.executable.target = #executable_target_rocm_hsaco_fb} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x4096xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32000x4096xf16> %5 = tensor.empty() : tensor<1x32000xf16> @@ -88,12 +84,10 @@ func.func @vmt1() attributes {hal.executable.target = #executable_target_rocm_hs // This test uses special heuristics that needs to check the backend in the #hal.executable.target. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb"> #map = affine_map<(d0, d1, d2) -> (d0, d2)> @@ -102,9 +96,9 @@ func.func @vmt1() attributes {hal.executable.target = #executable_target_rocm_hs func.func @vmt2() attributes {hal.executable.target = #executable_target_rocm_hsaco_fb} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x4096xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32000x4096xf16> %5 = tensor.empty() : tensor<1x32000xf16> @@ -128,14 +122,12 @@ func.func @vmt2() attributes {hal.executable.target = #executable_target_rocm_hs // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1)> @@ -144,11 +136,11 @@ func.func @vmt2() attributes {hal.executable.target = #executable_target_rocm_hs func.func @i4_dequant_matvec() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x32x128xi4> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x32xf16> %7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x32xf16> @@ -187,19 +179,17 @@ func.func @i4_dequant_matvec() { // Send 2xNxK mmt to the warp reduction pipeline. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @skinny_mmt() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x4096xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32000x4096xf16> %5 = tensor.empty() : tensor<2x32000xf16> @@ -220,19 +210,17 @@ func.func @skinny_mmt() { // Send Mx2xK mmt to the warp reduction pipeline. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @skinny_mmt() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x4096xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32000x4096xf16> %5 = tensor.empty() : tensor<32000x2xf16> @@ -251,12 +239,10 @@ func.func @skinny_mmt() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> #map1 = affine_map<(d0, d1, d2) -> (d1, d2)> @@ -264,9 +250,9 @@ func.func @skinny_mmt() { func.func @not_vmt() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [5, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<5x4096xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32000x4096xf16> %5 = tensor.empty() : tensor<5x32000xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_winograd.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_winograd.mlir index 8db080c0d5eaa..e5d6b2ae5aa1c 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_winograd.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_winograd.mlir @@ -1,15 +1,13 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx1100 --pass-pipeline='builtin.module(iree-llvmgpu-select-lowering-strategy)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @winograd_filter_transform() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [3, 3, 64, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x64x128xf32> %3 = tensor.empty() : tensor<8x8x64x128xf32> %4 = iree_linalg_ext.winograd.filter_transform output_tile_size(6) kernel_size(3) kernel_dimensions([0, 1]) ins(%2 : tensor<3x3x64x128xf32>) outs(%3 : tensor<8x8x64x128xf32>) -> tensor<8x8x64x128xf32> @@ -26,16 +24,14 @@ func.func @winograd_filter_transform() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @winograd_input_transform() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 34, 34, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x34x34x128xf16> %3 = tensor.empty() : tensor<8x8x2x6x6x128xf16> %4 = iree_linalg_ext.winograd.input_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<2x34x34x128xf16>) outs(%3 : tensor<8x8x2x6x6x128xf16>) -> tensor<8x8x2x6x6x128xf16> @@ -52,16 +48,14 @@ func.func @winograd_input_transform() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @winograd_output_transform() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 8, 2, 6, 6, 128], strides = [1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x8x2x6x6x128xf16> %3 = tensor.empty() : tensor<2x36x36x128xf16> %4 = iree_linalg_ext.winograd.output_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<8x8x2x6x6x128xf16>) outs(%3 : tensor<2x36x36x128xf16>) -> tensor<2x36x36x128xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir index de7bbb411ce38..d129117741e3f 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir @@ -3,12 +3,10 @@ // RUN: %s | FileCheck %s #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @conv2d_1x230x230x3_7x7x3x64_dispatch_0 { hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) { @@ -21,9 +19,9 @@ hal.executable private @conv2d_1x230x230x3_7x7x3x64_dispatch_0 { func.func @conv2d_1x230x230x3_7x7x3x64() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 230, 230, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x230x230x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [7, 7, 3, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<7x7x3x64xf32> %5 = tensor.empty() : tensor<1x112x112x64xf32> @@ -50,12 +48,10 @@ hal.executable private @conv2d_1x230x230x3_7x7x3x64_dispatch_0 { // ----- #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @conv_nchw_dispatch_0 { hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) { @@ -68,9 +64,9 @@ hal.executable private @conv_nchw_dispatch_0 { func.func @conv_nchw() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 230, 230, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x4x66x66xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [7, 7, 3, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<320x4x3x3xf32> %5 = tensor.empty() : tensor<2x320x64x64xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir index 6035b4bd73536..ec67064e9f82d 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir @@ -2,13 +2,11 @@ // RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target,canonicalize)))))' \ // RUN: %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @conv_nchw_dispatch_1 { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -23,10 +21,10 @@ hal.executable private @conv_nchw_dispatch_1 { func.func @conv_2d_nchw_fchw_2x320x64x64x320x3x3_f16() { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 320, 130, 130], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x320x130x130xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [320, 320, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<320x320x3x3xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [320], strides = [1] : !flow.dispatch.tensor> -> tensor<320xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir index ea5f33f32520a..f20c2c3a8d305 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir @@ -1,14 +1,10 @@ // RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-nvvm))))" --iree-gpu-test-target=sm_60 --split-input-file %s | FileCheck %s // Test that that standard and GPU ops are converted to LLVM and NVVM. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<4, storage_buffer> - ]>, - #hal.descriptor_set.layout<1, bindings = [ - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @abs_ex_dispatch_0 { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -17,9 +13,9 @@ hal.executable @abs_ex_dispatch_0 { func.func @abs_ex_dispatch_0() { %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<16xi32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<16xi32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<16xf32> %3 = gpu.block_id x %4 = gpu.block_dim x %5 = gpu.thread_id x @@ -44,14 +40,10 @@ hal.executable @abs_ex_dispatch_0 { // CHECK: llvm.store %[[FADD]], %[[ADDR]] : f32, !llvm.ptr // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<4, storage_buffer> - ]>, - #hal.descriptor_set.layout<1, bindings = [ - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @abs_dynamic { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -66,9 +58,9 @@ hal.executable @abs_dynamic { %d0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %d1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index %d2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) offset(%o) : memref>{%d0, %d1, %d2} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%d0, %d1, %d2} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref{%d0, %d1, %d2} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%o) : memref>{%d0, %d1, %d2} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%d0, %d1, %d2} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref{%d0, %d1, %d2} %9 = memref.load %0[%c3, %c5, %c7] : memref> %10 = memref.load %1[%c3, %c5, %c7] : memref %11 = arith.sitofp %10 : i32 to f32 @@ -106,13 +98,9 @@ hal.executable @abs_dynamic { // Test that we handle correctly the case where bindings are sparse (set 0 // binding 0 is not used). -#pipeline_layout = #hal.pipeline.layout - ]>, - #hal.descriptor_set.layout<1, bindings = [ - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @dead_symbol { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -121,8 +109,8 @@ hal.executable @dead_symbol { func.func @dead_symbol() { %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16xi32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<16xi32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16xf32> %3 = gpu.block_id x %4 = gpu.block_dim x %5 = gpu.thread_id x @@ -146,11 +134,9 @@ hal.executable @dead_symbol { // A single binding may contain different data types. // Test that we cast pointers correctly. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @mixed_type { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -159,9 +145,9 @@ hal.executable @mixed_type { func.func @mixed_type() { %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c128) : memref<16xf32, strided<[1], offset: 4>> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c0) : memref<16xi32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c128) : memref<16xf32, strided<[1], offset: 4>> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c0) : memref<16xi32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16xf32> %3 = gpu.block_id x %4 = gpu.block_dim x %5 = gpu.thread_id x @@ -187,10 +173,8 @@ hal.executable @mixed_type { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> hal.executable @shared_memory_lowering { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -227,10 +211,8 @@ hal.executable @shared_memory_lowering { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> hal.executable @shared_memory_dealloc_elision { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -253,10 +235,8 @@ hal.executable @shared_memory_dealloc_elision { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> hal.executable @shared_memory_lowering_aligned_alloc { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -288,15 +268,11 @@ hal.executable @shared_memory_lowering_aligned_alloc { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]>, - #hal.descriptor_set.layout<1, bindings = [ - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @check_not_readonly { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -305,13 +281,13 @@ hal.executable @check_not_readonly { func.func @check_not_readonly() { %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<16xi32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>> - %b11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) flags(ReadOnly) : memref<16xi32> - %b12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%c128) : memref<16xf32, strided<[1], offset: 32>> - %b21 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) flags(ReadOnly) : memref<16xi32> - %b22 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(3) : memref<16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<16xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>> + %b11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) flags(ReadOnly) : memref<16xi32> + %b12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%c128) : memref<16xf32, strided<[1], offset: 32>> + %b21 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) flags(ReadOnly) : memref<16xi32> + %b22 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : memref<16xf32> %3 = gpu.block_id x %4 = gpu.block_dim x %5 = gpu.thread_id x @@ -332,13 +308,9 @@ hal.executable @check_not_readonly { // ----- -#pipeline_layout = #hal.pipeline.layout - ]>, - #hal.descriptor_set.layout<1, bindings = [ - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @complex { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -347,8 +319,8 @@ hal.executable @complex { func.func @complex() { %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c128) flags(ReadOnly) : memref<16xcomplex> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c128) flags(ReadOnly) : memref<16xcomplex> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16xf32> %3 = gpu.block_id x %4 = gpu.block_dim x %5 = gpu.thread_id x @@ -371,10 +343,8 @@ hal.executable @complex { // ----- // Check that we don't choke on memref of index. -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> hal.executable @shared_memory_lowering_index { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -398,11 +368,9 @@ hal.executable @shared_memory_lowering_index { // CHECK-NEXT: %{{.*}} = llvm.getelementptr %{{.*}} : (!llvm.ptr<3>, i64, i64) -> !llvm.ptr<3> // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @masked_load_store { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -412,8 +380,8 @@ hal.executable @masked_load_store { %c0 = arith.constant 0 : index %idx = gpu.thread_id x %pass_thru = arith.constant dense<0.000000e+00> : vector<1xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64xf32, #gpu.address_space> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<64xf32, #gpu.address_space> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64xf32, #gpu.address_space> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<64xf32, #gpu.address_space> %mask = vector.create_mask %idx : vector<1xi1> %ld = vector.maskedload %0[%idx], %mask, %pass_thru : memref<64xf32, #gpu.address_space>, vector<1xi1>, vector<1xf32> into vector<1xf32> vector.maskedstore %1[%idx], %mask, %ld : memref<64xf32, #gpu.address_space>, vector<1xi1>, vector<1xf32> @@ -429,12 +397,10 @@ hal.executable @masked_load_store { // ----- // Test workgroup size lowering -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @interface_wg_size { hal.executable.variant @rocm target(<"cuda", "cuda-nvptx-fb">) { @@ -446,7 +412,7 @@ hal.executable private @interface_wg_size { %c0 = arith.constant 0.0 : f32 %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_size_y = hal.interface.workgroup.size[1] : index - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x64xf32> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x64xf32> memref.store %c0, %subspan[%workgroup_size_x, %workgroup_size_y] : memref<64x64xf32> return } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir index a88ec61be9cc6..3e158e13daf3a 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir @@ -2,15 +2,11 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" --iree-hip-index-bits=32 %s | FileCheck %s --check-prefix=INDEX32 // Test that that standard and GPU ops are converted to LLVM and NVVM. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]>, - #hal.descriptor_set.layout<1, bindings = [ - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @abs_ex_dispatch_0 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -18,9 +14,9 @@ hal.executable @abs_ex_dispatch_0 { builtin.module { func.func @abs_ex_dispatch_0() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) flags(ReadOnly) : memref<16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) flags(ReadOnly) : memref<16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<16xf32> %3 = gpu.block_id x %4 = gpu.block_dim x %5 = gpu.thread_id x @@ -48,14 +44,10 @@ hal.executable @abs_ex_dispatch_0 { // ----- // Test that maximum and minum are converted to max and min on rocm -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]>, - #hal.descriptor_set.layout<1, bindings = [ - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @abs_ex_dispatch_0 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -63,9 +55,9 @@ hal.executable @abs_ex_dispatch_0 { builtin.module { func.func @reduction_maximum() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x64x64xf32, + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>> %2 = vector.load %0[%c0, %c0, %c0] : memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>, vector<2xf32> %3 = vector.reduction , %2 : vector<2xf32> into f32 @@ -81,10 +73,8 @@ hal.executable @abs_ex_dispatch_0 { // ----- // Test that gpu barriers be lowered to `s_waitcnt lgkmcnt(0)\0As_barrier` on rocm -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> hal.executable @simple_barrier { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -101,11 +91,9 @@ hal.executable @simple_barrier { // CHECK: llvm.inline_asm has_side_effects asm_dialect = att ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier", "" : () -> () // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @masked_load_store { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -115,8 +103,8 @@ hal.executable @masked_load_store { %c0 = arith.constant 0 : index %idx = gpu.thread_id x %pass_thru = arith.constant dense<0.000000e+00> : vector<1xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64xf32, #gpu.address_space> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<64xf32, #gpu.address_space> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64xf32, #gpu.address_space> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<64xf32, #gpu.address_space> %mask = vector.create_mask %idx : vector<1xi1> %ld = vector.maskedload %0[%idx], %mask, %pass_thru : memref<64xf32, #gpu.address_space>, vector<1xi1>, vector<1xf32> into vector<1xf32> vector.maskedstore %1[%idx], %mask, %ld : memref<64xf32, #gpu.address_space>, vector<1xi1>, vector<1xf32> @@ -132,12 +120,10 @@ hal.executable @masked_load_store { // ----- // Test workgroup size lowering -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @interface_wg_size { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -149,7 +135,7 @@ hal.executable private @interface_wg_size { %c0 = arith.constant 0.0 : f32 %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_size_y = hal.interface.workgroup.size[1] : index - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x64xf32> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x64xf32> memref.store %c0, %subspan[%workgroup_size_x, %workgroup_size_y] : memref<64x64xf32> return } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir index c41952608ddc3..14259a95b6030 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --pass-pipeline="builtin.module(func.func(iree-llvmgpu-tile-and-distribute))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 2)> @@ -16,9 +14,9 @@ func.func @dot_dispatch_0() attributes {translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index %c1024 = arith.constant 1024 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x1024xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<1024x1024xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x1024xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x1024xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<1024x1024xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x1024xf32> %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_size_y = hal.interface.workgroup.size[1] : index %workgroup_id_x = hal.interface.workgroup.id[0] : index @@ -70,12 +68,10 @@ func.func @dot_dispatch_0() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 8)> @@ -90,11 +86,11 @@ func.func @batch_matmul_func() attributes {translation_info = #translation} { %c4 = arith.constant 4 : index %c32 = arith.constant 32 : index %c64 = arith.constant 64 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : memref<4x32x1024xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : memref<4x32x1024xf32> memref.assume_alignment %0, 32 : memref<4x32x1024xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : memref<4x1024x64xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : memref<4x1024x64xf32> memref.assume_alignment %1, 32 : memref<4x1024x64xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : memref<4x32x64xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : memref<4x32x64xf32> memref.assume_alignment %2, 32 : memref<4x32x64xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -143,12 +139,10 @@ func.func @batch_matmul_func() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 2)> @@ -159,9 +153,9 @@ func.func @dot_dispatch_0() attributes {translation_info = #translation} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index %c1024 = arith.constant 1024 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x1024xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<1024x1024xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x1024xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x1024xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<1024x1024xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x1024xf32> %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_size_y = hal.interface.workgroup.size[1] : index %workgroup_id_x = hal.interface.workgroup.id[0] : index @@ -215,11 +209,9 @@ func.func @dot_dispatch_0() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<(d0) -> (d0)> @@ -229,8 +221,8 @@ func.func @predict_dispatch_153() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index %cst = arith.constant 0x7FC00000 : f32 %cst_0 = arith.constant 0xFF800000 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1000xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1000xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref linalg.fill {lowering_config = #config} ins(%cst_0 : f32) outs(%1 : memref) linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["reduction"]} ins(%0 : memref<1000xf32>) outs(%1 : memref) attrs = {lowering_config = #config} { ^bb0(%in: f32, %out: f32): @@ -253,12 +245,10 @@ func.func @predict_dispatch_153() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 256)> @@ -274,11 +264,11 @@ module { %c41664 = arith.constant 41664 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<1x64x56x56xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<1x64x56x56xf32> memref.assume_alignment %0, 64 : memref<1x64x56x56xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c41664) : memref<64x64x1x1xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c41664) : memref<64x64x1x1xf32> memref.assume_alignment %1, 64 : memref<64x64x1x1xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c802816) : memref<1x64x56x56xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c802816) : memref<1x64x56x56xf32> memref.assume_alignment %2, 64 : memref<1x64x56x56xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -316,12 +306,10 @@ module { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -342,9 +330,9 @@ module { %cst_0 = arith.constant 0.000000e+00 : f32 %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %1 = arith.index_cast %0 : i32 to index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%1) : memref{%1, %1} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%1) : memref{%1, %1} - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref{%1, %1, %1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%1) : memref{%1, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%1) : memref{%1, %1} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref{%1, %1, %1} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir index 30e5278e09ac2..7f50d1d6a59b7 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir @@ -1,18 +1,16 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d2, d1, d0, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> func.func @forward_dispatch_0_generic_320x320x3x3() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [3, 320, 320, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x320x320x3xf32> %3 = tensor.empty() : tensor<320x320x3x3xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<3x320x320x3xf32>) outs(%3 : tensor<320x320x3x3xf32>) { diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_pipeline_generalize_named_ops.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_pipeline_generalize_named_ops.mlir index 74b075c71e1d5..2c171b2a2620f 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_pipeline_generalize_named_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_pipeline_generalize_named_ops.mlir @@ -10,21 +10,19 @@ // CHECK-NEXT: linalg.generic // CHECK-NOT: linalg.matmul_transpose_b -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @warp_reduction_large_vector() { %cst = arith.constant 0.000000e+00 : f32 %c128 = arith.constant 128 : index %c0 = arith.constant 0 : index %c394240 = arith.constant 394240 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c394240) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c394240) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x1280xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1280, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1280x1280xf32> %5 = tensor.empty() : tensor<1x1280xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir index 3c3932ccf98f1..50d989599819a 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir @@ -5,19 +5,17 @@ // Transform dialect attributes are tested separately. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0) -> (d0)> func.func @add_dispatch_0() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = tensor.empty() : tensor<16384xf32> %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [16384], strides = [1] : !flow.dispatch.tensor> -> tensor<16384xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [16384], strides = [1] : !flow.dispatch.tensor> -> tensor<16384xf32> @@ -39,21 +37,19 @@ func.func @add_dispatch_0() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @dot_dispatch_1() { %c0 = arith.constant 0 : index %c4 = arith.constant 4 : index %c2 = arith.constant 2 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2x3xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<3x4xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<2x4xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2x3xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<3x4xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<2x4xf32> linalg.fill ins(%cst : f32) outs(%2 : memref<2x4xf32>) linalg.matmul ins(%0, %1 : memref<2x3xf32>, memref<3x4xf32>) outs(%2 : memref<2x4xf32>) return @@ -70,21 +66,19 @@ func.func @dot_dispatch_1() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @unaligned_k() { %c0 = arith.constant 0 : index %c4 = arith.constant 4 : index %c2 = arith.constant 2 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<128x258xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<258x64xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<128x64xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<128x258xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<258x64xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<128x64xf32> linalg.fill ins(%cst : f32) outs(%2 : memref<128x64xf32>) linalg.matmul ins(%0, %1 : memref<128x258xf32>, memref<258x64xf32>) outs(%2 : memref<128x64xf32>) return @@ -101,11 +95,9 @@ func.func @unaligned_k() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0) -> (d0)> #map1 = affine_map<(d0) -> ()> @@ -113,8 +105,8 @@ func.func @predict_dispatch_153() { %c0 = arith.constant 0 : index %cst = arith.constant 0x7FC00000 : f32 %cst_0 = arith.constant 0xFF800000 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1000xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1000xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref linalg.fill ins(%cst_0 : f32) outs(%1 : memref) linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["reduction"]} ins(%0 : memref<1000xf32>) outs(%1 : memref) { ^bb0(%in: f32, %out: f32): @@ -138,19 +130,17 @@ func.func @predict_dispatch_153() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d2, d0, d1)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1)> func.func @reduction_aligned2() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 128, 384], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x128x384xf32> %3 = tensor.empty() : tensor<128x384xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<128x384xf32>) -> tensor<128x384xf32> @@ -174,19 +164,17 @@ func.func @reduction_aligned2() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @copy_as_generic() { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%0, %1} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref{%0, %1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref{%0, %1} linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%2 : memref) outs(%3 : memref) { ^bb0(%in: i32, %out: i32): linalg.yield %in : i32 @@ -203,19 +191,17 @@ func.func @copy_as_generic() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @static_1d_fft_stage2() { %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index %cst = arith.constant dense<[1.000000e+00, 6.12323426E-17]> : tensor<2xf32> %cst_0 = arith.constant dense<[-0.000000e+00, -1.000000e+00]> : tensor<2xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xf32> %4:2 = iree_linalg_ext.fft {__internal_linalg_transform__ = "workgroup"} ins(%c2, %cst, %cst_0 : index, tensor<2xf32>, tensor<2xf32>) outs(%2, %3 : tensor<32xf32>, tensor<32xf32>) : tensor<32xf32>, tensor<32xf32> @@ -233,11 +219,9 @@ func.func @static_1d_fft_stage2() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @static_3d_fft_stage3() { %c0 = arith.constant 0 : index @@ -249,8 +233,8 @@ func.func @static_3d_fft_stage3() { %cst_0 = arith.constant dense<[-0.000000e+00, -0.707106769, -1.000000e+00, -0.707106769]> : tensor<4xf32> %0 = bufferization.to_memref %cst_0 : memref<4xf32> %1 = bufferization.to_memref %cst : memref<4xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x128x32xf32> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<64x128x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x128x32xf32> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<64x128x32xf32> iree_linalg_ext.fft {__internal_linalg_transform__ = "workgroup"} ins(%c3, %1, %0 : index, memref<4xf32>, memref<4xf32>) outs(%2, %3 : memref<64x128x32xf32>, memref<64x128x32xf32>) return } @@ -264,12 +248,10 @@ func.func @static_3d_fft_stage3() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -279,9 +261,9 @@ func.func @_lowering_config_test_dispatch_1() { %c128 = arith.constant 128 : index %c1024 = arith.constant 1024 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf32> %5 = tensor.empty() : tensor<128x1024xf32> @@ -302,22 +284,20 @@ func.func @_lowering_config_test_dispatch_1() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @sort_op() { %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index %c2304000 = arith.constant 2304000 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(32) offset(%c2304000) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(32) offset(%c2304000) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 576000], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x576000xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, 576000], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x576000xi32> %6:2 = iree_linalg_ext.sort dimension(1) outs(%4, %5 : tensor<1x576000xf32>, tensor<1x576000xi32>) { @@ -339,21 +319,19 @@ func.func @sort_op() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_config_sm35() { %cst = arith.constant 0.000000e+00 : f32 %c128 = arith.constant 128 : index %c1024 = arith.constant 1024 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf32> %5 = tensor.empty() : tensor<128x1024xf32> @@ -369,21 +347,19 @@ func.func @matmul_config_sm35() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_config_sm80() { %cst = arith.constant 0.000000e+00 : f32 %c128 = arith.constant 128 : index %c1024 = arith.constant 1024 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf32> %5 = tensor.empty() : tensor<128x1024xf32> @@ -399,21 +375,19 @@ func.func @matmul_config_sm80() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_config_sm86() { %cst = arith.constant 0.000000e+00 : f32 %c128 = arith.constant 128 : index %c1024 = arith.constant 1024 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf32> %5 = tensor.empty() : tensor<128x1024xf32> @@ -429,12 +403,10 @@ func.func @matmul_config_sm86() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -445,9 +417,9 @@ func.func @contract_reduction() { %c40064 = arith.constant 40064 : index %c34752 = arith.constant 34752 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c40064) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c34752) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c40064) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c34752) : !flow.dispatch.tensor> %3 = tensor.empty() : tensor<3x64xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 4], sizes = [3, 64, 4, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x64x4xf32> %5 = linalg.fill {lowering_config = #config} ins(%cst : f32) outs(%3 : tensor<3x64xf32>) -> tensor<3x64xf32> @@ -470,11 +442,9 @@ func.func @contract_reduction() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @dynamic_pack_2x2() { %c0 = arith.constant 0 : index @@ -487,8 +457,8 @@ func.func @dynamic_pack_2x2() { %5 = arith.index_castui %1 : i32 to index %6 = arith.index_castui %2 : i32 to index %7 = arith.index_castui %3 : i32 to index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor>{%4, %5} - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6, %7} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor>{%4, %5} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6, %7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [%4, %5], strides = [1, 1] : !flow.dispatch.tensor>{%4, %5} -> tensor %11 = tensor.empty(%6, %7) : tensor %pack = tensor.pack %10 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %11 : tensor -> tensor @@ -505,21 +475,19 @@ func.func @dynamic_pack_2x2() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @large_matmul_f16() { %cst = arith.constant 0.000000e+00 : f16 %c128 = arith.constant 128 : index %c1024 = arith.constant 1024 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2560, 1792], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2560x1792xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1792, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1792x2048xf16> %5 = tensor.empty() : tensor<2560x2048xf16> @@ -539,21 +507,19 @@ func.func @large_matmul_f16() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @large_matmul_f32() { %cst = arith.constant 0.000000e+00 : f32 %c128 = arith.constant 128 : index %c1024 = arith.constant 1024 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2560, 1792], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2560x1792xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1792, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1792x2048xf32> %5 = tensor.empty() : tensor<2560x2048xf32> @@ -574,19 +540,17 @@ func.func @large_matmul_f32() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @inner_unit_dim() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = tensor.empty() : tensor<16384x1xf32> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16384, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16384x1xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16384, 1], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16384x1xf32> @@ -608,12 +572,10 @@ func.func @inner_unit_dim() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d3)> @@ -627,9 +589,9 @@ func.func @forward_dispatch_1_conv_2d_nhwc_hwcf_256x112x112x64x7x7x3_f32() { %cst_3 = arith.constant dense_resource<__elided__> : tensor<64xf32> %cst_4 = arith.constant dense_resource<__elided__> : tensor<64xf32> %cst_5 = arith.constant dense_resource<__elided__> : tensor<64xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c162508800) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c162508800) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [256, 230, 230, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<256x230x230x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [7, 7, 3, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<7x7x3x64xf32> %5 = tensor.empty() : tensor<256x112x112x64xf32> @@ -660,11 +622,9 @@ func.func @forward_dispatch_1_conv_2d_nhwc_hwcf_256x112x112x64x7x7x3_f32() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d2, d1, d4)> #map1 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d1, d4)> @@ -679,9 +639,9 @@ func.func @_main_dispatch_15_generic_512x4x42x42x64_f32() { %3 = arith.index_castui %0 {stream.alignment = 64 : index, stream.values = [35524672 : index, 240930880 : index, 446337088 : index, 651743296 : index]} : i32 to index %4 = arith.index_castui %1 {stream.alignment = 64 : index, stream.values = [57544768 : index, 262950976 : index, 468357184 : index, 673763392 : index]} : i32 to index %5 = arith.index_castui %2 {stream.alignment = 64 : index, stream.values = [1728 : index, 36472832 : index, 72943744 : index, 109415936 : index]} : i32 to index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor> - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor> %9 = flow.dispatch.tensor.load %6, offsets = [0, 0, 0, 0], sizes = [512, 42, 4, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<512x42x4x64xf32> %10 = flow.dispatch.tensor.load %7, offsets = [0, 0, 0, 0], sizes = [512, 42, 4, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<512x42x4x64xf32> %11 = tensor.empty() : tensor<512x4x42x42xf32> @@ -712,13 +672,11 @@ func.func @_main_dispatch_15_generic_512x4x42x42x64_f32() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> #map1 = affine_map<(d0, d1) -> (d0)> @@ -755,12 +713,12 @@ func.func @i4_dequant_matvec() { %24 = arith.shli %23, %c32_i64 : i64 %25 = arith.ori %22, %24 : i64 %26 = arith.index_castui %25 : i64 to index - %27 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor> - %28 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor> - %29 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor> + %27 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor> + %28 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor> + %29 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor> %30 = flow.dispatch.workload.ordinal %26, 0 : index - %31 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor>{%30} - %32 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor>{%30} + %31 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor>{%30} + %32 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor>{%30} %33 = flow.dispatch.tensor.load %27, offsets = [0, 0], sizes = [4096, 11008], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x11008xi4> %34 = flow.dispatch.tensor.load %28, offsets = [0], sizes = [4096], strides = [1] : !flow.dispatch.tensor> -> tensor<4096xf32> %35 = flow.dispatch.tensor.load %29, offsets = [0], sizes = [4096], strides = [1] : !flow.dispatch.tensor> -> tensor<4096xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir index 7313f52c7614f..436ef52e66948 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir @@ -1,19 +1,17 @@ // RUN: iree-opt --iree-gpu-test-target=sm_60 --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy)" --verify-diagnostics --split-input-file %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32> // expected-error @+1 {{Total number of threads in a thread block 2048 exceeds the limit of 1024 with compilation pipeline LLVMGPUMatmulSimt}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>) return @@ -21,20 +19,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32> // expected-error @+1 {{Expected workgroup size in z-dim = 1, but got 2 with compilation pipeline LLVMGPUMatmulSimt}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>) return @@ -42,20 +38,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<32x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x32xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<32x32xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<32x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<32x32xf32> // expected-error @+1 {{Total number of threads in a thread block 1280 exceeds the limit of 1024 with compilation pipeline LLVMGPUMatmulTensorCore}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<32x16xf32>, memref<16x32xf32>) outs(%2 : memref<32x32xf32>) return @@ -63,20 +57,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<32x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x32xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<32x32xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<32x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<32x32xf32> // expected-error @+1 {{Number of threads in x-dim 48 is not a multiple of warp size (32) or integer units of warps in x-dim with compilation pipeline LLVMGPUMatmulTensorCore}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<32x16xf32>, memref<16x32xf32>) outs(%2 : memref<32x32xf32>) return @@ -84,20 +76,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<32x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x32xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<32x32xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<32x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<32x32xf32> // expected-error @+1 {{Expected workgroup size in z-dim = 1, but got 2 with compilation pipeline LLVMGPUMatmulTensorCore}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<32x16xf32>, memref<16x32xf32>) outs(%2 : memref<32x32xf32>) return @@ -105,20 +95,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<32x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x32xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<32x32xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<32x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<32x32xf32> // expected-error @+1 {{Thread block shape 32, 32, 20 cannot be tiled on matmul shape 32, 32, 16 with compilation pipeline LLVMGPUMatmulTensorCore}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<32x16xf32>, memref<16x32xf32>) outs(%2 : memref<32x32xf32>) return @@ -126,20 +114,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x512xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<512x256xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x256xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x512xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<512x256xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x256xf32> // expected-error @+1 {{Tensor Core instruction shape 16, 16, 8 cannot be tiled on warp shape 64, 8, 16 with compilation pipeline LLVMGPUMatmulTensorCore}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<1024x512xf32>, memref<512x256xf32>) outs(%2 : memref<1024x256xf32>) return @@ -147,20 +133,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<48x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x32xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<48x32xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<48x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<48x32xf32> // expected-error @+1 {{Thread block shape 32, 32, 16 cannot be tiled on matmul shape 48, 32, 16 with compilation pipeline LLVMGPUMatmulTensorCore}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<48x16xf32>, memref<16x32xf32>) outs(%2 : memref<48x32xf32>) return @@ -168,20 +152,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<32x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x48xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<32x48xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<32x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x48xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<32x48xf32> // expected-error @+1 {{Thread block shape 32, 32, 16 cannot be tiled on matmul shape 32, 48, 16 with compilation pipeline LLVMGPUMatmulTensorCore}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<32x16xf32>, memref<16x48xf32>) outs(%2 : memref<32x48xf32>) return @@ -189,12 +171,10 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 8)> @@ -209,11 +189,11 @@ func.func @illegal() attributes {translation_info = #translation} { %c4 = arith.constant 4 : index %c32 = arith.constant 32 : index %c64 = arith.constant 64 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : memref<4x32x1024xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : memref<4x32x1024xf32> memref.assume_alignment %0, 32 : memref<4x32x1024xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : memref<4x1024x64xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : memref<4x1024x64xf32> memref.assume_alignment %1, 32 : memref<4x1024x64xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : memref<4x32x64xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : memref<4x32x64xf32> memref.assume_alignment %2, 32 : memref<4x32x64xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -242,20 +222,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x512xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<512x256xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x256xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x512xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<512x256xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x256xf32> // expected-error @+1 {{Thread block shape 64, 32, 48 cannot be tiled on matmul shape 1024, 256, 512 with compilation pipeline LLVMGPUMatmulTensorCoreMmaSync}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<1024x512xf32>, memref<512x256xf32>) outs(%2 : memref<1024x256xf32>) return @@ -263,20 +241,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x512xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<512x256xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x256xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x512xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<512x256xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x256xf32> // expected-error @+1 {{Tensor Core instruction shape 16, 8, 8 cannot be tiled on warp shape 64, 8, 4 with compilation pipeline LLVMGPUMatmulTensorCoreMmaSync}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<1024x512xf32>, memref<512x256xf32>) outs(%2 : memref<1024x256xf32>) return @@ -284,20 +260,18 @@ func.func @illegal() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info func.func @illegal() attributes {translation_info = #translation} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x512xi8> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<512x256xi8> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x256xi8> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x512xi8> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<512x256xi8> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x256xi8> // expected-error @+1 {{Expected f16, bf16 or f32 for Tensor Core (MMA.SYNC) pipeline}} linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<1024x512xi8>, memref<512x256xi8>) outs(%2 : memref<1024x256xi8>) return diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir index 18100b1c924be..f292df715093e 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir @@ -10,19 +10,17 @@ // RUN: --iree-codegen-transform-dialect-library=%p/transform_dialect_codegen_foreach_to_gpu_spec.mlir@__transform_main | \ // RUN: FileCheck %s --check-prefix=FOREACH-TO-GPU -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> func.func @matmul_static_dispatch_0() attributes {hal.executable.target = #executable_target_cuda_nvptx_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<250x500xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [500, 1020], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<500x1020xf32> %5 = tensor.empty() : tensor<250x1020xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir index 1082caf638274..73bdb91698e91 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir @@ -1,17 +1,15 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-llvmgpu-bufferization-pipeline))" --split-input-file %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @bufferize_with_thread_private_memory(%arg0: index) { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 %cst_ved = arith.constant dense<0.000000e+00> : vector<1x1x4x4xf16> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [%arg0, %arg0, %arg0, %arg0], sizes = [1, 1, 8, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x1x8x64xf16> %3 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xf16> %4 = scf.forall (%arg1, %arg2) in (2, 16) shared_outs(%arg3 = %2) -> (tensor<1x1x8x64xf16>) { diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir index 8ce12fb39f3e7..25b5957df1970 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir @@ -73,7 +73,11 @@ // Just double check that we captured the IV // CHECK: %[[IV_NEXT:.*]] = llvm.mul %[[IV]], %[[C8192]] : i64 #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> -#pipeline_layout = #hal.pipeline.layout, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding +]> hal.executable private @matmul_dispatch_0 { hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) { hal.executable.export public @matmul_dispatch_0_matmul_2560x2560x2560 ordinal(0) layout(#pipeline_layout) { @@ -85,9 +89,9 @@ hal.executable private @matmul_dispatch_0 { func.func @matmul_dispatch_0_matmul_2560x2560x2560() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2560, 2560], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2560x2560xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2560, 2560], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2560x2560xf16> %5 = tensor.empty() : tensor<2560x2560xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir index ab7136cd9ac75..28ae306f15212 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir @@ -5,12 +5,10 @@ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @mma_fused_fp16 { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -25,10 +23,10 @@ hal.executable @mma_fused_fp16 { %cst = arith.constant 0.000000e+00 : f16 %c2048 = arith.constant 2048 : index %c512 = arith.constant 512 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %di = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %di = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1024xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] @@ -87,12 +85,10 @@ hal.executable @mma_fused_fp16 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @mma_fused_f32 { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -107,10 +103,10 @@ hal.executable @mma_fused_f32 { %cst = arith.constant 0.000000e+00 : f32 %c2048 = arith.constant 2048 : index %c512 = arith.constant 512 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %di = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %di = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1024xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir index 975f73ddc9476..c5918852783c7 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir @@ -4,12 +4,10 @@ // Verify that a simple element wise op gets lowered succefully all the way to // nvvm/llvm dialect. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @simpleMath_ex_dispatch_0 { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -21,9 +19,9 @@ hal.executable @simpleMath_ex_dispatch_0 { builtin.module { func.func @add_dispatch_0() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = tensor.empty() : tensor<16xf32> %4 = flow.dispatch.tensor.load %0, offsets=[0], sizes=[16], strides=[1] : !flow.dispatch.tensor> -> tensor<16xf32> %5 = flow.dispatch.tensor.load %1, offsets=[0], sizes=[16], strides=[1] : !flow.dispatch.tensor> -> tensor<16xf32> @@ -48,12 +46,10 @@ hal.executable @simpleMath_ex_dispatch_0 { #map0 = affine_map<()[s0, s1] -> (s0 * s1)> #map1 = affine_map<(d0)[s0] -> (s0, -d0 + 1024)> #map2 = affine_map<(d0)[s0] -> (-d0 + 1024, s0)> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @dot_dispatch_0 { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -68,9 +64,9 @@ hal.executable @dot_dispatch_0 { %c0 = arith.constant 0 : index %c1024 = arith.constant 1024 : index %c1 = arith.constant 1 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x1024xf32> %10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] @@ -119,12 +115,10 @@ hal.executable @dot_dispatch_0 { ], iterator_types = ["parallel", "parallel", "reduction"] } -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @dot_dispatch_0 { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -139,9 +133,9 @@ hal.executable @dot_dispatch_0 { %c0 = arith.constant 0 : index %c1024 = arith.constant 1024 : index %c1 = arith.constant 1 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x1024xf32> %10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] @@ -172,12 +166,10 @@ hal.executable @dot_dispatch_0 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @conv2d_dispatch_0 { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -193,9 +185,9 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index %c1 = arith.constant 1 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %11 = flow.dispatch.tensor.load %0, offsets = [0, 0 ,0, 0], sizes = [1, 4, 4, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x4x4x2xf32> %13 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 2, 2, 1], strides = [1, 1, 1, 1] @@ -221,11 +213,9 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @simpleMath_ex_dispatch_0 { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -237,8 +227,8 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { builtin.module { func.func @add_dispatch_0() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %3 = tensor.empty() : tensor<16xf32> %4 = flow.dispatch.tensor.load %0, offsets=[0], sizes=[16], strides=[1] : !flow.dispatch.tensor> -> tensor<16xf32> %5 = arith.constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]> : tensor<16xf32> @@ -261,11 +251,9 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @reduction_dispatch { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -279,8 +267,8 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %c96 = arith.constant 96 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [14, 14, 96], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<14x14x96xf32> %8 = tensor.empty() : tensor<96xf32> @@ -307,12 +295,10 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @vector_add_dispatch { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -325,9 +311,9 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { func.func @vector_add_dispatch() { %c0 = arith.constant 0 : index %c16384 = arith.constant 16384 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %6 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [16384], strides = [1] : !flow.dispatch.tensor> -> tensor<16384xf32> %8 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [16384], strides = [1] @@ -361,11 +347,9 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { #map2 = affine_map<(d0)[s0] -> (-d0 + 16384, s0)> #map3 = affine_map<(d0, d1) -> (d1, d0)> #map4 = affine_map<(d0, d1) -> (d0)> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @vector_reduction_dispatch { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -379,8 +363,8 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { %c0 = arith.constant 0 : index %c16384 = arith.constant 16384 : index %cst = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 16384], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x16384xf32> %8 = tensor.empty() : tensor<16384xf32> @@ -406,16 +390,18 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @mma_fused { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { - hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) { + hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding + ]>) { ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index): %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2 hal.return %x, %y, %z : index, index, index @@ -426,10 +412,10 @@ hal.executable @mma_fused { %cst = arith.constant 0.000000e+00 : f32 %c2048 = arith.constant 2048 : index %c512 = arith.constant 512 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %di = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %di = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1024xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] @@ -489,16 +475,18 @@ hal.executable @mma_fused { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @mma_fused_fp16 { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { - hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) { + hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding + ]>) { ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index): %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2 hal.return %x, %y, %z : index, index, index @@ -509,10 +497,10 @@ hal.executable @mma_fused_fp16 { %cst = arith.constant 0.000000e+00 : f16 %c2048 = arith.constant 2048 : index %c512 = arith.constant 512 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %di = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %di = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x1024xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] @@ -568,12 +556,10 @@ hal.executable @mma_fused_fp16 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> #map0 = affine_map<()[s0, s1] -> (s0 * s1)> @@ -597,11 +583,11 @@ hal.executable @mma_fused_fp16 { %c4 = arith.constant 4 : index %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor> %11 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 32, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x32x1024xf32> @@ -648,12 +634,10 @@ hal.executable @mma_fused_fp16 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> #map0 = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)> @@ -674,9 +658,9 @@ hal.executable @mma_fused_fp16 { func.func @split_k_gemm() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2048, 4, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2048x4x256xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 256, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x256x512xf32> %5 = tensor.empty() : tensor<4x2048x512xf32> @@ -718,12 +702,10 @@ hal.executable @mma_fused_fp16 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> hal.executable public @pooling_dynamic { @@ -740,8 +722,8 @@ hal.executable @mma_fused_fp16 { %cst = arith.constant 0.000000e+00 : f32 %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %s = arith.index_cast %0 : i32 to index - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%s) : !flow.dispatch.tensor>{%s, %s, %s} - %15 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%s) : !flow.dispatch.tensor>{%s} + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%s) : !flow.dispatch.tensor>{%s, %s, %s} + %15 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%s) : !flow.dispatch.tensor>{%s} %16 = flow.dispatch.tensor.load %14, offsets = [0, 0, 0, 0], sizes = [%s, 2048, %s, %s], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%s, %s, %s} -> tensor %19 = tensor.empty(%s) : tensor %38 = tensor.empty(%s, %s) : tensor @@ -765,11 +747,9 @@ hal.executable @mma_fused_fp16 { #map2 = affine_map<(d0)[s0] -> (-d0 + 16384, s0)> #map3 = affine_map<(d0, d1) -> (d0, d1)> #map4 = affine_map<(d0, d1) -> (d0)> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @warp_reduction_dispatch { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -783,8 +763,8 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { %c0 = arith.constant 0 : index %c1024 = arith.constant 1024 : index %cst = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x1024xf32> %8 = tensor.empty() : tensor<512xf32> @@ -818,11 +798,9 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { #map0 = affine_map<()[s0, s1] -> (s0 * s1)> #map3 = affine_map<(d0, d1) -> (d0, d1)> #map4 = affine_map<(d0, d1) -> (d0)> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @warp_reduction_broadcast_dispatch { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -837,8 +815,8 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { %c1024 = arith.constant 1024 : index %cst_0 = arith.constant 3.840000e+02 : f32 %cst = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x1024xf32> %8 = tensor.empty() : tensor<512xf32> @@ -879,15 +857,13 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @shared_mem_alloc { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { - hal.executable.export public @shared_mem_alloc ordinal(0) layout(#hal.pipeline.layout, <1, storage_buffer>]>]>) { + hal.executable.export public @shared_mem_alloc ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index): %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4, %arg5 hal.return %x, %y, %z : index, index, index @@ -896,8 +872,8 @@ hal.executable private @shared_mem_alloc { func.func @shared_mem_alloc() { %c0 = arith.constant 0 : index %cst = arith.constant dense<0xFF800000> : tensor<14x14x480xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [29, 29, 480], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<29x29x480xf32> %3 = tensor.empty() : tensor<3x3xf32> %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0 * 2 + d3, d1 * 2 + d4, d2)>, affine_map<(d0, d1, d2, d3, d4) -> (d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%2, %3 : tensor<29x29x480xf32>, tensor<3x3xf32>) outs(%cst : tensor<14x14x480xf32>) { @@ -928,11 +904,9 @@ hal.executable private @shared_mem_alloc { #config = #iree_codegen.lowering_config #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map0 = affine_map<(d0, d1) -> (d1, d0)> #map1 = affine_map<(d0, d1) -> (d0, d1)> @@ -946,8 +920,8 @@ hal.executable private @shared_mem_transpose { builtin.module { func.func @shared_mem_transpose() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 768], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x768xf32> %3 = tensor.empty() : tensor<768x2048xf32> %4 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<2048x768xf32>) outs(%3 : tensor<768x2048xf32>) { diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir index bb741acd65e4d..bb7722c3086b5 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir @@ -1,15 +1,13 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @static_pack() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xi32> %3 = tensor.empty() : tensor<4x16x16x32xi32> %pack = tensor.pack %2 inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %3 : tensor<128x256xi32> -> tensor<4x16x16x32xi32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/promote_matmul_to_fit_mma.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/promote_matmul_to_fit_mma.mlir index 45eb7adda2966..e2ef4ee714bca 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/promote_matmul_to_fit_mma.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/promote_matmul_to_fit_mma.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-llvmgpu-promote-matmul-to-fit-mma{target-dimensions=parallel}))" %s | FileCheck %s --check-prefixes=ALL,PARALLEL // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-llvmgpu-promote-matmul-to-fit-mma{target-dimensions=reduction}))" %s | FileCheck %s --check-prefixes=ALL,REDUCTION -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<()[s0] -> (s0 * 64)> #map1 = affine_map<()[s0] -> (s0 * 128)> @@ -17,9 +15,9 @@ func.func @batch_matmul_f16() { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_z = hal.interface.workgroup.id[2] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %3 = affine.apply #map()[%workgroup_id_y] @@ -36,9 +34,9 @@ func.func @batch_matmul_f16() { return } // ALL-LABEL: func.func @batch_matmul_f16 -// ALL: %[[LHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> -// ALL: %[[RHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> -// ALL: %[[OUT_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> +// ALL: %[[LHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> +// ALL: %[[RHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> +// ALL: %[[OUT_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> // ALL-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_HANDLE]] // ALL-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_HANDLE]] // PARALLEL: %[[PADDED_LHS:.+]] = tensor.pad %[[LHS]] @@ -67,12 +65,10 @@ func.func @batch_matmul_f16() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<()[s0] -> (s0 * 64)> #map1 = affine_map<()[s0] -> (s0 * 128)> @@ -88,9 +84,9 @@ func.func @batch_matmul_pad_reduction_after_tiling() { %c1 = arith.constant 1 : index %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_z = hal.interface.workgroup.id[2] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %3 = affine.apply #map()[%workgroup_id_y] @@ -128,9 +124,9 @@ func.func @batch_matmul_pad_reduction_after_tiling() { // The padding on parallel dims is a nop because they are already padded. Skip // the check for the testcase. // ALL-LABEL: func.func @batch_matmul_pad_reduction_after_tiling -// ALL: %[[LHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> -// ALL: %[[RHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> -// ALL: %[[OUT_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> +// ALL: %[[LHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> +// ALL: %[[RHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> +// ALL: %[[OUT_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> // ALL-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_HANDLE]] // ALL-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_HANDLE]] // REDUCTION: %[[INIT:.+]] = tensor.empty() : tensor<1x64x128xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir index bfc69ed52d4e0..cbab841c3f272 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --iree-codegen-llvmgpu-enable-transform-dialect-jit=true --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-decompose-softmax), iree-llvmgpu-select-lowering-strategy, iree-codegen-lower-executable-using-transform-dialect, func.func(iree-llvmgpu-lower-executable-target)))))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @warp_reduction_dispatch { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -18,8 +16,8 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { %c0 = arith.constant 0 : index %c10240 = arith.constant 10240 : index %cst = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 10240], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x10240xf32> %8 = tensor.empty() : tensor<512xf32> @@ -103,11 +101,9 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @warp_reduction_broadcast_dispatch { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -122,8 +118,8 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { %c10240 = arith.constant 10240 : index %cst_0 = arith.constant 3.840000e+02 : f32 %cst = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x10240xf32> %8 = tensor.empty() : tensor<512xf32> @@ -196,11 +192,9 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @softmax { hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { @@ -215,8 +209,8 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { %cst = arith.constant -3.40282347E+38 : f32 %cst_0 = arith.constant 0.000000e+00 : f32 %cst_1 = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 40960], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<12x128x40960xf32> %3 = tensor.empty() : tensor<12x128x40960xf32> %4 = linalg.softmax dimension(2) ins(%2 : tensor<12x128x40960xf32>) outs(%3 : tensor<12x128x40960xf32>) -> tensor<12x128x40960xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir index b890571091921..c46f738d3fa91 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir @@ -1,19 +1,17 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx1100 --pass-pipeline="builtin.module(func.func(iree-codegen-decompose-softmax), iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx940 --pass-pipeline="builtin.module(func.func(iree-codegen-decompose-softmax), iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s --check-prefix=CDNA3 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @softmax() { %c0 = arith.constant 0 : index %cst = arith.constant -3.40282347E+38 : f32 %cst_0 = arith.constant 0.000000e+00 : f32 %cst_1 = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 40960], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<12x128x40960xf32> %3 = tensor.empty() : tensor<12x128x40960xf32> %4 = linalg.softmax dimension(2) ins(%2 : tensor<12x128x40960xf32>) outs(%3 : tensor<12x128x40960xf32>) -> tensor<12x128x40960xf32> @@ -28,19 +26,17 @@ func.func @softmax() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @softmax() { %c0 = arith.constant 0 : index %cst = arith.constant -3.40282347E+38 : f32 %cst_0 = arith.constant 0.000000e+00 : f32 %cst_1 = arith.constant 1.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 40960], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<12x128x40960xf32> %3 = tensor.empty() : tensor<12x128x40960xf32> %4 = linalg.softmax dimension(2) ins(%2 : tensor<12x128x40960xf32>) outs(%3 : tensor<12x128x40960xf32>) -> tensor<12x128x40960xf32> @@ -57,11 +53,9 @@ func.func @softmax() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @dynamic_softmax() { %c32_i64 = arith.constant 32 : i64 @@ -74,8 +68,8 @@ func.func @dynamic_softmax() { %5 = arith.ori %2, %4 : i64 %6 = arith.index_castui %5 : i64 to index %7 = flow.dispatch.workload.ordinal %6, 0 : index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%7} - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%7} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%7} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [32, %7], strides = [1, 1] : !flow.dispatch.tensor>{%7} -> tensor<32x?xf16> %11 = tensor.empty(%7) : tensor<32x?xf16> %12 = linalg.softmax dimension(1) ins(%10 : tensor<32x?xf16>) outs(%11 : tensor<32x?xf16>) -> tensor<32x?xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir index 3e22bd8240463..e3b16eb22cbcf 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --iree-codegen-llvmgpu-enable-transform-dialect-jit=true --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-llvmgpu-select-lowering-strategy, iree-codegen-lower-executable-using-transform-dialect, func.func(iree-llvmgpu-lower-executable-target)))))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @small_reduction { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -17,8 +15,8 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { func.func @small_reduction() { %c0 = arith.constant 0 : index %cst = arith.constant -0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 13], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x13xf32> %3 = tensor.empty() : tensor<1024xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<1024xf32>) -> tensor<1024xf32> @@ -52,11 +50,9 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @group_reduction { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -69,8 +65,8 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { func.func @group_reduction() { %c0 = arith.constant 0 : index %cst = arith.constant -0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [8, 64], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<8x64xf32> %3 = tensor.empty() : tensor<8xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<8xf32>) -> tensor<8xf32> @@ -121,11 +117,9 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @group_elementwise_reduction_elementwise { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -138,8 +132,8 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { func.func @group_elementwise_reduction_elementwise() { %c0 = arith.constant 0 : index %cst = arith.constant -0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [8, 64], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<8x64xf32> %3 = tensor.empty() : tensor<8xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<8xf32>) -> tensor<8xf32> @@ -198,11 +192,9 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @group_reduction_larger { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -215,8 +207,8 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { func.func @group_reduction_larger() { %c0 = arith.constant 0 : index %cst = arith.constant -0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [33, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<33x1024xf32> %3 = tensor.empty() : tensor<33xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<33xf32>) -> tensor<33xf32> @@ -268,11 +260,9 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @group_reduction_1d { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -285,8 +275,8 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { func.func @group_reduction_1d() { %c0 = arith.constant 0 : index %cst = arith.constant -0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [64], strides = [1] : !flow.dispatch.tensor> -> tensor<64xf32> %3 = tensor.empty() : tensor %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor) -> tensor @@ -307,11 +297,9 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @group_elementwise_reduction_elementwise_4d { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -324,8 +312,8 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { func.func @group_elementwise_reduction_elementwise_4d() { %c0 = arith.constant 0 : index %cst = arith.constant -0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 4, 8, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x4x8x64xf32> %3 = tensor.empty() : tensor<2x4x8xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x4x8xf32>) -> tensor<2x4x8xf32> @@ -355,11 +343,9 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @group_reduction_i8_12345 { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -372,8 +358,8 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { func.func @group_reduction_i8_12345() { %c0 = arith.constant 0 : index %cst = arith.constant 0 : i8 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [8, 12345], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<8x12345xi8> %3 = tensor.empty() : tensor<8x12345xi8> %4 = tensor.empty() : tensor<8xi8> @@ -440,11 +426,9 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -460,8 +444,8 @@ hal.executable @reduction_2d_trailing_elementwise_static_dispatch_0 { func.func @reduction_2d_trailing_elementwise_static_dispatch_0_generic_128x10_f32() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 10], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x10xf32> %3 = tensor.empty() : tensor<128x10xf32> %4 = tensor.empty() : tensor<128xf32> @@ -509,14 +493,12 @@ hal.executable @reduction_2d_trailing_elementwise_static_dispatch_0 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @i4_dequant_matvec { hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) { @@ -529,11 +511,11 @@ hal.executable private @i4_dequant_matvec { func.func @i4_dequant_matvec() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x32x128xi4> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x32xf16> %7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x32xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir index cfa16f79f2c02..fea7846af70bb 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir @@ -5,11 +5,9 @@ // RUN: --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target)))))" \ // RUN: %s | FileCheck %s --check-prefix=CDNA3 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @group_reduction_1d { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -22,8 +20,8 @@ hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { func.func @group_reduction_1d() { %c0 = arith.constant 0 : index %cst = arith.constant -0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [64], strides = [1] : !flow.dispatch.tensor> -> tensor<64xf32> %3 = tensor.empty() : tensor %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor) -> tensor @@ -46,11 +44,9 @@ hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @group_reduction_1d { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -63,8 +59,8 @@ hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { func.func @group_reduction_1d() { %c0 = arith.constant 0 : index %cst = arith.constant -0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [64], strides = [1] : !flow.dispatch.tensor> -> tensor<64xf32> %3 = tensor.empty() : tensor %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor) -> tensor @@ -88,14 +84,12 @@ hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @i4_dequant_matvec { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -108,11 +102,11 @@ hal.executable private @i4_dequant_matvec { func.func @i4_dequant_matvec() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x32x128xi4> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x32xf16> %7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x32xf16> @@ -165,14 +159,12 @@ hal.executable private @i4_dequant_matvec { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @i4_dequant_matvec { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -185,11 +177,11 @@ hal.executable private @i4_dequant_matvec { func.func @i4_dequant_matvec() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x32x128xi4> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x32xf16> %7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x32xf16> @@ -224,12 +216,10 @@ hal.executable private @i4_dequant_matvec { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @matvec_fp16 { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -242,9 +232,9 @@ hal.executable private @matvec_fp16 { func.func @matvec_fp16() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x4096xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32000x4096xf16> %5 = tensor.empty() : tensor<1x32000xf16> @@ -287,12 +277,10 @@ hal.executable private @matvec_fp16 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @matvec_fp16 { hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -305,9 +293,9 @@ hal.executable private @matvec_fp16 { func.func @matvec_fp16() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x4096xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32000x4096xf16> %5 = tensor.empty() : tensor<1x32000xf16> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir index 6a060af8a1b6b..7736a40050e48 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir @@ -5,12 +5,10 @@ // Verify that a simple element wise op gets lowered succefully all the way to // nvvm/llvm dialect. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @simpleMath_ex_dispatch_0 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -22,9 +20,9 @@ hal.executable @simpleMath_ex_dispatch_0 { builtin.module { func.func @add_dispatch_0() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = tensor.empty() : tensor<16xf32> %4 = flow.dispatch.tensor.load %0, offsets=[0], sizes=[16], strides=[1] : !flow.dispatch.tensor> -> tensor<16xf32> %5 = flow.dispatch.tensor.load %1, offsets=[0], sizes=[16], strides=[1] : !flow.dispatch.tensor> -> tensor<16xf32> @@ -49,12 +47,10 @@ hal.executable @simpleMath_ex_dispatch_0 { #map0 = affine_map<()[s0, s1] -> (s0 * s1)> #map1 = affine_map<(d0)[s0] -> (s0, -d0 + 1024)> #map2 = affine_map<(d0)[s0] -> (-d0 + 1024, s0)> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @dot_dispatch_0 { hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) { @@ -69,9 +65,9 @@ hal.executable @dot_dispatch_0 { %c0 = arith.constant 0 : index %c1024 = arith.constant 1024 : index %c1 = arith.constant 1 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x1024xf32> %10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] @@ -106,12 +102,10 @@ hal.executable @dot_dispatch_0 { // ----- #map = affine_map<(d0) -> (d0)> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @ext_fp8_dispatch { hal.executable.variant @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) { @@ -123,9 +117,9 @@ hal.executable @ext_fp8_dispatch { builtin.module { func.func @ext_fp8_dispatch() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4096], strides = [1] : !flow.dispatch.tensor> -> tensor<4096xf8E4M3FNUZ> %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4096], strides = [1] : !flow.dispatch.tensor> -> tensor<4096xf8E5M2FNUZ> %5 = tensor.empty() : tensor<4096xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir index 186fcf58b4bc3..f1ced7beb610d 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir @@ -14,12 +14,10 @@ // RUN: -td-matmul-strategy-use-fma=true \ // RUN: | FileCheck %s --check-prefixes=CHECK,OPTIONS -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> @@ -27,9 +25,9 @@ func.func @batch_matmul_dispatch_0_generic_128x80x320x32_f32() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [128, 80, 32], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x80x32xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [128, 32, 320], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x32x320xf32> %5 = tensor.empty() : tensor<128x80x320xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir index 4c0ecc905b730..445a64c13003e 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir @@ -1,19 +1,17 @@ // RUN: iree-opt %s --split-input-file --iree-codegen-llvmgpu-enable-transform-dialect-jit= --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy)" \ // RUN: --iree-gpu-test-target=sm_80 --iree-codegen-llvmgpu-enable-transform-dialect-implicit-gemm-strategy | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @nchw_convolution() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [8, 128, 258, 258], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x128x258x258xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [256, 128, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<256x128x3x3xf32> %5 = tensor.empty() : tensor<8x256x256x256xf32> @@ -67,19 +65,17 @@ func.func @nchw_convolution() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @nhwc_convolution() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [8, 258, 258, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x258x258x128xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 128, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x128x256xf32> %5 = tensor.empty() : tensor<8x256x256x256xf32> @@ -107,19 +103,17 @@ func.func @nhwc_convolution() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @unaligned_convolution() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [8, 258, 258, 132], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x258x258x132xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 132, 264], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x132x264xf32> %5 = tensor.empty() : tensor<8x256x256x264xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir index f1eecaf8bdd37..2e41bfe445c47 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir @@ -43,19 +43,17 @@ // RUN: iree-opt %s --split-input-file --iree-codegen-llvmgpu-enable-transform-dialect-jit=true --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy)" \ // RUN: --iree-gpu-test-target=sm_80 --iree-codegen-llvmgpu-enable-transform-dialect-small-matmul | FileCheck --check-prefix=SMALL %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_1() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2052, 2556], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2052x2556xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2556, 2052], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2556x2052xf32> %5 = tensor.empty() : tensor<2052x2052xf32> @@ -205,19 +203,17 @@ func.func @matmul_1() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_2() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2051, 2555], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2051x2555xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2555, 2051], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2555x2050xf32> %5 = tensor.empty() : tensor<2051x2050xf32> @@ -255,19 +251,17 @@ func.func @matmul_2() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_3() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 2556], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x2556xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2556, 2556], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2556x2556xf32> %5 = tensor.empty() : tensor<2048x2556xf32> @@ -287,19 +281,17 @@ func.func @matmul_3() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_4_partially_unaligned() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x2044xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2048, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2044x1024xf32> %5 = tensor.empty() : tensor<2048x1024xf32> @@ -355,19 +347,17 @@ func.func @matmul_4_partially_unaligned() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @aligned_matmul() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x2048xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2048, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x2048xf32> %5 = tensor.empty() : tensor<2048x2048xf32> @@ -422,19 +412,17 @@ func.func @aligned_matmul() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_5_small() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 2044], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x2044xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2044, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2044x1024xf32> %5 = tensor.empty() : tensor<2x1024xf32> @@ -461,19 +449,17 @@ func.func @matmul_5_small() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @f16_matmul() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2052, 2556], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2052x2556xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2556, 2052], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2556x2052xf16> %5 = tensor.empty() : tensor<2052x2052xf16> @@ -494,19 +480,17 @@ func.func @f16_matmul() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @int8_matmul() { %c0 = arith.constant 0 : index %c0_i8 = arith.constant 0 : i8 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 2556], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x2556xi8> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2556, 2052], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2556x2052xi8> %5 = tensor.empty() : tensor<4x2052xi8> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir index 76fb3683bb8fd..599ea923d9887 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir @@ -16,18 +16,16 @@ // RUN: --td-pad-strategy-use-async-copies=false \ // RUN: | FileCheck --check-prefix=WITH_OPTIONS %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @pad() { %c0 = arith.constant 0 : index %c56 = arith.constant 56 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [123, 456], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<123x456xf32> %cst_0 = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %2 low[%c0, 0] high[5, %c56] { @@ -98,17 +96,15 @@ func.func @pad() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @pad_low() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [123, 456], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<123x456xf32> %cst_0 = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %2 low[5, 0] high[0, 56] { @@ -127,17 +123,15 @@ func.func @pad_low() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @pad_local() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [123, 456], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<123x456xf32> %padded = tensor.pad %2 low[0, 0] high[5, 56] { ^bb0(%arg0: index, %arg1: index): diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir index 01904e18a7c94..48fc842231ed2 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir @@ -1,17 +1,15 @@ // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-llvmgpu-tensor-pad),fold-memref-alias-ops,canonicalize,cse)" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @transpose_no_align_dispatch_0_generic_48x32() { %c48 = arith.constant 48 : index %c32 = arith.constant 32 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -40,8 +38,8 @@ func.func @transpose_no_align_dispatch_0_generic_48x32() { // CHECK: %[[C48:.*]] = arith.constant 48 : index // CHECK: %[[C32:.*]] = arith.constant 32 : index // CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[D0:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : !flow.dispatch.tensor> -// CHECK: %[[D1:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) : !flow.dispatch.tensor> +// CHECK: %[[D0:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : !flow.dispatch.tensor> +// CHECK: %[[D1:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) : !flow.dispatch.tensor> // CHECK: %[[WORKGROUP_ID_X:.*]] = hal.interface.workgroup.id[0] : index // CHECK: %[[WORKGROUP_COUNT_X:.*]] = hal.interface.workgroup.count[0] : index // CHECK: %[[WORKGROUP_ID_Y:.*]] = hal.interface.workgroup.id[1] : index @@ -73,11 +71,9 @@ func.func @transpose_no_align_dispatch_0_generic_48x32() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<()[s0] -> (s0 * 16)> #map1 = affine_map<(d0)[s0] -> (-d0 + s0, 16)> @@ -94,8 +90,8 @@ func.func @unpack_dynamic() { %5 = arith.index_castui %1 : i32 to index %6 = arith.index_castui %2 : i32 to index %7 = arith.index_castui %3 : i32 to index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor>{%4, %5} - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6, %7} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor>{%4, %5} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6, %7} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -125,7 +121,7 @@ func.func @unpack_dynamic() { return } // CHECK-LABEL: func.func @unpack_dynamic -// CHECK: %[[DEST_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[DEST_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[DEST_BUF]] // CHECK: %[[PAD:.+]] = tensor.pad %[[LOAD]] // CHECK: %[[UNPACK:.+]] = tensor.unpack {{.+}} into %[[PAD]] diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir index edc882be49de2..ba696c714defb 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir @@ -1,20 +1,18 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmgpu-tensorcore-vectorization))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @dot() { %c16 = arith.constant 16 : index %c1024 = arith.constant 1024 : index %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048x1024xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<1024x512xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<2048x512xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048x1024xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<1024x512xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<2048x512xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %3 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_y] diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir index 25e19a8efdaf1..5354ca0cbc236 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir @@ -1,17 +1,15 @@ // RUN: iree-opt %s -iree-transform-dialect-interpreter -transform-dialect-drop-schedule | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @pad_matmul_static_dispatch_0() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<250x500xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [500, 1020], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<500x1020xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir index 024c901d0b3a1..4b4a465f0b3cb 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir @@ -1,17 +1,15 @@ // RUN: iree-opt %s -iree-transform-dialect-interpreter -transform-dialect-drop-schedule | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @pad_matmul_static_dispatch_0 { builtin.module { func.func @pad_matmul_static_dispatch_0(%arg0: tensor<250x500xf32>, %arg1: tensor<500x1020xf32>) -> tensor<250x1020xf32> { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<250x500xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [500, 1020], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<500x1020xf32> @@ -19,8 +17,8 @@ hal.executable private @pad_matmul_static_dispatch_0 { %cst = arith.constant 0.000000e+00 : f32 %5 = linalg.fill ins(%cst : f32) outs(%50 : tensor<250x1020xf32>) -> tensor<250x1020xf32> // CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[D0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) - // CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) + // CHECK: %[[D0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) + // CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) // CHECK: %[[D2:.+]] = flow.dispatch.tensor.load %[[D0]], offsets = [0, 0], sizes = [250, 500] // CHECK: %[[D3:.+]] = flow.dispatch.tensor.load %[[D1]], offsets = [0, 0], sizes = [500, 1020] // CHECK: %[[D4:.+]] = tensor.empty() : tensor<250x1020xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir index da19ad2a7b60c..3e47fe81c5c31 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir @@ -6,16 +6,14 @@ // RUN: --allow-unregistered-dialect | \ // RUN: FileCheck %s -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> #translation_info = #iree_codegen.translation_info func.func @reduce_dispatch_0() attributes {translation_info = #translation_info} { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128xf32> memref.assume_alignment %0, 64 : memref<128xf32> %1 = gpu.thread_id x %2 = arith.cmpi ult, %1, %c1 : index diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir index 410266427633d..4056c42913392 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir @@ -1,9 +1,7 @@ // RUN: iree-opt %s --pass-pipeline="builtin.module(iree-codegen-lower-executable-using-transform-dialect)" | FileCheck %s -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> #translation = #iree_codegen.translation_info @@ -13,7 +11,7 @@ module { %c250 = arith.constant 250 : index %c8 = arith.constant 8 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xf16> memref.assume_alignment %0, 64 : memref<2xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %subview = memref.subview %0[%workgroup_id_x] [1] [1] : memref<2xf16> to memref<1xf16, strided<[1], offset: ?>> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_gpu_pipelining.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_gpu_pipelining.mlir index 57f8c8293506c..4975c58913e02 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_gpu_pipelining.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_gpu_pipelining.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt %s -iree-transform-dialect-interpreter -transform-dialect-drop-schedule | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @matmul_pipelining { builtin.module { @@ -21,11 +19,11 @@ func.func @matmul_pipelining() { %3 = gpu.thread_id z %4 = memref.alloc() : memref<4x32x40xf16, 3> %5 = memref.alloc() : memref<4x32x40xf16, 3> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<3456x2048xf16> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<3456x2048xf16> memref.assume_alignment %6, 64 : memref<3456x2048xf16> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2048x1024xf16> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2048x1024xf16> memref.assume_alignment %7, 64 : memref<2048x1024xf16> - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf16> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf16> memref.assume_alignment %8, 64 : memref<3456x1024xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir index 2159e58ee50fb..9fae267557d00 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt %s --split-input-file -iree-transform-dialect-interpreter -transform-dialect-drop-schedule | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @matmul { builtin.module { @@ -17,11 +15,11 @@ func.func @matmul() { %c16 = arith.constant 16 : index %c32 = arith.constant 32 : index %cst_0 = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<32x32xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<32x32xf32> memref.assume_alignment %0, 64 : memref<32x32xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x32xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x32xf32> memref.assume_alignment %1, 64 : memref<32x32xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x32xf32> memref.assume_alignment %2, 64 : memref<32x32xf32> %3 = gpu.thread_id x %4 = gpu.thread_id y @@ -77,12 +75,10 @@ module attributes { transform.with_named_sequence } { // ----- // Verify that unrolling does not apply to rank 1 elementwise vector ops. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @gathered_matmul { builtin.module { @@ -98,11 +94,11 @@ func.func @gathered_matmul() { %cst_0 = arith.constant 0.000000e+00 : f32 %cst_1 = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex> %cst_2 = arith.constant dense<1> : vector<4x4xindex> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<32x32xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<32x32xf32> memref.assume_alignment %0, 64 : memref<32x32xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x32xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x32xf32> memref.assume_alignment %1, 64 : memref<32x32xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x32xf32> memref.assume_alignment %2, 64 : memref<32x32xf32> %alloc = memref.alloc() {alignment = 64 : i64} : memref<32x32xf32> %3 = gpu.thread_id x diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir index 09357d92bae10..8aa87740b0578 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_80 \ // RUN: --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target, fold-memref-alias-ops, canonicalize, cse)))))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> hal.executable @transpose_dispatch_0 { @@ -18,8 +16,8 @@ hal.executable @transpose_dispatch_0 { builtin.module { func.func @transpose_dispatch_0_generic_4096x4096() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4096, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x4096xf32> %3 = tensor.empty() : tensor<4096x4096xf32> %4 = linalg.generic {indexing_maps = [ affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<4096x4096xf32>) outs(%3 : tensor<4096x4096xf32>) { @@ -40,9 +38,9 @@ hal.executable @transpose_dispatch_0 { // CHECK-DAG: %[[D1:.*]] = gpu.thread_id y // CHECK-DAG: %[[D2:.*]] = gpu.thread_id z // CHECK-DAG: %[[D3:.*]] = memref.alloc() : memref<32x33xf32, #gpu.address_space> -// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<4096x4096xf32, #hal.descriptor_type> +// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<4096x4096xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D4]], 64 : memref<4096x4096xf32, #hal.descriptor_type> -// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) : memref<4096x4096xf32, #hal.descriptor_type> +// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) : memref<4096x4096xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D5]], 64 : memref<4096x4096xf32, #hal.descriptor_type> // CHECK: gpu.barrier // CHECK: %[[D6:.*]] = affine.apply #{{.*}}()[%{{.*}}, %[[D0]], %[[D1]], %[[D2]]] @@ -61,12 +59,10 @@ hal.executable @transpose_dispatch_0 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> hal.executable @transpose_single_operand_dispatch_0_generic_768x2048 { @@ -79,9 +75,9 @@ hal.executable @transpose_single_operand_dispatch_0_generic_768x2048 { builtin.module { func.func @transpose_single_operand_dispatch_0_generic_768x2048() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 768], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2048x768xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [768, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<768x2048xf32> %5 = tensor.empty() : tensor<768x2048xf32> @@ -104,11 +100,11 @@ hal.executable @transpose_single_operand_dispatch_0_generic_768x2048 { // CHECK: %[[D1:.*]] = gpu.thread_id y // CHECK: %[[D2:.*]] = gpu.thread_id z // CHECK: %[[D3:.*]] = memref.alloc() : memref<32x33xf32, #gpu.address_space> -// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<2048x768xf32, #hal.descriptor_type> +// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<2048x768xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D4]], 64 : memref<2048x768xf32, #hal.descriptor_type> -// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) : memref<768x2048xf32, #hal.descriptor_type> +// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) : memref<768x2048xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D5]], 64 : memref<768x2048xf32, #hal.descriptor_type> -// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%[[C0]]) : memref<768x2048xf32, #hal.descriptor_type> +// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%[[C0]]) : memref<768x2048xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D6]], 64 : memref<768x2048xf32, #hal.descriptor_type> // CHECK: gpu.barrier // CHECK: %[[D7:.*]] = affine.apply #{{.*}}()[%{{.*}}, %[[D0]], %[[D1]], %[[D2]]] @@ -129,12 +125,10 @@ hal.executable @transpose_single_operand_dispatch_0_generic_768x2048 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> hal.executable @transpose_3d_no_dispatch_0_generic_768x2048x1024 { @@ -147,9 +141,9 @@ hal.executable @transpose_3d_no_dispatch_0_generic_768x2048x1024 { builtin.module { func.func @transpose_3d_no_dispatch_0_generic_768x2048x1024() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2048, 768, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2048x768x1024xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [768, 2048, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<768x2048x1024xf32> %5 = tensor.empty() : tensor<768x2048x1024xf32> @@ -172,12 +166,10 @@ hal.executable @transpose_3d_no_dispatch_0_generic_768x2048x1024 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> hal.executable @transpose_3d_yes_dispatch_0_generic_10x768x2048 { @@ -190,9 +182,9 @@ hal.executable @transpose_3d_yes_dispatch_0_generic_10x768x2048 { builtin.module { func.func @transpose_3d_yes_dispatch_0_generic_10x768x2048() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [10, 2048, 768], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<10x2048x768xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [10, 768, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<10x768x2048xf32> %5 = tensor.empty() : tensor<10x768x2048xf32> @@ -215,11 +207,11 @@ hal.executable @transpose_3d_yes_dispatch_0_generic_10x768x2048 { // CHECK: %[[D1:.*]] = gpu.thread_id y // CHECK: %[[D2:.*]] = gpu.thread_id z // CHECK: %[[D3:.*]] = memref.alloc() : memref<1x32x33xf32, #gpu.address_space> -// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<10x2048x768xf32, #hal.descriptor_type> +// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<10x2048x768xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D4]], 64 : memref<10x2048x768xf32, #hal.descriptor_type> -// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type> +// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D5]], 64 : memref<10x768x2048xf32, #hal.descriptor_type> -// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type> +// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D6]], 64 : memref<10x768x2048xf32, #hal.descriptor_type> // CHECK: gpu.barrier // CHECK: %[[D7:.*]] = affine.apply #{{.*}}()[%{{.*}}, %[[D0]], %[[D1]], %[[D2]]] @@ -240,12 +232,10 @@ hal.executable @transpose_3d_yes_dispatch_0_generic_10x768x2048 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> hal.executable @transpose_3d_trans_out_dispatch_0_generic_10x2048x768 { @@ -258,9 +248,9 @@ hal.executable @transpose_3d_trans_out_dispatch_0_generic_10x2048x768 { builtin.module { func.func @transpose_3d_trans_out_dispatch_0_generic_10x2048x768() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [10, 768, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<10x768x2048xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [10, 768, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<10x768x2048xf32> %5 = tensor.empty() : tensor<10x2048x768xf32> @@ -284,11 +274,11 @@ hal.executable @transpose_3d_trans_out_dispatch_0_generic_10x2048x768 { // CHECK: %[[D2:.*]] = gpu.thread_id z // CHECK: %[[D3:.*]] = memref.alloc() : memref<1x32x33xf32, #gpu.address_space> // CHECK: %[[D4:.*]] = memref.alloc() : memref<1x32x33xf32, #gpu.address_space> -// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type> +// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D5]], 64 : memref<10x768x2048xf32, #hal.descriptor_type> -// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type> +// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D6]], 64 : memref<10x768x2048xf32, #hal.descriptor_type> -// CHECK: %[[D7:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%[[C0]]) : memref<10x2048x768xf32, #hal.descriptor_type> +// CHECK: %[[D7:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%[[C0]]) : memref<10x2048x768xf32, #hal.descriptor_type> // CHECK: memref.assume_alignment %[[D7]], 64 : memref<10x2048x768xf32, #hal.descriptor_type> // CHECK: gpu.barrier // CHECK: %[[D8:.*]] = affine.apply #{{.*}}()[%{{.*}}, %[[D0]], %[[D1]], %[[D2]]] @@ -311,12 +301,10 @@ hal.executable @transpose_3d_trans_out_dispatch_0_generic_10x2048x768 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> hal.executable @transpose_3d_diff_dispatch_0_generic_10x768x2048 { @@ -333,9 +321,9 @@ hal.executable @transpose_3d_diff_dispatch_0_generic_10x768x2048 { %c768 = arith.constant 768 : index %c2048 = arith.constant 2048 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ukernel_pipeline_transform.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ukernel_pipeline_transform.mlir index f231c8ba3f727..857f4bd5f7095 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ukernel_pipeline_transform.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ukernel_pipeline_transform.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx1100 --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb", {ukernels = "argmax"}> #map = affine_map<(d0) -> (d0)> @@ -21,9 +19,9 @@ func.func @argmax_1d_f16i64() attributes {hal.executable.target = #executable_ta %4 = arith.shli %3, %c32_i64 : i64 %5 = arith.ori %2, %4 : i64 %6 = arith.index_castui %5 : i64 to index - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %8 = flow.dispatch.workload.ordinal %6, 0 : index - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} %10 = flow.dispatch.tensor.load %9, offsets = [0], sizes = [%8], strides = [1] : !flow.dispatch.tensor>{%8} -> tensor %11 = tensor.empty() : tensor %12 = tensor.empty() : tensor @@ -49,11 +47,9 @@ func.func @argmax_1d_f16i64() attributes {hal.executable.target = #executable_ta // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb", {ukernels = "argmax"}> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -70,9 +66,9 @@ func.func @argmax_2d_f32i64() attributes {hal.executable.target = #executable_ta %4 = arith.shli %3, %c32_i64 : i64 %5 = arith.ori %2, %4 : i64 %6 = arith.index_castui %5 : i64 to index - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %8 = flow.dispatch.workload.ordinal %6, 0 : index - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} %10 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [16, %8], strides = [1, 1] : !flow.dispatch.tensor>{%8} -> tensor<16x?xf32> %11 = tensor.empty() : tensor<16xi64> %12 = tensor.empty() : tensor<16xf32> @@ -100,11 +96,9 @@ func.func @argmax_2d_f32i64() attributes {hal.executable.target = #executable_ta // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb"> #map = affine_map<(d0) -> (d0)> @@ -121,9 +115,9 @@ func.func @no_ukernel_argmax_1d_f16i64() attributes {hal.executable.target = #ex %4 = arith.shli %3, %c32_i64 : i64 %5 = arith.ori %2, %4 : i64 %6 = arith.index_castui %5 : i64 to index - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %8 = flow.dispatch.workload.ordinal %6, 0 : index - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} %10 = flow.dispatch.tensor.load %9, offsets = [0], sizes = [%8], strides = [1] : !flow.dispatch.tensor>{%8} -> tensor %11 = tensor.empty() : tensor %12 = tensor.empty() : tensor @@ -149,11 +143,9 @@ func.func @no_ukernel_argmax_1d_f16i64() attributes {hal.executable.target = #ex // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb", {ukernels = "argmax"}> #map = affine_map<(d0) -> (d0)> @@ -170,9 +162,9 @@ func.func @not_neg_inf_init_argmax_1d() attributes {hal.executable.target = #exe %4 = arith.shli %3, %c32_i64 : i64 %5 = arith.ori %2, %4 : i64 %6 = arith.index_castui %5 : i64 to index - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %8 = flow.dispatch.workload.ordinal %6, 0 : index - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} %10 = flow.dispatch.tensor.load %9, offsets = [0], sizes = [%8], strides = [1] : !flow.dispatch.tensor>{%8} -> tensor %11 = tensor.empty() : tensor %12 = tensor.empty() : tensor diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/winograd_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/winograd_pipeline_test.mlir index fcb1192034215..4173142c81010 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/winograd_pipeline_test.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/winograd_pipeline_test.mlir @@ -1,15 +1,13 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx1100 --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @winograd_filter_transform() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [3, 3, 64, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x64x128xf32> %3 = tensor.empty() : tensor<8x8x64x128xf32> %4 = iree_linalg_ext.winograd.filter_transform output_tile_size(6) kernel_size(3) kernel_dimensions([0, 1]) ins(%2 : tensor<3x3x64x128xf32>) outs(%3 : tensor<8x8x64x128xf32>) -> tensor<8x8x64x128xf32> @@ -29,16 +27,14 @@ func.func @winograd_filter_transform() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @winograd_input_transform() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 34, 34, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x34x34x128xf16> %3 = tensor.empty() : tensor<8x8x2x6x6x128xf16> %4 = iree_linalg_ext.winograd.input_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<2x34x34x128xf16>) outs(%3 : tensor<8x8x2x6x6x128xf16>) -> tensor<8x8x2x6x6x128xf16> @@ -59,16 +55,14 @@ func.func @winograd_input_transform() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @winograd_output_transform() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 8, 2, 6, 6, 128], strides = [1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x8x2x6x6x128xf16> %3 = tensor.empty() : tensor<2x36x36x128xf16> %4 = iree_linalg_ext.winograd.output_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<8x8x2x6x6x128xf16>) outs(%3 : tensor<2x36x36x128xf16>) -> tensor<2x36x36x128xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp index a5c7df978c951..b785b3df0174f 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp +++ b/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp @@ -117,7 +117,7 @@ createResourceVariable(Location loc, const SubspanResourceInfo &resource, /// Returns the (set, binding) pair for the given interface op. static std::pair getInterfaceSetAndBinding(IREE::HAL::InterfaceBindingSubspanOp op) { - return {op.getSet().getSExtValue(), op.getBinding().getSExtValue()}; + return {0, op.getBinding().getSExtValue()}; } /// Scans all hal.interface.binding.subspan ops in `module`, creates their @@ -289,7 +289,7 @@ struct HALInterfaceLoadConstantConverter final assert(exportOps.size() == 1); auto layoutAttr = exportOps.front().getLayout(); - uint64_t elementCount = layoutAttr.getPushConstants(); + uint64_t elementCount = layoutAttr.getConstants(); unsigned index = loadOp.getOrdinal().getZExtValue(); // The following function generates SPIR-V ops with i32 types. So it does diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp index 6861e81781f84..0e2734b149723 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp +++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp @@ -61,10 +61,9 @@ struct ConvertHalInterfaceBindingSubspan final auto newOp = rewriter.replaceOpWithNewOp( - op, newResultTy, adaptor.getLayout(), adaptor.getSet(), - adaptor.getBinding(), adaptor.getByteOffset(), - adaptor.getDynamicDims(), adaptor.getAlignmentAttr(), - adaptor.getDescriptorFlagsAttr()); + op, newResultTy, adaptor.getLayout(), adaptor.getBinding(), + adaptor.getByteOffset(), adaptor.getDynamicDims(), + adaptor.getAlignmentAttr(), adaptor.getDescriptorFlagsAttr()); LLVM_DEBUG(llvm::dbgs() << "WideIntegerEmulation: new op: " << newOp << "\n"); (void)newOp; diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp index 817b121a6f2fe..94a3dc2ce6719 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp +++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp @@ -50,7 +50,7 @@ bool is1DStaticShapedStorageBuffer( /// e.g., /// /// ```mlir -/// hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) +/// hal.interface.binding.subspan layout(#pipeline_layout) binding(0) /// offset(%offset) /// : memref<16xf32> /// ``` @@ -58,7 +58,7 @@ bool is1DStaticShapedStorageBuffer( /// is re-written to /// /// ```mlir -/// hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) +/// hal.interface.binding.subspan layout(#pipeline_layout) binding(0) /// offset(%offset) /// : memref{%c16} /// ``` @@ -87,9 +87,8 @@ rewriteStorageBufferSubspanOp(RewriterBase &rewriter, auto newOp = rewriter.create( subspanOp.getLoc(), newType, subspanOp.getLayoutAttr(), - subspanOp.getSetAttr(), subspanOp.getBindingAttr(), - subspanOp.getByteOffset(), dynamicDims, subspanOp.getAlignmentAttr(), - subspanOp.getDescriptorFlagsAttr()); + subspanOp.getBindingAttr(), subspanOp.getByteOffset(), dynamicDims, + subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr()); LLVM_DEBUG({ llvm::dbgs() << "Rewritten to: "; diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp index 5934189e28058..de4c7fab876f3 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp +++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp @@ -649,10 +649,9 @@ class ProcessInterfaceBindingSubspan final "cannot get vectorized memref type"); } rewriter.replaceOpWithNewOp( - subspanOp, *vecMemRef, subspanOp.getLayout(), subspanOp.getSet(), - subspanOp.getBinding(), subspanOp.getByteOffset(), - subspanOp.getDynamicDims(), subspanOp.getAlignmentAttr(), - subspanOp.getDescriptorFlagsAttr()); + subspanOp, *vecMemRef, subspanOp.getLayout(), subspanOp.getBinding(), + subspanOp.getByteOffset(), subspanOp.getDynamicDims(), + subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr()); return success(); } }; diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/annotate_winograd_loops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/annotate_winograd_loops.mlir index 7796ffc2f8faa..1f541ff342f4f 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/annotate_winograd_loops.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/annotate_winograd_loops.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-spirv-annotate-winograd-loops))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @_wino_input_dispatch_0() { %c0 = arith.constant 0 : index @@ -16,8 +14,8 @@ func.func @_wino_input_dispatch_0() { %c1 = arith.constant 1 : index %c32 = arith.constant 32 : index %0 = tensor.empty() : tensor<8x8xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -68,9 +66,9 @@ func.func @_wino_input_dispatch_0() { // CHECK: %[[C1:.+]] = arith.constant 1 : index // CHECK: %[[C32:.+]] = arith.constant 32 : index // CHECK: %[[D0:.+]] = tensor.empty() : tensor<8x8xf32> -// CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) +// CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) // CHECK-SAME: : !flow.dispatch.tensor> -// CHECK: %[[D2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) +// CHECK: %[[D2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) // CHECK-SAME: : !flow.dispatch.tensor> // CHECK: %[[WORKGROUP_ID_X:.+]] = hal.interface.workgroup.id[0] : index // CHECK: %[[WORKGROUP_COUNT_X:.+]] = hal.interface.workgroup.count[0] : index diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir index 4e6ea89053601..221790a95ac0c 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir @@ -2,21 +2,19 @@ // Conv - large OC - distribute to only one workgroup dimension. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @conv_112x112x512() { %c0 = arith.constant 0 : index %c512 = arith.constant 512 : index %c112 = arith.constant 112 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x225x225x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 512], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x3x512xf32> %5 = tensor.empty() : tensor<1x112x112x512xf32> @@ -37,21 +35,19 @@ func.func @conv_112x112x512() { // Conv - medium OC/OW/OH - distribute to two workgroup dimensions. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @conv_112x112x32() { %c0 = arith.constant 0 : index %c32 = arith.constant 32 : index %c112 = arith.constant 112 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x225x225x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x3x32xf32> %5 = tensor.empty() : tensor<1x112x112x32xf32> @@ -72,20 +68,18 @@ func.func @conv_112x112x32() { // Conv - small OC/OW/OH - distribute to all three workgroup dimensions. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @conv_16x16x16() { %c0 = arith.constant 0 : index %c16 = arith.constant 16 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 33, 33, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x33x33x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x3x16xf32> %5 = tensor.empty() : tensor<1x16x16x16xf32> @@ -105,21 +99,19 @@ func.func @conv_16x16x16() { // Depthwise conv - small OC/OW/OH - distribute to all three workgroup dimensions. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @dwconv_28x28x144() { %c0 = arith.constant 0 : index %c144 = arith.constant 144 : index %c28 = arith.constant 28 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 57, 57, 144], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x57x57x144xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 144], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x144xf32> %5 = tensor.empty() : tensor<1x28x28x144xf32> @@ -140,21 +132,19 @@ func.func @dwconv_28x28x144() { // Depthwise conv - tiny OC/OW/OH - starving the GPU. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @dwconv_4x4x8() { %c0 = arith.constant 0 : index %c8 = arith.constant 8 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 9, 9, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x9x9x8xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 8], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x8xf32> %5 = tensor.empty() : tensor<1x4x4x8xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir index 60a9e65d931dc..08d7676350ceb 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir @@ -2,21 +2,19 @@ // Large matmul that can match the best tiling scheme. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_1024x2048x512() { %c0 = arith.constant 0 : index %c2048 = arith.constant 2048 : index %c1024 = arith.constant 1024 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x512xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x2048xf32> %5 = tensor.empty() : tensor<1024x2048xf32> @@ -37,21 +35,19 @@ func.func @matmul_1024x2048x512() { // Small matmul N that can still tile to all threads in a workgroup. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_3136x24x96() { %c0 = arith.constant 0 : index %c24 = arith.constant 24 : index %c3136 = arith.constant 3136 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [3136, 96], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<3136x96xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [96, 24], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<96x24xf32> %5 = tensor.empty() : tensor<3136x24xf32> @@ -72,21 +68,19 @@ func.func @matmul_3136x24x96() { // Small matmul M that can still tile to all threads in a workgroup. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_196x64x192() { %c0 = arith.constant 0 : index %c64 = arith.constant 64 : index %c196 = arith.constant 196 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [196, 192], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<196x192xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [192, 64], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<192x64xf32> %5 = tensor.empty() : tensor<196x64xf32> @@ -107,21 +101,19 @@ func.func @matmul_196x64x192() { // Small matmul K that can still tile to all threads in a workgroup. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_12544x96x16() { %c0 = arith.constant 0 : index %c96 = arith.constant 96 : index %c12544 = arith.constant 12544 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<12544x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x96xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<12544x96xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<12544x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x96xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<12544x96xf32> linalg.fill ins(%cst : f32) outs(%2 : memref<12544x96xf32>) linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%0, %1 : memref<12544x16xf32>, memref<16x96xf32>) outs(%2 : memref<12544x96xf32>) return @@ -138,21 +130,19 @@ func.func @matmul_12544x96x16() { // Odd matmul M and small N that cannot utilize all threads in a workgroup. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_49x160x576() { %c0 = arith.constant 0 : index %c160 = arith.constant 160 : index %c49 = arith.constant 49 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [49, 576], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<49x576xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [576, 160], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<576x160xf32> %5 = tensor.empty() : tensor<49x160xf32> @@ -173,21 +163,19 @@ func.func @matmul_49x160x576() { // Large batch matmul. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @batch_matmul_4x384x384() { %c0 = arith.constant 0 : index %c384 = arith.constant 384 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 384, 32], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x384x32xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 384], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x32x384xf32> %5 = tensor.empty() : tensor<4x384x384xf32> @@ -208,21 +196,19 @@ func.func @batch_matmul_4x384x384() { // Small batch matmul. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @batch_matmul_4x8x8() { %c0 = arith.constant 0 : index %c8 = arith.constant 8 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 8, 32], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x8x32xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 8], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x32x8xf32> %5 = tensor.empty() : tensor<4x8x8xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir index 2fa2c4d3d0e45..e499af856d02e 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir @@ -1,21 +1,19 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=rdna2@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> func.func @nhwc_conv_pointwise_2x64x64x320() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 66, 66, 320], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x66x66x320xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 320, 320], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x320x320xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [2, 64, 64, 320], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x64x64x320xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir index b986068feed6d..37bd8637293cb 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir @@ -1,17 +1,15 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=rdna2@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @batch_matmul_f32_16x4096x40x4096() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 4096, 4096], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x4096x4096xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 4096, 40], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x4096x40xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [16, 4096, 40], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x4096x40xf32> @@ -30,19 +28,17 @@ func.func @batch_matmul_f32_16x4096x40x4096() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_f16_64x640x320() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 320], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x320xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [320, 640], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<320x640xf16> %5 = tensor.empty() : tensor<64x640xf16> @@ -61,18 +57,16 @@ func.func @matmul_f16_64x640x320() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @batch_matmul_f32_16x4096x40x4096() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 4096, 4096], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x4096x4096xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 4096, 48], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x4096x48xf32> %5 = tensor.empty() : tensor<16x4096x48xf32> @@ -91,20 +85,18 @@ func.func @batch_matmul_f32_16x4096x40x4096() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func.func @batch_matmul_f16_1x4096x4096x512() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1, 4096, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x4096x512xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 512, 4096], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x512x4096xf16> %5 = tensor.empty() : tensor<1x4096x4096xf32> @@ -129,13 +121,11 @@ func.func @batch_matmul_f16_1x4096x4096x512() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1)> @@ -154,11 +144,11 @@ func.func @matmul_multi_reduce_i4xf32xf32() { %7 = arith.index_castui %2 : i32 to index %8 = arith.index_castui %3 : i32 to index %9 = arith.index_castui %4 : i32 to index - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> - %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor> - %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%9) : !flow.dispatch.tensor> + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> + %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor> + %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%9) : !flow.dispatch.tensor> %15 = flow.dispatch.tensor.load %10, offsets = [0, 0, 0], sizes = [11008, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<11008x32x128xi4> %16 = flow.dispatch.tensor.load %11, offsets = [0, 0], sizes = [11008, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<11008x32xf32> %17 = flow.dispatch.tensor.load %12, offsets = [0, 0], sizes = [11008, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<11008x32xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir index c07fe95ed94d0..c3755fb9e11d8 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir @@ -1,13 +1,11 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=rdna3@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @matmul_256x1024x128_div_add() { @@ -15,11 +13,11 @@ func.func @matmul_256x1024x128_div_add() { %c1024 = arith.constant 1024 : index %c256 = arith.constant 256 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf16> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf16> %7 = tensor.empty() : tensor<256x1024xf16> @@ -47,22 +45,20 @@ func.func @matmul_256x1024x128_div_add() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func.func @batch_matmul_16x128x256x512_div() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 128, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x128x512xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x512x256xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [16, 128, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x128x256xf16> @@ -89,12 +85,10 @@ func.func @batch_matmul_16x128x256x512_div() { // Linalg.generic that is a batch matmul. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> @@ -105,9 +99,9 @@ func.func @batch_matmul_16x128x256x512_div() { func.func @generic_batch_matmul_32x8x512x64() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 32, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x32x64xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [32, 64, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<32x64x512xf16> %5 = tensor.empty() : tensor<32x128x512xf16> @@ -133,19 +127,17 @@ func.func @generic_batch_matmul_32x8x512x64() { // K dim size not divisble by 32. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @batch_matmul_16x1024x1024x80() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 1024, 80], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x1024x80xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 80, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x80x1024xf16> %5 = tensor.empty() : tensor<16x1024x1024xf16> @@ -166,21 +158,19 @@ func.func @batch_matmul_16x1024x1024x80() { // Small K - not supported by cooperative matrix. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_256x1024x8() { %c0 = arith.constant 0 : index %c1024 = arith.constant 1024 : index %c256 = arith.constant 256 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 8], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x8xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [8, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<8x1024xf16> %5 = tensor.empty() : tensor<256x1024xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matvec.mlir index fcf53412841f0..f00e7b511bad4 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matvec.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matvec.mlir @@ -1,13 +1,11 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=cdna2@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1)> @@ -15,11 +13,11 @@ #map3 = affine_map<(d0, d1, d2) -> (d0)> func.func @i4_dequant_matvec_f32() { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x128xi4> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x86xf32> %7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x86xf32> @@ -54,12 +52,10 @@ func.func @i4_dequant_matvec_f32() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1, 0)> @@ -70,11 +66,11 @@ func.func @i4_dequant_matvec_f32() { %c32_i64 = arith.constant 32 : i64 %cst = arith.constant 0.000000e+00 : f32 %c4294967296_i64 = arith.constant 4294967296 : i64 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x32x128xi4> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4096, 32, 1], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x32x1xf32> %7 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [4096, 32, 1], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x32x1xf32> @@ -109,13 +105,11 @@ func.func @i4_dequant_matvec_f32() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1)> @@ -152,12 +146,12 @@ func.func @i4_dequant_matvec_f32() { %24 = arith.shli %23, %c32_i64 : i64 %25 = arith.ori %22, %24 : i64 %26 = arith.index_castui %25 : i64 to index - %27 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor> - %28 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor> - %29 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor> + %27 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor> + %28 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor> + %29 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor> %30 = flow.dispatch.workload.ordinal %26, 0 : index - %31 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor>{%30} - %32 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor>{%30} + %31 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor>{%30} + %32 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor>{%30} %33 = flow.dispatch.tensor.load %27, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x128xi4> %34 = flow.dispatch.tensor.load %28, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x86xf32> %35 = flow.dispatch.tensor.load %29, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x86xf32> @@ -192,14 +186,12 @@ func.func @i4_dequant_matvec_f32() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1, 0)> @@ -209,11 +201,11 @@ func.func @i4_dequant_matvec_f32() { func.func @i4_dequant_matvec_f16() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x128xi4> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4096, 86, 1], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x1xf16> %7 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [4096, 86, 1], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x1xf16> @@ -248,14 +240,12 @@ func.func @i4_dequant_matvec_f16() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1)> @@ -292,12 +282,12 @@ func.func @i4_dequant_matvec() { %24 = arith.shli %23, %c32_i64 : i64 %25 = arith.ori %22, %24 : i64 %26 = arith.index_castui %25 : i64 to index - %27 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor> - %28 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor> - %29 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor> + %27 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor> + %28 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor> + %29 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor> %30 = flow.dispatch.workload.ordinal %26, 0 : index - %31 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor>{%30} - %32 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor>{%30} + %31 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor>{%30} + %32 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor>{%30} %33 = flow.dispatch.tensor.load %27, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x128xi4> %34 = flow.dispatch.tensor.load %28, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x86xf32> %35 = flow.dispatch.tensor.load %29, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x86xf32> @@ -332,12 +322,10 @@ func.func @i4_dequant_matvec() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1)> @@ -368,12 +356,12 @@ func.func @i4_dequant_matvec() { %17 = arith.shli %16, %c32_i64 : i64 %18 = arith.ori %15, %17 : i64 %19 = arith.index_castui %18 : i64 to index - %20 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor> - %21 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> - %22 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor> + %20 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor> + %21 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> + %22 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor> %23 = flow.dispatch.workload.ordinal %19, 0 : index - %24 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%23} - %25 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%14) : !flow.dispatch.tensor>{%23} + %24 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%23} + %25 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%14) : !flow.dispatch.tensor>{%23} %26 = flow.dispatch.tensor.load %20, offsets = [0, 0, 0], sizes = [11008, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<11008x32x128xi4> %27 = flow.dispatch.tensor.load %21, offsets = [0, 0], sizes = [11008, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<11008x32xf16> %28 = flow.dispatch.tensor.load %22, offsets = [0, 0], sizes = [11008, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<11008x32xf16> @@ -408,12 +396,10 @@ func.func @i4_dequant_matvec() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @dynamic_batch_matvec() { %c32_i64 = arith.constant 32 : i64 @@ -428,11 +414,11 @@ func.func @dynamic_batch_matvec() { %7 = arith.index_castui %2 : i32 to index %8 = arith.index_castui %3 : i32 to index %9 = arith.index_castui %4 : i32 to index - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor> + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor> %11 = flow.dispatch.workload.ordinal %8, 0 : index %12 = flow.dispatch.workload.ordinal %9, 1 : index - %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor>{%11} - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor>{%12} + %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor>{%11} + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor>{%12} %15 = flow.dispatch.tensor.load %13, offsets = [0, 0, 0], sizes = [32, 1, %11], strides = [1, 1, 1] : !flow.dispatch.tensor>{%11} -> tensor<32x1x?xf16> %16 = flow.dispatch.tensor.load %14, offsets = [0, 0, 0], sizes = [32, %12, 128], strides = [1, 1, 1] : !flow.dispatch.tensor>{%12} -> tensor<32x?x128xf16> %17 = tensor.empty() : tensor<32x1x128xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir index 88f9f723c0873..aa9ae5ea0a4a8 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir @@ -2,13 +2,11 @@ // Convolution with consumer pointwise ops. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> func.func @nhwc_conv_pointwise_112x112x32() { @@ -16,10 +14,10 @@ func.func @nhwc_conv_pointwise_112x112x32() { %cst = arith.constant 0.000000e+00 : f32 %c112 = arith.constant 112 : index %c32 = arith.constant 32 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x112x112x32xf32> %5 = tensor.empty() : tensor<1x112x112x32xf32> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x225x225x3xf32> @@ -45,18 +43,16 @@ func.func @nhwc_conv_pointwise_112x112x32() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @nchw_conv_2x1280x8x8() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 1280, 10, 10], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x1280x10x10xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [1280, 1280, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1280x1280x3x3xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [2, 1280, 8, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x1280x8x8xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir index 29d8e0422f317..7f0702c1a160f 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir @@ -1,13 +1,11 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @static_1d_sort() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> %1 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1000], strides = [1] : !flow.dispatch.tensor> -> tensor<1000xi32> %2 = iree_linalg_ext.sort dimension(0) outs(%1 : tensor<1000xi32>) { ^bb0(%arg0: i32, %arg1: i32): @@ -29,19 +27,17 @@ func.func @static_1d_sort() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func.func @static_3d_sort() { %c64 = arith.constant 64 : index %c128 = arith.constant 128 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x32x128xi32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<64x32x128xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x32x128xi32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<64x32x128xi32> linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : memref<64x32x128xi32>) outs(%1 : memref<64x32x128xi32>) { ^bb0(%in: i32, %out: i32): linalg.yield %in : i32 @@ -63,19 +59,17 @@ func.func @static_3d_sort() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @static_1d_fft_stage2() { %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index %cst = arith.constant dense<[1.000000e+00, 6.12323426E-17]> : tensor<2xf32> %cst_0 = arith.constant dense<[-0.000000e+00, -1.000000e+00]> : tensor<2xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xf32> %4:2 = iree_linalg_ext.fft ins(%c2, %cst, %cst_0 : index, tensor<2xf32>, tensor<2xf32>) outs(%2, %3 : tensor<32xf32>, tensor<32xf32>) : tensor<32xf32>, tensor<32xf32> @@ -93,11 +87,9 @@ func.func @static_1d_fft_stage2() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @static_3d_fft_stage3() { %c0 = arith.constant 0 : index @@ -109,8 +101,8 @@ func.func @static_3d_fft_stage3() { %cst_0 = arith.constant dense<[-0.000000e+00, -0.707106769, -1.000000e+00, -0.707106769]> : tensor<4xf32> %0 = bufferization.to_memref %cst_0 : memref<4xf32> %1 = bufferization.to_memref %cst : memref<4xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x128x32xf32> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<64x128x32xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x128x32xf32> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<64x128x32xf32> iree_linalg_ext.fft ins(%c3, %1, %0 : index, memref<4xf32>, memref<4xf32>) outs(%2, %3 : memref<64x128x32xf32>, memref<64x128x32xf32>) return } @@ -124,16 +116,14 @@ func.func @static_3d_fft_stage3() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @winograd_input_transform() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 34, 34, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x34x34x128xf16> %3 = tensor.empty() : tensor<8x8x2x6x6x128xf16> %4 = iree_linalg_ext.winograd.input_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<2x34x34x128xf16>) outs(%3 : tensor<8x8x2x6x6x128xf16>) -> tensor<8x8x2x6x6x128xf16> @@ -150,16 +140,14 @@ func.func @winograd_input_transform() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @winograd_output_transform() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 8, 2, 6, 6, 128], strides = [1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x8x2x6x6x128xf16> %3 = tensor.empty() : tensor<2x36x36x128xf16> %4 = iree_linalg_ext.winograd.output_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<8x8x2x6x6x128xf16>) outs(%3 : tensor<2x36x36x128xf16>) -> tensor<2x36x36x128xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir index 13f86b7514692..dcec345c46cca 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target{%0, %1} - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref{%0, %1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%0, %1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref{%0, %1} linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%2 : memref) outs(%3 : memref) { ^bb0(%in: i32, %out: i32): linalg.yield %in : i32 @@ -35,11 +33,9 @@ func.func @copy_as_generic() attributes {hal.executable.target = #executable_tar // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<1x224x224x3xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1x224x224x3xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<1x224x224x3xf32> linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : memref<1x224x224x3xf32>) outs(%1 : memref<1x224x224x3xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 @@ -73,11 +69,9 @@ func.func @copy() attributes {hal.executable.target = #executable_target_vulkan_ // Average pooling op with nice tilable input. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = tensor.empty() : tensor<12x12xf32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 24, 24, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x24x24x8xf32> %4 = tensor.empty() : tensor<1x2x2x8xf32> @@ -111,11 +105,9 @@ func.func @avg_pool() attributes {hal.executable.target = #executable_target_vul // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 7, 7, 1280], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x7x7x1280xf32> %3 = tensor.empty() : tensor<7x7xf32> %4 = tensor.empty() : tensor<1x1x1x1280xf32> @@ -156,11 +148,9 @@ func.func @avg_pool() attributes {hal.executable.target = #executable_target_vul // Max pooling op with odd size-1 dimension sizes. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = tensor.empty() : tensor<2x1xf32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 76, 1, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x76x1x1xf32> %4 = tensor.empty() : tensor<1x38x1x1xf32> @@ -197,12 +187,10 @@ func.func @max_pool() attributes {hal.executable.target = #executable_target_vul // Element wise op with mismatched input and output rank. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 10], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x10xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [10], strides = [1] : !flow.dispatch.tensor> -> tensor<10xf32> %5 = tensor.empty() : tensor<10xf32> @@ -240,11 +228,9 @@ func.func @elementwise() attributes {hal.executable.target = #executable_target_ // Fused depthwise convolution and element wise ops: don't vectorize with partially active subgroups. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = tensor.empty() : tensor<1x19x18x1x4xf32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 21, 20, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x21x20x1xf32> %4 = tensor.empty() : tensor<1x19x18x1x4xf32> @@ -289,11 +275,9 @@ func.func @dwconv_elementwise() attributes {hal.executable.target = #executable_ // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 2048, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x2048x512xf32> %3 = tensor.empty() : tensor<2048x512xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2048x512xf32>) -> tensor<2048x512xf32> @@ -330,11 +314,9 @@ func.func @outermost_reduction() attributes {hal.executable.target = #executable // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) : !flow.dispatch.tensor> - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%3) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) : !flow.dispatch.tensor> + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor> %9 = flow.dispatch.tensor.load %6, offsets = [0, 0], sizes = [128, 384], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x384xf32> %10 = flow.dispatch.tensor.load %7, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor> -> tensor<128xf32> %11 = tensor.empty() : tensor<128xf32> @@ -380,11 +362,9 @@ func.func @innermost_reduction() attributes {hal.executable.target = #executable // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target (d0, d1, d2, d3)> func.func @four_dim_elementwise() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [128, 8, 256, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x8x256x4xf32> %3 = tensor.empty() : tensor<128x256x4x8xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<128x8x256x4xf32>) outs(%3 : tensor<128x256x4x8xf32>) { @@ -418,11 +398,9 @@ func.func @four_dim_elementwise() attributes {hal.executable.target = #executabl // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 501], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x501xf32> %3 = tensor.empty() : tensor<512x501xf32> %4 = tensor.empty() : tensor<512xf32> @@ -466,11 +444,9 @@ func.func @odd_reduction_dimension_size_501() attributes {hal.executable.target // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 2809], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x2809xf32> %3 = tensor.empty() : tensor<512x2809xf32> %4 = tensor.empty() : tensor<512xf32> @@ -514,11 +490,9 @@ func.func @odd_reduction_dimension_size_2809() attributes {hal.executable.target // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %3 = tensor.empty() : tensor<2048x1x1x1xf32> %4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor) outs(%3 : tensor<2048x1x1x1xf32>) { diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir index 9370b6c429fa9..79df8d2e2ad9e 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir @@ -2,12 +2,10 @@ // Odd K that forbids vectorization. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1, 3, 3], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x3x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 3, 32], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x3x32xf32> %5 = tensor.empty() : tensor<1x3x32xf32> @@ -45,12 +43,10 @@ func.func @batch_matmul_1x3x32() attributes {hal.executable.target = #executable // 8-bit integers can be vectorized. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x32xi8> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x16xi8> %5 = tensor.empty() : tensor<64x16xi32> @@ -87,12 +83,10 @@ func.func @matmul_64x16xi8() attributes {hal.executable.target = #executable_tar // Vectorize non-32 bit types. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x32xi64> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x16xi64> %5 = tensor.empty() : tensor<64x16xi64> @@ -129,12 +123,10 @@ func.func @matmul_64x16xi64() attributes {hal.executable.target = #executable_ta // Odd N that forbids vectorization. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c11775744) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [273], strides = [1] : !flow.dispatch.tensor> -> tensor<273xf32> %5 = tensor.empty() : tensor<400x273xf32> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [400, 576], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<400x576xf32> @@ -182,12 +174,10 @@ func.func @matmul_400x273() attributes {hal.executable.target = #executable_targ // Odd M and non-4-multiplier N -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c15842560) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [546], strides = [1] : !flow.dispatch.tensor> -> tensor<546xf32> %5 = tensor.empty() : tensor<25x546xf32> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [25, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<25x512xf32> @@ -235,14 +225,12 @@ func.func @matmul_25x546() attributes {hal.executable.target = #executable_targe // Matmul with consumer pointwise ops -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf16> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf16> %7 = tensor.empty() : tensor<256x1024xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir index 8deaa1191986d..965c7dc6149a7 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir @@ -1,22 +1,20 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d1)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func.func @complex_view_as_real() { %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xi32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 32, 50, 2], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x1x32x50x2xf32> %6 = tensor.empty() : tensor<32x50x2xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir index 0f31e2af78dc2..5fadf1733c036 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x512xf32> %3 = tensor.empty() : tensor<2xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2xf32>) -> tensor<2xf32> @@ -41,11 +39,9 @@ func.func @subgroup_reduce_f32() attributes {hal.executable.target = #executable // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 4096, 4096], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x4096x4096xf16> %3 = tensor.empty() : tensor<16x4096x4096xf16> %4 = tensor.empty() : tensor<16x4096xf16> @@ -88,11 +84,9 @@ func.func @subgroup_reduce_f16() attributes {hal.executable.target = #executable // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -116,9 +110,9 @@ func.func @subgroup_reduce_dynamic() attributes {hal.executable.target = #execut %4 = arith.shli %3, %c32_i64 : i64 %5 = arith.ori %2, %4 : i64 %6 = arith.index_castui %5 : i64 to index - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %8 = flow.dispatch.workload.ordinal %6, 0 : index - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%8} %10 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [8, %8], strides = [1, 1] : !flow.dispatch.tensor>{%8} -> tensor<8x?xf32> %11 = tensor.empty() : tensor<8xf32> %12 = linalg.fill {lowering_config = #config} ins(%cst : f32) outs(%11 : tensor<8xf32>) -> tensor<8xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir index fefcfe06b45ba..f2bdec2eb8767 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir @@ -1,21 +1,19 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> #map1 = affine_map<(d0, d1) -> (d0)> func.func @i4_dequant() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [131072, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<131072x128xi4> %5 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [131072], strides = [1] : !flow.dispatch.tensor> -> tensor<131072xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [131072], strides = [1] : !flow.dispatch.tensor> -> tensor<131072xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir index 8ae533f2a2b11..78a057ee67c3a 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir @@ -2,21 +2,19 @@ // Conv - large OC - distribute to only one workgroup dimension. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @conv_112x112x512() { %c0 = arith.constant 0 : index %c512 = arith.constant 512 : index %c112 = arith.constant 112 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x225x225x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 512], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x3x512xf32> %5 = tensor.empty() : tensor<1x112x112x512xf32> @@ -37,21 +35,19 @@ func.func @conv_112x112x512() { // Conv - medium OC/OW/OH - distribute to two workgroup dimensions. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @conv_112x112x32() { %c0 = arith.constant 0 : index %c32 = arith.constant 32 : index %c112 = arith.constant 112 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x225x225x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x3x32xf32> %5 = tensor.empty() : tensor<1x112x112x32xf32> @@ -72,20 +68,18 @@ func.func @conv_112x112x32() { // Conv - small OC/OW/OH - distribute to all three workgroup dimensions. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @conv_16x16x16() { %c0 = arith.constant 0 : index %c16 = arith.constant 16 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 33, 33, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x33x33x3xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x3x16xf32> %5 = tensor.empty() : tensor<1x16x16x16xf32> @@ -106,21 +100,19 @@ func.func @conv_16x16x16() { // Depthwise conv - small OC/OW/OH - distribute to all three workgroup dimensions. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @dwconv_28x28x144() { %c0 = arith.constant 0 : index %c144 = arith.constant 144 : index %c28 = arith.constant 28 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [0, 57, 57, 144], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x57x57x144xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 144], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x144xf32> %5 = tensor.empty() : tensor<1x28x28x144xf32> @@ -141,12 +133,10 @@ func.func @dwconv_28x28x144() { // Depthwise conv - tiny OC/OW/OH - starving the GPU. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @dwconv_1x2x8() { %c0 = arith.constant 0 : index @@ -154,9 +144,9 @@ func.func @dwconv_1x2x8() { %c2 = arith.constant 2 : index %c1 = arith.constant 1 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 3, 5, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x3x5x8xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 8], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<3x3x8xf32> %5 = tensor.empty() : tensor<1x1x2x8xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir index 5f30177f1ff18..7facf6762c497 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir @@ -2,21 +2,19 @@ // Large matmul that can match the best tiling scheme. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_1024x2048x512() { %c0 = arith.constant 0 : index %c2048 = arith.constant 2048 : index %c1024 = arith.constant 1024 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x512xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x2048xf32> %5 = tensor.empty() : tensor<1024x2048xf32> @@ -37,21 +35,19 @@ func.func @matmul_1024x2048x512() { // Small matmul N that can still tile to all threads in a workgroup. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_3136x24x96() { %c0 = arith.constant 0 : index %c24 = arith.constant 24 : index %c3136 = arith.constant 3136 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [3136, 96], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<3136x96xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [96, 24], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<96x24xf32> %5 = tensor.empty() : tensor<3136x24xf32> @@ -72,21 +68,19 @@ func.func @matmul_3136x24x96() { // Small matmul M that can still tile to all threads in a workgroup. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_196x64x192() { %c0 = arith.constant 0 : index %c64 = arith.constant 64 : index %c196 = arith.constant 196 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [196, 192], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<196x192xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [192, 64], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<192x64xf32> %5 = tensor.empty() : tensor<196x64xf32> @@ -107,21 +101,19 @@ func.func @matmul_196x64x192() { // Small matmul K that can still tile to all threads in a workgroup. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_12544x96x16() { %c0 = arith.constant 0 : index %c96 = arith.constant 96 : index %c12544 = arith.constant 12544 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<12544x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x96xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<12544x96xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<12544x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x96xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<12544x96xf32> linalg.fill ins(%cst : f32) outs(%2 : memref<12544x96xf32>) linalg.matmul ins(%0, %1 : memref<12544x16xf32>, memref<16x96xf32>) outs(%2 : memref<12544x96xf32>) return @@ -138,21 +130,19 @@ func.func @matmul_12544x96x16() { // Odd matmul M and small N that cannot utilize all threads in a workgroup. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_49x160x576() { %c0 = arith.constant 0 : index %c160 = arith.constant 160 : index %c49 = arith.constant 49 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [49, 576], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<49x576xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [576, 160], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<576x160xf32> %5 = tensor.empty() : tensor<49x160xf32> @@ -173,12 +163,10 @@ func.func @matmul_49x160x576() { // Small matmul M to "shift" parallelism to N. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_2x1024x576() { %cst = arith.constant 0.000000e+00 : f32 @@ -189,10 +177,10 @@ func.func @matmul_2x1024x576() { %c3436864 = arith.constant 3436864 : index %c10141312 = arith.constant 10141312 : index %c2304 = arith.constant 2304 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c3436864) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c10141312) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c3436864) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c10141312) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 576], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x576xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [576, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<576x1024xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x1024xf32> @@ -214,21 +202,19 @@ func.func @matmul_2x1024x576() { // Large matmul with i8 inputs. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_1024x2048x512xi8() { %c0 = arith.constant 0 : index %c2048 = arith.constant 2048 : index %c1024 = arith.constant 1024 : index %c0_i32 = arith.constant 0 : i32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x512xi8> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x2048xi8> %5 = tensor.empty() : tensor<1024x2048xi32> @@ -240,21 +226,19 @@ func.func @matmul_1024x2048x512xi8() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @batch_matmul_4x384x384() { %c0 = arith.constant 0 : index %c384 = arith.constant 384 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 384, 32], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x384x32xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 384], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x32x384xf32> %5 = tensor.empty() : tensor<4x384x384xf32> @@ -275,12 +259,10 @@ func.func @batch_matmul_4x384x384() { // Small batch matmul. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @batch_matmul_4x2x8() { %c0 = arith.constant 0 : index @@ -288,9 +270,9 @@ func.func @batch_matmul_4x2x8() { %c2 = arith.constant 2 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 2, 32], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x2x32xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 8], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x32x8xf32> %5 = tensor.empty() : tensor<4x2x8xf32> @@ -311,12 +293,10 @@ func.func @batch_matmul_4x2x8() { // Linalg.generic that is a batch matmul. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> @@ -327,9 +307,9 @@ func.func @batch_matmul_4x2x8() { func.func @generic_batch_matmul_32x2x512() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 32, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x32x64xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [32, 64, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<32x64x512xf32> %5 = tensor.empty() : tensor<32x8x512xf32> @@ -355,13 +335,11 @@ func.func @generic_batch_matmul_32x2x512() { // Linalg.generic that is a batch matmul. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d3, d2)> @@ -372,11 +350,11 @@ func.func @generic_batch_matmul_8x2500x512x4608() { %c537247744 = arith.constant 537247744 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c168607744) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c537247744) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c168607744) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c537247744) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [8, 2500, 4608], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x2500x4608xf32> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4608, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4608x512xf32> %7 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [8, 2500, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<8x2500x512xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir index 4c3f060c91a46..8d4f3f0a92cbf 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=pascal@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_4x4096x9216() { %c36864 = arith.constant 36864 : index @@ -13,10 +11,10 @@ func.func @matmul_4x4096x9216() { %c209920 = arith.constant 209920 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c209920) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c667974912) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c36864) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c209920) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c667974912) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c36864) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 9216], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x9216xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [9216, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<9216x4096xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4096xf32> @@ -36,12 +34,10 @@ func.func @matmul_4x4096x9216() { // Matvec does not go down matmul pipelines. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_1x4096x9216() { %c36864 = arith.constant 36864 : index @@ -49,10 +45,10 @@ func.func @matmul_1x4096x9216() { %c209920 = arith.constant 209920 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c209920) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c667974912) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c36864) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c209920) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c667974912) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c36864) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 9216], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x9216xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [9216, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<9216x4096xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1x4096xf32> @@ -72,12 +68,10 @@ func.func @matmul_1x4096x9216() { // Multi-reduction-dimension transposed-B matmul. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)> @@ -85,9 +79,9 @@ func.func @matmul_1x4096x9216() { func.func @multi_reduction_transposed_b_matmul() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x128xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [2048, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<2048x86x128xf32> %5 = tensor.empty() : tensor<4096x2048xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir index db27e00cce445..b818edddc9168 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir @@ -2,14 +2,12 @@ // RUN: --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | \ // RUN: FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @matmul_256x1024x128_div_add() { @@ -17,11 +15,11 @@ func.func @matmul_256x1024x128_div_add() { %c1024 = arith.constant 1024 : index %c256 = arith.constant 256 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf16> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf16> %7 = tensor.empty() : tensor<256x1024xf16> @@ -49,22 +47,20 @@ func.func @matmul_256x1024x128_div_add() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func.func @batch_matmul_16x128x256x512_div() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 128, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x128x512xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x512x256xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [16, 128, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x128x256xf16> @@ -91,12 +87,10 @@ func.func @batch_matmul_16x128x256x512_div() { // Linalg.generic that is a batch matmul. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> @@ -107,9 +101,9 @@ func.func @batch_matmul_16x128x256x512_div() { func.func @generic_batch_matmul_32x8x512x64() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 32, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x32x64xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [32, 64, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<32x64x512xf16> %5 = tensor.empty() : tensor<32x128x512xf16> @@ -135,19 +129,17 @@ func.func @generic_batch_matmul_32x8x512x64() { // K dim size not divisble by 32. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @batch_matmul_16x1024x1024x80() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 1024, 80], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x1024x80xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 80, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x80x1024xf16> %5 = tensor.empty() : tensor<16x1024x1024xf16> @@ -168,21 +160,19 @@ func.func @batch_matmul_16x1024x1024x80() { // Small K - not supported by cooperative matrix. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_256x1024x8() { %c0 = arith.constant 0 : index %c1024 = arith.constant 1024 : index %c256 = arith.constant 256 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 8], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x8xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [8, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<8x1024xf16> %5 = tensor.empty() : tensor<256x1024xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir index f42b1c1fd02ff..ed93869f2e428 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(iree-codegen-materialize-user-configs, iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -15,9 +13,9 @@ func.func @matmul_128x1024x256() { %c128 = arith.constant 128 : index %c1024 = arith.constant 1024 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf32> %5 = tensor.empty() : tensor<128x1024xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_gpu_target.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_gpu_target.mlir index eca6f4affabc8..73f09e64527bb 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_gpu_target.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_gpu_target.mlir @@ -6,7 +6,9 @@ hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-s wgp = , ], subgroup_size_choices = [32, 64], max_workgroup_sizes = [1024, 1024, 1024], max_thread_count_per_workgroup = 1024, max_workgroup_memory_bytes = 65536, max_workgroup_counts = [2147483647, 2147483647, 2147483647]>>}>) { - hal.executable.export public @dispatch ordinal(0) layout(#hal.pipeline.layout]>]>) { + hal.executable.export public @dispatch ordinal(0) layout(#hal.pipeline.layout]> + ) { ^bb0(%arg0: !hal.device): %x, %y, %z = flow.dispatch.workgroup_count_from_slice hal.return %x, %y, %z : index, index, index diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir index 9fcdab10a188e..b437006183779 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-spirv))))' %s | FileCheck %s // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-spirv{index-bits=64}))))' %s | FileCheck %s --check-prefix=INDEX64 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @push_constant { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -38,14 +36,10 @@ hal.executable private @push_constant { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<3, storage_buffer> - ]>, - #hal.descriptor_set.layout<3, bindings = [ - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @resource_bindings_in_same_func { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -54,10 +48,10 @@ hal.executable private @resource_bindings_in_same_func { } builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>>} { // CHECK-LABEL: spirv.module - // CHECK: spirv.GlobalVariable @[[ARG0:.+]] bind(1, 2) : !spirv.ptr [0])>, StorageBuffer> - // CHECK: spirv.GlobalVariable @[[ARG1_0:.+]] bind(1, 3) {aliased} : !spirv.ptr [0])>, StorageBuffer> - // CHECK: spirv.GlobalVariable @[[ARG1_1:.+]] bind(1, 3) {aliased} : !spirv.ptr, stride=16> [0])>, StorageBuffer> - // CHECK: spirv.GlobalVariable @[[RET0:.+]] bind(3, 4) : !spirv.ptr [0])>, StorageBuffer> + // CHECK: spirv.GlobalVariable @[[ARG0:.+]] bind(0, 0) : !spirv.ptr [0])>, StorageBuffer> + // CHECK: spirv.GlobalVariable @[[ARG1_0:.+]] bind(0, 1) {aliased} : !spirv.ptr [0])>, StorageBuffer> + // CHECK: spirv.GlobalVariable @[[ARG1_1:.+]] bind(0, 1) {aliased} : !spirv.ptr, stride=16> [0])>, StorageBuffer> + // CHECK: spirv.GlobalVariable @[[RET0:.+]] bind(0, 2) : !spirv.ptr [0])>, StorageBuffer> // CHECK: spirv.func @resource_bindings_in_same_entry_func() func.func @resource_bindings_in_same_entry_func() -> f32 { %c0 = arith.constant 0 : index @@ -65,17 +59,17 @@ hal.executable private @resource_bindings_in_same_func { // Same type // CHECK: spirv.mlir.addressof @[[ARG0]] // CHECK: spirv.mlir.addressof @[[ARG0]] - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<4x4xf32, #spirv.storage_class> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<4x4xf32, #spirv.storage_class> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x4xf32, #spirv.storage_class> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x4xf32, #spirv.storage_class> // Different type // CHECK: spirv.mlir.addressof @[[ARG1_0]] // CHECK: spirv.mlir.addressof @[[ARG1_1]] - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(3) : memref<4x4xf32, #spirv.storage_class> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(3) : memref<4xvector<4xf32>, #spirv.storage_class> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4x4xf32, #spirv.storage_class> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4xvector<4xf32>, #spirv.storage_class> // CHECK: spirv.mlir.addressof @[[RET0]] - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(3) binding(4) : memref<4x4xf32, #spirv.storage_class> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x4xf32, #spirv.storage_class> %5 = memref.load %0[%c0, %c0] : memref<4x4xf32, #spirv.storage_class> %6 = memref.load %1[%c0, %c0] : memref<4x4xf32, #spirv.storage_class> @@ -99,13 +93,9 @@ hal.executable private @resource_bindings_in_same_func { // ----- -#pipeline_layout = #hal.pipeline.layout - ]>, - #hal.descriptor_set.layout<3, bindings = [ - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @resource_bindings_in_multi_entry_func { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -117,18 +107,18 @@ hal.executable private @resource_bindings_in_multi_entry_func { } builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>>} { // CHECK-LABEL: spirv.module - // CHECK: spirv.GlobalVariable @[[FUNC1_ARG:.+]] bind(1, 2) : !spirv.ptr [0])>, StorageBuffer> - // CHECK: spirv.GlobalVariable @[[FUNC1_RET:.+]] bind(3, 4) : !spirv.ptr, stride=16> [0])>, StorageBuffer> - // CHECK: spirv.GlobalVariable @[[FUNC2_ARG:.+]] bind(1, 2) : !spirv.ptr [0])>, StorageBuffer> - // CHECK: spirv.GlobalVariable @[[FUNC2_RET:.+]] bind(3, 4) : !spirv.ptr [0])>, StorageBuffer> + // CHECK: spirv.GlobalVariable @[[FUNC1_ARG:.+]] bind(0, 0) : !spirv.ptr [0])>, StorageBuffer> + // CHECK: spirv.GlobalVariable @[[FUNC1_RET:.+]] bind(0, 1) : !spirv.ptr, stride=16> [0])>, StorageBuffer> + // CHECK: spirv.GlobalVariable @[[FUNC2_ARG:.+]] bind(0, 0) : !spirv.ptr [0])>, StorageBuffer> + // CHECK: spirv.GlobalVariable @[[FUNC2_RET:.+]] bind(0, 1) : !spirv.ptr [0])>, StorageBuffer> // CHECK: spirv.func @resource_bindings_in_entry_func1() func.func @resource_bindings_in_entry_func1() -> f32 { // CHECK: spirv.mlir.addressof @[[FUNC1_ARG]] // CHECK: spirv.mlir.addressof @[[FUNC1_RET]] %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<4x4xf32, #spirv.storage_class> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(3) binding(4) : memref<4xvector<4xf32>, #spirv.storage_class> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x4xf32, #spirv.storage_class> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4xvector<4xf32>, #spirv.storage_class> %2 = memref.load %0[%c0, %c0] : memref<4x4xf32, #spirv.storage_class> %3 = memref.load %1[%c0] : memref<4xvector<4xf32>, #spirv.storage_class> @@ -144,8 +134,8 @@ hal.executable private @resource_bindings_in_multi_entry_func { // CHECK: spirv.mlir.addressof @[[FUNC2_ARG]] // CHECK: spirv.mlir.addressof @[[FUNC2_RET]] %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<4x4xf32, #spirv.storage_class> // Same type as previous function - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(3) binding(4) : memref<4x4xf32, #spirv.storage_class> // Different type as previous function + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x4xf32, #spirv.storage_class> // Same type as previous function + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4x4xf32, #spirv.storage_class> // Different type as previous function %2 = memref.load %0[%c0, %c0] : memref<4x4xf32, #spirv.storage_class> %3 = memref.load %1[%c0, %c0] : memref<4x4xf32, #spirv.storage_class> @@ -160,12 +150,10 @@ hal.executable private @resource_bindings_in_multi_entry_func { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @interface_binding { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -175,9 +163,9 @@ hal.executable private @interface_binding { builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>>} { func.func @interface_binding() -> f32 { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<8x5xf32, #spirv.storage_class> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5xf32, #spirv.storage_class> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<8x5xf32, #spirv.storage_class> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<8x5xf32, #spirv.storage_class> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5xf32, #spirv.storage_class> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<8x5xf32, #spirv.storage_class> %3 = memref.load %0[%c0, %c0] : memref<8x5xf32, #spirv.storage_class> %4 = memref.load %1[%c0] : memref<5xf32, #spirv.storage_class> @@ -205,12 +193,10 @@ hal.executable private @interface_binding { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @interface_wg_id { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -240,12 +226,10 @@ hal.executable private @interface_wg_id { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @interface_wg_size { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -257,7 +241,7 @@ hal.executable private @interface_wg_size { %c0 = arith.constant 0.0 : f32 %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_size_y = hal.interface.workgroup.size[1] : index - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x64xf32, #spirv.storage_class> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x64xf32, #spirv.storage_class> memref.store %c0, %subspan[%workgroup_size_x, %workgroup_size_y] : memref<64x64xf32, #spirv.storage_class> return } @@ -278,12 +262,10 @@ hal.executable private @interface_wg_size { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @interface_wg_count { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir index eb1c28116fbf7..28405723f7ab9 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir @@ -2,12 +2,10 @@ // RUN: --pass-pipeline='builtin.module(func.func(iree-spirv-emulate-i64))' %s | \ // RUN: FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8xi64, #spirv.storage_class> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<8xi64, #spirv.storage_class> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<8xi32, #spirv.storage_class> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8xi64, #spirv.storage_class> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<8xi64, #spirv.storage_class> %3 = memref.load %0[%c0] : memref<8xi32, #spirv.storage_class> %4 = memref.load %1[%c0] : memref<8xi64, #spirv.storage_class> %5 = arith.addi %4, %c1_i64 : i64 @@ -32,8 +30,8 @@ func.func @buffer_types() attributes {hal.executable.target = #executable_target // Check that without the Int64 capability emulation produces expected i32 ops. // // CHECK-LABEL: func.func @buffer_types -// CHECK: [[REF_I64_0:%.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<8xvector<2xi32>, #spirv.storage_class> -// CHECK: [[REF_I64_1:%.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<8xvector<2xi32>, #spirv.storage_class> +// CHECK: [[REF_I64_0:%.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<8xvector<2xi32>, #spirv.storage_class> +// CHECK: [[REF_I64_1:%.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<8xvector<2xi32>, #spirv.storage_class> // CHECK: [[VI64:%.+]] = memref.load [[REF_I64_0]][{{%.+}}] : memref<8xvector<2xi32>, #spirv.storage_class> // CHECK: {{%.+}} = arith.addui_extended {{%.+}}, {{%.+}} : i32, i1 // CHECK: memref.store {{%.+}}, [[REF_I64_1]][{{%.+}}] : memref<8xvector<2xi32>, #spirv.storage_class> @@ -41,11 +39,9 @@ func.func @buffer_types() attributes {hal.executable.target = #executable_target // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target, #spirv.storage_class>{%c96} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c1523712) : memref, #spirv.storage_class>{%c36864} - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref, #spirv.storage_class>{%c36864} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref, #spirv.storage_class>{%c96} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c1523712) : memref, #spirv.storage_class>{%c36864} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref, #spirv.storage_class>{%c36864} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %thread_id_x = gpu.thread_id x @@ -94,12 +90,10 @@ func.func @emulate_1d_vector() attributes {hal.executable.target = #executable_t // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8xi64, #spirv.storage_class> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<8xi64, #spirv.storage_class> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<8xi32, #spirv.storage_class> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8xi64, #spirv.storage_class> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<8xi64, #spirv.storage_class> %3 = memref.load %0[%c0] : memref<8xi32, #spirv.storage_class> %4 = memref.load %1[%c0] : memref<8xi64, #spirv.storage_class> %5 = arith.addi %4, %c1_i64 : i64 @@ -125,9 +119,9 @@ func.func @no_emulation() attributes {hal.executable.target = #executable_target // // CHECK-LABEL: func.func @no_emulation // CHECK: [[CST1:%.+]] = arith.constant 1 : i64 -// CHECK: [[REF_I32:%.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<8xi32, #spirv.storage_class> -// CHECK: [[REF_I64_0:%.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<8xi64, #spirv.storage_class> -// CHECK: [[REF_I64_1:%.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<8xi64, #spirv.storage_class> +// CHECK: [[REF_I32:%.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<8xi32, #spirv.storage_class> +// CHECK: [[REF_I64_0:%.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<8xi64, #spirv.storage_class> +// CHECK: [[REF_I64_1:%.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<8xi64, #spirv.storage_class> // CHECK: [[VI32:%.+]] = memref.load [[REF_I32]][{{%.+}}] : memref<8xi32, #spirv.storage_class> // CHECK: [[VI64:%.+]] = memref.load [[REF_I64_0]][{{%.+}}] : memref<8xi64, #spirv.storage_class> // CHECK: {{%.+}} = arith.addi {{%.+}} : i64 diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/erase_storage_buffer_static_shape.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/erase_storage_buffer_static_shape.mlir index aa25417ac7f7c..6031f46a924b6 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/erase_storage_buffer_static_shape.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/erase_storage_buffer_static_shape.mlir @@ -1,14 +1,12 @@ // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-spirv-erase-storage-buffer-static-shape))" %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @storage_buffer_load_store(%offset: index, %i0: index, %i1: index) { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%offset) : memref<256xf32, #hal.descriptor_type> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%offset) : memref<256xf32, #hal.descriptor_type> %val = memref.load %0[%i0] : memref<256xf32, #hal.descriptor_type> memref.store %val, %1[%i1] : memref<256xf32, #hal.descriptor_type> return @@ -17,8 +15,8 @@ func.func @storage_buffer_load_store(%offset: index, %i0: index, %i1: index) { // CHECK-LABEL: func.func @storage_buffer_load_store // CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index) // CHECK: %[[C256:.+]] = arith.constant 256 : index -// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[OFFSET]]) flags(ReadOnly) : memref>{%[[C256]]} -// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[OFFSET]]) : memref>{%[[C256]]} +// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[OFFSET]]) flags(ReadOnly) : memref>{%[[C256]]} +// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[OFFSET]]) : memref>{%[[C256]]} // CHECK: %[[LD:.+]] = memref.load %[[SPAN0]][%[[I0]]] // CHECK: memref.store %[[LD]], %[[SPAN1]][%[[I1]]] @@ -26,51 +24,45 @@ func.func @storage_buffer_load_store(%offset: index, %i0: index, %i1: index) { // Test that we don't rewrite memref for uniform buffers. -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @uniform_buffer_load(%offset: index, %i0: index) -> f32 { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type> %val = memref.load %0[%i0] : memref<256xf32, #hal.descriptor_type> return %val : f32 } // CHECK-LABEL: func.func @uniform_buffer_load -// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%{{.+}}) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type> +// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%{{.+}}) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type> // CHECK: memref.load %[[SPAN0]] // ----- // Test that we don't rewrite memref without HAL descriptor types. -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @uniform_buffer_load(%offset: index, %i0: index) -> f32 { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32> %val = memref.load %0[%i0] : memref<256xf32> return %val : f32 } // CHECK-LABEL: func.func @uniform_buffer_load -// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%{{.+}}) flags(ReadOnly) : memref<256xf32> +// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%{{.+}}) flags(ReadOnly) : memref<256xf32> // CHECK: memref.load %[[SPAN0]] // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @storage_buffer_transfer_read_write(%offset: index, %i0: index, %i1: index) { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%offset) : memref<256xf32, #hal.descriptor_type> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%offset) : memref<256xf32, #hal.descriptor_type> %f0 = arith.constant 0.0 : f32 %val = vector.transfer_read %0[%i0], %f0 {in_bounds = [true]} : memref<256xf32, #hal.descriptor_type>, vector<4xf32> vector.transfer_write %val, %1[%i1] {in_bounds = [true]} : vector<4xf32>, memref<256xf32, #hal.descriptor_type> @@ -83,14 +75,12 @@ func.func @storage_buffer_transfer_read_write(%offset: index, %i0: index, %i1: i // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @storage_buffer_subview(%offset : index, %i0: index, %i1: index) -> f32 { %c0 = arith.constant 0 : index - %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<128xf32, strided<[1], offset: ?>, #hal.descriptor_type> + %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<128xf32, strided<[1], offset: ?>, #hal.descriptor_type> %subview = memref.subview %subspan[%i0][16][1] : memref<128xf32, strided<[1], offset: ?>, #hal.descriptor_type> to memref<16xf32, strided<[1], offset: ?>, #hal.descriptor_type> %value = memref.load %subview[%c0] : memref<16xf32, strided<[1], offset: ?>, #hal.descriptor_type> return %value : f32 @@ -101,18 +91,16 @@ func.func @storage_buffer_subview(%offset : index, %i0: index, %i1: index) -> f3 // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @storage_buffer_cast(%offset: index) -> memref> { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%offset) : memref<16xf32, #hal.descriptor_type> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%offset) : memref<16xf32, #hal.descriptor_type> %1 = memref.cast %0 : memref<16xf32, #hal.descriptor_type> to memref> return %1 : memref> } // CHECK-LABEL: func.func @storage_buffer_cast // CHECK: %[[C16:.+]] = arith.constant 16 : index -// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%{{.+}}) : memref>{%[[C16]]} +// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%{{.+}}) : memref>{%[[C16]]} // CHECK: return %[[SPAN0]] diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir index 681cf99e5c8cd..6127e8448ef71 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir @@ -2,12 +2,10 @@ // RUN: --pass-pipeline='builtin.module(iree-codegen-materialize-user-configs, iree-spirv-select-lowering-strategy-pass)' \ // RUN: --verify-diagnostics --split-input-file %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -21,9 +19,9 @@ #compilation = #iree_codegen.compilation_info func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32> // expected-error @+1 {{expected 1 levels of tiling sizes, got 0}} linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>) return @@ -31,12 +29,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -51,21 +47,19 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // expected-error @+1 {{expected workgroup size to have three dimensions for SPIR-V pipelines}} func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x128xf32> linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x128xf32>) outs(%2 : memref<64x128xf32>) return } // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -79,9 +73,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk #compilation = #iree_codegen.compilation_info func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x128xf32> // expected-error @+1 {{expected workgroup size dimensions not exceeding [128, 128, 64]}} linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x128xf32>) outs(%2 : memref<64x128xf32>) return @@ -89,12 +83,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -108,9 +100,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk #compilation = #iree_codegen.compilation_info func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x128xf32> // expected-error @+1 {{expected total invocation count in workgroup to be <= 128}} linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x128xf32>) outs(%2 : memref<64x128xf32>) return @@ -118,12 +110,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -137,9 +127,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk #compilation = #iree_codegen.compilation_info func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x128xf32> // expected-error @+1 {{expected total workgroup size to be multiple of 32}} linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x128xf32>) outs(%2 : memref<64x128xf32>) return @@ -147,12 +137,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -166,9 +154,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk #compilation = #iree_codegen.compilation_info func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x128xf32> // expected-error @+1 {{expected each workgroup size dimension to be power of two}} linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x128xf32>) outs(%2 : memref<64x128xf32>) return @@ -176,12 +164,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -195,9 +181,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk #compilation = #iree_codegen.compilation_info func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<48x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<48x128xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<48x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<48x128xf32> // expected-error @+1 {{LHS shape is indivisible by first level tile size}} linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<48x16xf32>, memref<16x128xf32>) outs(%2 : memref<48x128xf32>) return @@ -205,12 +191,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -224,9 +208,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk #compilation = #iree_codegen.compilation_info func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x80xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x80xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x80xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x80xf32> // expected-error @+1 {{RHS shape is indivisible by first level tile size}} linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x80xf32>) outs(%2 : memref<64x80xf32>) return @@ -234,12 +218,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -255,9 +237,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk func.func @matmul_tensor() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x32xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x128xf16> %5 = tensor.empty() : tensor<64x128xf16> @@ -270,12 +252,10 @@ func.func @matmul_tensor() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -291,9 +271,9 @@ func.func @matmul_tensor() attributes {hal.executable.target = #executable_targe func.func @matmul_tensor() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x32xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x128xf16> %5 = tensor.empty() : tensor<64x128xf16> @@ -306,12 +286,10 @@ func.func @matmul_tensor() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -327,9 +305,9 @@ func.func @matmul_tensor() attributes {hal.executable.target = #executable_targe func.func @matmul_tensor() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x32xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x128xf16> %5 = tensor.empty() : tensor<64x128xf16> @@ -342,12 +320,10 @@ func.func @matmul_tensor() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -363,9 +339,9 @@ func.func @matmul_tensor() attributes {hal.executable.target = #executable_targe func.func @matmul_tensor() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x32xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x128xf16> %5 = tensor.empty() : tensor<64x128xf16> @@ -378,12 +354,10 @@ func.func @matmul_tensor() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -399,9 +373,9 @@ func.func @matmul_tensor() attributes {hal.executable.target = #executable_targe func.func @matmul_tensor() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x32xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x128xf16> %5 = tensor.empty() : tensor<64x128xf16> @@ -414,12 +388,10 @@ func.func @matmul_tensor() attributes {hal.executable.target = #executable_targe // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -439,9 +411,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk %c16 = arith.constant 16 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -474,12 +446,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -499,9 +469,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk %c16 = arith.constant 16 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -534,12 +504,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -559,9 +527,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk %c16 = arith.constant 16 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -594,12 +562,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -613,9 +579,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk #compilation = #iree_codegen.compilation_info func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1x11x11x576xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5x5x576xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1x7x7x576xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1x11x11x576xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5x5x576xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1x7x7x576xf32> // expected-error @+1 {{expected tile sizes for KH and KW to be 1}} linalg.depthwise_conv_2d_nhwc_hwc {compilation_info = #compilation} ins(%0, %1 : memref<1x11x11x576xf32>, memref<5x5x576xf32>) outs(%2 : memref<1x7x7x576xf32>) return @@ -623,12 +589,10 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { @@ -642,9 +606,9 @@ func.func @illegal() attributes {hal.executable.target = #executable_target_vulk #compilation = #iree_codegen.compilation_info func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1x11x11x576xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5x5x576xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1x7x7x576xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1x11x11x576xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5x5x576xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1x7x7x576xf32> // expected-error @+1 {{expected the fourth level of tile size to be [0, 1, 0, 0]}} linalg.depthwise_conv_2d_nhwc_hwc {compilation_info = #compilation} ins(%0, %1 : memref<1x11x11x576xf32>, memref<5x5x576xf32>) outs(%2 : memref<1x7x7x576xf32>) return diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir index bee65573c9945..4ca2219d93d3f 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir @@ -8,11 +8,9 @@ #vulkan_target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv"]}> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @dispatch_0 { @@ -94,9 +92,9 @@ func.func @basic_linking() -> () attributes { %dispatch_0_ordinal = hal.executable.export.ordinal target(@dispatch_0::@spirv::@dispatch_0) : index %dispatch_1_ordinal = hal.executable.export.ordinal target(@dispatch_1::@spirv::@dispatch_1) : index %dispatch_2_ordinal = hal.executable.export.ordinal target(@dispatch_2::@spirv::@dispatch_2) : index - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) return } util.initializer { @@ -111,9 +109,9 @@ util.initializer { %dispatch_0_ordinal = hal.executable.export.ordinal target(@dispatch_0::@spirv::@dispatch_0) : index %dispatch_1_ordinal = hal.executable.export.ordinal target(@dispatch_1::@spirv::@dispatch_1) : index %dispatch_2_ordinal = hal.executable.export.ordinal target(@dispatch_2::@spirv::@dispatch_2) : index - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) util.return } @@ -165,8 +163,8 @@ util.initializer { // CHECK-DAG: %[[DISPATCH_1_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_spirv::@vulkan_spirv_fb::@dispatch_1) // CHECK-DAG: %[[DISPATCH_2_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_spirv::@vulkan_spirv_fb::@dispatch_2) // CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_0_EXE]] : !hal.executable)[%[[DISPATCH_0_ORDINAL]]] workgroups([%c1, %c1, %c1]) -// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1]) -// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1]) +// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1]) +// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1]) // // CHECK: util.initializer // CHECK-DAG: %[[DISPATCH_0_EXE:.+]] = hal.executable.lookup device(%{{.+}}) executable(@link_executables_linked_spirv) : !hal.executable @@ -176,8 +174,8 @@ util.initializer { // CHECK-DAG: %[[DISPATCH_1_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_spirv::@vulkan_spirv_fb::@dispatch_1) // CHECK-DAG: %[[DISPATCH_2_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_spirv::@vulkan_spirv_fb::@dispatch_2) // CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_0_EXE]] : !hal.executable)[%[[DISPATCH_0_ORDINAL]]] workgroups([%c1, %c1, %c1]) -// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1]) -// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1]) +// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1]) +// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1]) // ----- @@ -193,11 +191,9 @@ util.initializer { #vulkan_target_1 = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.spirv.features = ["vulkan-spirv", "subgroup=1"]}> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @dispatch_0 { @@ -304,10 +300,10 @@ func.func @two_target_environments() -> () { %dispatch_1_ordinal = hal.executable.export.ordinal target(@dispatch_1::@spirv::@dispatch_1) : index %dispatch_2_ordinal = hal.executable.export.ordinal target(@dispatch_2::@spirv::@dispatch_2) : index %dispatch_3_ordinal = hal.executable.export.ordinal target(@dispatch_3::@spirv::@dispatch_3) : index - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_3_exe : !hal.executable)[%dispatch_3_ordinal] workgroups([%c1, %c1, %c1]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_3_exe : !hal.executable)[%dispatch_3_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) return } @@ -387,11 +383,9 @@ func.func @two_target_environments() -> () { #vulkan_target_2 = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.spirv.features = ["vulkan-spirv", "subgroup=2"]}> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @dispatch_0 { diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir index 582890159f525..ca050b5fb829b 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir @@ -1,13 +1,11 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=cdna2@vulkan --pass-pipeline='builtin.module(iree-codegen-spirv-configuration-pipeline, func.func(iree-spirv-lower-executable-target-pass))' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 32)> @@ -24,11 +22,11 @@ func.func @matmul_i4_quant_weight() { %c128 = arith.constant 128 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %5 = affine.apply #map()[%workgroup_id_y] @@ -71,9 +69,9 @@ func.func @matmul_i4_quant_weight() { // CHECK-LABEL: func.func @matmul_i4_quant_weight() // CHECK: %[[A_ALLOC:.+]] = memref.alloc() : memref<32x1x36xf32, #gpu.address_space> // CHECK: %[[B_ALLOC:.+]] = memref.alloc() : memref<1x32x132xf32, #gpu.address_space> -// CHECK: %[[WEIGHT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK: %[[SCALE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK: %[[ZP_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[WEIGHT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK: %[[SCALE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK: %[[ZP_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: scf.for %arg0 = %c0 to %c86 step %c1 iter_args({{.+}}) -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) // CHECK: %[[SCALE0:.+]] = vector.transfer_read %[[SCALE_BINDING]] // CHECK: %[[SCALE1:.+]] = vector.transfer_read %[[SCALE_BINDING]] diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir index 274669dc5897f..d3ebf521e8c0a 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir @@ -6,13 +6,11 @@ // Verify pipelining + multi-buffering. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #compilation = #iree_codegen.compilation_info< lowering_config = #iree_codegen.lowering_config, @@ -29,10 +27,10 @@ hal.executable @matmul_f32_128x256x64 { func.func @matmul_f32_128x256x64() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x512xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x256xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> @@ -94,13 +92,11 @@ hal.executable @matmul_f32_128x256x64 { // Store in stage 0 of pipeline. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #compilation = #iree_codegen.compilation_info< lowering_config = #iree_codegen.lowering_config, @@ -117,10 +113,10 @@ hal.executable @matmul_f32_128x256x64 { func.func @matmul_f32_128x256x64() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x512xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x256xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> @@ -195,13 +191,11 @@ hal.executable @matmul_f32_128x256x64 { // Check that fused transposed consumer elementwise op does not cause extra workgroup memory allocations. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #compilation = #iree_codegen.compilation_info< lowering_config = #iree_codegen.lowering_config, @@ -217,10 +211,10 @@ hal.executable @matmul_f16_4096x512x512 { func.func @matmul_f16_4096x512x512() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4096, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x512xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x512xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor> -> tensor<512xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir index 0948ba9c9aa5b..24c7b744d321c 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir @@ -1,13 +1,11 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=cdna2@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass, func.func(iree-spirv-lower-executable-target-pass))' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d1)> @@ -15,11 +13,11 @@ #map3 = affine_map<(d0, d1, d2) -> (d0)> func.func @i4_dequant_matvec_f32() { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x128xi4> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x86xf32> %7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x86xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir index 6d4d16315827a..951179059217e 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir @@ -2,11 +2,9 @@ // RUN: --pass-pipeline='builtin.module(func.func(iree-codegen-decompose-softmax), iree-spirv-select-lowering-strategy-pass, func.func(iree-spirv-lower-executable-target-pass))' \ // RUN: %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 10240], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x10240xf32> %3 = tensor.empty() : tensor<512xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<512xf32>) -> tensor<512xf32> @@ -91,11 +89,9 @@ func.func @warp_reduction_dispatch() attributes {hal.executable.target = #execut // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [10, 9216, 9216], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<10x9216x9216xf16> %3 = tensor.empty() : tensor<10x9216x9216xf16> %4 = tensor.empty() : tensor<10x9216xf16> @@ -150,8 +146,8 @@ func.func @warp_reduction_dispatch() attributes {hal.executable.target = #execut // CHECK-DAG: %[[WGIDY:.+]] = hal.interface.workgroup.id[1] : index // CHECK-DAG: %[[TIDX:.+]] = gpu.thread_id x -// CHECK-DAG: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: gpu.barrier // CHECK: %{{.+}}, %{{.+}} = gpu.shuffle xor %{{.+}}, %[[I1]], %[[I32]] : i32 @@ -175,11 +171,9 @@ func.func @warp_reduction_dispatch() attributes {hal.executable.target = #execut // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 40960], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<12x128x40960xf32> %3 = tensor.empty() : tensor<12x128x40960xf32> %4 = linalg.softmax dimension(2) ins(%2 : tensor<12x128x40960xf32>) outs(%3 : tensor<12x128x40960xf32>) -> tensor<12x128x40960xf32> @@ -283,11 +277,9 @@ func.func @softmax() attributes {hal.executable.target = #executable_target_vulk // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { iree.gpu.target = #iree_gpu.target>{%6} - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%6} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [32, %6], strides = [1, 1] : !flow.dispatch.tensor>{%6} -> tensor<32x?xf16> %11 = tensor.empty(%6) : tensor<32x?xf16> %12 = linalg.softmax dimension(1) ins(%10 : tensor<32x?xf16>) outs(%11 : tensor<32x?xf16>) -> tensor<32x?xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir index 3cd1cc8c9480a..8aee76176afe6 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=pascal@vulkan --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-spirv-select-lowering-strategy-pass, func.func(iree-spirv-lower-executable-target-pass)))))' -mlir-print-local-scope %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable @scalar_dispatch { hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -18,8 +16,8 @@ hal.executable @scalar_dispatch { %c0 = arith.constant 0 : index %c6364136223846793005_i64 = arith.constant 6364136223846793005 : i64 %c1442695040888963407_i64 = arith.constant 1442695040888963407 : i64 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor> -> tensor %extracted = tensor.extract %2[] : tensor %3 = arith.muli %extracted, %c6364136223846793005_i64 : i64 @@ -34,8 +32,8 @@ hal.executable @scalar_dispatch { // CHECK: func.func @scalar_dispatch() // CHECK-SAME: translation_info = #iree_codegen.translation_info -// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: memref.load %[[SPAN0]][] : memref> // CHECK: arith.muli {{.+}} : i64 // CHECK: arith.addi {{.+}} : i64 diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir index cef233859a57c..d302ed46d14db 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir @@ -1,19 +1,15 @@ // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-spirv-materialize-executable-conditions)))' --mlir-print-local-scope %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> -#indirect_pipeline_layout = #hal.pipeline.layout, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer> - ], flags = Indirect> +#indirect_pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @dispatch_executable { @@ -21,10 +17,10 @@ hal.executable private @dispatch_executable { // CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv"]}>) // CHECK-NOT: hal.executable.condition hal.executable.variant public @test_assumed_capabilities target( - #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> - }> - ) { + #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> + }> + ) { hal.executable.export public @test_assumed_capabilities ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device): %c1 = arith.constant 1 : index @@ -54,10 +50,10 @@ hal.executable private @dispatch_executable { // CHECK-NEXT: hal.return %[[RESULT]] : i1 // CHECK-NEXT: } hal.executable.variant public @test_subgroup_capabilities target( - #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> - }> - ) { + #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> + }> + ) { hal.executable.export public @test_subgroup_capabilities ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device): %c1 = arith.constant 1 : index @@ -87,10 +83,10 @@ hal.executable private @dispatch_executable { // CHECK-NEXT: hal.return %[[RESULT]] : i1 // CHECK-NEXT: } hal.executable.variant public @test_8bit_storage_capabilities target( - #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> - }> - ) { + #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> + }> + ) { hal.executable.export public @test_8bit_storage_capabilities ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device): %c1 = arith.constant 1 : index @@ -121,10 +117,10 @@ hal.executable private @dispatch_executable { // CHECK-NEXT: hal.return %[[RESULT]] : i1 // CHECK-NEXT: } hal.executable.variant public @test_16bit_storage_capabilities target( - #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> - }> - ) { + #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> + }> + ) { hal.executable.export public @test_16bit_storage_capabilities ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device): %c1 = arith.constant 1 : index @@ -147,10 +143,10 @@ hal.executable private @dispatch_executable { // CHECK: %[[TARGET:.+]] = arith.constant 7 : i32 // CHECK: %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32 hal.executable.variant public @test_int_compute_capabilities target( - #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> - }> - ) { + #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> + }> + ) { hal.executable.export public @test_int_compute_capabilities ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device): %c1 = arith.constant 1 : index @@ -172,10 +168,10 @@ hal.executable private @dispatch_executable { // CHECK: %[[TARGET:.+]] = arith.constant 3 : i32 // CHECK: %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32 hal.executable.variant public @test_float_compute_capabilities target( - #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> - }> - ) { + #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> + }> + ) { hal.executable.export public @test_float_compute_capabilities ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device): %c1 = arith.constant 1 : index @@ -197,10 +193,10 @@ hal.executable private @dispatch_executable { // CHECK: %[[TARGET:.+]] = arith.constant 1 : i32 // CHECK: %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32 hal.executable.variant public @test_dot_product_capabilities target( - #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> - }> - ) { + #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> + }> + ) { hal.executable.export public @test_dot_product_capabilities ordinal(0) layout(#pipeline_layout) { ^bb0(%arg0: !hal.device): %c1 = arith.constant 1 : index @@ -222,10 +218,10 @@ hal.executable private @dispatch_executable { // CHECK: %[[TARGET:.+]] = arith.constant 1 : i32 // CHECK: %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32 hal.executable.variant public @test_cooperative_matrix_capabilities target( - #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> - }> - ) { + #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> + }> + ) { hal.executable.export public @test_cooperative_matrix_capabilities ordinal(0) layout(#pipeline_layout) attributes { iree.spirv.coopmatrix.shape = array, iree.spirv.coopmatrix.type = [f16, f16] } { @@ -254,13 +250,13 @@ hal.executable private @dispatch_executable { // CHECK: %[[TARGET1:.+]] = arith.constant 1 : i32 // CHECK: %{{.+}} = arith.andi %[[V1]], %[[TARGET1]] : i32 hal.executable.variant public @test_address_capabilities target( - #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb-ptr", { - spirv.target_env = #spirv.target_env<#spirv.vce, - #spirv.resource_limits<>> - }> - ) { + #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb-ptr", { + spirv.target_env = #spirv.target_env<#spirv.vce, + #spirv.resource_limits<>> + }> + ) { hal.executable.export public @test_address_capabilities ordinal(0) layout(#indirect_pipeline_layout) { ^bb0(%arg0: !hal.device): %c1 = arith.constant 1 : index diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/physical_storage_buffer_addresses.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/physical_storage_buffer_addresses.mlir index 65cf1e558b105..b95b1b8778bb7 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/physical_storage_buffer_addresses.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/physical_storage_buffer_addresses.mlir @@ -2,12 +2,10 @@ // RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-spirv{index-bits=64}))))' \ // RUN: %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ], flags = Indirect> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @interface_binding { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb-ptr">) { @@ -21,9 +19,9 @@ hal.executable private @interface_binding { } { func.func @interface_binding() -> f32 { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<8x5xf32, #spirv.storage_class> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5xf32, #spirv.storage_class> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x5xf32, #spirv.storage_class> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<8x5xf32, #spirv.storage_class> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5xf32, #spirv.storage_class> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x5xf32, #spirv.storage_class> %3 = memref.load %0[%c0, %c0] : memref<8x5xf32, #spirv.storage_class> %4 = memref.load %1[%c0] : memref<5xf32, #spirv.storage_class> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir index ed932b80a57a0..254d09daeb6f9 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir @@ -6,14 +6,12 @@ // RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline, canonicalize, cse)))' \ // RUN: %s | FileCheck %s --check-prefix=RDNA3 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable public @matmul_256x1024x128_div_exp { @@ -29,11 +27,11 @@ hal.executable public @matmul_256x1024x128_div_exp { %c1024 = arith.constant 1024 : index %c256 = arith.constant 256 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor> %11 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf16> %14 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x1024xf16> %17 = tensor.empty() : tensor<256x1024xf16> @@ -196,13 +194,11 @@ hal.executable public @matmul_256x1024x128_div_exp { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable public @batch_matmul_16x128x256x512_div { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -215,10 +211,10 @@ hal.executable public @batch_matmul_16x128x256x512_div { func.func @batch_matmul_16x128x256x512_div() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 128, 512], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x128x512xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x512x256xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [16, 128, 256], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x128x256xf16> @@ -301,13 +297,11 @@ hal.executable public @batch_matmul_16x128x256x512_div { // Small matmul that each subgroup only handles one tile -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable public @matmul_32x32x32_div { @@ -321,10 +315,10 @@ hal.executable public @matmul_32x32x32_div { func.func @matmul_32x32x32_div() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x32xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x32xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x32xf16> @@ -355,12 +349,10 @@ hal.executable public @matmul_32x32x32_div { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable public @generic_batch_matmul_32x128x512x64 { @@ -374,9 +366,9 @@ hal.executable public @generic_batch_matmul_32x128x512x64 { func.func @generic_batch_matmul_32x128x512x64() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [32, 128, 64], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<32x128x64xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [64, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<64x512xf16> %5 = tensor.empty() : tensor<32x128x512xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir index 046c2891330ba..9e28eb0bbf99b 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=pascal@vulkan --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -21,10 +19,10 @@ hal.executable @matmul_f32_128x256x64 { func.func @matmul_f32_128x256x64() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x512xf32> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x256xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf32> @@ -75,13 +73,11 @@ hal.executable @matmul_f32_128x256x64 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> @@ -96,10 +92,10 @@ hal.executable @matmul_f16_128x256x64 { func.func @matmul_f16_128x256x64() { %cst = arith.constant 0.0 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x512xf16> %5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x256xf16> %6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xf16> @@ -153,12 +149,10 @@ hal.executable @matmul_f16_128x256x64 { // Check scalar load/store for promotion to shared memory. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #user_config = #iree_codegen.compilation_info< @@ -176,9 +170,9 @@ hal.executable @matmul_f16_32x1280x1280 { func.func @matmul_f16_32x1280x1280() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<32x1280xf16> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1280, 1280], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1280x1280xf16> %5 = tensor.empty() : tensor<32x1280xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir index f9a186cf95de2..503781c46150c 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=valhall1 --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @fuse_and_vectorize_fill_matmul { hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -19,9 +17,9 @@ hal.executable private @fuse_and_vectorize_fill_matmul { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %c4096 = arith.constant 4096 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4096, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x4096xf32> %10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 4096], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x4096xf32> %15 = tensor.empty() : tensor<4096x4096xf32> @@ -44,13 +42,11 @@ hal.executable private @fuse_and_vectorize_fill_matmul { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @fuse_and_vectorize_matmul_add { hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -65,10 +61,10 @@ hal.executable private @fuse_and_vectorize_matmul_add { %cst = arith.constant 0.000000e+00 : f32 %c1024 = arith.constant 1024 : index %c256 = arith.constant 256 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor> %10 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x256xf32> %13 = tensor.empty() : tensor<1024x256xf32> %15 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x512xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir index aed1a325aaee8..41a2c4a843bfe 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir @@ -2,14 +2,12 @@ // RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' \ // RUN: %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @i4_dequant_unit_matmul_f16 { hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", { @@ -28,11 +26,11 @@ hal.executable @i4_dequant_unit_matmul_f16 { func.func @i4_dequant_unit_matmul_f16() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor> %5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x128xi4> %6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4096, 86, 1], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x1xf16> %7 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [4096, 86, 1], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x1xf16> @@ -114,12 +112,10 @@ hal.executable @i4_dequant_unit_matmul_f16 { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @i4_dequant_matvec_f16_subgroup_64 { hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", { @@ -147,11 +143,11 @@ hal.executable @i4_dequant_matvec_f16_subgroup_64 { %7 = arith.index_castui %2 : i32 to index %8 = arith.index_castui %3 : i32 to index %9 = arith.index_castui %4 : i32 to index - %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> - %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor> - %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> - %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor> + %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor> + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor> + %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor> + %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor> + %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor> %15 = flow.dispatch.tensor.load %10, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<4096x86x128xi4> %16 = flow.dispatch.tensor.load %11, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x86xf16> %17 = flow.dispatch.tensor.load %12, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4096x86xf16> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir index 03a12aafb230f..f5143574c715f 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=valhall1 --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' %s | FileCheck %s // RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' %s | FileCheck %s --check-prefix=NOSHUFFLE -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @subgroup_reduce { hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -18,8 +16,8 @@ hal.executable private @subgroup_reduce { func.func @subgroup_reduce() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2x512xf32> %3 = tensor.empty() : tensor<2xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2xf32>) -> tensor<2xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir index b2e8d5bba2e47..c799a0d2761c3 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @i4_dequant { hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -18,10 +16,10 @@ hal.executable @i4_dequant { builtin.module { func.func @i4_dequant() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [131072, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<131072x128xi4> %5 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [131072], strides = [1] : !flow.dispatch.tensor> -> tensor<131072xf32> %6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [131072], strides = [1] : !flow.dispatch.tensor> -> tensor<131072xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir index 331832e2fbd96..d32855d538b71 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir @@ -6,19 +6,17 @@ // core, but there are no such wmma intrinsics. Fix it to support fp16-input. // TODO: | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul() { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2052, 2556], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2052x2556xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2556, 2052], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<2556x2052xf32> %5 = tensor.empty() : tensor<2052x2052xf32> diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir index 27dbd92825cca..9bc566790cc0a 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir @@ -10,12 +10,10 @@ #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @matmul { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -29,9 +27,9 @@ hal.executable private @matmul { %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index - %arg0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%M, %K} - %arg1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref{%K, %N} - %arg2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref{%M, %N} + %arg0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%M, %K} + %arg1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref{%K, %N} + %arg2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref{%M, %N} %c4 = arith.constant 4 : index %c1 = arith.constant 1 : index %0 = memref.dim %arg0, %c1 : memref @@ -80,12 +78,10 @@ hal.executable private @matmul { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @conv_1d { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -97,9 +93,9 @@ hal.executable private @conv_1d { func.func @conv_1d() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<3x6x1xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<3x8x1xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<3x1x1xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<3x6x1xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<3x8x1xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<3x1x1xf32> %3 = gpu.block_id x %4 = gpu.block_id y %5 = gpu.block_id z @@ -125,9 +121,9 @@ hal.executable private @conv_1d { } // CHECK-LABEL: func.func @conv_1d -// CHECK-DAG: %[[RET:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK-DAG: %[[RET:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[ARG0SV1:.+]] = memref.subview %[[ARG0]] // CHECK-DAG: %[[ARG1SV1:.+]] = memref.subview %[[ARG1]] // CHECK-DAG: %[[RETSV1:.+]] = memref.subview %[[RET]] @@ -157,12 +153,10 @@ hal.executable private @conv_1d { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @conv_2d { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -182,9 +176,9 @@ hal.executable private @conv_2d { %ic = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index %fh = hal.interface.constant.load layout(#pipeline_layout) ordinal(7) : index %fw = hal.interface.constant.load layout(#pipeline_layout) ordinal(8) : index - %arg0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%n, %ih, %iw, %ic} - %arg1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref{%fh, %fw, %ic, %oc} - %arg2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref{%n, %oh, %ow, %oc} + %arg0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%n, %ih, %iw, %ic} + %arg1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref{%fh, %fw, %ic, %oc} + %arg2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref{%n, %oh, %ow, %oc} %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index %c1 = arith.constant 1 : index @@ -239,9 +233,9 @@ hal.executable private @conv_2d { // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 * 4)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 32)> // CHECK: func.func @conv_2d -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 // CHECK-DAG: %[[C1:.+]] = arith.constant 1 // CHECK-DAG: %[[C4:.+]] = arith.constant 4 @@ -272,12 +266,10 @@ hal.executable private @conv_2d { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @conv_3d { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -289,9 +281,9 @@ hal.executable private @conv_3d { func.func @conv_3d() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<2x7x7x7x2xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2x8x8x8x3xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<2x2x2x3x2xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<2x7x7x7x2xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2x8x8x8x3xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<2x2x2x3x2xf32> %3 = gpu.block_id x %4 = gpu.block_id y %5 = gpu.block_id z @@ -342,12 +334,10 @@ hal.executable private @conv_3d { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> module { hal.executable private @pooling_nhwc_max { @@ -359,9 +349,9 @@ module { builtin.module { func.func @pooling_nhwc_max() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2x16x16x6xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<3x4xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<2x14x13x6xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2x16x16x6xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<3x4xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<2x14x13x6xf32> %3 = gpu.block_id x %4 = gpu.block_id y %5 = affine.apply #map0()[%4] @@ -385,9 +375,9 @@ module { // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 * 4)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0] -> (s0 * 32)> // CHECK: func.func @pooling_nhwc_max -// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: %[[SV1:.+]] = memref.subview %[[ARG0]] // CHECK: %[[SV2:.+]] = memref.subview %[[RET0]] // CHECK-DAG: %[[TIDX:.+]] = gpu.thread_id x @@ -409,12 +399,10 @@ module { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable @matvec { @@ -428,9 +416,9 @@ hal.executable @matvec { %c250 = arith.constant 250 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<250x1024xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<1024xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<250xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<250x1024xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<1024xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<250xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %3 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_x] diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir index 8ce2c91849110..63c4d2e3bf06f 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir @@ -2,12 +2,10 @@ #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @static_scatter_update_slice { hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -20,9 +18,9 @@ hal.executable private @static_scatter_update_slice { %c40 = arith.constant 40 : index %c500 = arith.constant 500 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<40x500xi32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<40x1xi32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<100x500xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<40x500xi32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<40x1xi32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<100x500xi32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -50,9 +48,9 @@ hal.executable private @static_scatter_update_slice { } // CHECK-LABEL: func.func @static_scatter_update_slice() -// CHECK: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) -// CHECK: %[[ARG2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// CHECK: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) +// CHECK: %[[ARG2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // CHECK: scf.for // CHECK: scf.for // CHECK: %[[WG_UPDATE:.+]] = memref.subview %[[ARG0]] diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir index 42738b7f3c255..051cd3daefdfe 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir @@ -2,10 +2,8 @@ #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> hal.executable private @static_3d_sort { hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -16,7 +14,7 @@ hal.executable private @static_3d_sort { builtin.module { func.func @static_3d_sort() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<64x32x128xi32, #hal.descriptor_type> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<64x32x128xi32, #hal.descriptor_type> memref.assume_alignment %0, 64 : memref<64x32x128xi32, #hal.descriptor_type> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -34,7 +32,7 @@ hal.executable private @static_3d_sort { } // CHECK-LABEL: func.func @static_3d_sort() -// CHECK: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) +// CHECK: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) // CHECK: %[[WG_OUTPUT:.+]] = memref.subview %[[ARG0]] // CHECK: %[[TID_X:.+]] = gpu.thread_id x // CHECK: %[[DIM_X:.+]] = gpu.block_dim x diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir index daa7c7e5c8814..afc60d50b74a5 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir @@ -8,13 +8,11 @@ // Single tile per workgroup means no subview ops for promotion. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 32)> @@ -24,13 +22,13 @@ func.func @matmul_f16_32x32x32() attributes {translation_info = #translation} { %c32 = arith.constant 32 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<32x32xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<32x32xf16> memref.assume_alignment %0, 64 : memref<32x32xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x32xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x32xf16> memref.assume_alignment %1, 64 : memref<32x32xf16> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x32xf16> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x32xf16> memref.assume_alignment %2, 64 : memref<32x32xf16> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<32x32xf16> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<32x32xf16> memref.assume_alignment %3, 64 : memref<32x32xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -56,8 +54,8 @@ func.func @matmul_f16_32x32x32() attributes {translation_info = #translation} { // CHECK-LABEL: func.func @matmul_f16_32x32x32() -// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) -// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) +// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) +// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-NOT: memref.alloc() // CHECK-NOT: memref.copy @@ -69,12 +67,10 @@ func.func @matmul_f16_32x32x32() attributes {translation_info = #translation} { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 32)> @@ -89,10 +85,10 @@ func.func @generic_batch_matmul_f16_32x128x512x64() attributes {translation_info %c512 = arith.constant 512 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -183,12 +179,10 @@ func.func @generic_batch_matmul_f16_32x128x512x64() attributes {translation_info // Cooperative matrix fusable elementwise ops do not need promote C. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 32)> @@ -203,10 +197,10 @@ func.func @generic_batch_matmul_f16_32x128x512x64() attributes {translation_info %c512 = arith.constant 512 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -267,12 +261,10 @@ func.func @generic_batch_matmul_f16_32x128x512x64() attributes {translation_info // No need to promote C if there is no fused element wise ops. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 32)> @@ -286,9 +278,9 @@ func.func @generic_batch_matmul_f16_32x128x512x64() attributes {translation_info %c512 = arith.constant 512 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -325,7 +317,7 @@ func.func @generic_batch_matmul_f16_32x128x512x64() attributes {translation_info // PROMOTEC: %[[LHS_ALLOC:.+]] = memref.alloc() : memref<32x1x32xf16, #gpu.address_space> // PROMOTEC-NOT: memref.alloc() -// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) +// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) // PROMOTEC: %[[OUT_VIEW:.+]] = memref.subview %[[SPAN2]] // PROMOTEC: linalg.fill @@ -352,12 +344,10 @@ func.func @generic_batch_matmul_f16_32x128x512x64() attributes {translation_info // No need to promote again with allocations from bufferization. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 64)> @@ -368,9 +358,9 @@ func.func @batch_matmul_f16_1x64x128x512() attributes {translation_info = #trans %c4096 = arith.constant 4096 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<1x4096x512xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<1x512x4096xf16> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<1x4096x4096xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<1x4096x512xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<1x512x4096xf16> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<1x4096x4096xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -429,13 +419,11 @@ func.func @batch_matmul_f16_1x64x128x512() attributes {translation_info = #trans // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 64)> @@ -448,10 +436,10 @@ func.func @matmul_f16_f512x4096x64() attributes {translation_info = #translation %c4096 = arith.constant 4096 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<512x64xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<64x4096xf16> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<4096xf16> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<512x4096xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<512x64xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<64x4096xf16> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<4096xf16> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<512x4096xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -485,8 +473,8 @@ func.func @matmul_f16_f512x4096x64() attributes {translation_info = #translation // PROMOTEC-DAG: %[[RHS_ALLOC:.+]] = memref.alloc() : memref<32x128xf16, #gpu.address_space> // PROMOTEC-NOT: memref.alloc() -// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// PROMOTEC: %[[SPAN3:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// PROMOTEC: %[[SPAN3:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // PROMOTEC: %[[OUT_VIEW:.+]] = memref.subview %[[SPAN3]] // PROMOTEC: linalg.fill @@ -520,13 +508,11 @@ func.func @matmul_f16_f512x4096x64() attributes {translation_info = #translation // Transposed+broadcasted elementwise ops does not need promoting C matrix. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 64)> @@ -539,10 +525,10 @@ func.func @matmul_f16_f512x4096x64() attributes {translation_info = #translation %c4096 = arith.constant 4096 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<512x64xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<64x4096xf16> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<512xf16> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<512x4096xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<512x64xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<64x4096xf16> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<512xf16> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<512x4096xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -576,8 +562,8 @@ func.func @matmul_f16_f512x4096x64() attributes {translation_info = #translation // PROMOTEC-DAG: %[[RHS_ALLOC:.+]] = memref.alloc() : memref<32x128xf16, #gpu.address_space> // PROMOTEC-NOT: memref.alloc() -// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) -// PROMOTEC: %[[SPAN3:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) +// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) +// PROMOTEC: %[[SPAN3:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) // PROMOTEC: %[[OUT_VIEW:.+]] = memref.subview %[[SPAN3]] // PROMOTEC: linalg.fill @@ -611,12 +597,10 @@ func.func @matmul_f16_f512x4096x64() attributes {translation_info = #translation // Inlined large constant array needs promoting C matrix. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 64)> @@ -633,9 +617,9 @@ func.func @matmul_f16_128x262144x2304() attributes {translation_info = #translat %cst = arith.constant 0.000000e+00 : f16 %cst_0 = arith.constant dense<"0x69222B2E40A3002A45AC1AAB2E2E202DA21C212680264C2A102314A041A7D029CB28352E5BAAD3B02F299D9A142B8AA1D1285C28412B25AF9A24EE2BA22C242D53AD9E2948A9289FCF301D28012F08AD68A6DD20ECAC912465290B2E9420C5AA50A222A912AB9526B62ADA2039AD4D912C9FDD287B20B224D329BA2A4D2C41A76DAB7E30B027F62ED1A0F1273A2BAE9D0FA48029812992A65AA92A2C9C2EE9A744A4632C5FA8A9A4CF2D70A482A0F5A2DBA7B6304B9D22A52B1B9DA8E424722AB5ACD0248A2B8B29C82D782E402D1A99F0A60CA4DE2DD32815266F2A6B247FA6FE214E2853AA402390AB6925F1A339307F2664A23CACBE28BA2B3D286DB0BA2E"> : tensor<128xf16> %0 = bufferization.to_memref %cst_0 : memref<128xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c96565312) : memref<128x2304xf16> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c806357120) : memref<2304x262144xf16> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c134217728) : memref<128x262144xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c96565312) : memref<128x2304xf16> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c806357120) : memref<2304x262144xf16> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c134217728) : memref<128x262144xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir index 36510aeb94a22..cc235c487c473 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --split-input-file --mlir-print-local-scope --iree-gpu-test-target=pascal@vulkan --pass-pipeline='builtin.module(func.func(iree-spirv-tile-and-promote, cse))' %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 128)> @@ -19,10 +17,10 @@ func.func @matmul_f32_256x1024x128() attributes {translation_info = #translation %c256 = arith.constant 256 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<256x128xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128x1024xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<256x1024xf32> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<256x1024xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<256x128xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128x1024xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<256x1024xf32> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<256x1024xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -58,10 +56,10 @@ func.func @matmul_f32_256x1024x128() attributes {translation_info = #translation // CHECK-DAG: %[[MEM_A:.+]] = memref.alloc() : memref<128x32xf32, #gpu.address_space> // CHECK-DAG: %[[MEM_B:.+]] = memref.alloc() : memref<32x128xf32, #gpu.address_space> -// CHECK-DAG: %[[BUFFER_A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.+}} : memref<256x128xf32> -// CHECK-DAG: %[[BUFFER_B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.+}} : memref<128x1024xf32> -// CHECK-DAG: %[[BUFFER_C:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) {{.+}} : memref<256x1024xf32> -// CHECK-DAG: %[[BUFFER_D:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) {{.+}} : memref<256x1024xf32> +// CHECK-DAG: %[[BUFFER_A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.+}} : memref<256x128xf32> +// CHECK-DAG: %[[BUFFER_B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.+}} : memref<128x1024xf32> +// CHECK-DAG: %[[BUFFER_C:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) {{.+}} : memref<256x1024xf32> +// CHECK-DAG: %[[BUFFER_D:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) {{.+}} : memref<256x1024xf32> // CHECK: scf.for // CHECK: scf.for @@ -112,12 +110,10 @@ func.func @matmul_f32_256x1024x128() attributes {translation_info = #translation // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 64)> @@ -130,9 +126,9 @@ func.func @batch_matmul_16x1024x1024x80() attributes {translation_info = #transl %c1024 = arith.constant 1024 : index %cst = arith.constant 0.111803398 : f32 %cst_0 = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<16x1024x80xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<16x80x1024xf16> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<16x1024x1024xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<16x1024x80xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<16x80x1024xf16> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<16x1024x1024xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -179,12 +175,10 @@ func.func @batch_matmul_16x1024x1024x80() attributes {translation_info = #transl // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #map = affine_map<()[s0] -> (s0 * 512)> @@ -196,9 +190,9 @@ func.func @batch_matmul_f32_16x4096x40x4096() attributes {translation_info = #tr %c40 = arith.constant 40 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<16x4096x4096xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<16x4096x40xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<16x4096x40xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<16x4096x4096xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<16x4096x40xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<16x4096x40xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir index f04d6880b6e32..aa1e45f8f0547 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir @@ -4,12 +4,10 @@ #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @fused_fill_batch_matmul { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -23,9 +21,9 @@ hal.executable private @fused_fill_batch_matmul { %cst = arith.constant 0.000000e+00 : f32 %c4 = arith.constant 4 : index %c1024 = arith.constant 1024 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir index e0faab5b6a1cf..ec4976c2e9725 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir @@ -4,12 +4,10 @@ #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @nhwc_conv_static_shape_f32 { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -23,9 +21,9 @@ hal.executable private @nhwc_conv_static_shape_f32 { %c16 = arith.constant 16 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -78,12 +76,10 @@ hal.executable private @nhwc_conv_static_shape_f32 { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @nhwc_nhwc_depthwise_conv_static_shape_f32 { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -97,9 +93,9 @@ hal.executable private @nhwc_nhwc_depthwise_conv_static_shape_f32 { %c96 = arith.constant 96 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -148,13 +144,11 @@ hal.executable private @nhwc_nhwc_depthwise_conv_static_shape_f32 { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @low_padded_conv { @@ -176,10 +170,10 @@ hal.executable private @low_padded_conv { %c0 = arith.constant 0 : index %c112 = arith.constant 112 : index %c32 = arith.constant 32 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor> %4 = tensor.empty() : tensor<1x112x112x32xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -266,13 +260,11 @@ hal.executable private @low_padded_conv { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @low_high_padded_nhwc_depthwise_conv { @@ -294,10 +286,10 @@ hal.executable private @low_high_padded_nhwc_depthwise_conv { %c0 = arith.constant 0 : index %c112 = arith.constant 112 : index %c32 = arith.constant 32 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor> %4 = tensor.empty() : tensor<1x112x112x32xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -388,12 +380,10 @@ hal.executable private @low_high_padded_nhwc_depthwise_conv { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @nchw_conv_static_shape_f32 { @@ -407,9 +397,9 @@ hal.executable private @nchw_conv_static_shape_f32 { %c1280 = arith.constant 1280 : index %c8 = arith.constant 8 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -462,13 +452,11 @@ hal.executable private @nchw_conv_static_shape_f32 { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @nhwc_conv_static_shape_f16_batch2 { @@ -483,10 +471,10 @@ hal.executable private @nhwc_conv_static_shape_f16_batch2 { %c320 = arith.constant 320 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir index e194b3007d9bf..5487820f73241 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir @@ -3,12 +3,10 @@ #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @matmul_static_shape_f16 { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -21,9 +19,9 @@ hal.executable private @matmul_static_shape_f16 { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 %c4096 = arith.constant 4096 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -65,12 +63,10 @@ hal.executable private @matmul_static_shape_f16 { #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable private @matmul_static_shape_f32 { hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) { @@ -83,9 +79,9 @@ hal.executable private @matmul_static_shape_f32 { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %c4096 = arith.constant 4096 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir index 9c43800ad7f8d..fb3e57cab9082 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir @@ -4,11 +4,9 @@ #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @pooling_nhwc_sum_f32 { @@ -28,8 +26,8 @@ hal.executable private @pooling_nhwc_sum_f32 { %c8 = arith.constant 8 : index %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = tensor.empty() : tensor<12x12xf32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir index a27be5d924b39..cc56261e2d246 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir @@ -2,14 +2,12 @@ // RUN: --pass-pipeline='builtin.module(func.func(iree-spirv-tile-to-cooperative-ops, iree-codegen-generic-vectorization, iree-spirv-vectorize-to-cooperative-ops, iree-codegen-optimize-tensor-insert-extract-slices, canonicalize, cse))' \ // RUN: %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -23,11 +21,11 @@ func.func @matmul_256x1024x128_div_add() attributes {translation_info = #transla %2 = gpu.thread_id z %alloc = memref.alloc() : memref<32x32xf16, 3> %alloc_0 = memref.alloc() : memref<32x32xf16, 3> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<256x1024xf16> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<1024x128xf16> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<256x128xf16> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<256x128xf16> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : memref<256x128xf16> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<256x1024xf16> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<1024x128xf16> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<256x128xf16> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<256x128xf16> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : memref<256x128xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %8 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y] @@ -134,13 +132,11 @@ func.func @matmul_256x1024x128_div_add() attributes {translation_info = #transla // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -155,13 +151,13 @@ func.func @matmul_256x1024x128_div_add() attributes {translation_info = #transla %2 = gpu.thread_id z %alloc = memref.alloc() : memref<1x32x32xf16, 3> %alloc_0 = memref.alloc() : memref<1x32x32xf16, 3> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<16x128x512xf16> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<16x128x512xf16> memref.assume_alignment %3, 64 : memref<16x128x512xf16> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<16x512x256xf16> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<16x512x256xf16> memref.assume_alignment %4, 64 : memref<16x512x256xf16> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<16x128x256xf16> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<16x128x256xf16> memref.assume_alignment %5, 64 : memref<16x128x256xf16> - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<16x128x256xf16> + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<16x128x256xf16> memref.assume_alignment %6, 64 : memref<16x128x256xf16> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -272,12 +268,10 @@ func.func @matmul_256x1024x128_div_add() attributes {translation_info = #transla // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<4, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #config = #iree_codegen.lowering_config #translation = #iree_codegen.translation_info @@ -292,9 +286,9 @@ func.func @matmul_256x1024x128_mixed_signedness_int8() { %2 = gpu.thread_id z %alloc = memref.alloc() : memref<32x32xi8, 3> %alloc_0 = memref.alloc() : memref<32x32xi8, 3> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<256x1024xi8> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<1024x128xi8> - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : memref<256x128xi32> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<256x1024xi8> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<1024x128xi8> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<256x128xi32> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index %8 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y] diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir index 07a831e7994cf..9c2477a05d526 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir @@ -2,14 +2,18 @@ #executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", { spirv.target_env = #spirv.target_env<#spirv.vce, - api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>>}> - + [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class]>, + api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>> +}> // CHECK-DAG: #[[$TARGET0:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>>}> // CHECK-DAG: #[[$TARGET1:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>>}> -#pipeline_layout = #hal.pipeline.layout, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding +]> hal.executable private @predict_dispatch_0 { // CHECK-LABEL: hal.executable.variant public @vulkan_spirv_fb0 @@ -22,10 +26,12 @@ hal.executable private @predict_dispatch_0 { hal.return %c2, %c1, %c1 : index, index, index } // CHECK-NOT: spirv.target_env - builtin.module attributes {spirv.target_env = #spirv.target_env< + builtin.module attributes { + spirv.target_env = #spirv.target_env< #spirv.vce, - api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>>} { + api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>> + } { spirv.module Logical GLSL450 requires #spirv.vce { spirv.func @predict_dispatch_0_vecmat_128x784_f32() "None" { spirv.Return @@ -48,10 +54,12 @@ hal.executable private @predict_dispatch_1 { hal.return %c10, %c1, %c1 : index, index, index } // CHECK-NOT: spirv.target_env - builtin.module attributes {spirv.target_env = #spirv.target_env< + builtin.module attributes { + spirv.target_env = #spirv.target_env< #spirv.vce, - api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>>} { + api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>> + } { spirv.module Logical GLSL450 requires #spirv.vce { spirv.func @predict_dispatch_1_vecmat_10x128_f32() "None" { spirv.Return diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/vectorize_load_store.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/vectorize_load_store.mlir index 0a2fb48099997..93a861d42ad88 100644 --- a/compiler/src/iree/compiler/Codegen/SPIRV/test/vectorize_load_store.mlir +++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/vectorize_load_store.mlir @@ -49,23 +49,21 @@ func.func @dont_vectorize_scalar_load(%arg0: memref<4096x4096xf32>, %x: index, % // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @resource_copy() -// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<4096x1024xvector<4xf32>> -// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4096x1024xvector<4xf32>> +// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4096x1024xvector<4xf32>> +// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4096x1024xvector<4xf32>> // CHECK: %[[V:.+]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<4096x1024xvector<4xf32>> // CHECK: memref.store %[[V]], %[[B]][%{{.*}}, %{{.*}}] : memref<4096x1024xvector<4xf32>> func.func @resource_copy() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4096x4096xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4096x4096xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4096x4096xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4096x4096xf32> %v = vector.transfer_read %0[%c0, %c0], %cst : memref<4096x4096xf32>, vector<4xf32> vector.transfer_write %v, %1[%c0, %c0] : vector<4xf32>, memref<4096x4096xf32> return @@ -73,24 +71,22 @@ func.func @resource_copy() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @resource_copy_with_offset() -// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%{{.*}}) : memref<2048x4096x1024xvector<4xf32>, strided<[4194304, 1024, 1], offset: ?>> -// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4096x1024xvector<4xf32>> +// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%{{.*}}) : memref<2048x4096x1024xvector<4xf32>, strided<[4194304, 1024, 1], offset: ?>> +// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4096x1024xvector<4xf32>> // CHECK: %[[V:.+]] = memref.load %[[A]][%{{.*}}, %{{.*}}, %{{.*}}] : memref<2048x4096x1024xvector<4xf32>, strided<[4194304, 1024, 1], offset: ?>> // CHECK: memref.store %[[V]], %[[B]][%{{.*}}, %{{.*}}] : memref<4096x1024xvector<4xf32>> func.func @resource_copy_with_offset() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index %offset = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<2048x4096x4096xf32, strided<[16777216, 4096, 1], offset: ?>> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4096x4096xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<2048x4096x4096xf32, strided<[16777216, 4096, 1], offset: ?>> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4096x4096xf32> %v = vector.transfer_read %0[%c0, %c0, %c0], %cst : memref<2048x4096x4096xf32, strided<[16777216, 4096, 1], offset: ?>>, vector<4xf32> vector.transfer_write %v, %1[%c0, %c0] : vector<4xf32>, memref<4096x4096xf32> return @@ -98,23 +94,21 @@ func.func @resource_copy_with_offset() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @resource_copy_f16 -// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<4096x1024xvector<4xf16>> -// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4096x1024xvector<4xf16>> +// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4096x1024xvector<4xf16>> +// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4096x1024xvector<4xf16>> // CHECK: %[[V:.+]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<4096x1024xvector<4xf16>> // CHECK: memref.store %[[V]], %[[B]][%{{.*}}, %{{.*}}] : memref<4096x1024xvector<4xf16>> func.func @resource_copy_f16() { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4096x4096xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4096x4096xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4096x4096xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4096x4096xf16> %v = vector.transfer_read %0[%c0, %c0], %cst : memref<4096x4096xf16>, vector<4xf16> vector.transfer_write %v, %1[%c0, %c0] : vector<4xf16>, memref<4096x4096xf16> return @@ -122,23 +116,21 @@ func.func @resource_copy_f16() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @resource_copy_8xf16 -// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<4096x512xvector<4xf32>> -// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4096x512xvector<4xf32>> +// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4096x512xvector<4xf32>> +// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4096x512xvector<4xf32>> // CHECK: %[[V:.+]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<4096x512xvector<4xf32>> // CHECK: memref.store %[[V]], %[[B]][%{{.*}}, %{{.*}}] : memref<4096x512xvector<4xf32>> func.func @resource_copy_8xf16() { %cst = arith.constant 0.000000e+00 : f16 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4096x4096xf16> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4096x4096xf16> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4096x4096xf16> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4096x4096xf16> %v = vector.transfer_read %0[%c0, %c0], %cst : memref<4096x4096xf16>, vector<8xf16> vector.transfer_write %v, %1[%c0, %c0] : vector<8xf16>, memref<4096x4096xf16> return @@ -146,11 +138,9 @@ func.func @resource_copy_8xf16() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @resource_copy_dynamic_shape() @@ -162,10 +152,10 @@ func.func @resource_copy_dynamic_shape() { %dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index %dim1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index - // CHECK: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref>{%[[DIM0]], %[[DIM1]]} - // CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref>{%[[DIM0]], %[[DIM1]]} - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%dim0, %dim1} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref{%dim0, %dim1} + // CHECK: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref>{%[[DIM0]], %[[DIM1]]} + // CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref>{%[[DIM0]], %[[DIM1]]} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%dim0, %dim1} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref{%dim0, %dim1} // CHECK: %[[VAL:.+]] = memref.load %[[INPUT]] // CHECK: memref.store %[[VAL]], %[[OUTPUT]] @@ -177,11 +167,9 @@ func.func @resource_copy_dynamic_shape() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @resource_copy_dynamic_last_dim() @@ -189,10 +177,10 @@ func.func @resource_copy_dynamic_last_dim() { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index %dim = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index - // CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<4096x?xf32> - // CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4096x?xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4096x?xf32>{%dim} - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4096x?xf32>{%dim} + // CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4096x?xf32> + // CHECK: hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4096x?xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4096x?xf32>{%dim} + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4096x?xf32>{%dim} %v = vector.transfer_read %0[%c0, %c0], %cst : memref<4096x?xf32>, vector<4xf32> vector.transfer_write %v, %1[%c0, %c0] : vector<4xf32>, memref<4096x?xf32> return @@ -200,11 +188,9 @@ func.func @resource_copy_dynamic_last_dim() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @dont_vectorize_odd_vector_size @@ -213,10 +199,10 @@ func.func @dont_vectorize_odd_vector_size() { %c0 = arith.constant 0 : index // CHECK: hal.interface.binding.subspan // CHECK-SAME: memref<4x3xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x3xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x3xf32> // CHECK: hal.interface.binding.subspan // CHECK-SAME: memref<4x3xf32> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4x3xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4x3xf32> %v = vector.transfer_read %0[%c0, %c0], %cst : memref<4x3xf32>, vector<3xf32> vector.transfer_write %v, %1[%c0, %c0] : vector<3xf32>, memref<4x3xf32> return @@ -224,11 +210,9 @@ func.func @dont_vectorize_odd_vector_size() { // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @scalarize_vector_transfer_op @@ -236,8 +220,8 @@ func.func @scalarize_vector_transfer_op(%arg: vector<3xf32>) -> (vector<3xf32>) %c0 = arith.constant 0: index %c3 = arith.constant 3: index %f0 = arith.constant 0.0 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<20xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<20xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<20xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<20xf32> // CHECK-DAG: %[[INDEX0:.+]] = arith.constant 3 : index // CHECK-DAG: %[[INDEX1:.+]] = arith.constant 4 : index // CHECK-DAG: %[[INDEX2:.+]] = arith.constant 5 : index @@ -289,17 +273,15 @@ func.func @scalarize_non_minor_identity_transfer_read(%memory: memref<4x2x4xi32> // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: func.func @scalarize_non_minor_identity_transfer_write // CHECK-SAME: (%[[VALUE:.+]]: vector<4xf32>, %[[I1:.+]]: index, %[[I2:.+]]: index) func.func @scalarize_non_minor_identity_transfer_write(%value: vector<4xf32>, %i1: index, %i2: index) { %c0 = arith.constant 0: index - %buffer = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<1x130x130x64xf32> + %buffer = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<1x130x130x64xf32> vector.transfer_write %value, %buffer[%c0, %i1, %i2, %c0] {in_bounds = [true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d2)>} : vector<4xf32>, memref<1x130x130x64xf32> return } @@ -346,16 +328,14 @@ func.func @scalarize_0d_transfer_write(%val: vector, %memory: memref<4xf32> // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: func.func @scalarize_indivisible_vector_transfer_read_op func.func @scalarize_indivisible_vector_transfer_read_op(%i: index) -> vector<4xf32> { %f0 = arith.constant 0.0 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<10xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<10xf32> %1 = vector.transfer_read %0[%i], %f0 : memref<10xf32>, vector<4xf32> return %1: vector<4xf32> } @@ -366,16 +346,14 @@ func.func @scalarize_indivisible_vector_transfer_read_op(%i: index) -> vector<4x // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: func.func @scalarize_indivisible_vector_transfer_write_op func.func @scalarize_indivisible_vector_transfer_write_op(%value: vector<4xf32>, %i: index) { %f0 = arith.constant 0.0 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<10xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<10xf32> vector.transfer_write %value, %0[%i] : vector<4xf32>, memref<10xf32> return } @@ -434,17 +412,15 @@ func.func @vectorize_alloc_with_mma_load_store_unaligned_case(%i0: index, %i1: i // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: func.func @scalarize_vector_load_op // CHECK-SAME: (%[[ARG0:.+]]: index) func.func @scalarize_vector_load_op(%i: index) -> vector<4xi32> { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<10x10xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<10x10xi32> %1 = vector.load %0[%c0, %i] : memref<10x10xi32>, vector<4xi32> return %1: vector<4xi32> } @@ -469,35 +445,31 @@ func.func @scalarize_vector_load_op(%i: index) -> vector<4xi32> { // Test that the memref is not vectorized if the element type is a complex type. -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: func.func @complex_memref func.func @complex_memref(%x: index, %y: index) -> complex { - // CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<8x32xcomplex> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<8x32xcomplex> + // CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<8x32xcomplex> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<8x32xcomplex> %1 = memref.load %0[%x, %y] : memref<8x32xcomplex> return %1: complex } // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK-LABEL: func.func @vectorize_mma_load_store_non_identity_memref // CHECK-SAME: (%[[I0:.+]]: index, %[[I1:.+]]: index) func.func @vectorize_mma_load_store_non_identity_memref(%i0: index, %i1: index) { %c0 = arith.constant 0 : index - %span0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type> - %span1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type> + %span0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type> + %span1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type> %val = gpu.subgroup_mma_load_matrix %span0[%i0, %i1] {leadDimension = 1280 : index} : memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type> -> !gpu.mma_matrix<16x16xf16, "COp"> gpu.subgroup_mma_store_matrix %val, %span1[%i0, %i1] {leadDimension = 1280 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type> return @@ -512,22 +484,20 @@ func.func @vectorize_mma_load_store_non_identity_memref(%i0: index, %i1: index) // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @transfer_read_i4_memref_vector8(%x: index) -> vector<8xi4> { %c0_i4 = arith.constant 0 : i4 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048xi4> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048xi4> %1 = vector.transfer_read %0[%x], %c0_i4 {in_bounds = [true]} : memref<2048xi4>, vector<8xi4> return %1: vector<8xi4> } // CHECK-LABEL: func.func @transfer_read_i4_memref_vector8 // CHECK-SAME: (%[[ARG:.+]]: index) -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<256xvector<1xi32>> +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<256xvector<1xi32>> // CHECK: %[[INDEX:.+]] = affine.apply affine_map<()[s0] -> (s0 floordiv 8)>()[%[[ARG]]] // CHECK: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[INDEX]]] : memref<256xvector<1xi32>> // CHECK: %[[CAST:.+]] = vector.bitcast %[[LOAD]] : vector<1xi32> to vector<8xi4> @@ -537,14 +507,14 @@ func.func @transfer_read_i4_memref_vector8(%x: index) -> vector<8xi4> { // func.func @transfer_read_i4_memref_vector4(%x: index) -> vector<4xi4> { // %c0_i4 = arith.constant 0 : i4 -// %0 = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<2048xi4> +// %0 = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<2048xi4> // %1 = vector.transfer_read %0[%x], %c0_i4 {in_bounds = [true]} : memref<2048xi4>, vector<4xi4> // return %1: vector<4xi4> // } // XXXXX-LABEL: func.func @transfer_read_i4_memref_vector4 // XXXXX-SAME: (%[[ARG:.+]]: index) -// XXXXX: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<512xvector<2xi8>> +// XXXXX: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<512xvector<2xi8>> // XXXXX: %[[INDEX:.+]] = affine.apply affine_map<()[s0] -> (s0 floordiv 4)>()[%[[ARG]]] // XXXXX: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[INDEX]]] : memref<512xvector<2xi8>> // XXXXX: %[[CAST:.+]] = vector.bitcast %[[LOAD]] : vector<2xi8> to vector<4xi4> @@ -552,22 +522,20 @@ func.func @transfer_read_i4_memref_vector8(%x: index) -> vector<8xi4> { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @transfer_read_i4_memref_vector2(%x: index) -> vector<2xi4> { %c0_i4 = arith.constant 0 : i4 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048xi4> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048xi4> %1 = vector.transfer_read %0[%x], %c0_i4 {in_bounds = [true]} : memref<2048xi4>, vector<2xi4> return %1: vector<2xi4> } // XXXXX-LABEL: func.func @transfer_read_i4_memref_vector2 // XXXXX-SAME: (%[[ARG:.+]]: index) -// XXXXX: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<1024xvector<1xi8>> +// XXXXX: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<1024xvector<1xi8>> // XXXXX: %[[INDEX:.+]] = affine.apply affine_map<()[s0] -> (s0 floordiv 2)>()[%[[ARG]]] // XXXXX: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[INDEX]]] : memref<1024xvector<1xi8>> // XXXXX: %[[CAST:.+]] = vector.bitcast %[[LOAD]] : vector<1xi8> to vector<2xi4> @@ -575,34 +543,30 @@ func.func @transfer_read_i4_memref_vector2(%x: index) -> vector<2xi4> { // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @transfer_read_i3_memref_vector8(%x: index) -> vector<8xi3> { %c0_i3 = arith.constant 0 : i3 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048xi3> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048xi3> %1 = vector.transfer_read %0[%x], %c0_i3 {in_bounds = [true]} : memref<2048xi3>, vector<8xi3> return %1: vector<8xi3> } // CHECK-LABEL: func.func @transfer_read_i3_memref_vector8 -// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<2048xi3> +// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<2048xi3> // CHECK-COUNT-8: memref.load {{.+}} : memref<2048xi3> // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @transfer_read_vector2_vector8(%x: index) -> (vector<2xi32>, vector<8xi32>) { %c0 = arith.constant 0 : i32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048xi32> %1 = vector.transfer_read %0[%x], %c0 {in_bounds = [true]} : memref<2048xi32>, vector<2xi32> %2 = vector.transfer_read %0[%x], %c0 {in_bounds = [true]} : memref<2048xi32>, vector<8xi32> return %1, %2: vector<2xi32>, vector<8xi32> @@ -628,15 +592,13 @@ func.func @transfer_read_vector2_vector8(%x: index) -> (vector<2xi32>, vector<8x // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @transfer_write_vector2_vector8(%x: index, %val0: vector<2xi32>, %val1: vector<8xi32>) { %c0 = arith.constant 0 : i32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048xi32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048xi32> vector.transfer_write %val0, %0[%x] : vector<2xi32>, memref<2048xi32> vector.transfer_write %val1, %0[%x] : vector<8xi32>, memref<2048xi32> return @@ -644,7 +606,7 @@ func.func @transfer_write_vector2_vector8(%x: index, %val0: vector<2xi32>, %val1 // CHECK-LABEL: func @transfer_write_vector2_vector8 // CHECK-SAME: (%[[INDEX:.+]]: index, %[[VAL0:.+]]: vector<2xi32>, %[[VAL1:.+]]: vector<8xi32>) -// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<1024xvector<2xi32>> +// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<1024xvector<2xi32>> // CHECK: %[[OFFSET0:.+]] = affine.apply affine_map<()[s0] -> (s0 floordiv 2)>()[%[[INDEX]]] // CHECK: memref.store %[[VAL0]], %[[SUBSPAN]][%[[OFFSET0]]] @@ -663,19 +625,17 @@ func.func @transfer_write_vector2_vector8(%x: index, %val0: vector<2xi32>, %val1 // ----- -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @scalarize_masked_vector_transfer_op(%arg: vector<3xf32>, %mask: vector<3xi1>) -> (vector<3xf32>) { %c0 = arith.constant 0: index %c3 = arith.constant 3: index %f0 = arith.constant 0.0 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<20xf32> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<20xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<20xf32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<20xf32> %3 = vector.transfer_read %0[%c3], %f0, %mask : memref<20xf32>, vector<3xf32> vector.transfer_write %arg, %2[%c3], %mask : vector<3xf32>, memref<20xf32> return %3: vector<3xf32> @@ -722,17 +682,15 @@ func.func @scalarize_masked_vector_transfer_op(%arg: vector<3xf32>, %mask: vecto // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @extract_vector_transfer_read_mask_bits(%arg: vector<3xf32>, %index: index) -> (vector<3xf32>) { %c3 = arith.constant 3: index %f0 = arith.constant 0.0 : f32 %mask = vector.create_mask %index : vector<3xi1> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<20xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<20xf32> %1 = vector.transfer_read %0[%c3], %f0, %mask : memref<20xf32>, vector<3xf32> return %1: vector<3xf32> } diff --git a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp index 337d4ed7651cb..3507752242739 100644 --- a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp +++ b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp @@ -551,7 +551,7 @@ struct FoldReshapeIntoInterfaceTensorLoad : OpRewritePattern { Value newSubspanOp = rewriter.create( subspanOp.getLoc(), newSubspanType, subspanOp.getLayout(), - subspanOp.getSet(), subspanOp.getBinding(), subspanOp.getByteOffset(), + subspanOp.getBinding(), subspanOp.getByteOffset(), subspanOp.getDynamicDims(), subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr()); @@ -623,7 +623,7 @@ struct FoldExpandShapeIntoInterfaceTensorStore rewriter.setInsertionPointAfter(subspanOp); newSubspanOp = rewriter.create( subspanOp.getLoc(), newSubspanType, subspanOp.getLayout(), - subspanOp.getSet(), subspanOp.getBinding(), subspanOp.getByteOffset(), + subspanOp.getBinding(), subspanOp.getByteOffset(), subspanOp.getDynamicDims(), subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr()); } @@ -759,7 +759,7 @@ struct FoldCollapseShapeIntoInterfaceTensorStore rewriter.setInsertionPointAfter(subspanOp); newSubspanOp = rewriter.create( subspanOp.getLoc(), newSubspanType, subspanOp.getLayout(), - subspanOp.getSet(), subspanOp.getBinding(), subspanOp.getByteOffset(), + subspanOp.getBinding(), subspanOp.getByteOffset(), subspanOp.getDynamicDims(), subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr()); } diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir index 2baedf6a305bf..e760edaddb72a 100644 --- a/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir +++ b/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir @@ -1,11 +1,9 @@ // RUN: iree-opt --split-input-file --iree-vmvx-link-executables %s | FileCheck %s #vmvx_target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @dispatch_0 { @@ -87,9 +85,9 @@ func.func @basic_linking() -> () attributes { %dispatch_0_ordinal = hal.executable.export.ordinal target(@dispatch_0::@vmvx::@dispatch_0) : index %dispatch_1_ordinal = hal.executable.export.ordinal target(@dispatch_1::@vmvx::@dispatch_1) : index %dispatch_2_ordinal = hal.executable.export.ordinal target(@dispatch_2::@vmvx::@dispatch_2) : index - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) return } util.initializer { @@ -104,9 +102,9 @@ util.initializer { %dispatch_0_ordinal = hal.executable.export.ordinal target(@dispatch_0::@vmvx::@dispatch_0) : index %dispatch_1_ordinal = hal.executable.export.ordinal target(@dispatch_1::@vmvx::@dispatch_1) : index %dispatch_2_ordinal = hal.executable.export.ordinal target(@dispatch_2::@vmvx::@dispatch_2) : index - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None) util.return } @@ -158,8 +156,8 @@ util.initializer { // CHECK-DAG: %[[DISPATCH_1_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_vmvx::@vmvx_bytecode_fb::@dispatch_1) // CHECK-DAG: %[[DISPATCH_2_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_vmvx::@vmvx_bytecode_fb::@dispatch_2) // CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_0_EXE]] : !hal.executable)[%[[DISPATCH_0_ORDINAL]]] workgroups([%c1, %c1, %c1]) -// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1]) -// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1]) +// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1]) +// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1]) // // CHECK: util.initializer // CHECK-DAG: %[[DISPATCH_0_EXE:.+]] = hal.executable.lookup device(%{{.+}}) executable(@link_executables_linked_vmvx) : !hal.executable @@ -169,17 +167,15 @@ util.initializer { // CHECK-DAG: %[[DISPATCH_1_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_vmvx::@vmvx_bytecode_fb::@dispatch_1) // CHECK-DAG: %[[DISPATCH_2_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_vmvx::@vmvx_bytecode_fb::@dispatch_2) // CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_0_EXE]] : !hal.executable)[%[[DISPATCH_0_ORDINAL]]] workgroups([%c1, %c1, %c1]) -// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1]) -// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1]) +// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1]) +// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1]) // ----- #vmvx_target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @dispatch_0 { diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir index 38cc29a25d0d6..9f18092190703 100644 --- a/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir +++ b/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir @@ -2,11 +2,9 @@ #executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1, d2) -> (d0, d2)> @@ -23,15 +21,15 @@ func.func @mmt4d_i8() attributes {hal.executable.target = #executable_target_vmv %0:2 = iree_codegen.query_tile_sizes tensor<16x16xi8, #iree_encoding.encoding> -> index, index %1 = affine.apply #map3()[%0#0] %2 = affine.apply #map3()[%0#1] - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%1, %2, %0#0, %0#1} + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%1, %2, %0#0, %0#1} %4:2 = iree_codegen.query_tile_sizes tensor<16x16xi8, #iree_encoding.encoding> -> index, index %5 = affine.apply #map3()[%4#0] %6 = affine.apply #map3()[%4#1] - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c256) flags(ReadOnly) : !flow.dispatch.tensor>{%5, %6, %4#0, %4#1} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c256) flags(ReadOnly) : !flow.dispatch.tensor>{%5, %6, %4#0, %4#1} %8:2 = iree_codegen.query_tile_sizes tensor<16x16xi32, #iree_encoding.encoding> -> index, index %9 = affine.apply #map3()[%8#0] %10 = affine.apply #map3()[%8#1] - %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c512) : !flow.dispatch.tensor>{%9, %10, %8#0, %8#1} + %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c512) : !flow.dispatch.tensor>{%9, %10, %8#0, %8#1} %12 = flow.dispatch.tensor.load %3, offsets = [0, 0, 0, 0], sizes = [%1, %2, %0#0, %0#1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%1, %2, %0#0, %0#1} -> tensor %13 = flow.dispatch.tensor.load %7, offsets = [0, 0, 0, 0], sizes = [%5, %6, %4#0, %4#1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%5, %6, %4#0, %4#1} -> tensor %14 = flow.dispatch.tensor.load %11, offsets = [0, 0, 0, 0], sizes = [%9, %10, %8#0, %8#1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%9, %10, %8#0, %8#1} -> tensor diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir index 5a2f6408de027..34bcdaa444d12 100644 --- a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir @@ -1,18 +1,16 @@ // RUN: iree-opt -pass-pipeline='builtin.module(iree-vmvx-select-lowering-strategy)' -split-input-file %s | FileCheck %s #executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> func.func @matmul_static() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} { %cst = arith.constant 0.000000e+00 : f32 - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x512xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<512x128xf32> %5 = tensor.empty() : tensor<384x128xf32> @@ -32,11 +30,9 @@ func.func @matmul_static() attributes {hal.executable.target = #executable_targe // ----- #executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d0, d1)> func.func @copy_op_dynamic() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} { @@ -46,8 +42,8 @@ func.func @copy_op_dynamic() attributes {hal.executable.target = #executable_tar %3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index %4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index %5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref{%0, %1} - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref{%2, %3} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref{%0, %1} + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref{%2, %3} %subview = memref.subview %7[%4, %5] [%0, %1] [1, 1] : memref to memref> linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%6 : memref) outs(%subview : memref>) { ^bb0(%in: i32, %out: i32): @@ -66,19 +62,17 @@ func.func @copy_op_dynamic() attributes {hal.executable.target = #executable_tar // ----- #executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @static_1d_fft_stage2() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} { %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index %cst = arith.constant dense<[1.000000e+00, 6.12323426E-17]> : tensor<2xf32> %cst_0 = arith.constant dense<[-0.000000e+00, -1.000000e+00]> : tensor<2xf32> - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xf32> %3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xf32> %4:2 = iree_linalg_ext.fft ins(%c2, %cst, %cst_0 : index, tensor<2xf32>, tensor<2xf32>) outs(%2, %3 : tensor<32xf32>, tensor<32xf32>) : tensor<32xf32>, tensor<32xf32> @@ -97,13 +91,11 @@ func.func @static_1d_fft_stage2() attributes {hal.executable.target = #executabl // ----- #executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer>, - #hal.descriptor_set.binding<3, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> #map = affine_map<(d0, d1) -> (d1)> #map1 = affine_map<(d0, d1) -> (d0, d1)> @@ -117,11 +109,11 @@ func.func @fusion_quant_matmul_generic() attributes {hal.executable.target = #ex %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %1 = arith.index_castui %0 : i32 to index - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c107520) : !flow.dispatch.tensor> - %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} - %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c107520) : !flow.dispatch.tensor> + %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} + %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%1} %7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%1, 3360], strides = [1, 1] : !flow.dispatch.tensor>{%1} -> tensor %8 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [3360, 32], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<3360x32xi8> %9 = flow.dispatch.tensor.load %3, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor> -> tensor<32xi32> @@ -158,11 +150,9 @@ func.func @fusion_quant_matmul_generic() attributes {hal.executable.target = #ex // ----- #executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} { %c131072 = arith.constant 131072 : index @@ -175,8 +165,8 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl %5 = arith.index_castui %1 : i32 to index %6 = arith.index_castui %2 : i32 to index %7 = arith.index_castui %3 : i32 to index - %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%4, %5} - %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} + %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%4, %5} + %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%4, %5} -> tensor %11 = tensor.empty(%6, %7) : tensor %unpack = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor -> tensor @@ -194,11 +184,9 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl // ----- #executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = true}> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> #map = affine_map<()[s0] -> (1024 ceildiv s0)> #map1 = affine_map<()[s0] -> (2048 ceildiv s0)> @@ -206,11 +194,11 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl func.func @elem_pack_ukernels() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} { %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> %1:2 = iree_codegen.query_tile_sizes tensor<1024x2048xf32, #iree_encoding.encoding>> -> index, index %2 = affine.apply #map()[%1#0] %3 = affine.apply #map1()[%1#1] - %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%2, %3, %1#0, %1#1} + %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%2, %3, %1#0, %1#1} %5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 2048], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1024x2048xf32> %6 = tensor.empty() : tensor<1024x2048xf32> %7 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel", "parallel"]} ins(%5 : tensor<1024x2048xf32>) outs(%6 : tensor<1024x2048xf32>) { @@ -240,10 +228,8 @@ func.func @elem_pack_ukernels() attributes {hal.executable.target = #executable_ // ----- #executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "none"}> -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @copy_cst() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} { %cst = arith.constant dense<4.200000e-01> : tensor<5x19x8x4xf32> @@ -255,7 +241,7 @@ func.func @copy_cst() attributes {hal.executable.target = #executable_target_vmv %4 = arith.shli %3, %c32_i64 : i64 %5 = arith.ori %2, %4 : i64 %6 = arith.index_castui %5 : i64 to index - %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%6) : !flow.dispatch.tensor> + %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%6) : !flow.dispatch.tensor> flow.dispatch.tensor.store %cst, %7, offsets = [0, 0, 0, 0], sizes = [5, 19, 8, 4], strides = [1, 1, 1, 1] : tensor<5x19x8x4xf32> -> !flow.dispatch.tensor> return } diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchExterns.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchExterns.cpp index 41acaf28ddbb3..751f784ae9a20 100644 --- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchExterns.cpp +++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchExterns.cpp @@ -110,9 +110,6 @@ outlineDispatchExternOp(std::string name, dispatchExternOp.getSubgroupSizeAttr(), dispatchExternOp.getWorkgroupLocalMemoryAttr()); exportOp->setDialectAttrs(dispatchExternOp->getDialectAttrs()); - if (auto bindingsAttr = dispatchExternOp.getBindingsAttr()) { - exportOp->setAttr("hal.interface.bindings", bindingsAttr); - } if (!dispatchExternOp.getWorkgroupCount().empty()) { IRMapping mapper; dispatchExternOp.getWorkgroupCount().cloneInto( diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir index 8ce0a36442bfe..3d25684eaf282 100644 --- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir +++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir @@ -386,11 +386,9 @@ hal.executable private @ex0 { hal.return %selected : i1 } hal.executable.export public @dispatch ordinal(0) - layout(#hal.pipeline.layout, - <1, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) { ^bb0(%device: !hal.device, %workload: index): hal.return %workload, %workload, %workload : index, index, index @@ -405,11 +403,9 @@ hal.executable private @ex1 { hal.return %selected : i1 } hal.executable.export public @dispatch ordinal(0) - layout(#hal.pipeline.layout, - <1, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) { ^bb0(%device: !hal.device, %workload: index): hal.return %workload, %workload, %workload : index, index, index diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir index 4e73fe454c055..d4ff9aa0295fa 100644 --- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir +++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir @@ -4,8 +4,7 @@ // CHECK-NEXT: hal.executable.variant public @a target(<"llvm-cpu", "a">) // CHECK-SAME: objects([#hal.executable.object<{path = "a.o"}>]) // CHECK-NEXT: hal.executable.export public @main ordinal(100) -// CHECK-SAME: layout(#hal.pipeline.layout, <1, storage_buffer>]>]>) -// CHECK-SAME: hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>] +// CHECK-SAME: layout(#hal.pipeline.layout, #hal.pipeline.binding]>) // CHECK-NEXT: ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): // CHECK-NEXT: %ok, %value = hal.device.query<%arg0 : !hal.device> key("some" :: "value") : i1, i32 // CHECK-NEXT: %0 = arith.index_cast %value : i32 to index @@ -16,8 +15,7 @@ // CHECK-NEXT: %ok, %value = hal.device.query<%arg0 : !hal.device> key("some" :: "feature") : i1, i32 // CHECK-NEXT: hal.return %ok : i1 // CHECK: hal.executable.export public @main ordinal(200) -// CHECK-SAME: layout(#hal.pipeline.layout, <1, storage_buffer>]>]>) -// CHECK-SAME: hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>] +// CHECK-SAME: layout(#hal.pipeline.layout, #hal.pipeline.binding]>) // CHECK-NEXT: ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): // Demonstrates the full functionality of an extern dispatch op. @@ -41,17 +39,10 @@ util.func public @dispatchExtern(%arg0: tensor<4xi32>, %arg1: tensor<8xi32>, %ar hal.return %x_capture, %y_capture, %z : index, index, index } // Must match the external definition. - layout(#hal.pipeline.layout, - <1, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) - // Optional, automatically inferred if omitted. - bindings([ - #hal.interface.binding<0, 0>, - #hal.interface.binding<0, 1> - ]) // Can have object references for multiple targets or configurations. objects({ #hal.executable.target<"llvm-cpu", "a"> ordinal(100) = [#hal.executable.object<{path = "a.o"}>], diff --git a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp index 39b5ec8abb643..14790bf2fb902 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp @@ -19,20 +19,15 @@ namespace mlir::iree_compiler::IREE::HAL { void PipelineLayout::print(llvm::raw_ostream &os) const { os << "PipelineLayout:\n"; - os << " push constants: " << pushConstantCount << "\n"; - os << " sets:\n"; - for (auto &setLayout : setLayouts) { - os << " set[" << setLayout.ordinal - << "]: " << stringifyDescriptorSetLayoutFlags(setLayout.flags) << "\n"; - for (auto &binding : setLayout.bindings) { - os << " binding[" << binding.ordinal - << "]: " << stringifyDescriptorType(binding.type) << "\n"; - } + os << " constants: " << constantCount << "\n"; + os << " bindings:\n"; + for (auto &binding : bindings) { + os << " binding[" << binding.ordinal + << "]: " << stringifyDescriptorType(binding.type) << "\n"; } os << " resource map:\n"; - for (auto setBinding : llvm::enumerate(resourceMap)) { - os << " resource[" << setBinding.index() << "]: set " - << setBinding.value().first << " binding " << setBinding.value().second + for (auto ordinal : llvm::enumerate(resourceMap)) { + os << " resource[" << ordinal.index() << "]: binding " << ordinal.value() << "\n"; } } @@ -41,33 +36,18 @@ void PipelineLayout::print(llvm::raw_ostream &os) const { static PipelineLayout assumeExportLayout(IREE::HAL::PipelineLayoutAttr layoutAttr) { PipelineLayout pipelineLayout; - pipelineLayout.pushConstantCount = layoutAttr.getPushConstants(); - - auto setLayoutAttrs = layoutAttr.getSetLayouts(); - int64_t bindingCount = 0; - for (auto setLayoutAttr : setLayoutAttrs) { - bindingCount += setLayoutAttr.getBindings().size(); - } + pipelineLayout.constantCount = layoutAttr.getConstants(); - pipelineLayout.setLayouts.resize(setLayoutAttrs.size()); + size_t bindingCount = layoutAttr.getBindings().size(); + pipelineLayout.bindings.resize(bindingCount); pipelineLayout.resourceMap.resize(bindingCount); - for (auto setLayoutAttr : setLayoutAttrs) { - DescriptorSetLayout setLayout; - setLayout.ordinal = setLayoutAttr.getOrdinal(); - setLayout.flags = setLayoutAttr.getFlags().value_or( - IREE::HAL::DescriptorSetLayoutFlags::None); - auto bindingAttrs = setLayoutAttr.getBindings(); - setLayout.bindings.resize(bindingAttrs.size()); - for (auto bindingAttr : bindingAttrs) { - DescriptorSetLayoutBinding setBinding; - setBinding.ordinal = bindingAttr.getOrdinal(); - setBinding.type = bindingAttr.getType(); - setBinding.flags = bindingAttr.getFlags(); - setLayout.bindings[setBinding.ordinal] = setBinding; - pipelineLayout.resourceMap.emplace_back(setLayout.ordinal, - setBinding.ordinal); - } - pipelineLayout.setLayouts[setLayout.ordinal] = setLayout; + for (auto [i, bindingAttr] : llvm::enumerate(layoutAttr.getBindings())) { + PipelineLayoutBinding binding; + binding.ordinal = i; + binding.type = bindingAttr.getType(); + binding.flags = bindingAttr.getFlags(); + pipelineLayout.bindings[binding.ordinal] = binding; + pipelineLayout.resourceMap[i] = binding.ordinal; } return pipelineLayout; @@ -174,30 +154,25 @@ deriveStreamExportLayout(IREE::Stream::ExecutableExportOp exportOp, } PipelineLayout pipelineLayout; - pipelineLayout.pushConstantCount = operandCount; + pipelineLayout.constantCount = operandCount; pipelineLayout.resourceMap.resize(bindingCount); - // TODO(#18154): simplify binding setup. - DescriptorSetLayout setLayout; - setLayout.ordinal = 0; - setLayout.flags = IREE::HAL::DescriptorSetLayoutFlags::None; - setLayout.bindings.reserve(bindingCount); + IREE::HAL::PipelineLayoutFlags layoutFlags = + IREE::HAL::PipelineLayoutFlags::None; for (unsigned i = 0; i < bindingCount; ++i) { const auto &descriptorInfo = descriptorInfos[i]; if (allEnumBitsSet(descriptorInfo.flags, IREE::HAL::DescriptorFlags::Indirect)) { - setLayout.flags = - setLayout.flags | IREE::HAL::DescriptorSetLayoutFlags::Indirect; + layoutFlags = layoutFlags | IREE::HAL::PipelineLayoutFlags::Indirect; } - DescriptorSetLayoutBinding setBinding; - setBinding.ordinal = setLayout.bindings.size(); - setBinding.type = IREE::HAL::DescriptorType::StorageBuffer; - setBinding.flags = descriptorInfo.flags; - setLayout.bindings.push_back(setBinding); - pipelineLayout.resourceMap[i] = - std::make_pair(setLayout.ordinal, setBinding.ordinal); + PipelineLayoutBinding binding; + binding.ordinal = i; + binding.type = IREE::HAL::DescriptorType::StorageBuffer; + binding.flags = descriptorInfo.flags; + pipelineLayout.bindings.push_back(binding); + pipelineLayout.resourceMap[i] = binding.ordinal; } - pipelineLayout.setLayouts.push_back(setLayout); + pipelineLayout.flags = layoutFlags; LLVM_DEBUG({ auto executableOp = exportOp->getParentOfType(); diff --git a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h index 7d08959f24901..50f17d9be5a0d 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h +++ b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h @@ -16,35 +16,28 @@ namespace mlir::iree_compiler::IREE::HAL { -struct DescriptorSetLayoutBinding { +struct PipelineLayoutBinding { // Ordinal of the descriptor within its parent set layout. - unsigned ordinal; + unsigned ordinal = 0; // Storage type of the descriptor resource. - IREE::HAL::DescriptorType type; + IREE::HAL::DescriptorType type = IREE::HAL::DescriptorType::StorageBuffer; // Flags defining how the descriptor behaves. - IREE::HAL::DescriptorFlags flags; + IREE::HAL::DescriptorFlags flags = IREE::HAL::DescriptorFlags::None; }; -struct DescriptorSetLayout { - // Ordinal of the set within the parent pipeline layout. - unsigned ordinal; - // Usage of the descriptor set (such as whether it is persistent or push). - IREE::HAL::DescriptorSetLayoutFlags flags; - // Bindings within the layout. Ordinals may be sparse. - SmallVector bindings; -}; - -using PipelineResourceMap = SmallVector>; +using PipelineResourceMap = SmallVector; struct PipelineLayout { // Total number of 32-bit push constants allocated. Not all dispatchable // functions using this layout will use all constants. - int64_t pushConstantCount; - // Sets bound in the layout. Ordinals may be sparse. - SmallVector setLayouts; - // Mapping of flattened source resource bindings into the descriptor sets. - // Matches 1:1 with the IREE::Stream::CmdDispatchOp::resources. + int64_t constantCount; + // Bindings within the layout. Ordinals may be sparse. + SmallVector bindings; + // Mapping of flattened source resource bindings into the descriptor set + // bindings. Matches 1:1 with the IREE::Stream::CmdDispatchOp::resources. PipelineResourceMap resourceMap; + // Flags defining behavior of the pipeline. + IREE::HAL::PipelineLayoutFlags flags = IREE::HAL::PipelineLayoutFlags::None; void print(llvm::raw_ostream &os) const; }; diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir index 8533479fae499..4cbccfba191d0 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir @@ -1,9 +1,7 @@ // RUN: iree-opt --split-input-file --iree-hal-conversion %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> hal.executable private @ex { hal.executable.variant public @variant target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">) { diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir index 7290cd163eff2..87fab3fe6a673 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir @@ -11,49 +11,6 @@ hal.executable @exe { } } -// CHECK-LABEL: @executableCreate -util.func public @executableCreate( - // CHECK-SAME: %[[DEV:.+]]: !vm.ref - %device: !hal.device, - // CHECK-SAME: %[[LAYOUT0:.+]]: !vm.ref, - %layout0: !hal.pipeline_layout, - // CHECK-SAME: %[[LAYOUT1:.+]]: !vm.ref - %layout1: !hal.pipeline_layout - ) -> (!hal.executable, !hal.executable) { - - // CHECK-DAG: %[[FORMAT1:.+]] = vm.rodata.inline "_utf8_format1_ - // CHECK-DAG: %[[BINARY1:.+]] = vm.rodata.inline "exe_binary1" {alignment = 16 : i64} : !vm.buffer = dense<[0, 1, 2, 3]> : vector<4xi8> - // CHECK-DAG: %[[NULL1:.+]] = vm.const.ref.zero : !vm.buffer - // CHECK: %[[EXE1:.+]] = vm.call.variadic @hal.executable.create( - // CHECK-SAME: %[[DEV]], %[[FORMAT1]], %[[BINARY1]], %[[NULL1]], [%[[LAYOUT0]], %[[LAYOUT1]]] - // CHECK-SAME: ) {nosideeffects} : (!vm.ref, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref ...) -> !vm.ref - %0 = hal.executable.create device(%device : !hal.device) target(@exe::@binary1) layouts([%layout0, %layout1]) : !hal.executable - - // CHECK-DAG: %[[FORMAT2:.+]] = vm.rodata.inline "_utf8_format2_ - // CHECK-DAG: %[[BINARY2:.+]] = vm.rodata.inline "exe_binary2" {alignment = 16 : i64} : !vm.buffer = dense<[4, 5, 6, 7]> : vector<4xi8> - // CHECK-DAG: %[[NULL2:.+]] = vm.const.ref.zero : !vm.buffer - // CHECK: %[[EXE2:.+]] = vm.call.variadic @hal.executable.create( - // CHECK-SAME: %[[DEV]], %[[FORMAT2]], %[[BINARY2]], %[[NULL2]], [%[[LAYOUT1]], %[[LAYOUT0]]] - // CHECK-SAME: ) {nosideeffects} : (!vm.ref, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref ...) -> !vm.ref - %1 = hal.executable.create device(%device : !hal.device) target(@exe::@binary2) layouts([%layout1, %layout0]) : !hal.executable - - // CHECK: vm.return %[[EXE1]], %[[EXE2]] - util.return %0, %1 : !hal.executable, !hal.executable -} - -// ----- - -hal.executable @exe { - hal.executable.binary @binary1 attributes { - data = dense<[0, 1, 2, 3]> : vector<4xi8>, - format = "format1" - } - hal.executable.binary @binary2 attributes { - data = dense<[4, 5, 6, 7]> : vector<4xi8>, - format = "format2" - } -} - // CHECK-LABEL: @executableCreate util.func public @executableCreate( // CHECK-SAME: %[[DEV:.+]]: !vm.ref @@ -97,16 +54,14 @@ hal.executable @exe2 { // CHECK-LABEL: @multipleExecutables util.func public @multipleExecutables( - %device: !hal.device, - %layout0: !hal.pipeline_layout, - %layout1: !hal.pipeline_layout + %device: !hal.device ) -> (!hal.executable, !hal.executable) { // CHECK-DAG: %[[FORMAT1:.+]] = vm.rodata.inline "_utf8_format_ // CHECK-DAG: %[[BINARY1:.+]] = vm.rodata.inline "exe1_binary1" {alignment = 16 : i64} : !vm.buffer = dense<[0, 1, 2, 3]> : vector<4xi8> - %0 = hal.executable.create device(%device : !hal.device) target(@exe1::@binary1) layouts([%layout0, %layout1]) : !hal.executable + %0 = hal.executable.create device(%device : !hal.device) target(@exe1::@binary1) : !hal.executable // CHECK-DAG: %[[FORMAT2:.+]] = vm.rodata.inline "_utf8_format_ // CHECK-DAG: %[[BINARY2:.+]] = vm.rodata.inline "exe2_binary2" {alignment = 16 : i64} : !vm.buffer = dense<[4, 5, 6, 7]> : vector<4xi8> - %1 = hal.executable.create device(%device : !hal.device) target(@exe2::@binary2) layouts([%layout1, %layout0]) : !hal.executable + %1 = hal.executable.create device(%device : !hal.device) target(@exe2::@binary2) : !hal.executable util.return %0, %1 : !hal.executable, !hal.executable } @@ -123,8 +78,6 @@ hal.executable @exe { util.func public @executableConstants( // CHECK-SAME: %[[DEV:.+]]: !vm.ref %device: !hal.device, - // CHECK-SAME: %[[LAYOUT:.+]]: !vm.ref - %layout: !hal.pipeline_layout, // CHECK-SAME: %[[CONSTANT0:.+]]: i32, %[[CONSTANT1:.+]]: i32 %constant0: i32, %constant1: i32 ) -> !hal.executable { @@ -141,13 +94,12 @@ util.func public @executableConstants( // CHECK-DAG: %[[INDEX2:.+]] = vm.const.i64 2 // CHECK-DAG: vm.buffer.store.i32 %[[CONSTANT1]], %[[CONSTANTS]][%[[INDEX2]]] : i32 -> !vm.buffer - // CHECK: %[[EXE:.+]] = vm.call.variadic @hal.executable.create( - // CHECK-SAME: %[[DEV]], %[[FORMAT]], %[[BINARY]], %[[CONSTANTS]], [%[[LAYOUT]]] - // CHECK-SAME: ) {nosideeffects} : (!vm.ref, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref ...) -> !vm.ref + // CHECK: %[[EXE:.+]] = vm.call @hal.executable.create( + // CHECK-SAME: %[[DEV]], %[[FORMAT]], %[[BINARY]], %[[CONSTANTS]] + // CHECK-SAME: ) {nosideeffects} : (!vm.ref, !vm.buffer, !vm.buffer, !vm.buffer) -> !vm.ref %0 = hal.executable.create device(%device : !hal.device) target(@exe::@binary) - layouts([%layout]) constants([%constant0, %c0, %constant1]) : !hal.executable // CHECK: vm.return %[[EXE]] diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp index 25567af0a4f2c..8270b21cd5524 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp @@ -762,15 +762,10 @@ struct CmdDispatchOpPattern Value ordinal = builder.create( loc, builder.getIndexType(), entryPointAttr); - // TODO(#18154): simplify bindings by removing descriptor sets. auto layoutAttr = exportOp.getLayout(); - auto bindingAttrs = IREE::HAL::getInterfaceBindingAttrs( - exportOp, dispatchOp.getResources().size()); SmallVector bindings; - for (auto [i, bindingAttr] : llvm::enumerate(bindingAttrs)) { - auto descriptorFlags = layoutAttr.getSetLayout(bindingAttr.getSet()) - .getBinding(bindingAttr.getBinding()) - .getFlags(); + for (auto [i, bindingAttr] : llvm::enumerate(layoutAttr.getBindings())) { + auto descriptorFlags = bindingAttr.getFlags(); IREE::HAL::BindingValue binding; if (bitEnumContainsAll(descriptorFlags, IREE::HAL::DescriptorFlags::Indirect)) { diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir index c764fa8aab9b6..ece8202f74bfc 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --split-input-file --allow-unregistered-dialect --iree-hal-conversion %s | FileCheck %s +// RUN: iree-opt --split-input-file --allow-unregistered-dialect --iree-hal-conversion --cse --iree-hal-indirect-command-buffers=true %s | FileCheck %s // Today all memory control operations are ignored and we're just left with // the normal sequential execution barriers. @@ -32,7 +32,7 @@ util.func public @cmdFill(%arg0: !stream.resource, %arg1: index) -> ! %c128 = arith.constant 128 : index %c255_i32 = arith.constant 255 : i32 // CHECK: %[[CMD:.+]] = hal.command_buffer.create - %0 = stream.cmd.execute on(#hal.device.affinity<@device>) with(%arg0 as %arg2: !stream.resource{%arg1}) { + %0 = stream.cmd.execute once on(#hal.device.affinity<@device>) with(%arg0 as %arg2: !stream.resource{%arg1}) { // CHECK-NEXT: hal.command_buffer.fill_buffer<%[[CMD]] : !hal.command_buffer> // CHECK-SAME: target(%arg0 : !hal.buffer)[%c0, %c128] // CHECK-SAME: pattern(%c255_i32 : i32) @@ -52,7 +52,7 @@ util.func public @cmdCopy(%arg0: !stream.resource, %arg1: index, %arg %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index // CHECK: %[[CMD:.+]] = hal.command_buffer.create - %0 = stream.cmd.execute on(#hal.device.affinity<@device>) with(%arg0 as %arg4: !stream.resource{%arg1}, %arg2 as %arg5: !stream.resource{%arg3}) { + %0 = stream.cmd.execute once on(#hal.device.affinity<@device>) with(%arg0 as %arg4: !stream.resource{%arg1}, %arg2 as %arg5: !stream.resource{%arg3}) { // CHECK-NEXT: hal.command_buffer.copy_buffer<%[[CMD]] : !hal.command_buffer> // CHECK-SAME: source(%arg0 : !hal.buffer)[%c0] // CHECK-SAME: target(%arg2 : !hal.buffer)[%c0] @@ -73,7 +73,7 @@ util.func public @cmdCollective(%arg0: !stream.resource, %arg1: index %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index // CHECK: %[[CMD:.+]] = hal.command_buffer.create - %0 = stream.cmd.execute on(#hal.device.affinity<@device>) with(%arg0 as %arg5: !stream.resource{%arg1}, %arg2 as %arg6: !stream.resource{%arg3}) { + %0 = stream.cmd.execute once on(#hal.device.affinity<@device>) with(%arg0 as %arg5: !stream.resource{%arg1}, %arg2 as %arg6: !stream.resource{%arg3}) { // Out-of-place all-reduce: // CHECK-NEXT: hal.command_buffer.collective @@ -142,7 +142,7 @@ util.func public @cmdExecute(%arg0: !stream.resource, %arg1: index, % %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index // CHECK: %[[CMD:.+]] = hal.command_buffer.create - %0 = stream.cmd.execute on(#hal.device.affinity<@device>) await(%arg4) => with(%arg0 as %arg5: !stream.resource{%arg1}, %arg2 as %arg6: !stream.resource{%arg3}) { + %0 = stream.cmd.execute once on(#hal.device.affinity<@device>) await(%arg4) => with(%arg0 as %arg5: !stream.resource{%arg1}, %arg2 as %arg6: !stream.resource{%arg3}) { stream.cmd.concurrent { // CHECK-NEXT: hal.command_buffer.copy_buffer<%[[CMD]] stream.cmd.copy %arg5[%c0], %arg6[%c0], %c128 : !stream.resource{%arg1} -> !stream.resource{%arg3} @@ -176,11 +176,9 @@ util.func public @cmdExecute(%arg0: !stream.resource, %arg1: index, % #executable_target_aarch64 = #hal.executable.target<"llvm-cpu", "embedded-elf-aarch64"> #executable_target_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer, Indirect> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @ex { hal.executable.variant public @aarch64 target(#executable_target_aarch64) { @@ -328,7 +326,7 @@ util.func public @cmdExecuteAffinities(%arg0: !stream.resource, %arg1 %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index // CHECK: %[[CMD:.+]] = hal.command_buffer.create - %0 = stream.cmd.execute on(#hal.device.affinity<@device, [0, 1]>) await(%arg4) => with(%arg0 as %arg5: !stream.resource{%arg1}, %arg2 as %arg6: !stream.resource{%arg3}) { + %0 = stream.cmd.execute once on(#hal.device.affinity<@device, [0, 1]>) await(%arg4) => with(%arg0 as %arg5: !stream.resource{%arg1}, %arg2 as %arg6: !stream.resource{%arg3}) { stream.cmd.copy %arg5[%c0], %arg6[%c0], %c128 : !stream.resource{%arg1} -> !stream.resource{%arg3} } => !stream.timepoint // CHECK: hal.device.queue.execute diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.cpp index 4869771d22e47..95cf53eec3bb2 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.cpp @@ -86,45 +86,13 @@ uint32_t CollectiveAttr::getEncodedValue() const { } //===----------------------------------------------------------------------===// -// hal.descriptor_set.layout<*> +// #hal.pipeline.layout<*> //===----------------------------------------------------------------------===// -DescriptorSetBindingAttr -DescriptorSetLayoutAttr::getBinding(int64_t ordinal) const { - for (auto binding : getBindings()) { - if (binding.getOrdinal() == ordinal) { - return binding; - } - } - return {}; -} - -//===----------------------------------------------------------------------===// -// hal.pipeline.layout<*> -//===----------------------------------------------------------------------===// - -DescriptorSetLayoutAttr -PipelineLayoutAttr::getSetLayout(int64_t ordinal) const { - for (auto setLayout : getSetLayouts()) { - if (setLayout.getOrdinal() == ordinal) { - return setLayout; - } - } - return {}; -} - -int64_t PipelineLayoutAttr::getFlatBindingIndex(int64_t set, - int64_t binding) const { - int64_t flatIndex = 0; - for (auto setLayoutAttr : getSetLayouts()) { - if (setLayoutAttr.getOrdinal() == set) { - flatIndex += binding; - break; - } else { - flatIndex += setLayoutAttr.getBindings().size(); - } - } - return flatIndex; +PipelineBindingAttr PipelineLayoutAttr::getBinding(int64_t ordinal) const { + assert(ordinal >= 0 && ordinal < getBindings().size() && + "binding ordinal out of bounds"); + return getBindings()[ordinal]; } //===----------------------------------------------------------------------===// diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.td b/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.td index 8d463aae8745a..976a4cabccd1d 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.td +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.td @@ -178,12 +178,12 @@ def HAL_DescriptorFlagsAttr : let cppNamespace = "::mlir::iree_compiler::IREE::HAL"; } -def HAL_DescriptorSetLayoutFlags_None : I32BitEnumAttrCase<"None", 0x0000>; -def HAL_DescriptorSetLayoutFlags_Indirect : I32BitEnumAttrCase<"Indirect", 0x0001>; -def HAL_DescriptorSetLayoutFlagsAttr : - I32BitEnumAttr<"DescriptorSetLayoutFlags", "valid DescriptorSetLayout flags", [ - HAL_DescriptorSetLayoutFlags_None, - HAL_DescriptorSetLayoutFlags_Indirect, +def HAL_PipelineLayoutFlags_None : I32BitEnumAttrCase<"None", 0x0000>; +def HAL_PipelineLayoutFlags_Indirect : I32BitEnumAttrCase<"Indirect", 0x0001>; +def HAL_PipelineLayoutFlagsAttr : + I32BitEnumAttr<"PipelineLayoutFlags", "valid PipelineLayout flags", [ + HAL_PipelineLayoutFlags_None, + HAL_PipelineLayoutFlags_Indirect, ]> { let cppNamespace = "::mlir::iree_compiler::IREE::HAL"; } @@ -376,57 +376,22 @@ def HAL_CollectiveAttr : } //===----------------------------------------------------------------------===// -// hal.descriptor_set.binding<*> +// hal.pipeline.binding<*> //===----------------------------------------------------------------------===// -def HAL_DescriptorSetBindingAttr : - AttrDef { - let mnemonic = "descriptor_set.binding"; - let summary = [{descriptor set binding specification}]; +def HAL_PipelineBindingAttr : + AttrDef { + let mnemonic = "pipeline.binding"; + let summary = [{pipeline binding specification}]; let description = [{ - Specifies a single binding within a descriptor set layout. + Specifies a single binding within a pipeline layout. }]; let parameters = (ins - AttrParameter<"int64_t", "">:$ordinal, AttrParameter<"DescriptorType", "">:$type, OptionalParameter<"DescriptorFlags", "DescriptorFlags::None">:$flags ); let assemblyFormat = [{ - `<` $ordinal `,` $type (`,` $flags^)? `>` - }]; -} - -def HAL_DescriptorSetLayoutBindingArrayAttr : - TypedArrayAttrBase; - -//===----------------------------------------------------------------------===// -// hal.descriptor_set.layout<*> -//===----------------------------------------------------------------------===// - -def HAL_DescriptorSetLayoutAttr : - AttrDef { - let mnemonic = "descriptor_set.layout"; - let summary = [{descriptor set layout specification}]; - let description = [{ - Specifies the layout information of a single set of descriptors used within - an pipeline layout. Multiple of these sets may be used by a single entry - point to allow for bindings with similar update frequencies to be grouped. - }]; - let parameters = (ins - AttrParameter<"int64_t", "">:$ordinal, - ArrayRefParameter<"DescriptorSetBindingAttr", "">:$bindings, - OptionalParameter<"std::optional">:$flags - ); - let assemblyFormat = [{ - `<` - $ordinal `,` - `bindings` `=` `[` $bindings `]` - (`,` `flags` `=` $flags^)? - `>` - }]; - let extraClassDeclaration = [{ - DescriptorSetBindingAttr getBinding(int64_t ordinal) const; + `<` $type (`,` $flags^)? `>` }]; } @@ -444,55 +409,25 @@ def HAL_PipelineLayoutAttr : lower-level target-specific argument passing behavior. }]; let parameters = (ins - AttrParameter<"int64_t", "">:$pushConstants, - ArrayRefParameter<"DescriptorSetLayoutAttr", "">:$setLayouts + ArrayRefParameter<"PipelineBindingAttr", "">:$bindings, + OptionalParameter<"int64_t", "0">:$constants, + OptionalParameter<"std::optional">:$flags ); let assemblyFormat = [{ `<` - `push_constants` `=` $pushConstants `,` - `sets` `=` `[` $setLayouts `]` + (`constants` `=` $constants^ `,` ` `)? + `bindings` `=` `[` qualified($bindings) `]` + (`,` `flags` `=` $flags^)? `>` }]; let extraClassDeclaration = [{ - DescriptorSetLayoutAttr getSetLayout(int64_t ordinal) const; - - // Returns the binding index in a flattened list of all sets and bindings. - // For example, if the layout is [set(bindings[4]), set(bindings[2])] then - // a query for set 1 binding 0 would return 4. - int64_t getFlatBindingIndex(int64_t set, int64_t binding) const; + IREE::HAL::PipelineBindingAttr getBinding(int64_t ordinal) const; + IREE::HAL::PipelineBindingAttr getBinding(APInt ordinal) const { + return getBinding(ordinal.getSExtValue()); + } }]; } -//===----------------------------------------------------------------------===// -// hal.interface.binding<*> -//===----------------------------------------------------------------------===// - -def HAL_InterfaceBindingAttr : - AttrDef { - let mnemonic = "interface.binding"; - let summary = [{interface binding specification}]; - let description = [{ - Specifies the descriptor set and binding ordinal of a particular layout - binding. - - Example: - ```mlir - #hal.interface.binding<0, 1> - ``` - }]; - let parameters = (ins - AttrParameter<"int64_t", "">:$set, - AttrParameter<"int64_t", "">:$binding - ); - let assemblyFormat = [{ - `<` $set `,` $binding `>` - }]; -} - -def HAL_InterfaceBindingArrayAttr : - TypedArrayAttrBase; - //===----------------------------------------------------------------------===// // #hal.executable.target<*> //===----------------------------------------------------------------------===// diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp index 811789ccc7824..00c2c6ebebc8d 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp @@ -100,9 +100,7 @@ class HALToVMConversionInterface : public VMConversionDialectInterface { MLIRContext *context = attr.getContext(); // TODO(benvanik): remove this interface or make it an attr interface. if (auto bindingAttr = - llvm::dyn_cast(attr)) { - fn(IntegerAttr::get(IndexType::get(context), - APInt(64, bindingAttr.getOrdinal()))); + llvm::dyn_cast(attr)) { fn(IREE::HAL::DescriptorTypeAttr::get(context, bindingAttr.getType())); fn(IREE::HAL::DescriptorFlagsAttr::get(context, bindingAttr.getFlags())); return success(); diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp index 36eaac47f0df2..45dbb15a225b6 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp @@ -188,7 +188,7 @@ namespace { /// the same scope. struct SkipCommandBufferDeviceOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; + using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(CommandBufferDeviceOp op, PatternRewriter &rewriter) const override { @@ -340,6 +340,96 @@ void CommandBufferCopyBufferOp::getCanonicalizationPatterns( results.insert(context); } +namespace { + +/// Folds hal.buffer.subspans into dispatch bindings. +/// The binding range is always equal to or a subset of the subspan. +template +struct FoldCommandBufferDispatchBufferSubspan : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(OpT op, + PatternRewriter &rewriter) const override { + auto ip = rewriter.saveInsertionPoint(); + rewriter.setInsertionPoint(op); + bool needsUpdate = false; + auto bindingBuffers = llvm::to_vector(op.getBindingBuffers()); + auto bindingOffsets = llvm::to_vector(op.getBindingOffsets()); + for (size_t i = 0; i < bindingBuffers.size(); ++i) { + auto *definingOp = bindingBuffers[i].getDefiningOp(); + if (!definingOp) + continue; + if (auto subspanOp = dyn_cast(definingOp)) { + needsUpdate = true; + bindingBuffers[i] = subspanOp.getSourceBuffer(); + bindingOffsets[i] = rewriter.createOrFold( + subspanOp.getLoc(), subspanOp.getSourceOffset(), bindingOffsets[i]); + } + } + rewriter.restoreInsertionPoint(ip); + if (!needsUpdate) + return failure(); + rewriter.modifyOpInPlace(op, [&]() { + auto mutableBindingBuffers = op.getBindingBuffersMutable(); + mutableBindingBuffers.clear(); + mutableBindingBuffers.append(bindingBuffers); + auto mutableBindingOffsets = op.getBindingOffsetsMutable(); + mutableBindingOffsets.clear(); + mutableBindingOffsets.append(bindingOffsets); + }); + return success(); + } +}; + +} // namespace + +void CommandBufferDispatchOp::getCanonicalizationPatterns( + RewritePatternSet &results, MLIRContext *context) { + results + .insert>( + context); +} + +namespace { + +/// Folds hal.buffer.subspans into the indirect dispatch workgroup count. +/// The binding range is always equal to or a subset of the subspan. +struct FoldCommandBufferDispatchIndirectBufferSubspan + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(CommandBufferDispatchIndirectOp op, + PatternRewriter &rewriter) const override { + Value workgroupsBuffer = op.getWorkgroupsBuffer(); + auto *definingOp = workgroupsBuffer.getDefiningOp(); + if (!definingOp) + return failure(); + Value workgroupsOffset = op.getWorkgroupsOffset(); + if (auto subspanOp = dyn_cast(definingOp)) { + workgroupsBuffer = subspanOp.getSourceBuffer(); + workgroupsOffset = rewriter.createOrFold( + subspanOp.getLoc(), subspanOp.getSourceOffset(), workgroupsOffset); + } else { + return failure(); + } + rewriter.modifyOpInPlace(op, [&]() { + op.getWorkgroupsBufferMutable().set(workgroupsBuffer); + op.getWorkgroupsOffsetMutable().set(workgroupsOffset); + }); + return success(); + } +}; + +} // namespace + +void CommandBufferDispatchIndirectOp::getCanonicalizationPatterns( + RewritePatternSet &results, MLIRContext *context) { + results.insert(context); + results.insert< + FoldCommandBufferDispatchBufferSubspan>( + context); +} + //===----------------------------------------------------------------------===// // hal.device.queue.execute //===----------------------------------------------------------------------===// diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp index 09581e830f735..4485011711878 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp @@ -47,14 +47,14 @@ static void printDescriptorType(OpAsmPrinter &p, Operation *, } //===----------------------------------------------------------------------===// -// custom($binding_ordinals, +// custom($binding_ordinals, // $binding_buffers, // type($binding_buffers), // $binding_offsets, // $binding_lengths) //===----------------------------------------------------------------------===// -static ParseResult parseDescriptorSetBindings( +static ParseResult parsePipelineBindings( OpAsmParser &parser, SmallVectorImpl &ordinals, SmallVectorImpl &buffers, @@ -86,11 +86,11 @@ static ParseResult parseDescriptorSetBindings( return success(); } -static void printDescriptorSetBindings(OpAsmPrinter &p, Operation *op, - ValueRange ordinals, ValueRange buffers, - TypeRange bufferTypes, - ValueRange bufferOffsets, - ValueRange bufferLengths) { +static void printPipelineBindings(OpAsmPrinter &p, Operation *op, + ValueRange ordinals, ValueRange buffers, + TypeRange bufferTypes, + ValueRange bufferOffsets, + ValueRange bufferLengths) { llvm::interleaveComma(llvm::zip_equal(ordinals, buffers, bufferTypes, bufferOffsets, bufferLengths), p, @@ -1887,7 +1887,7 @@ void ExecutableExportOrdinalOp::getAsmResultNames( LogicalResult InterfaceConstantLoadOp::verify() { InterfaceConstantLoadOp op = *this; auto layoutAttr = op.getLayout(); - if (op.getOrdinal().getZExtValue() >= layoutAttr.getPushConstants()) { + if (op.getOrdinal().getZExtValue() >= layoutAttr.getConstants()) { return op.emitOpError("push constant ordinal out of bounds"); } return success(); @@ -1897,18 +1897,20 @@ LogicalResult InterfaceConstantLoadOp::verify() { // hal.interface.binding.subspan //===----------------------------------------------------------------------===// -void InterfaceBindingSubspanOp::build( - OpBuilder &builder, OperationState &result, Type resultType, - IREE::HAL::PipelineLayoutAttr layout, APInt set, APInt binding, - Value byte_offset, ValueRange dynamic_dims, IntegerAttr alignment, - std::optional flags) { +void InterfaceBindingSubspanOp::build(OpBuilder &builder, + OperationState &result, Type resultType, + IREE::HAL::PipelineLayoutAttr layout, + APInt binding, Value byte_offset, + ValueRange dynamic_dims, + IntegerAttr alignment, + std::optional flags) { IREE::HAL::DescriptorFlagsAttr descriptorAttr; if (flags.has_value()) { descriptorAttr = IREE::HAL::DescriptorFlagsAttr::get(builder.getContext(), flags.value()); } - build(builder, result, resultType, layout, set, binding, byte_offset, - dynamic_dims, alignment, descriptorAttr); + build(builder, result, resultType, layout, binding, byte_offset, dynamic_dims, + alignment, descriptorAttr); } LogicalResult InterfaceBindingSubspanOp::verify() { @@ -1921,58 +1923,24 @@ LogicalResult InterfaceBindingSubspanOp::verify() { << " associated dimension SSA values"; } } - int64_t set = op.getSet().getSExtValue(); - int64_t binding = op.getBinding().getSExtValue(); - bool foundSet = false; - bool foundBinding = false; - for (auto setLayoutAttr : op.getLayout().getSetLayouts()) { - if (setLayoutAttr.getOrdinal() == set) { - foundSet = true; - for (auto bindingAttr : setLayoutAttr.getBindings()) { - if (bindingAttr.getOrdinal() == binding) { - foundBinding = true; - break; - } - } - } - } - if (!foundSet) { - return op.emitOpError("set ordinal ") - << set << " not present in pipeline layout"; - } else if (!foundBinding) { + uint64_t binding = op.getBinding().getZExtValue(); + if (binding >= op.getLayout().getBindings().size()) { return op.emitOpError("binding ordinal ") - << binding << " not present in descriptor set layout"; + << binding << " out of bounds in layout " << op.getLayout(); } return success(); } -IREE::HAL::DescriptorSetBindingAttr -InterfaceBindingSubspanOp::getDescriptorSetBindingAttr() { - int64_t set = getSet().getSExtValue(); - int64_t binding = getBinding().getSExtValue(); - for (auto setLayoutAttr : getLayout().getSetLayouts()) { - if (setLayoutAttr.getOrdinal() == set) { - for (auto bindingAttr : setLayoutAttr.getBindings()) { - if (bindingAttr.getOrdinal() == binding) { - return bindingAttr; - } - } - } - } - return {}; +IREE::HAL::PipelineBindingAttr +InterfaceBindingSubspanOp::getPipelineBindingAttr() { + return getLayout().getBinding(getBinding()); } IREE::HAL::DescriptorType InterfaceBindingSubspanOp::getDescriptorType() { - auto bindingAttr = getDescriptorSetBindingAttr(); + auto bindingAttr = getPipelineBindingAttr(); return bindingAttr.getType(); } -int64_t InterfaceBindingSubspanOp::getFlatBindingIndex() { - int64_t set = getSet().getSExtValue(); - int64_t binding = getBinding().getSExtValue(); - return getLayout().getFlatBindingIndex(set, binding); -} - llvm::MaybeAlign InterfaceBindingSubspanOp::getBaseAlignment() { if (auto baseAlignmentInt = getAlignment()) { return llvm::MaybeAlign(baseAlignmentInt.value().getZExtValue()); diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td index d4494678deec2..fdd43b7a5e728 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td @@ -429,7 +429,6 @@ def HAL_DispatchExternOp : HAL_PureOp<"dispatch.extern", [ OptionalAttr:$workgroup_size, OptionalAttr:$subgroup_size, OptionalAttr:$workgroup_local_memory, - OptionalAttr:$bindings, OptionalAttr:$tied_operands ); let results = (outs @@ -451,7 +450,6 @@ def HAL_DispatchExternOp : HAL_PureOp<"dispatch.extern", [ $tied_operands) `count` `` custom($workgroup_count) `layout` `(` $layout `)` - (`bindings` `(` $bindings^ `)`)? `objects` `(` `{` custom($targets, $target_ordinals, $target_objects, @@ -1532,6 +1530,7 @@ def HAL_CommandBufferDispatchOp : HAL_Op<"command_buffer.dispatch", [ )>, ]; + let hasCanonicalizer = 1; let hasVerifier = 1; } @@ -1599,6 +1598,7 @@ def HAL_CommandBufferDispatchIndirectOp : HAL_Op<"command_buffer.dispatch.indire )>, ]; + let hasCanonicalizer = 1; let hasVerifier = 1; } @@ -3059,7 +3059,6 @@ def HAL_InterfaceBindingSubspanOp : HAL_PureOp<"interface.binding.subspan", [ let arguments = (ins HAL_PipelineLayoutAttr:$layout, - IndexAttr:$set, IndexAttr:$binding, Optional:$byte_offset, HAL_ShapeDynamicDims:$dynamic_dims, @@ -3072,7 +3071,6 @@ def HAL_InterfaceBindingSubspanOp : HAL_PureOp<"interface.binding.subspan", [ let assemblyFormat = [{ `layout` `(` $layout `)` - `set` `(` $set `)` `binding` `(` $binding `)` (`alignment` `(` $alignment^ `)`)? (`offset` `(` $byte_offset^ `)`)? @@ -3084,7 +3082,6 @@ def HAL_InterfaceBindingSubspanOp : HAL_PureOp<"interface.binding.subspan", [ OpBuilder<(ins "Type":$resultType, "IREE::HAL::PipelineLayoutAttr":$layout, - "APInt":$set, "APInt":$binding, "Value":$byte_offset, "ValueRange":$dynamic_dims, @@ -3100,16 +3097,11 @@ def HAL_InterfaceBindingSubspanOp : HAL_PureOp<"interface.binding.subspan", [ ValueRange getResultDynamicDims(unsigned idx) { return getDynamicDims(); } // Returns the descriptor set binding metadata for the given set/binding. - IREE::HAL::DescriptorSetBindingAttr getDescriptorSetBindingAttr(); + IREE::HAL::PipelineBindingAttr getPipelineBindingAttr(); // Returns the type of the descriptor this binding references. IREE::HAL::DescriptorType getDescriptorType(); - // Returns the binding index in a flattened list of all sets and bindings. - // For example, if the layout is [set(bindings[4]), set(bindings[2])] then - // a query for set 1 binding 0 would return 4. - int64_t getFlatBindingIndex(); - // Returns the alignment of the base buffer pointer (before offset). llvm::MaybeAlign getBaseAlignment(); diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp index 1903bc800638b..9e31af790e8b3 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp @@ -90,26 +90,6 @@ Value DeviceType::resolveAny(Location loc, OpBuilder &builder) { // Utilities //===----------------------------------------------------------------------===// -SmallVector -getInterfaceBindingAttrs(Operation *op, size_t resourceCount) { - // It'd be nice if we had something typed here but this is just used for - // spooky action at a distance or user overrides. If the attribute is not - // found (not set by MaterializeInterfaces or the user) we construct one by - // convention (dense set 0 bindings for each resource). - auto bindingAttrs = op->getAttrOfType("hal.interface.bindings"); - if (bindingAttrs) { - return llvm::to_vector( - bindingAttrs.getAsRange()); - } - SmallVector bindings; - for (size_t i = 0; i < resourceCount; ++i) { - bindings.push_back(IREE::HAL::InterfaceBindingAttr::get(op->getContext(), - /*set=*/0, - /*binding=*/i)); - } - return bindings; -} - //===----------------------------------------------------------------------===// // Dialect registration //===----------------------------------------------------------------------===// diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h index 543c26d947dbc..fefe68fd38f9d 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h @@ -164,7 +164,7 @@ struct SemaphoreType : public Type::TypeBase { // A tuple containing runtime values for a descriptor set binding. // The buffer specified may be either a !hal.buffer or an index of a binding // table slot to source the buffer from. -struct DescriptorSetBindingValue { +struct PipelineBindingValue { Value ordinal; Value buffer; Value byteOffset; @@ -220,14 +220,14 @@ operator<<(AsmPrinter &printer, template <> struct FieldParser< - std::optional> { - static FailureOr + std::optional> { + static FailureOr parse(AsmParser &parser) { std::string value; if (parser.parseKeywordOrString(&value)) return failure(); auto result = mlir::iree_compiler::IREE::HAL::symbolizeEnum< - mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlags>(value); + mlir::iree_compiler::IREE::HAL::PipelineLayoutFlags>(value); if (!result.has_value()) return failure(); return result.value(); @@ -235,8 +235,7 @@ struct FieldParser< }; static inline AsmPrinter &operator<<( AsmPrinter &printer, - std::optional - param) { + std::optional param) { printer << (param.has_value() ? mlir::iree_compiler::IREE::HAL::stringifyEnum(param.value()) : StringRef{""}); @@ -287,11 +286,6 @@ operator<<(AsmPrinter &printer, namespace mlir::iree_compiler::IREE::HAL { -// Returns the assigned bindings via the `hal.interface.bindings` attribute -// on the operation or default bindings in set 0 with bindings [0, count). -SmallVector -getInterfaceBindingAttrs(Operation *op, size_t resourceCount); - } // namespace mlir::iree_compiler::IREE::HAL #endif // IREE_COMPILER_DIALECT_HAL_IR_HALTYPES_H_ diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/BUILD.bazel b/compiler/src/iree/compiler/Dialect/HAL/IR/test/BUILD.bazel index 8c88e205f708c..a111296749b00 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/BUILD.bazel @@ -24,7 +24,6 @@ iree_lit_test_suite( "channel_ops.mlir", "command_buffer_folding.mlir", "command_buffer_ops.mlir", - "descriptor_set_ops.mlir", "device_folding.mlir", "device_ops.mlir", "devices_ops.mlir", diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/HAL/IR/test/CMakeLists.txt index f1fb3494728d3..e2d654bb595c0 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/CMakeLists.txt @@ -22,7 +22,6 @@ iree_lit_test_suite( "channel_ops.mlir" "command_buffer_folding.mlir" "command_buffer_ops.mlir" - "descriptor_set_ops.mlir" "device_folding.mlir" "device_ops.mlir" "devices_ops.mlir" diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/attributes.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/attributes.mlir index d04c7d6814074..0e7dd9efbe30c 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/attributes.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/attributes.mlir @@ -3,30 +3,26 @@ // CHECK-LABEL: descriptor_set_layout_binding.basic "descriptor_set_layout_binding.basic"() { - // CHECK: dslb0 = #hal.descriptor_set.binding<0, uniform_buffer> - dslb0 = #hal.descriptor_set.binding<0, uniform_buffer>, - // CHECK: dslb1 = #hal.descriptor_set.binding<1, storage_buffer, "ReadOnly|Indirect"> - dslb1 = #hal.descriptor_set.binding<1, storage_buffer, "ReadOnly|Indirect"> + // CHECK: dslb0 = #hal.pipeline.binding + dslb0 = #hal.pipeline.binding, + // CHECK: dslb1 = #hal.pipeline.binding + dslb1 = #hal.pipeline.binding } : () -> () // ----- // CHECK-LABEL: pipeline_layout.basic "pipeline_layout.basic"() { - // CHECK: layout0 = #hal.pipeline.layout - // CHECK-SAME: <1, storage_buffer> - // CHECK-SAME: <1, bindings = [ - // CHECK-SAME: <0, uniform_buffer> - layout0 = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]>, - #hal.descriptor_set.layout<1, bindings = [ - #hal.descriptor_set.binding<0, uniform_buffer> - ]> + // CHECK: layout0 = #hal.pipeline.layout< + // CHECK-SAME: constants = 4 + // CHECK-SAME: bindings = [ + // CHECK-SAME: #hal.pipeline.binding, + // CHECK-SAME: #hal.pipeline.binding, + // CHECK-SAME: #hal.pipeline.binding + layout0 = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> } : () -> () diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir index 3adbce807fb79..279511e94e14f 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir @@ -98,13 +98,13 @@ util.func public @fold_buffer_subspan_into_copy_buffer( // ----- -// CHECK-LABEL: @fold_buffer_subspan_into_push_descriptor_set +// CHECK-LABEL: @fold_buffer_subspan_into_dispatch // CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer, -// CHECK-SAME: %[[LAYOUT:.+]]: !hal.pipeline_layout, +// CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable, // CHECK-SAME: %[[BASE_BUFFER:.+]]: !hal.buffer -util.func public @fold_buffer_subspan_into_push_descriptor_set( +util.func public @fold_buffer_subspan_into_dispatch( %cmd: !hal.command_buffer, - %layout: !hal.pipeline_layout, + %executable: !hal.executable, %buffer: !hal.buffer ) { %c0 = arith.constant 0 : index @@ -116,20 +116,52 @@ util.func public @fold_buffer_subspan_into_push_descriptor_set( %c262140 = arith.constant 262140 : index %c262144 = arith.constant 262144 : index %subspan = hal.buffer.subspan<%buffer : !hal.buffer>[%c4096, %c262144] : !hal.buffer - // CHECK: hal.command_buffer.push_descriptor_set + // CHECK: hal.command_buffer.dispatch // CHECK-SAME: bindings([ - hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> - layout(%layout : !hal.pipeline_layout)[%c0] + hal.command_buffer.dispatch<%cmd : !hal.command_buffer> + target(%executable: !hal.executable)[%c0] + workgroups([%c1, %c1, %c1]) bindings([ // 0 + 4096: - // CHECK-NEXT: %c0 = (%[[BASE_BUFFER]] : !hal.buffer)[%c4096, %c8000] - %c0 = (%subspan : !hal.buffer)[%c0, %c8000], + // CHECK-NEXT: (%[[BASE_BUFFER]] : !hal.buffer)[%c4096, %c8000] + (%subspan : !hal.buffer)[%c0, %c8000], // 4096 + 4: - // CHECK-NEXT: %c1 = (%[[BASE_BUFFER]] : !hal.buffer)[%c4100, %c262140] - %c1 = (%subspan : !hal.buffer)[%c4, %c262140], + // CHECK-NEXT: (%[[BASE_BUFFER]] : !hal.buffer)[%c4100, %c262140] + (%subspan : !hal.buffer)[%c4, %c262140], // No change: - // CHECK-NEXT: %c2 = (%[[BASE_BUFFER]] : !hal.buffer)[%c4096, %c262144] - %c2 = (%buffer : !hal.buffer)[%c4096, %c262144] + // CHECK-NEXT: (%[[BASE_BUFFER]] : !hal.buffer)[%c4096, %c262144] + (%buffer : !hal.buffer)[%c4096, %c262144] ]) + flags("None") + util.return +} + +// ----- + +// CHECK-LABEL: @fold_buffer_subspan_into_dispatch_indirect +// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer, +// CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable, +// CHECK-SAME: %[[BASE_BUFFER:.+]]: !hal.buffer +util.func public @fold_buffer_subspan_into_dispatch_indirect( + %cmd: !hal.command_buffer, + %executable: !hal.executable, + %buffer: !hal.buffer + ) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %c4096 = arith.constant 4096 : index + %c262144 = arith.constant 262144 : index + %subspan = hal.buffer.subspan<%buffer : !hal.buffer>[%c4096, %c262144] : !hal.buffer + // CHECK: hal.command_buffer.dispatch.indirect + hal.command_buffer.dispatch.indirect<%cmd : !hal.command_buffer> + target(%executable: !hal.executable)[%c0] + // 4096 + 4: + // CHECK-SAME: workgroups(%[[BASE_BUFFER]] : !hal.buffer)[%c4100] + workgroups(%subspan : !hal.buffer)[%c4] + bindings([ + (%buffer : !hal.buffer)[%c0, %c1] + ]) + flags("None") util.return } diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir index 77d56e362d36d..2e141410fb0a2 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir @@ -213,44 +213,11 @@ util.func public @command_buffer_collective( // ----- -// CHECK-LABEL: @command_buffer_push_descriptor_set -// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, -// CHECK-SAME: %[[LAYOUT:.+]]: !hal.pipeline_layout, -// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer, -// CHECK-SAME: %[[SLOT:.+]]: index) -util.func public @command_buffer_push_descriptor_set( - %cmd: !hal.command_buffer, - %layout: !hal.pipeline_layout, - %buffer: !hal.buffer, - %slot: index) { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %c4 = arith.constant 4 : index - %c4096 = arith.constant 4096 : index - %c8000 = arith.constant 8000 : index - // CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer> - hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> - // CHECK-SAME: layout(%[[LAYOUT]] : !hal.pipeline_layout)[%c1] - layout(%layout : !hal.pipeline_layout)[%c1] - // CHECK-SAME: bindings([ - bindings([ - // CHECK-NEXT: %c0 = (%[[BUFFER]] : !hal.buffer)[%c4096, %c8000] - %c0 = (%buffer : !hal.buffer)[%c4096, %c8000], - // CHECK-NEXT: %c1 = (%[[SLOT]] : index)[%c4, %c4096] - %c1 = (%slot : index)[%c4, %c4096] - ]) - util.return -} - -// ----- - hal.executable @ex { hal.executable.variant @backend target(<"backend", "format">) { - hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> + hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) } } @@ -258,18 +225,34 @@ hal.executable @ex { // CHECK-LABEL: @command_buffer_dispatch // CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, // CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable, %[[ORDINAL:[a-z0-9]+]]: index, -// CHECK-SAME: %[[X:[a-z0-9]+]]: index, %[[Y:[a-z0-9]+]]: index, %[[Z:[a-z0-9]+]]: index) +// CHECK-SAME: %[[X:[a-z0-9]+]]: index, %[[Y:[a-z0-9]+]]: index, %[[Z:[a-z0-9]+]]: index, +// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer, +// CHECK-SAME: %[[SLOT:.+]]: index) util.func public @command_buffer_dispatch( %cmd: !hal.command_buffer, %executable: !hal.executable, %ordinal: index, - %x: index, %y: index, %z: index) { - // CHECK: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer> - // CHECK-SAME: target(%[[EXECUTABLE]] : !hal.executable)[%[[ORDINAL]] - // CHECK-SAME: workgroups([%[[X]], %[[Y]], %[[Z]]]) - // CHECK-SAME: flags("None") + %x: index, %y: index, %z: index, + %buffer: !hal.buffer, + %slot: index) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %c4096 = arith.constant 4096 : index + %c8000 = arith.constant 8000 : index + // CHECK: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer> hal.command_buffer.dispatch<%cmd : !hal.command_buffer> + // CHECK-SAME: target(%[[EXECUTABLE]] : !hal.executable)[%[[ORDINAL]] target(%executable: !hal.executable)[%ordinal] + // CHECK-SAME: workgroups([%[[X]], %[[Y]], %[[Z]]]) workgroups([%x, %y, %z]) + // CHECK-SAME: bindings([ + bindings([ + // CHECK-NEXT: (%[[BUFFER]] : !hal.buffer)[%c4096, %c8000] + (%buffer : !hal.buffer)[%c4096, %c8000], + // CHECK-NEXT: (%[[SLOT]] : index)[%c4, %c4096] + (%slot : index)[%c4, %c4096] + ]) + // CHECK-NEXT: flags("None") flags("None") util.return } @@ -278,30 +261,33 @@ util.func public @command_buffer_dispatch( hal.executable @ex { hal.executable.variant @backend target(<"backend", "format">) { - hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> + hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) } } // CHECK-LABEL: @command_buffer_dispatch_indirect // CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, -// CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable, %[[ORDINAL:.+]]: index, -// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer, %[[OFFSET:.+]]: index) +// CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable, %[[ORDINAL:[a-z0-9]+]]: index, +// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer, %[[OFFSET:.+]]: index, %[[LENGTH:.+]]: index) util.func public @command_buffer_dispatch_indirect( %cmd: !hal.command_buffer, %executable: !hal.executable, %ordinal: index, - %buffer: !hal.buffer, %offset: index) { + %buffer: !hal.buffer, %offset: index, %length: index) { // CHECK: hal.command_buffer.dispatch.indirect<%[[CMD]] : !hal.command_buffer> // CHECK-SAME: target(%[[EXECUTABLE]] : !hal.executable)[%[[ORDINAL]] // CHECK-SAME: workgroups(%[[BUFFER]] : !hal.buffer)[%[[OFFSET]]] - // CHECK-SAME: flags("None") + // CHECK-SAME: bindings([ + // CHECK-NEXT: (%[[BUFFER]] : !hal.buffer)[%[[OFFSET]], %[[LENGTH]]] + // CHECK-NEXT: ]) flags("None") hal.command_buffer.dispatch.indirect<%cmd : !hal.command_buffer> target(%executable: !hal.executable)[%ordinal] workgroups(%buffer : !hal.buffer)[%offset] + bindings([ + (%buffer : !hal.buffer)[%offset, %length] + ]) flags("None") util.return } @@ -310,11 +296,9 @@ util.func public @command_buffer_dispatch_indirect( hal.executable @ex { hal.executable.variant @backend target(<"backend", "format">) { - hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> + hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) } } @@ -322,18 +306,23 @@ hal.executable @ex { // CHECK-LABEL: @command_buffer_dispatch_indirect_indirect // CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, // CHECK-SAME: %[[EXECUTABLE:[a-z0-9]+]]: !hal.executable, %[[ORDINAL:[a-z0-9]+]]: index, -// CHECK-SAME: %[[BUFFER_SLOT:[a-z0-9]+]]: index, %[[OFFSET:[a-z0-9]+]]: index) +// CHECK-SAME: %[[BUFFER_SLOT:[a-z0-9]+]]: index, %[[OFFSET:[a-z0-9]+]]: index, %[[LENGTH:[a-z0-9]+]]: index) util.func public @command_buffer_dispatch_indirect_indirect( %cmd: !hal.command_buffer, %executable: !hal.executable, %ordinal: index, - %buffer_slot: index, %offset: index) { + %buffer_slot: index, %offset: index, %length: index) { // CHECK: hal.command_buffer.dispatch.indirect<%[[CMD]] : !hal.command_buffer> // CHECK-SAME: target(%[[EXECUTABLE]] : !hal.executable)[%[[ORDINAL]] // CHECK-SAME: workgroups(%[[BUFFER_SLOT]] : index)[%[[OFFSET]]] - // CHECK-SAME: flags("None") + // CHECK-SAME: bindings([ + // CHECK-NEXT: (%[[BUFFER_SLOT]] : index)[%[[OFFSET]], %[[LENGTH]]] + // CHECK-NEXT: ]) flags("None") hal.command_buffer.dispatch.indirect<%cmd : !hal.command_buffer> target(%executable: !hal.executable)[%ordinal] workgroups(%buffer_slot : index)[%offset] + bindings([ + (%buffer_slot : index)[%offset, %length] + ]) flags("None") util.return } diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/descriptor_set_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/descriptor_set_ops.mlir deleted file mode 100644 index 86180ac169324..0000000000000 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/descriptor_set_ops.mlir +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s - -// CHECK-LABEL: @descriptor_set_layout_create -// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device) -util.func public @descriptor_set_layout_create(%device: !hal.device) { - // CHECK: = hal.descriptor_set_layout.create - // CHECK-SAME: device(%[[DEVICE]] : !hal.device) - // CHECK-SAME: flags("None") - // CHECK-SAME: bindings([ - // CHECK-SAME: #hal.descriptor_set.binding<0, storage_buffer>, - // CHECK-SAME: #hal.descriptor_set.binding<1, storage_buffer> - // CHECK-SAME: ]) : !hal.descriptor_set_layout - %0 = hal.descriptor_set_layout.create device(%device : !hal.device) - flags("None") - bindings([ - #hal.descriptor_set.binding<0, storage_buffer>, - #hal.descriptor_set.binding<1, storage_buffer> - ]) : !hal.descriptor_set_layout - util.return -} diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir index 5d9874c132748..3e69574ee4cd5 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir @@ -13,11 +13,9 @@ hal.executable @ex { ]) { // CHECK-DAG: hal.executable.export public @entry0 ordinal(0) layout(#pipeline_layout) attributes { // CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] - hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> + hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) attributes { workgroup_size = [4 : index, 1 : index, 1 : index] } @@ -42,11 +40,9 @@ hal.executable @ex_with_workgroup_count_region { // CHECK-DAG: hal.executable.export public @entry0 ordinal(0) layout(#pipeline_layout) attributes { // CHECK-SAME: subgroup_size = 64 : index // CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] - hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> + hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) attributes { subgroup_size = 64 : index, workgroup_size = [4 : index, 1 : index, 1 : index] @@ -83,11 +79,9 @@ hal.executable @ex_with_condition { // CHECK-DAG: hal.executable.export public @entry0 ordinal(0) layout(#pipeline_layout) attributes { // CHECK-SAME: subgroup_size = 64 : index // CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] - hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> + hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) attributes { subgroup_size = 64 : index, workgroup_size = [4 : index, 1 : index, 1 : index] @@ -140,26 +134,6 @@ hal.executable @ex_with_constants { // ----- -// CHECK-LABEL: @executable_create -// CHECK-SAME: %[[DEVICE:.+]]: !hal.device, -// CHECK-SAME: %[[LAYOUT0:.+]]: !hal.pipeline_layout, -// CHECK-SAME: %[[LAYOUT1:.+]]: !hal.pipeline_layout -util.func public @executable_create( - %device: !hal.device, - %layout0: !hal.pipeline_layout, - %layout1: !hal.pipeline_layout) { - // CHECK: = hal.executable.create - // CHECK-SAME: device(%[[DEVICE]] : !hal.device) - // CHECK-SAME: target(@exe::@binary1) - // CHECK-SAME: layouts([%[[LAYOUT0]], %[[LAYOUT1]]]) : !hal.executable - %0 = hal.executable.create device(%device : !hal.device) - target(@exe::@binary1) - layouts([%layout0, %layout1]) : !hal.executable - util.return -} - -// ----- - // CHECK-LABEL: @executable_create // CHECK-SAME: %[[DEVICE:.+]]: !hal.device util.func public @executable_create(%device: !hal.device) { @@ -173,36 +147,14 @@ util.func public @executable_create(%device: !hal.device) { // ----- -// CHECK-LABEL: @pipeline_layout_create -// CHECK-SAME: %[[DEVICE:.+]]: !hal.device, -// CHECK-SAME: %[[LAYOUT0:.+]]: !hal.descriptor_set_layout, -// CHECK-SAME: %[[LAYOUT1:.+]]: !hal.descriptor_set_layout -util.func public @pipeline_layout_create( - %device: !hal.device, - %layout0: !hal.descriptor_set_layout, - %layout1: !hal.descriptor_set_layout) { - // CHECK: hal.pipeline_layout.create - // CHECK-SAME: device(%[[DEVICE]] : !hal.device) - // CHECK-SAME: push_constants(1) - // CHECK-SAME: layouts([%[[LAYOUT0]], %[[LAYOUT1]]]) : !hal.pipeline_layout - %0 = hal.pipeline_layout.create device(%device : !hal.device) - push_constants(1) - layouts([%layout0, %layout1]) : !hal.pipeline_layout - util.return -} - -// ----- - // CHECK-LABEL: @unresolved_workload_ex hal.executable @unresolved_workload_ex { // CHECK: hal.executable.variant public @backend hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) { // CHECK: hal.executable.export public @entry0 - hal.executable.export public @entry0 ordinal(0) layout(#hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> + hal.executable.export public @entry0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) { ^bb0(%device: !hal.device, %arg0: index): hal.return %arg0, %arg0, %arg0 : index, index, index diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/interface_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/interface_ops.mlir index 7800b2ac47968..b87c6bc23b23f 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/interface_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/interface_ops.mlir @@ -13,8 +13,8 @@ func.func @interface_workgroup_info() { // ----- -#pipeline_layout = #hal.pipeline.layout]> +#pipeline_layout = #hal.pipeline.layout ]> // CHECK-LABEL: @interface_io_constant @@ -26,9 +26,11 @@ func.func @interface_io_constant() { // ----- -#pipeline_layout = #hal.pipeline.layout]>, - <1, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding ]> // CHECK-LABEL: @interface_io_subspan @@ -36,26 +38,26 @@ func.func @interface_io_constant() { func.func @interface_io_subspan(%dim0: index, %dim2: index) { %c8 = arith.constant 8 : index - // CHECK: = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c8) : memref{%[[DIM0]], %[[DIM2]]} - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c8) : memref{%dim0, %dim2} + // CHECK: = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c8) : memref{%[[DIM0]], %[[DIM2]]} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c8) : memref{%dim0, %dim2} - // CHECK: = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) alignment(16) : memref<16xi8> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) alignment(16) : memref<16xi8> + // CHECK: = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(16) : memref<16xi8> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(16) : memref<16xi8> return } // ----- -#pipeline_layout = #hal.pipeline.layout]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @interface_io_subspan_wrong_dynamic_dim(%dim: index) { %c8 = arith.constant 8 : index // expected-error @+1{{result type 'memref' has 2 dynamic dimensions but 1 associated dimension SSA values}} - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c8) : memref{%dim} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c8) : memref{%dim} return } diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir index 5dd1ea7110534..59145c52cd3b8 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir @@ -80,19 +80,11 @@ util.func public @dispatchExtern(%arg0: tensor<4xi32>, %arg1: tensor<8xi32>, %ar hal.return %x_capture, %y_capture, %z : index, index, index } // Must match the external definition. - // CHECK: layout(, - <1, storage_buffer> - ]> + // CHECK: layout(, + #hal.pipeline.binding ]>) - // Optional, automatically inferred if omitted. - // CHECK: bindings([#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>]) - bindings([ - #hal.interface.binding<0, 0>, - #hal.interface.binding<0, 1> - ]) // Can have object references for multiple targets or configurations. // CHECK: objects({ objects({ diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp index eedb427a3f15f..decde187caf01 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp @@ -91,15 +91,6 @@ struct ConvertToHALPass // Cleanup conversion attributes used for spooky action at a distance. moduleOp->removeAttr("stream.affinity.default"); - for (auto executableOp : moduleOp.getOps()) { - for (auto variantOp : - executableOp.getOps()) { - for (auto exportOp : - variantOp.getOps()) { - exportOp->removeAttr("hal.interface.bindings"); - } - } - } } }; diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp index c5cc1c03a2604..9d7cc63165b97 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp @@ -41,7 +41,6 @@ static const int64_t kBufferAlignment = 256; using Vec3 = std::tuple; struct Binding { - unsigned set = 0; unsigned binding = 0; int64_t size = 0; }; @@ -119,15 +118,9 @@ static DispatchParamsMap gatherDispatchParams(mlir::ModuleOp moduleOp, // Work around needing a mutable key for the set; C++ was a mistake. dispatchOp.forEachEntryPointAttr([&](SymbolRefAttr entryPointAttr) { - auto exportOp = - symbolTable.lookupNearestSymbolFrom( - dispatchOp, entryPointAttr); - auto bindingAttrs = IREE::HAL::getInterfaceBindingAttrs( - exportOp, dispatchOp.getResources().size()); - SmallVector bindings; - for (auto [bindingAttr, resourceLength] : - llvm::zip_equal(bindingAttrs, dispatchOp.getResourceLengths())) { + for (auto [i, resourceLength] : + llvm::enumerate(dispatchOp.getResourceLengths())) { APInt resourceLengthInt; if (!matchPattern(resourceLength, m_ConstantInt(&resourceLengthInt))) { @@ -136,9 +129,7 @@ static DispatchParamsMap gatherDispatchParams(mlir::ModuleOp moduleOp, << "` (non-constant resource length)\n";); return; } - bindings.push_back({(unsigned)bindingAttr.getSet(), - (unsigned)bindingAttr.getBinding(), - resourceLengthInt.getSExtValue()}); + bindings.push_back({(unsigned)i, resourceLengthInt.getSExtValue()}); } auto &dispatchParamsSet = map[entryPointAttr]; @@ -300,7 +291,7 @@ static void appendDispatchBenchmark(IREE::Stream::AffinityAttr affinityAttr, // Constant values. auto layoutAttr = exportOp.getLayoutAttr(); SmallVector constantValues; - if (int64_t pushConstantCount = layoutAttr.getPushConstants()) { + if (int64_t pushConstantCount = layoutAttr.getConstants()) { constantValues.reserve(pushConstantCount); for (int64_t i = 0; i < pushConstantCount; ++i) { constantValues.push_back(funcBuilder.create( diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp index 37d960307d215..9f3bee7d529a9 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp @@ -263,19 +263,14 @@ static IREE::HAL::PipelineLayoutAttr makePipelineLayoutAttr(const PipelineLayout &pipelineLayout, IREE::HAL::ExecutableTargetAttr targetAttr, OpBuilder &builder) { - SmallVector setLayoutAttrs; - for (const auto &setLayout : pipelineLayout.setLayouts) { - SmallVector bindingAttrs; - for (const auto &binding : setLayout.bindings) { - bindingAttrs.push_back(IREE::HAL::DescriptorSetBindingAttr::get( - builder.getContext(), binding.ordinal, binding.type, binding.flags)); - } - setLayoutAttrs.push_back(IREE::HAL::DescriptorSetLayoutAttr::get( - builder.getContext(), setLayout.ordinal, bindingAttrs, - setLayout.flags)); + SmallVector bindingAttrs; + for (const auto &binding : pipelineLayout.bindings) { + bindingAttrs.push_back(IREE::HAL::PipelineBindingAttr::get( + builder.getContext(), binding.type, binding.flags)); } - return IREE::HAL::PipelineLayoutAttr::get( - builder.getContext(), pipelineLayout.pushConstantCount, setLayoutAttrs); + return IREE::HAL::PipelineLayoutAttr::get(builder.getContext(), bindingAttrs, + pipelineLayout.constantCount, + pipelineLayout.flags); } // Converts the usage of the given primitive |arg| to interface methods. @@ -297,8 +292,8 @@ convertOperandUsage(mlir::FunctionOpInterface sourceFuncOp, BlockArgument arg, static void convertBindingUsage(mlir::FunctionOpInterface sourceFuncOp, BlockArgument arg, IREE::HAL::PipelineLayoutAttr pipelineLayoutAttr, - IREE::HAL::DescriptorSetLayoutAttr setLayoutAttr, - IREE::HAL::DescriptorSetBindingAttr bindingAttr) { + int64_t bindingOrdinal, + IREE::HAL::PipelineBindingAttr bindingAttr) { if (arg.use_empty()) return; // no-op for (auto &use : llvm::make_early_inc_range(arg.getUses())) { @@ -309,8 +304,7 @@ convertBindingUsage(mlir::FunctionOpInterface sourceFuncOp, BlockArgument arg, arg.getArgNumber(), "stream.alignment"); auto newOp = builder.create( oldOp.getLoc(), oldOp.getType(), pipelineLayoutAttr, - APInt(64, setLayoutAttr.getOrdinal()), - APInt(64, bindingAttr.getOrdinal()), oldOp.getByteOffset(), + APInt(64, bindingOrdinal), oldOp.getByteOffset(), oldOp.getDynamicDims(), alignmentAttr, bindingAttr.getFlags()); oldOp.replaceAllUsesWith(newOp.getResult()); oldOp.erase(); @@ -347,13 +341,10 @@ cloneFuncWithInterface(mlir::func::FuncOp sourceFuncOp, if (!llvm::isa(arg.getType())) { continue; // unhandled arg type (primitive/etc) } - auto setBinding = resourceMap[resourceIdx++]; - auto setLayoutAttr = layoutAttr.getSetLayout(setBinding.first); - assert(setLayoutAttr && "layout must be consistent"); - auto bindingAttr = setLayoutAttr.getBinding(setBinding.second); + auto binding = resourceMap[resourceIdx++]; + auto bindingAttr = layoutAttr.getBinding(binding); assert(bindingAttr && "layout must be consistent"); - convertBindingUsage(sourceFuncOp, arg, layoutAttr, setLayoutAttr, - bindingAttr); + convertBindingUsage(sourceFuncOp, arg, layoutAttr, binding, bindingAttr); } // Remove all arguments now that we've turned them into lookup ops. @@ -396,7 +387,7 @@ declareEntryPointOps(IREE::Stream::ExecutableOp sourceExecutableOp, exportOp->getAttrOfType( "hal.interface.layout"); const auto &pipelineLayout = layoutAnalysis.getPipelineLayout(exportOp); - const PipelineResourceMap &resourceMap = pipelineLayout.resourceMap; + const auto &resourceMap = pipelineLayout.resourceMap; // Clone the updated function declaration into each variant. ExportExpansions exportExpansions; @@ -444,17 +435,6 @@ declareEntryPointOps(IREE::Stream::ExecutableOp sourceExecutableOp, exportExpansions[oldRefAttr].push_back( std::make_pair(newRefAttr, variantOp.getTargetAttr())); - // Annotate the export with the a mapping of the resources to the - // interface bindings. This is used during conversion. - SmallVector bindingAttrs; - for (auto setBinding : resourceMap) { - bindingAttrs.push_back(IREE::HAL::InterfaceBindingAttr::get( - newExportOp.getContext(), setBinding.first, setBinding.second)); - } - newExportOp->setAttr( - "hal.interface.bindings", - ArrayAttr::get(newExportOp.getContext(), bindingAttrs)); - // Clone the workgroup count calculation function. if (!exportOp.getWorkgroupCount().empty()) { mlir::IRMapping mapper; diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir index e7838d63c207b..c5e799ade1b63 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir @@ -1,12 +1,10 @@ // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-capture-executable-sources{stage=configured})' %s | FileCheck %s #executable_target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> // CHECK-DAG: #[[EX0_VARIANT0_LOC:.+]] = loc("module_ex0_variant0.configured.mlir" diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir index 1de9b90632e84..6d443f511cdc3 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir @@ -7,35 +7,20 @@ util.global private @device : !hal.device #executable_target_embedded_elf_aarch64 = #hal.executable.target<"llvm-cpu", "embedded-elf-aarch64"> #executable_target_embedded_elf_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> -// CHECK: #[[PIPELINE_LAYOUT_ATTR_0:.+]] = #hal.pipeline.layout -#pipeline_layout_0 = #hal.pipeline.layout - #hal.descriptor_set.binding<0, storage_buffer>, - // CHECK-SAME: <1, storage_buffer> - #hal.descriptor_set.binding<1, storage_buffer>, - // CHECK-SAME: <2, storage_buffer> - #hal.descriptor_set.binding<2, storage_buffer> - ]> -]> -// CHECK: #[[PIPELINE_LAYOUT_ATTR_1:.+]] = #hal.pipeline.layout -#pipeline_layout_1 = #hal.pipeline.layout - #hal.descriptor_set.binding<4, storage_buffer> - ]>, - #hal.descriptor_set.layout<1, bindings = [ - // CHECK-SAME: <5, storage_buffer> - #hal.descriptor_set.binding<5, storage_buffer>, - // CHECK-SAME: <6, storage_buffer> - #hal.descriptor_set.binding<6, storage_buffer> - ]> +// CHECK: #[[PIPELINE_LAYOUT_ATTR:.+]] = #hal.pipeline.layout +#pipeline_layout = #hal.pipeline.layout + #hal.pipeline.binding, + // CHECK-SAME: #hal.pipeline.binding + #hal.pipeline.binding, + // CHECK-SAME: #hal.pipeline.binding + #hal.pipeline.binding ]> // CHECK: hal.executable private @ex hal.executable private @ex { hal.executable.variant public @embedded_elf_aarch64 target(#executable_target_embedded_elf_aarch64) { - hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout_0) { + hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) { ^bb0(%device: !hal.device, %arg0: index, %arg1: index, %arg2: index): // no predecessors %c1 = arith.constant 1 : index %0 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%arg0] @@ -46,15 +31,7 @@ hal.executable private @ex { } } hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64) { - hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout_1) attributes { - // Override the bindings. The other variant uses the default ones. - // CHECK-NOT: hal.interface.bindings - hal.interface.bindings = [ - #hal.interface.binding<0, 4>, - #hal.interface.binding<1, 5>, - #hal.interface.binding<1, 6> - ] - } { + hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) { ^bb0(%device: !hal.device, %arg0: index, %arg1: index, %arg2: index): // no predecessors %c1 = arith.constant 1 : index %0 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%arg0] @@ -108,9 +85,8 @@ util.func public @simpleDispatch(%arg0: !hal.buffer_view, %arg1: !hal.buffer_vie // CHECK: %[[CMD:.+]] = hal.command_buffer.create // CHECK-SAME: device(%[[DEVICE]] : !hal.device) - // CHECK-SAME: mode("OneShot|AllowInlineExecution") // CHECK-SAME: categories("Transfer|Dispatch") - %timepoint = stream.cmd.execute + %timepoint = stream.cmd.execute once with(%arg0_resource as %arg0_capture: !stream.resource{%c16}, %arg1_resource as %arg1_capture: !stream.resource{%c16}, %result_resource as %result_capture: !stream.resource{%c16}) { @@ -121,42 +97,28 @@ util.func public @simpleDispatch(%arg0: !hal.buffer_view, %arg1: !hal.buffer_vie // CHECK-DAG: %[[SWITCH0:.+]] = arith.select %[[FORMAT_AARCH64]], %c0, %[[SWITCH1]] // CHECK: scf.index_switch %[[SWITCH0]] // CHECK: case 0 { - // CHECK: %[[PIPELINE_LAYOUT_0:.+]] = hal.pipeline_layout.lookup - // CHECK-SAME: device(%[[DEVICE]] : !hal.device) - // CHECK-SAME: layout(#[[PIPELINE_LAYOUT_ATTR_0]]) : !hal.pipeline_layout - // CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer> - // CHECK-SAME: layout(%[[PIPELINE_LAYOUT_0]] : !hal.pipeline_layout)[%c0] - // CHECK-SAME: bindings([ - // CHECK: %c0 = (%[[ARG0_BUFFER]] : !hal.buffer)[%c0, %c16], - // CHECK: %c1 = (%[[ARG1_BUFFER]] : !hal.buffer)[%c0, %c16], - // CHECK: %c2 = (%[[RESULT_BUFFER]] : !hal.buffer)[%c0, %c16] - // CHECK: ]) // CHECK-DAG: %[[EXECUTABLE_0:.+]] = hal.executable.lookup device(%[[DEVICE]] : !hal.device) executable(@ex) : !hal.executable // CHECK-DAG: %[[ORDINAL_0:.+]] = hal.executable.export.ordinal target(@ex::@embedded_elf_aarch64::@dispatch) : index // CHECK: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer> // CHECK-SAME: target(%[[EXECUTABLE_0]] : !hal.executable)[%[[ORDINAL_0]]] // CHECK-SAME: workgroups([%c1, %c1, %c1]) + // CHECK-SAME: bindings([ + // CHECK-NEXT: (%[[ARG0_BUFFER]] : !hal.buffer)[%c0, %c16], + // CHECK-NEXT: (%[[ARG1_BUFFER]] : !hal.buffer)[%c0, %c16], + // CHECK-NEXT: (%[[RESULT_BUFFER]] : !hal.buffer)[%c0, %c16] + // CHECK-NEXT: ]) // CHECK: scf.yield // CHECK: } // CHECK: case 1 { - // CHECK: %[[PIPELINE_LAYOUT_1:.+]] = hal.pipeline_layout.lookup - // CHECK-SAME: device(%[[DEVICE]] : !hal.device) - // CHECK-SAME: layout(#[[PIPELINE_LAYOUT_ATTR_1]]) : !hal.pipeline_layout - // CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer> - // CHECK-SAME: layout(%[[PIPELINE_LAYOUT_1]] : !hal.pipeline_layout)[%c0] - // CHECK-SAME: bindings([ - // CHECK: %c4 = (%[[ARG0_BUFFER]] : !hal.buffer)[%c0, %c16] - // CHECK: ]) - // CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer> - // CHECK-SAME: layout(%[[PIPELINE_LAYOUT_1]] : !hal.pipeline_layout)[%c1] - // CHECK-SAME: bindings([ - // CHECK: %c5 = (%[[ARG1_BUFFER]] : !hal.buffer)[%c0, %c16], - // CHECK: %c6 = (%[[RESULT_BUFFER]] : !hal.buffer)[%c0, %c16] - // CHECK: ]) // CHECK-DAG: %[[EXECUTABLE_1:.+]] = hal.executable.lookup device(%[[DEVICE]] : !hal.device) executable(@ex) : !hal.executable // CHECK-DAG: %[[ORDINAL_1:.+]] = hal.executable.export.ordinal target(@ex::@embedded_elf_x86_64::@dispatch) : index // CHECK: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer> // CHECK-SAME: target(%[[EXECUTABLE_1]] : !hal.executable)[%[[ORDINAL_1]]] + // CHECK-SAME: bindings([ + // CHECK-NEXT: (%[[ARG0_BUFFER]] : !hal.buffer)[%c0, %c16] + // CHECK-NEXT: (%[[ARG1_BUFFER]] : !hal.buffer)[%c0, %c16], + // CHECK-NEXT: (%[[RESULT_BUFFER]] : !hal.buffer)[%c0, %c16] + // CHECK-NEXT: ]) // CHECK: scf.yield // CHECK: } stream.cmd.dispatch { diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir index 484adeed84f4c..d91f19efd4bc0 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir @@ -10,18 +10,14 @@ util.global private @device = #hal.device.target<"local", [ #executable_target_embedded_elf_x86_64 ]> : !hal.device -#pipeline_layout_0 = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout_0 = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> -#pipeline_layout_1 = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout_1 = #hal.pipeline.layout, + #hal.pipeline.binding ]> // Executable should be dumped: @@ -74,14 +70,7 @@ hal.executable private @ex0 { // Create command buffer: // CHECK: %[[CMD:.+]] = hal.command_buffer.create -// Setup dispatch constants and bindings: -// CHECK: hal.command_buffer.push_constants<%[[CMD]] : !hal.command_buffer> layout(%{{.+}} : !hal.pipeline_layout) offset(0) values([%c100_i32, %c200_i32]) : i32, i32 // CHECK: %[[BUFFER:.+]] = util.global.load @ex0_embedded_elf_x86_64_dispatch0_512_buffer -// CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer> layout(%{{.+}} : !hal.pipeline_layout)[%c0] bindings([ -// CHECK-NEXT: %c0 = (%[[BUFFER]] : !hal.buffer)[%c0, %c32], -// CHECK-NEXT: %c1 = (%[[BUFFER]] : !hal.buffer)[%c256, %c32], -// CHECK-NEXT: %c2 = (%[[BUFFER]] : !hal.buffer)[%c512, %c32] -// CHECK-NEXT: ]) // Calculate the workgroup count, which we leave symbolic until after // translation: @@ -96,7 +85,15 @@ hal.executable private @ex0 { // Dispatch up to batch size dispatches: // CHECK: scf.for %{{.+}} = %c0 to %[[BATCH_SIZE]] step %c1 { -// CHECK-NEXT: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer> target(%[[EXECUTABLE:.+]] : !hal.executable)[%[[ORDINAL_0]]] workgroups([%[[WORKGROUP_X]], %[[WORKGROUP_Y]], %[[WORKGROUP_Z]]]) +// CHECK-NEXT: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer> +// CHECK-SAME: target(%[[EXECUTABLE:.+]] : !hal.executable)[%[[ORDINAL_0]]] +// CHECK-SAME: workgroups([%[[WORKGROUP_X]], %[[WORKGROUP_Y]], %[[WORKGROUP_Z]]]) +// CHECK-SAME: constants([%c100_i32, %c200_i32]) +// CHECK-SAME: bindings([ +// CHECK-NEXT: (%[[BUFFER]] : !hal.buffer)[%c0, %c32], +// CHECK-NEXT: (%[[BUFFER]] : !hal.buffer)[%c256, %c32], +// CHECK-NEXT: (%[[BUFFER]] : !hal.buffer)[%c512, %c32] +// CHECK-NEXT: ]) // CHECK-NEXT: hal.command_buffer.execution_barrier // CHECK-NEXT: } @@ -181,10 +178,8 @@ util.global private @device_b = #hal.device.target<"local", [ #executable_target_embedded_elf_x86_64 ]> : !hal.device -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> hal.executable private @ex_0 { diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir index 5de1c9c8686ca..d22f7a187ecfc 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir @@ -4,12 +4,10 @@ // but this is much easier to test with lit. #executable_target_embedded_elf_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> // CHECK: hal.executable public @ex0 diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir index 861d4e0a962a1..1b9070d2b1165 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir @@ -3,127 +3,21 @@ // Tests that redundant barriers are elided but barriers gaurding ops are not. // CHECK-LABEL: @elideRedundantBarriers -// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, %[[LAYOUT:.+]]: !hal.pipeline_layout) -util.func public @elideRedundantBarriers(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) { +// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, %[[BUFFER:.+]]: !hal.buffer) +util.func public @elideRedundantBarriers(%cmd: !hal.command_buffer, %buffer: !hal.buffer) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index - %c42_i32 = arith.constant 42 : i32 // CHECK: hal.command_buffer.execution_barrier hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") // CHECK-NOT: hal.command_buffer.execution_barrier hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") - // CHECK: hal.command_buffer.push_constants - hal.command_buffer.push_constants<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout) offset(0) values([%c42_i32]) : i32 + // CHECK: hal.command_buffer.copy_buffer + hal.command_buffer.copy_buffer<%cmd : !hal.command_buffer> + source(%buffer : !hal.buffer)[%c0] + target(%buffer : !hal.buffer)[%c0] + length(%c1) // CHECK: hal.command_buffer.execution_barrier hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") // CHECK: util.return util.return } - -// ----- - -// CHECK-LABEL: @elidePushConstants -util.func public @elidePushConstants(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) { - // CHECK-DAG: %[[C0:.+]] = arith.constant 0 - %c0 = arith.constant 0 : i32 - // CHECK-DAG: %[[C1:.+]] = arith.constant 1 - %c1 = arith.constant 1 : i32 - // CHECK: hal.command_buffer.push_constants{{.+}} offset(0) values([%[[C0]], %[[C1]]]) - hal.command_buffer.push_constants<%cmd : !hal.command_buffer> - layout(%pipeline_layout : !hal.pipeline_layout) - offset(0) - values([%c0, %c1]) : i32, i32 - // CHECK-NOT: hal.command_buffer.push_constants - hal.command_buffer.push_constants<%cmd : !hal.command_buffer> - layout(%pipeline_layout : !hal.pipeline_layout) - offset(0) - values([%c0, %c1]) : i32, i32 - // CHECK-NOT: hal.command_buffer.push_constants - hal.command_buffer.push_constants<%cmd : !hal.command_buffer> - layout(%pipeline_layout : !hal.pipeline_layout) - offset(0) - values([%c0, %c1]) : i32, i32 - // CHECK: util.return - util.return -} - -// ----- - -// CHECK-LABEL: @elidePushConstantsPrefix -util.func public @elidePushConstantsPrefix(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) { - // CHECK-DAG: %[[C0:.+]] = arith.constant 0 - %c0 = arith.constant 0 : i32 - // CHECK-DAG: %[[C1:.+]] = arith.constant 1 - %c1 = arith.constant 1 : i32 - // CHECK: hal.command_buffer.push_constants{{.+}} offset(0) values([%[[C0]]]) - hal.command_buffer.push_constants<%cmd : !hal.command_buffer> - layout(%pipeline_layout : !hal.pipeline_layout) - offset(0) - values([%c0]) : i32 - // CHECK: hal.command_buffer.push_constants{{.+}} offset(1) values([%[[C1]]]) - hal.command_buffer.push_constants<%cmd : !hal.command_buffer> - layout(%pipeline_layout : !hal.pipeline_layout) - offset(0) - values([%c0, %c1]) : i32, i32 - // CHECK-NOT: hal.command_buffer.push_constants - hal.command_buffer.push_constants<%cmd : !hal.command_buffer> - layout(%pipeline_layout : !hal.pipeline_layout) - offset(1) - values([%c1]) : i32 - // CHECK: util.return - util.return -} - -// ----- - -// CHECK-LABEL: @elidePushConstantsSuffix -util.func public @elidePushConstantsSuffix(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) { - // CHECK-DAG: %[[C0:.+]] = arith.constant 0 - %c0 = arith.constant 0 : i32 - // CHECK-DAG: %[[C1:.+]] = arith.constant 1 - %c1 = arith.constant 1 : i32 - // CHECK-DAG: %[[C2:.+]] = arith.constant 2 - %c2 = arith.constant 2 : i32 - // CHECK: hal.command_buffer.push_constants{{.+}} offset(0) values([%[[C0]], %[[C1]], %[[C2]]]) - hal.command_buffer.push_constants<%cmd : !hal.command_buffer> - layout(%pipeline_layout : !hal.pipeline_layout) - offset(0) - values([%c0, %c1, %c2]) : i32, i32, i32 - // CHECK: hal.command_buffer.push_constants{{.+}} offset(1) values([%[[C0]]]) - hal.command_buffer.push_constants<%cmd : !hal.command_buffer> - layout(%pipeline_layout : !hal.pipeline_layout) - offset(1) - values([%c0, %c2]) : i32, i32 - // CHECK: util.return - util.return -} - -// ----- - -// NOTE: today we just check for complete equality. - -// CHECK-LABEL: @elidePushDescriptorSet -// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, %[[LAYOUT:.+]]: !hal.pipeline_layout, %[[BUFFER0:.+]]: !hal.buffer, %[[BUFFER1:.+]]: !hal.buffer) -util.func public @elidePushDescriptorSet(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout, %buffer0: !hal.buffer, %buffer1: !hal.buffer) { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - // CHECK-DAG: %[[SIZE0:.+]] = arith.constant 100 - %size0 = arith.constant 100 : index - // CHECK-DAG: %[[SIZE1:.+]] = arith.constant 101 - %size1 = arith.constant 101 : index - // CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer> layout(%[[LAYOUT]] : !hal.pipeline_layout)[%c0] bindings([ - // CHECK-NEXT: %c0 = (%[[BUFFER0]] : !hal.buffer)[%c0, %[[SIZE0]]], - // CHECK-NEXT: %c1 = (%[[BUFFER1]] : !hal.buffer)[%c0, %[[SIZE1]]] - // CHECK-NEXT: ]) - hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([ - %c0 = (%buffer0 : !hal.buffer)[%c0, %size0], - %c1 = (%buffer1 : !hal.buffer)[%c0, %size1] - ]) - // CHECK-NOT: hal.command_buffer.push_descriptor_set - hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([ - %c0 = (%buffer0 : !hal.buffer)[%c0, %size0], - %c1 = (%buffer1 : !hal.buffer)[%c0, %size1] - ]) - // CHECK: util.return - util.return -} diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir index 5623e7fccf0e5..2fb1a667eef15 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir @@ -10,17 +10,15 @@ util.global private @default_device = #hal.device.target<"cpu", [ ]> : !hal.device // CHECK: #pipeline_layout = #hal.pipeline.layout< -// CHECK-SAME: push_constants = 1 -// CHECK-SAME: sets = [ -// CHECK-SAME: <0, bindings = [ -// CHECK-SAME: <0, storage_buffer, "ReadOnly|Indirect"> -// CHECK-SAME: <1, storage_buffer, "ReadOnly|Indirect"> -// CHECK-SAME: <2, storage_buffer, Indirect> +// CHECK-SAME: constants = 1 +// CHECK-SAME: bindings = [ +// CHECK-SAME: #hal.pipeline.binding +// CHECK-SAME: #hal.pipeline.binding +// CHECK-SAME: #hal.pipeline.binding // CHECK: hal.executable private @ex // CHECK: hal.executable.variant public @arm_64 target(#executable_target_arm_64 // CHECK: hal.executable.export public @entry ordinal(0) layout(#pipeline_layout) -// CHECK-SAME: hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>] // CHECK-NEXT: ^bb0(%[[DEVICE:.+]]: !hal.device, %[[ARG0:.+]]: index, %[[ARG1:.+]]: index): // CHECK-NEXT: hal.return %[[ARG0]], %[[ARG1]], %[[ARG0]] : index, index, index // CHECK-NEXT: } @@ -29,7 +27,6 @@ util.global private @default_device = #hal.device.target<"cpu", [ // CHECK-NEXT: func.func @entry // CHECK: hal.executable.variant public @x86_64 target(#executable_target_x86_64 // CHECK: hal.executable.export public @entry ordinal(0) layout(#pipeline_layout) -// CHECK-SAME: hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>] // CHECK-NEXT: ^bb0(%[[DEVICE:.+]]: !hal.device, %[[ARG0:.+]]: index, %[[ARG1:.+]]: index): // CHECK-NEXT: hal.return %[[ARG0]], %[[ARG1]], %[[ARG0]] : index, index, index // CHECK-NEXT: } @@ -159,10 +156,8 @@ util.global private @riscv_device = #hal.device.target<"cpu", [ // CHECK: hal.executable.variant public @riscv_32 // CHECK: hal.executable.variant public @x86_64 hal.executable.source private @ex { - hal.executable.export public @entry layout(#hal.pipeline.layout - ]> + hal.executable.export public @entry layout(#hal.pipeline.layout ]>) builtin.module { func.func @entry() { @@ -227,10 +222,8 @@ module { // CHECK: hal.executable.variant public @riscv_32 // CHECK: hal.executable.variant public @x86_64 hal.executable.source public @ex { - hal.executable.export public @entry layout(#hal.pipeline.layout - ]> + hal.executable.export public @entry layout(#hal.pipeline.layout ]>) builtin.module { func.func @entry() { diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir index 4e562f63f72c7..4a24e7862e93b 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir @@ -1,51 +1,13 @@ // RUN: iree-opt --split-input-file --iree-hal-materialize-resource-caches %s | FileCheck %s -// CHECK: util.global private @device = #hal.device.ordinal<0> -util.global private @device = #hal.device.ordinal<0> : !hal.device -// CHECK: util.global private @__device_pipeline_layout_0 : !hal.pipeline_layout -// CHECK-NEXT: util.initializer { -// CHECK-DAG: %[[DEVICE:.+]] = util.global.load @device -// CHECK-DAG: %[[SET_LAYOUT_0:.+]] = hal.descriptor_set_layout.create -// CHECK-SAME: device(%[[DEVICE]] : !hal.device) -// CHECK-SAME: flags("None") -// CHECK-SAME: bindings([ -// CHECK-SAME: #hal.descriptor_set.binding<0, storage_buffer>, -// CHECK-SAME: #hal.descriptor_set.binding<1, storage_buffer> -// CHECK-SAME: ]) : !hal.descriptor_set_layout -// CHECK-NEXT: %[[PIPELINE_LAYOUT:.+]] = hal.pipeline_layout.create -// CHECK-SAME: device(%[[DEVICE]] : !hal.device) -// CHECK-SAME: push_constants(1) -// CHECK-SAME: layouts([%[[SET_LAYOUT_0]]]) : !hal.pipeline_layout -// CHECK-NEXT: util.global.store %[[PIPELINE_LAYOUT]], @__device_pipeline_layout_0 : !hal.pipeline_layout - -// CHECK-LABEL: @exeLayoutLookup -util.func public @exeLayoutLookup() -> !hal.pipeline_layout { - %device = util.global.load @device : !hal.device - // CHECK: %[[LOADED_LAYOUT:.+]] = util.global.load @__device_pipeline_layout_0 : !hal.pipeline_layout - %0 = hal.pipeline_layout.lookup device(%device : !hal.device) layout(#hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> - ]>) : !hal.pipeline_layout - // CHECK-NEXT: util.return %[[LOADED_LAYOUT]] - util.return %0 : !hal.pipeline_layout -} - -// ----- - -#pipeline_layout_0 = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout_0 = #hal.pipeline.layout, + #hal.pipeline.binding ]> -#pipeline_layout_1 = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout_1 = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> // CHECK: hal.executable private @exe @@ -83,22 +45,12 @@ hal.executable private @exe { util.global private @device = #hal.device.ordinal<0> : !hal.device // Cached resources for the device. -// CHECK: util.global private @__device_pipeline_layout_0 : !hal.pipeline_layout -// CHECK: util.global private @__device_pipeline_layout_1 : !hal.pipeline_layout // CHECK: util.global private @__device_executable_0_exe : !hal.executable // Device initializer for all resources used with the device: // CHECK: util.initializer // CHECK: %[[DEVICE:.+]] = util.global.load @device -// Create pipeline layouts (and required descriptor set layouts): -// CHECK: %[[SET_LAYOUT_0:.+]] = hal.descriptor_set_layout.create device(%[[DEVICE]] : !hal.device) -// CHECK: %[[SET_LAYOUT_1:.+]] = hal.descriptor_set_layout.create device(%[[DEVICE]] : !hal.device) -// CHECK: %[[PIPELINE_LAYOUT_0:.+]] = hal.pipeline_layout.create device(%[[DEVICE]] : !hal.device) push_constants(0) layouts([%[[SET_LAYOUT_0]]]) : !hal.pipeline_layout -// CHECK: util.global.store %[[PIPELINE_LAYOUT_0]], @__device_pipeline_layout_0 -// CHECK: %[[PIPELINE_LAYOUT_1:.+]] = hal.pipeline_layout.create device(%device : !hal.device) push_constants(0) layouts([%[[SET_LAYOUT_1]]]) : !hal.pipeline_layout -// CHECK: util.global.store %[[PIPELINE_LAYOUT_1]], @__device_pipeline_layout_1 - // Switch on the supported formats: // CHECK: %{{.+}}, %[[FORMAT_VMVX:.+]] = hal.device.query<%[[DEVICE]] : !hal.device> key("hal.executable.format" :: "vmvx-bytecode-fb") // CHECK: %[[VMVX_CONDITION:.+]] = scf.execute_region -> i1 { @@ -120,7 +72,6 @@ util.global private @device = #hal.device.ordinal<0> : !hal.device // CHECK: %[[EXE:.+]] = hal.executable.create // CHECK-SAME: device(%[[DEVICE]] : !hal.device) // CHECK-SAME: target(@exe::@vmvx) -// CHECK-SAME: layouts([%[[PIPELINE_LAYOUT_0]], %[[PIPELINE_LAYOUT_0]], %[[PIPELINE_LAYOUT_1]]]) // CHECK-SAME: constants([%[[CONST_01]]#0, %[[CONST_01]]#1, %[[CONST_2]]]) // CHECK-SAME: : !hal.executable @@ -172,11 +123,9 @@ hal.executable private @exe { %ok, %selected = hal.device.query<%device : !hal.device> key("some" :: "feature") : i1, i1 hal.return %selected : i1 } - hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> + hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) // CHECK-NOT: hal.executable.constant.block hal.executable.constant.block() -> (i32, i32) as ("foo", "bar") { @@ -189,13 +138,9 @@ hal.executable private @exe { // CHECK: util.global private @primary_device util.global private @primary_device = #hal.device.ordinal<0> : !hal.device -// CHECK-NEXT: util.global private @__primary_device_pipeline_layout_0 // CHECK-NEXT: util.global private @__primary_device_executable_0_exe // CHECK-NEXT: util.initializer // CHECK: util.global.load @primary_device -// CHECK: hal.descriptor_set_layout.create -// CHECK: hal.pipeline_layout.create -// CHECK: util.global.store {{.+}}, @__primary_device_pipeline_layout_0 // CHECK: hal.executable.create // CHECK: util.global.store {{.+}}, @__primary_device_executable_0_exe // CHECK: util.func private @__primary_device_executable_0_exe_constant_block_0 @@ -205,7 +150,6 @@ util.global private @optional_device = #hal.device.select<[ #hal.device.ordinal<1> : !hal.device, #hal.device.fallback<@primary_device> : !hal.device ]> : !hal.device -// CHECK-NEXT: util.global private @__optional_device_pipeline_layout_0 // CHECK-NEXT: util.global private @__optional_device_executable_0_exe // CHECK-NEXT: util.initializer // CHECK-DAG: %[[OPTIONAL_DEVICE:.+]] = util.global.load @optional_device @@ -214,14 +158,9 @@ util.global private @optional_device = #hal.device.select<[ // CHECK-DAG: %[[INDEX:.+]] = arith.select %[[DEVICE_EQ]] // CHECK-DAG: scf.index_switch %[[INDEX]] // CHECK: case 0 -// CHECK: %[[PRIMARY_LAYOUT:.+]] = util.global.load @__primary_device_pipeline_layout_0 -// CHECK: util.global.store %[[PRIMARY_LAYOUT]], @__optional_device_pipeline_layout_0 // CHECK: %[[PRIMARY_EXE:.+]] = util.global.load @__primary_device_executable_0_exe // CHECK: util.global.store %[[PRIMARY_EXE]], @__optional_device_executable_0_exe // CHECK: default -// CHECK: hal.descriptor_set_layout.create -// CHECK: hal.pipeline_layout.create -// CHECK: util.global.store {{.+}}, @__optional_device_pipeline_layout_0 // CHECK: hal.executable.create // CHECK: util.global.store {{.+}}, @__optional_device_executable_0_exe // CHECK: util.func private @__optional_device_executable_0_exe_constant_block_0 @@ -248,23 +187,17 @@ util.func public @fallbackLookup() -> (!hal.executable, !hal.executable) { hal.executable private @exe { hal.executable.variant @vmvx target(<"vmvx", "vmvx-bytecode-fb">) { - hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout - ]> + hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout ]>) } } // CHECK-LABEL: util.global private @primary_device util.global private @primary_device = #hal.device.ordinal<0> : !hal.device -// CHECK-NEXT: util.global private @__primary_device_pipeline_layout_0 // CHECK-NEXT: util.global private @__primary_device_executable_0_exe // CHECK-NEXT: util.initializer // CHECK: util.global.load @primary_device -// CHECK: hal.descriptor_set_layout.create -// CHECK: hal.pipeline_layout.create -// CHECK: util.global.store {{.+}}, @__primary_device_pipeline_layout_0 // CHECK: hal.executable.create // CHECK: util.global.store {{.+}}, @__primary_device_executable_0_exe @@ -273,7 +206,6 @@ util.global private @optional_device_0 = #hal.device.select<[ #hal.device.ordinal<1> : !hal.device, #hal.device.fallback<@primary_device> : !hal.device ]> : !hal.device -// CHECK-NEXT: util.global private @__optional_device_0_pipeline_layout_0 // CHECK-NEXT: util.global private @__optional_device_0_executable_0_exe // CHECK-NEXT: util.initializer // CHECK-DAG: %[[OPTIONAL_DEVICE_0:.+]] = util.global.load @optional_device_0 @@ -281,8 +213,6 @@ util.global private @optional_device_0 = #hal.device.select<[ // CHECK-DAG: %[[DEVICE_EQ:.+]] = util.cmp.eq %[[OPTIONAL_DEVICE_0]], %[[PRIMARY_DEVICE]] // CHECK-DAG: %[[INDEX:.+]] = arith.select %[[DEVICE_EQ]] // CHECK-DAG: scf.index_switch %[[INDEX]] -// CHECK: util.global.load @__primary_device_pipeline_layout_0 -// CHECK: util.global.store {{.+}}, @__optional_device_0_pipeline_layout_0 // CHECK: util.global.load @__primary_device_executable_0_exe // CHECK: util.global.store {{.+}}, @__optional_device_0_executable_0_exe @@ -291,7 +221,6 @@ util.global private @optional_device_1 = #hal.device.select<[ #hal.device.ordinal<2> : !hal.device, #hal.device.fallback<@optional_device_0> : !hal.device ]> : !hal.device -// CHECK-NEXT: util.global private @__optional_device_1_pipeline_layout_0 // CHECK-NEXT: util.global private @__optional_device_1_executable_0_exe // CHECK-NEXT: util.initializer // CHECK-DAG: %[[OPTIONAL_DEVICE_1:.+]] = util.global.load @optional_device_1 @@ -299,8 +228,6 @@ util.global private @optional_device_1 = #hal.device.select<[ // CHECK-DAG: %[[DEVICE_EQ:.+]] = util.cmp.eq %[[OPTIONAL_DEVICE_1]], %[[OPTIONAL_DEVICE_0]] // CHECK-DAG: %[[INDEX:.+]] = arith.select %[[DEVICE_EQ]] // CHECK-DAG: scf.index_switch %[[INDEX]] -// CHECK: util.global.load @__optional_device_0_pipeline_layout_0 -// CHECK: util.global.store {{.+}}, @__optional_device_1_pipeline_layout_0 // CHECK: util.global.load @__optional_device_0_executable_0_exe // CHECK: util.global.store {{.+}}, @__optional_device_1_executable_0_exe @@ -322,34 +249,13 @@ util.func public @fallbackOnlyLookup() -> !hal.executable { // could rework the pass to support only materializing what's required based on // what resources are looked up. -#pipeline_layout_0 = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout_0 = #hal.pipeline.layout, + #hal.pipeline.binding ]> util.global private @device : !hal.device -util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout -util.initializer { - %c0 = arith.constant 0 : index - %device = hal.devices.get %c0 : !hal.device - %descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) flags("None") bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>]) : !hal.descriptor_set_layout - util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout - util.return -} - -util.global private @_pipeline_layout_0 : !hal.pipeline_layout -util.initializer { - %_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout - %c0 = arith.constant 0 : index - %device = hal.devices.get %c0 : !hal.device - %pipeline_layout = hal.pipeline_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.pipeline_layout - util.global.store %pipeline_layout, @_pipeline_layout_0 : !hal.pipeline_layout - util.return -} - util.global private @_executable_exe : !hal.executable util.initializer { %c0 = arith.constant 0 : index @@ -359,8 +265,7 @@ util.initializer { %variant = arith.select %format_supported, %c0, %c-1 : index %selected = scf.index_switch %variant -> !hal.executable case 0 { - %_pipeline_layout_0 = util.global.load @_pipeline_layout_0 : !hal.pipeline_layout - %exe = hal.executable.create device(%device : !hal.device) target(@exe0::@vmvx) layouts([%_pipeline_layout_0]) : !hal.executable + %exe = hal.executable.create device(%device : !hal.device) target(@exe0::@vmvx) : !hal.executable scf.yield %exe : !hal.executable } default { diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/outline_memoize_regions.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/outline_memoize_regions.mlir index 6f1f7999ec9e3..8dd0e55bce991 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/outline_memoize_regions.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/outline_memoize_regions.mlir @@ -194,6 +194,9 @@ util.global private @buffer : !hal.buffer // CHECK: hal.command_buffer.dispatch.indirect<%[[CMD]] : !hal.command_buffer> // CHECK-SAME: target(%[[APPLY_EXECUTABLE]] : !hal.executable) // CHECK-SAME: workgroups(%[[APPLY_BUFFER]] : !hal.buffer) +// CHECK-SAME: bindings([ +// CHECK-NEXT: (%[[APPLY_BUFFER]] : !hal.buffer)[%c0, %c1] +// CHECK-NEXT: ]) // CHECK: hal.command_buffer.execution_barrier // CHECK: hal.command_buffer.finalize // CHECK: util.return %[[CMD]] @@ -216,6 +219,8 @@ util.func public @memoize_command_buffer() -> !hal.command_buffer { %affinity = arith.constant 2 : i64 %executable = util.global.load immutable @executable : !hal.executable %buffer = util.global.load immutable @buffer : !hal.buffer + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index // CHECK-NOT: hal.device.memoize // CHECK: %[[CMD:.+]] = util.call @__memoize_command_buffer_memoize_lookup %result = hal.device.memoize<%device : !hal.device> affinity(%affinity) -> !hal.command_buffer { @@ -225,6 +230,9 @@ util.func public @memoize_command_buffer() -> !hal.command_buffer { hal.command_buffer.dispatch.indirect<%cmd : !hal.command_buffer> target(%executable : !hal.executable)[%dispatch_ordinal] workgroups(%buffer : !hal.buffer)[%offset] + bindings([ + (%buffer : !hal.buffer)[%c0, %c1] + ]) flags(None) hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source(CommandIssue) target(CommandProcess) flags(None) diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir index 6742b24cce776..0f4a829c3a3a1 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir @@ -21,7 +21,9 @@ hal.executable private @executable_a { // CHECK: hal.executable.variant public @variant_a hal.executable.variant public @variant_a target(#hal.executable.target<"cuda", "cuda-nvptx-fb", {replace_i64 = 123 : i64}>) { - hal.executable.export public @dispatch_a ordinal(0) layout(#hal.pipeline.layout]>]>) { + hal.executable.export public @dispatch_a ordinal(0) layout(#hal.pipeline.layout + ]>) { ^bb0(%arg0: !hal.device, %arg1: index): %c1 = arith.constant 1 : index hal.return %c1, %c1, %c1 : index, index, index @@ -37,7 +39,9 @@ hal.executable private @executable_a { } // CHECK: hal.executable.variant public @variant_unmodified hal.executable.variant public @variant_unmodified target(#hal.executable.target<"cuda", "cuda-nvptx-fb", {}>) { - hal.executable.export public @dispatch_unmodified ordinal(0) layout(#hal.pipeline.layout]>]>) { + hal.executable.export public @dispatch_unmodified ordinal(0) layout(#hal.pipeline.layout + ]>) { ^bb0(%arg0: !hal.device, %arg1: index): %c1 = arith.constant 1 : index hal.return %c1, %c1, %c1 : index, index, index @@ -57,7 +61,9 @@ hal.executable private @executable_a { hal.executable private @executable_b { // CHECK: hal.executable.variant public @variant_b hal.executable.variant public @variant_b target(#hal.executable.target<"cuda", "cuda-nvptx-fb", {replace_i64 = 456 : i64}>) { - hal.executable.export public @dispatch_b ordinal(0) layout(#hal.pipeline.layout]>]>) { + hal.executable.export public @dispatch_b ordinal(0) layout(#hal.pipeline.layout + ]>) { ^bb0(%arg0: !hal.device): %c1 = arith.constant 1 : index hal.return %c1, %c1, %c1 : index, index, index diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/prune_executables.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/prune_executables.mlir index 8d2ea70f91a45..3c711ef39008b 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/prune_executables.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/prune_executables.mlir @@ -5,10 +5,8 @@ // as part of this pass for consistency (after running no executables/variants/ // exports that are unused exist). -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> // Should be removed as there are no uses. @@ -57,10 +55,8 @@ util.func private @user() attributes { // Tests that an export with no references is dropped. -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> hal.executable private @exe { hal.executable.variant public @variant target(<"backend", "format">) { diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir index a139ecedef038..164bcbf292fb5 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir @@ -1,24 +1,25 @@ // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(util.func(iree-hal-repeat-dispatches{count=2}))' %s | FileCheck %s -util.global @_executable : !hal.executable +util.global @executable : !hal.executable // CHECK-LABEL: @duplicate_dispatches -// CHECK-SAME: (%[[CMD1:.+]]: !hal.command_buffer, -// CHECK-SAME: %[[CMD2:.+]]: !hal.command_buffer) -util.func public @duplicate_dispatches(%cmd1 : !hal.command_buffer, %cmd2 : !hal.command_buffer) { - // CHECK: %[[EXE:.+]] = util.global.load @_executable - %exe = util.global.load @_executable : !hal.executable +// CHECK-SAME: (%[[CMD1:[a-z0-9]+]]: !hal.command_buffer, +// CHECK-SAME: %[[CMD2:[a-z0-9]+]]: !hal.command_buffer, +// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer) +util.func public @duplicate_dispatches(%cmd1: !hal.command_buffer, %cmd2: !hal.command_buffer, %buffer: !hal.buffer) { + // CHECK: %[[EXE:.+]] = util.global.load @executable + %exe = util.global.load @executable : !hal.executable %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index - hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c0] workgroups([%c1, %c1, %c1]) flags(None) + hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c0] workgroups([%c1, %c1, %c1]) bindings([(%buffer : !hal.buffer)[%c0, %c1]]) flags(None) hal.command_buffer.execution_barrier<%cmd1 : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None") - hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c1] workgroups([%c2, %c2, %c2]) flags(None) + hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c1] workgroups([%c2, %c2, %c2]) bindings([(%buffer : !hal.buffer)[%c0, %c1]]) flags(None) - hal.command_buffer.dispatch<%cmd2 : !hal.command_buffer> target(%exe : !hal.executable)[%c2] workgroups([%c1, %c1, %c1]) flags(None) - hal.command_buffer.dispatch<%cmd2 : !hal.command_buffer> target(%exe : !hal.executable)[%c3] workgroups([%c2, %c2, %c2]) flags(None) + hal.command_buffer.dispatch<%cmd2 : !hal.command_buffer> target(%exe : !hal.executable)[%c2] workgroups([%c1, %c1, %c1]) bindings([(%buffer : !hal.buffer)[%c0, %c1]]) flags(None) + hal.command_buffer.dispatch<%cmd2 : !hal.command_buffer> target(%exe : !hal.executable)[%c3] workgroups([%c2, %c2, %c2]) bindings([(%buffer : !hal.buffer)[%c0, %c1]]) flags(None) hal.command_buffer.execution_barrier<%cmd2 : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None") util.return @@ -46,20 +47,21 @@ util.func public @duplicate_dispatches(%cmd1 : !hal.command_buffer, %cmd2 : !hal // ----- -util.global @_executable : !hal.executable +util.global @executable : !hal.executable // CHECK-LABEL: @nested_dispatch // CHECK-SAME: (%[[CMD1:.+]]: !hal.command_buffer, +// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer, // CHECK-SAME: %[[IDX:.+]]: index) -util.func public @nested_dispatch(%cmd1 : !hal.command_buffer, %idx : index) { - // CHECK: %[[EXE:.+]] = util.global.load @_executable - %exe = util.global.load @_executable : !hal.executable +util.func public @nested_dispatch(%cmd1: !hal.command_buffer, %buffer: !hal.buffer, %idx: index) { + // CHECK: %[[EXE:.+]] = util.global.load @executable + %exe = util.global.load @executable : !hal.executable %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index scf.index_switch %idx case 0 { - hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c0] workgroups([%c1, %c1, %c1]) flags(None) + hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c0] workgroups([%c1, %c1, %c1]) bindings([(%buffer : !hal.buffer)[%c0, %c1]]) flags(None) scf.yield } default { diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir index 5ce9f72a9c006..45658e944074b 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir @@ -2,19 +2,15 @@ hal.executable @exe0 { hal.executable.variant @target target(<"vmvx", "vmvx-bytecode-fb">) { - hal.executable.export @entry123 ordinal(123) layout(#hal.pipeline.layout - ]> + hal.executable.export @entry123 ordinal(123) layout(#hal.pipeline.layout ]>) } } hal.executable @exe1 { hal.executable.variant @target target(<"vmvx", "vmvx-bytecode-fb">) { - hal.executable.export @entry456 ordinal(456) layout(#hal.pipeline.layout - ]> + hal.executable.export @entry456 ordinal(456) layout(#hal.pipeline.layout ]>) } } diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/strip_executable_contents.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/strip_executable_contents.mlir index ef9bff761119e..7cc2a58bb5694 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/strip_executable_contents.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/strip_executable_contents.mlir @@ -5,11 +5,9 @@ hal.executable @ex { // CHECK: hal.executable.variant public @backend hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) { // CHECK: hal.executable.export public @entry0 - hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> + hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) // CHECK-NOT: builtin.module builtin.module { diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir index ba2baad435695..19b229b24db66 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir @@ -7,7 +7,9 @@ // CHECK: hal.executable private @executable0 hal.executable private @executable0 { hal.executable.variant public @variant target(<"cuda", "cuda-nvptx-fb">) { - hal.executable.export public @dispatch0 ordinal(0) layout(#hal.pipeline.layout]>]>) { + hal.executable.export public @dispatch0 ordinal(0) layout(#hal.pipeline.layout + ]>) { ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): // CHECK: arith.constant 123 %c1 = arith.constant 1 : index @@ -34,7 +36,9 @@ hal.executable private @executable1 { // CHECK-SAME: path = "substitute_executables_replacement.obj", // CHECK-SAME: data = dense<[72, 69, 76, 76, 79, 33, hal.executable.variant public @variant target(<"cuda", "cuda-nvptx-fb">) { - hal.executable.export public @dispatch1 ordinal(0) layout(#hal.pipeline.layout]>]>) { + hal.executable.export public @dispatch1 ordinal(0) layout(#hal.pipeline.layout + ]>) { ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): // CHECK: arith.constant 100 : index %c100 = arith.constant 100 : index diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir index 7c96db9720f5c..d12d3d17e4f75 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir @@ -1,7 +1,9 @@ // Replacement executable for substitute_executables.mlir. hal.executable private @executable0 { hal.executable.variant public @variant target(<"cuda", "cuda-nvptx-fb">) { - hal.executable.export public @dispatch0 ordinal(0) layout(#hal.pipeline.layout]>]>) { + hal.executable.export public @dispatch0 ordinal(0) layout(#hal.pipeline.layout + ]>) { ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): %c123 = arith.constant 123 : index hal.return %c123, %c123, %c123 : index, index, index diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp index 7f171926cafcc..2f1aa4f4c0eca 100644 --- a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp +++ b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp @@ -224,9 +224,7 @@ struct ConvertGetRawInterfaceBindingBufferOp .replaceOpWithNewOp( op, bindingType, bindingsArg, rewriter.createOrFold( - op.getLoc(), op.getLayout().getFlatBindingIndex( - op.getSet().getSExtValue(), - op.getBinding().getSExtValue()))) + op.getLoc(), op.getBinding().getSExtValue())) .getResult(); return success(); } @@ -249,12 +247,13 @@ struct ConvertHALInterfaceBindingSubspanOp IndexSet indexSet(op.getLoc(), rewriter); auto bindingType = llvm::cast(bindingsArg.getType()) .getElementType(); - auto sourceBuffer = rewriter - .create( - op.getLoc(), bindingType, bindingsArg, - rewriter.createOrFold( - op.getLoc(), op.getFlatBindingIndex())) - .getResult(); + auto sourceBuffer = + rewriter + .create( + op.getLoc(), bindingType, bindingsArg, + rewriter.createOrFold( + op.getLoc(), op.getBinding().getSExtValue())) + .getResult(); if (op.getByteOffset() && !matchPattern(op.getByteOffset(), m_Zero())) { // Offsetted binding: replace with a BufferSubspanOp. diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/test/interface_ops.mlir b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/test/interface_ops.mlir index dabea3303bc12..91a8bb66c1c13 100644 --- a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/test/interface_ops.mlir +++ b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/test/interface_ops.mlir @@ -1,10 +1,8 @@ // RUN: iree-opt --split-input-file --iree-vmvx-conversion --canonicalize %s | FileCheck %s -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> // CHECK: util.global private @__constant_5xi32 : !util.buffer @@ -33,9 +31,9 @@ func.func @entry() { %c1 = arith.constant 1 : index %0 = memref.get_global @__constant_5xi32 : memref<5xi32> // CHECK: %[[BINDING0:.+]] = util.list.get %[[BINDINGS]][%c0] : !util.list - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<5xf32> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<5xf32> // CHECK: %[[BINDING1:.+]] = util.list.get %[[BINDINGS]][%c1] : !util.list - %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5xi32> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5xi32> %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index diff --git a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.td b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.td index 218a38fe31496..6321e1cdae33b 100644 --- a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.td +++ b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.td @@ -73,7 +73,6 @@ def VMVX_GetRawInterfaceBindingBufferOp : VMVX_PureOp< }]; let arguments = (ins HAL_PipelineLayoutAttr:$layout, - IndexAttr:$set, IndexAttr:$binding ); let results = (outs @@ -81,7 +80,6 @@ def VMVX_GetRawInterfaceBindingBufferOp : VMVX_PureOp< ); let assemblyFormat = [{ `layout` `(` $layout `)` - `set` `(` $set `)` `binding` `(` $binding `)` attr-dict }]; diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp index 5d38c4f9bce08..cc4032395f10b 100644 --- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp +++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp @@ -315,7 +315,7 @@ struct FromHalInterfaceBindingSubspan rewriter .create( loc, op.getBaseBuffer().getType(), binding.getLayout(), - binding.getSetAttr(), binding.getBindingAttr()) + binding.getBindingAttr()) .getResult()); rewriter.eraseOp(op); diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir index f2b1ffce4649b..e81000221ebc2 100644 --- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir +++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir @@ -60,15 +60,13 @@ func.func @resolve_subview_rankreducing_not_at_the_end(%arg0: memref<8x16x4xf32> // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @resolve_binding_subspan_zero_offset() -> (!util.buffer, index, index, index, index, index) { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<512x384xf32> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<512x384xf32> %base_buffer, %offset, %sizes:2, %strides:2 = vmvx.get_buffer_descriptor %0 : memref<512x384xf32> -> !util.buffer, index, index, index, index, index return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : !util.buffer, index, index, index, index, index } @@ -77,19 +75,17 @@ func.func @resolve_binding_subspan_zero_offset() -> (!util.buffer, index, index, // CHECK-DAG: %[[C384:.+]] = arith.constant 384 : index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) set(0) binding(0) +// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) binding(0) // CHECK: return %[[CAST]], %[[C0]], %[[C512]], %[[C384]], %[[C384]], %[[C1]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @resolve_binding_subspan_offset_index(%arg0 : index) -> (!util.buffer, index, index, index, index, index) { - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%arg0) : memref<512x384xindex> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%arg0) : memref<512x384xindex> %base_buffer, %offset, %sizes:2, %strides:2 = vmvx.get_buffer_descriptor %0 : memref<512x384xindex> -> !util.buffer, index, index, index, index, index return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : !util.buffer, index, index, index, index, index } @@ -100,26 +96,24 @@ func.func @resolve_binding_subspan_offset_index(%arg0 : index) -> (!util.buffer, // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[INDEX_SIZE:.+]] = util.sizeof index // CHECK-DAG: %[[OFFSET:.+]] = affine.apply #map()[%arg0, %[[INDEX_SIZE]]] -// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) set(0) binding(0) +// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) binding(0) // CHECK: return %[[CAST]], %[[OFFSET]], %[[C512]], %[[C384]], %[[C384]], %[[C1]] // ----- -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> func.func @resolve_binding_subspan_dyn_dims(%arg0 : index, %arg1 : index) -> (!util.buffer, index, index, index, index, index) { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref{%arg0, %arg1} + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref{%arg0, %arg1} %base_buffer, %offset, %sizes:2, %strides:2 = vmvx.get_buffer_descriptor %0 : memref -> !util.buffer, index, index, index, index, index return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : !util.buffer, index, index, index, index, index } // CHECK: func @resolve_binding_subspan_dyn_dims( // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) set(0) binding(0) +// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) binding(0) // CHECK: return %[[CAST]], %{{.+}}, %arg0, %arg1, %arg1, %[[C1]] // ----- diff --git a/compiler/src/iree/compiler/DispatchCreation/CollapseDimensions.cpp b/compiler/src/iree/compiler/DispatchCreation/CollapseDimensions.cpp index 1db69fc438039..f0b0322f6b093 100644 --- a/compiler/src/iree/compiler/DispatchCreation/CollapseDimensions.cpp +++ b/compiler/src/iree/compiler/DispatchCreation/CollapseDimensions.cpp @@ -739,7 +739,7 @@ static bool updateProducersFromConsumers( FailureOr consumerToProducerMap = getConsumerLoopToProducerLoopsMap(*operand); if (failed(consumerToProducerMap)) { - didChange |= producerInfo.getCollapsibleLoops().size(); + didChange |= !producerInfo.getCollapsibleLoops().empty(); producerInfo.clear(); continue; } diff --git a/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp b/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp index 7b950f7ff3ad8..de1f736038737 100644 --- a/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp +++ b/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp @@ -94,43 +94,34 @@ convertDescriptorFlags(std::optional src) { } } -static IREE::HAL::DescriptorSetBindingAttr -convertDescriptorSetBinding(IREE::Input::DescriptorSetBindingAttr src) { - return IREE::HAL::DescriptorSetBindingAttr::get( - src.getContext(), src.getOrdinal(), convertDescriptorType(src.getType()), +static IREE::HAL::PipelineBindingAttr +convertPipelineBinding(IREE::Input::PipelineBindingAttr src) { + return IREE::HAL::PipelineBindingAttr::get( + src.getContext(), convertDescriptorType(src.getType()), convertDescriptorFlags(src.getFlags())); } -static std::optional -convertDescriptorSetLayoutFlags( - std::optional src) { +static std::optional convertPipelineLayoutFlags( + std::optional src) { if (!src.has_value()) return std::nullopt; switch (*src) { - case IREE::Input::DescriptorSetLayoutFlags::None: - return IREE::HAL::DescriptorSetLayoutFlags::None; - case IREE::Input::DescriptorSetLayoutFlags::Indirect: - return IREE::HAL::DescriptorSetLayoutFlags::Indirect; + case IREE::Input::PipelineLayoutFlags::None: + return IREE::HAL::PipelineLayoutFlags::None; + case IREE::Input::PipelineLayoutFlags::Indirect: + return IREE::HAL::PipelineLayoutFlags::Indirect; default: return std::nullopt; } } -static IREE::HAL::DescriptorSetLayoutAttr -convertDescriptorSetLayout(IREE::Input::DescriptorSetLayoutAttr src) { - return IREE::HAL::DescriptorSetLayoutAttr::get( - src.getContext(), src.getOrdinal(), - convertAttributes( - src.getBindings(), convertDescriptorSetBinding), - convertDescriptorSetLayoutFlags(src.getFlags())); -} - static IREE::HAL::PipelineLayoutAttr convertPipelineLayout(IREE::Input::PipelineLayoutAttr src) { return IREE::HAL::PipelineLayoutAttr::get( - src.getContext(), src.getPushConstants(), - convertAttributes( - src.getSetLayouts(), convertDescriptorSetLayout)); + src.getContext(), + convertAttributes(src.getBindings(), + convertPipelineBinding), + src.getConstants(), convertPipelineLayoutFlags(src.getFlags())); } static IREE::HAL::ExecutableObjectAttr diff --git a/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir b/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir index d9cff4410e75d..0c48dbc935380 100644 --- a/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir +++ b/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir @@ -385,11 +385,11 @@ func.func @optimization_barrier(%arg0 : tensor) -> tensor { // ----- // CHECK: #[[PTX:.*]] = #hal.executable.target<"cuda", "cuda-nvptx-fb"> -// CHECK: #[[LAYOUT:.*]] = #hal.pipeline.layout, -// CHECK-SAME: <1, storage_buffer> -// CHECK-SAME: ]>]> +// CHECK: #[[LAYOUT:.*]] = #hal.pipeline.layout, +// CHECK-SAME: #hal.pipeline.binding +// CHECK-SAME: ]> // CHECK: hal.executable.source private @executable // CHECK-SAME: {objects = #hal.executable.objects<{ @@ -409,11 +409,9 @@ builtin.module @executable_source { }> } { iree_input.executable.export public @add ordinal(0) - layout(#iree_input.pipeline.layout, - <1, storage_buffer> - ]> + layout(#iree_input.pipeline.layout, + #iree_input.pipeline.binding ]>) attributes { workgroup_size = [64 : index, 1 : index, 1 : index] } diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp index a048e7942bc08..a4f60efd07d91 100644 --- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp +++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp @@ -104,17 +104,14 @@ class InlineExecutablesPass final // Build dispatch function signature that the stream.cmd.dispatch ops will // map to. auto layoutAttr = exportOp.getLayout(); - size_t totalBindingCount = 0; - for (auto setLayout : layoutAttr.getSetLayouts()) { - totalBindingCount += setLayout.getBindings().size(); - } + size_t bindingCount = layoutAttr.getBindings().size(); SmallVector inputTypes; inputTypes.append(exportOp.getWorkgroupCountBody()->getNumArguments() - 1, indexType); // workload - inputTypes.append(layoutAttr.getPushConstants(), i32Type); - inputTypes.append(totalBindingCount, bufferType); // buffers - inputTypes.append(totalBindingCount, indexType); // offsets - inputTypes.append(totalBindingCount, indexType); // lengths + inputTypes.append(layoutAttr.getConstants(), i32Type); + inputTypes.append(bindingCount, bufferType); // buffers + inputTypes.append(bindingCount, indexType); // offsets + inputTypes.append(bindingCount, indexType); // lengths auto dispatchFuncType = innerModuleBuilder.getFunctionType(inputTypes, {}); @@ -136,13 +133,13 @@ class InlineExecutablesPass final return exportOp.emitOpError("missing body function"); } if (bodyFuncOp.isPublic()) { - if (failed(rewriteWorkgroupSignature(layoutAttr, totalBindingCount, + if (failed(rewriteWorkgroupSignature(layoutAttr, bindingCount, bodyFuncOp))) { return failure(); } bodyFuncOp.setPrivate(); // so we only do it once } - buildDispatchFunc(exportOp, layoutAttr, totalBindingCount, bodyFuncOp, + buildDispatchFunc(exportOp, layoutAttr, bindingCount, bodyFuncOp, dispatchFuncOp); // Map from what the stream.cmd.dispatch ops is using to the new function. @@ -185,7 +182,7 @@ class InlineExecutablesPass final // about the function signatures. LogicalResult rewriteWorkgroupSignature(IREE::HAL::PipelineLayoutAttr layoutAttr, - size_t totalBindingCount, + size_t bindingCount, FunctionOpInterface bodyFuncOp) { auto *entryBlock = &bodyFuncOp.front(); auto builder = OpBuilder::atBlockBegin(entryBlock); @@ -209,10 +206,10 @@ class InlineExecutablesPass final // Expand push constants by replacing buffer accesses with the flattened // args. - newArgTypes.append(layoutAttr.getPushConstants(), i32Type); + newArgTypes.append(layoutAttr.getConstants(), i32Type); auto constantBuffer = entryBlock->getArgument(argOffset++); SmallVector constantArgs; - for (unsigned i = 0; i < layoutAttr.getPushConstants(); ++i) { + for (unsigned i = 0; i < layoutAttr.getConstants(); ++i) { constantArgs.push_back( entryBlock->addArgument(i32Type, constantBuffer.getLoc())); } @@ -221,10 +218,10 @@ class InlineExecutablesPass final } // Expand buffer list by replacing list accesses with the flattened args. - newArgTypes.append(totalBindingCount, bufferType); + newArgTypes.append(bindingCount, bufferType); auto bindingList = entryBlock->getArgument(argOffset++); SmallVector bindingArgs; - for (unsigned i = 0; i < totalBindingCount; ++i) { + for (unsigned i = 0; i < bindingCount; ++i) { bindingArgs.push_back( entryBlock->addArgument(bufferType, bindingList.getLoc())); } @@ -329,7 +326,7 @@ class InlineExecutablesPass final // Builds a function that calls a workgroup body and marshals arguments. // // Incoming: - // (workload..., push_constants..., + // (workload..., constants..., // binding_buffers..., binding_offsets..., binding_lengths...) // Body (as translated): // (local_memory, [constants], [bindings], @@ -338,8 +335,7 @@ class InlineExecutablesPass final // workgroup_count_x, workgroup_count_y, workgroup_count_z) void buildDispatchFunc(IREE::HAL::ExecutableExportOp exportOp, IREE::HAL::PipelineLayoutAttr layoutAttr, - size_t totalBindingCount, - FunctionOpInterface bodyFuncOp, + size_t bindingCount, FunctionOpInterface bodyFuncOp, FunctionOpInterface dispatchFuncOp) { auto loc = exportOp.getLoc(); auto builder = OpBuilder::atBlockBegin(dispatchFuncOp.addEntryBlock()); @@ -369,18 +365,18 @@ class InlineExecutablesPass final workgroupArgs.push_back(localMemory); // Pass all constants through. - for (int64_t i = 0; i < layoutAttr.getPushConstants(); ++i) { + for (int64_t i = 0; i < layoutAttr.getConstants(); ++i) { workgroupArgs.push_back(dispatchFuncOp.getArgument(argOffset++)); } // Pass all buffers through as subspans with the binding offset and length // factored in. IPO can propagate the subspans (hopefully). - for (size_t i = 0; i < totalBindingCount; ++i) { + for (size_t i = 0; i < bindingCount; ++i) { auto bindingBuffer = dispatchFuncOp.getArgument(argOffset + i); auto bindingOffset = - dispatchFuncOp.getArgument(argOffset + totalBindingCount + i); - auto bindingLength = dispatchFuncOp.getArgument( - argOffset + totalBindingCount + totalBindingCount + i); + dispatchFuncOp.getArgument(argOffset + bindingCount + i); + auto bindingLength = dispatchFuncOp.getArgument(argOffset + bindingCount + + bindingCount + i); Value bufferSize = builder.create(loc, bindingBuffer); Value bindingView = builder.create( diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir index 1ee7e25e82ee2..4dff570f25911 100644 --- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir +++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir @@ -8,14 +8,11 @@ hal.executable private @ex { hal.executable.variant public @vmvx_ir target(<"vmvx-inline", "vmvx-ir">) { hal.executable.export public @dispatch_0 ordinal(0) layout( - #hal.pipeline.layout, - <1, storage_buffer>, - <2, storage_buffer> - ]> - ]>) { + #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding + ]>) { ^bb0(%arg0: !hal.device, %workload_x: index, %workload_y: index): %count_x = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%workload_x] %count_y = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%workload_y] diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp index 857410270693f..687830db72875 100644 --- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp +++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp @@ -89,19 +89,19 @@ struct ExecutableDispatchOpConversion castToI32(adaptor.getWorkgroupY(), rewriter), castToI32(adaptor.getWorkgroupZ(), rewriter), }; - auto pushConstants = adaptor.getPushConstants(); + auto constants = adaptor.getConstants(); SmallVector segmentSizes = { /*executable=*/-1, /*entry_point=*/-1, /*workgroup_x=*/-1, /*workgroup_y=*/-1, /*workgroup_z=*/-1, - /*push_constants=*/ - static_cast(pushConstants.size()), + /*constants=*/ + static_cast(constants.size()), /*bindings=*/ static_cast(adaptor.getBindingBuffers().size()), }; - callOperands.append(pushConstants.begin(), pushConstants.end()); + callOperands.append(constants.begin(), constants.end()); for (auto [bindingBuffer, bindingOffset, bindingLength] : llvm::zip_equal( adaptor.getBindingBuffers(), adaptor.getBindingOffsets(), adaptor.getBindingLengths())) { diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir index a76749290f416..9370e097c7444 100644 --- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir +++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir @@ -3,13 +3,9 @@ // NOTE: all other stream.cmd.* ops are handled by the hal_inline conversions. // Executables are required to translate the dispatch calls. -#pipeline_layout = #hal.pipeline.layout - ]>, - #hal.descriptor_set.layout<1, bindings = [ - #hal.descriptor_set.binding<5, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable private @ex { hal.executable.variant public @variant target(#hal.executable.target<"llvm", "embedded-elf-x86_64">) { diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.td b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.td index aa045b5dc992f..ac7bb0ef68efa 100644 --- a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.td +++ b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.td @@ -144,7 +144,7 @@ def HALLoader_ExecutableDispatchOp : HALLoader_Op<"executable.dispatch", [ HAL_Dim:$workgroup_x, HAL_Dim:$workgroup_y, HAL_Dim:$workgroup_z, - Variadic:$push_constants, + Variadic:$constants, Variadic:$binding_buffers, Variadic:$binding_offsets, Variadic:$binding_lengths @@ -158,7 +158,7 @@ def HALLoader_ExecutableDispatchOp : HALLoader_Op<"executable.dispatch", [ $workgroup_y `,` $workgroup_z `]` `)` - (`constants` `(` `[` $push_constants^ `]` `)`)? + (`constants` `(` `[` $constants^ `]` `)`)? `bindings` `(` `[` custom($binding_buffers, type($binding_buffers), diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/hal_loader.imports.mlir b/compiler/src/iree/compiler/Modules/HAL/Loader/hal_loader.imports.mlir index d811bc7bc8d75..a76e2d243f1b9 100644 --- a/compiler/src/iree/compiler/Modules/HAL/Loader/hal_loader.imports.mlir +++ b/compiler/src/iree/compiler/Modules/HAL/Loader/hal_loader.imports.mlir @@ -32,7 +32,7 @@ vm.import private @executable.dispatch( %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32, - %push_constants : i32 ..., + %constants : i32 ..., // %bindings : tuple... ) diff --git a/docs/website/docs/community/blog/posts/microkernels.md b/docs/website/docs/community/blog/posts/microkernels.md index a948807225dbe..7e14195b05551 100644 --- a/docs/website/docs/community/blog/posts/microkernels.md +++ b/docs/website/docs/community/blog/posts/microkernels.md @@ -467,9 +467,9 @@ module { %53 = arith.shli %52, %c32_i64 : i64 %54 = arith.ori %51, %53 : i64 %55 = arith.index_castui %54 : i64 to index - %56 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%30, %35} - %57 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%20) flags(ReadOnly) : !flow.dispatch.tensor>{%40, %45} - %58 = hal.interface.binding.subspan layout(#layout) set(0) binding(1) alignment(64) offset(%25) : !flow.dispatch.tensor>{%50, %55} + %56 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>{%30, %35} + %57 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%20) flags(ReadOnly) : !flow.dispatch.tensor>{%40, %45} + %58 = hal.interface.binding.subspan layout(#layout) binding(1) alignment(64) offset(%25) : !flow.dispatch.tensor>{%50, %55} %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index %workgroup_id_y = hal.interface.workgroup.id[1] : index @@ -566,11 +566,11 @@ func.func @matmul_dynamic_dispatch_3_mmt4d_DxDxDx16x16x1_f32() { %53 = arith.shli %52, %c32_i64 : i64 %54 = arith.ori %51, %53 : i64 %55 = arith.index_castui %54 : i64 to index - %56 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref>{%30, %35} + %56 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref>{%30, %35} memref.assume_alignment %56, 64 : memref> - %57 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%20) flags(ReadOnly) : memref, #hal.descriptor_type>{%40, %45} + %57 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%20) flags(ReadOnly) : memref, #hal.descriptor_type>{%40, %45} memref.assume_alignment %57, 1 : memref, #hal.descriptor_type> - %58 = hal.interface.binding.subspan layout(#layout) set(0) binding(1) alignment(64) offset(%25) : memref, #hal.descriptor_type>{%50, %55} + %58 = hal.interface.binding.subspan layout(#layout) binding(1) alignment(64) offset(%25) : memref, #hal.descriptor_type>{%50, %55} memref.assume_alignment %58, 1 : memref, #hal.descriptor_type> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index @@ -657,11 +657,11 @@ module { %53 = arith.shli %52, %c32_i64 : i64 %54 = arith.ori %51, %53 : i64 %55 = arith.index_castui %54 : i64 to index - %56 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref{%30, %35} + %56 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref{%30, %35} memref.assume_alignment %56, 64 : memref - %57 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%20) flags(ReadOnly) : memref>{%40, %45} + %57 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%20) flags(ReadOnly) : memref>{%40, %45} memref.assume_alignment %57, 1 : memref> - %58 = hal.interface.binding.subspan layout(#layout) set(0) binding(1) alignment(64) offset(%25) : memref>{%50, %55} + %58 = hal.interface.binding.subspan layout(#layout) binding(1) alignment(64) offset(%25) : memref>{%50, %55} memref.assume_alignment %58, 1 : memref> %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index diff --git a/experimental/webgpu/command_buffer.c b/experimental/webgpu/command_buffer.c index 1654beb8bd2e2..92403205f4d05 100644 --- a/experimental/webgpu/command_buffer.c +++ b/experimental/webgpu/command_buffer.c @@ -147,12 +147,12 @@ typedef struct iree_hal_webgpu_command_buffer_t { // Currently open pass - NULL if no open pass. WGPUComputePassEncoder compute_pass; - // All available push constants updated each time push_constants is called. + // All available push constants updated each time constants is called. // Reset only with the command buffer and otherwise will maintain its values - // during recording to allow for partial push_constants updates. - uint32_t push_constants[IREE_HAL_WEBGPU_MAX_PUSH_CONSTANT_COUNT]; + // during recording to allow for partial constants updates. + uint32_t constants[IREE_HAL_WEBGPU_MAX_PUSH_CONSTANT_COUNT]; - // TODO(benvanik): add a push_constants dirty bit so we know if we need to + // TODO(benvanik): add a constants dirty bit so we know if we need to // upload more. Today we'll stage the same values for each dispatch. // Snapshot of descriptor sets as populated by push_descriptor_set. @@ -750,7 +750,7 @@ static iree_status_t iree_hal_webgpu_command_buffer_copy_buffer( return iree_ok_status(); } -static iree_status_t iree_hal_webgpu_command_buffer_push_constants( +static iree_status_t iree_hal_webgpu_command_buffer_constants( iree_hal_command_buffer_t* base_command_buffer, iree_hal_pipeline_layout_t* pipeline_layout, iree_host_size_t offset, const void* values, iree_host_size_t values_length) { @@ -758,7 +758,7 @@ static iree_status_t iree_hal_webgpu_command_buffer_push_constants( iree_hal_webgpu_command_buffer_cast(base_command_buffer); if (IREE_UNLIKELY(offset + values_length >= - sizeof(command_buffer->state.push_constants))) { + sizeof(command_buffer->state.constants))) { return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "push constant range %" PRIhsz " (length=%" PRIhsz ") out of range", @@ -766,7 +766,7 @@ static iree_status_t iree_hal_webgpu_command_buffer_push_constants( } // NOTE: command buffer state change only; enqueues no tasks. - memcpy((uint8_t*)&command_buffer->state.push_constants + offset, values, + memcpy((uint8_t*)&command_buffer->state.constants + offset, values, values_length); return iree_ok_status(); @@ -819,14 +819,14 @@ static iree_status_t iree_hal_webgpu_command_buffer_prepare_dispatch( // Upload push constant data - this may incur a segment flush if the staging // buffer is exhausted. - iree_host_size_t push_constant_count = - iree_hal_webgpu_pipeline_layout_push_constant_count(entry_point->layout); - iree_const_byte_span_t push_constant_data = iree_make_const_byte_span( - command_buffer->state.push_constants, - push_constant_count * sizeof(command_buffer->state.push_constants[0])); + iree_host_size_t constant_count = + iree_hal_webgpu_pipeline_layout_constant_count(entry_point->layout); + iree_const_byte_span_t constant_data = iree_make_const_byte_span( + command_buffer->state.constants, + constant_count * sizeof(command_buffer->state.constants[0])); uint32_t params_offset = 0; IREE_RETURN_IF_ERROR(iree_hal_webgpu_command_buffer_append_parameters( - command_buffer, push_constant_data, ¶ms_offset)); + command_buffer, constant_data, ¶ms_offset)); // Acquire the compute pass we'll encode the dispatch into - this may be // fresh or reused from prior commands. @@ -835,7 +835,7 @@ static iree_status_t iree_hal_webgpu_command_buffer_prepare_dispatch( command_buffer, &compute_pass)); wgpuComputePassEncoderSetPipeline(compute_pass, entry_point->pipeline); - if (push_constant_count > 0) { + if (constant_count > 0) { // Bind the push constant emulation bind group at the staging buffer // relative offset for this dispatch. wgpuComputePassEncoderSetBindGroup( @@ -872,7 +872,7 @@ static iree_status_t iree_hal_webgpu_command_buffer_prepare_dispatch( command_buffer->state.bind_groups_empty &= ~(1ull << i); } - if (push_constant_count > 0) { + if (constant_count > 0) { // Pad up to IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX with empty bind groups. WGPUBindGroup empty_handle = command_buffer->staging_buffer->empty_bind_group; @@ -1045,7 +1045,7 @@ const iree_hal_command_buffer_vtable_t iree_hal_webgpu_command_buffer_vtable = { .fill_buffer = iree_hal_webgpu_command_buffer_fill_buffer, .update_buffer = iree_hal_webgpu_command_buffer_update_buffer, .copy_buffer = iree_hal_webgpu_command_buffer_copy_buffer, - .push_constants = iree_hal_webgpu_command_buffer_push_constants, + .constants = iree_hal_webgpu_command_buffer_constants, .push_descriptor_set = iree_hal_webgpu_command_buffer_push_descriptor_set, .dispatch = iree_hal_webgpu_command_buffer_dispatch, .dispatch_indirect = iree_hal_webgpu_command_buffer_dispatch_indirect, diff --git a/experimental/webgpu/pipeline_layout.c b/experimental/webgpu/pipeline_layout.c index a5c940c8ddcff..c55fcbef31fb1 100644 --- a/experimental/webgpu/pipeline_layout.c +++ b/experimental/webgpu/pipeline_layout.c @@ -159,7 +159,7 @@ typedef struct iree_hal_webgpu_pipeline_layout_t { iree_hal_resource_t resource; iree_allocator_t host_allocator; WGPUPipelineLayout handle; - iree_host_size_t push_constant_count; + iree_host_size_t constant_count; iree_hal_webgpu_set_binding_info_t set_binding_info; iree_host_size_t set_layout_count; iree_hal_descriptor_set_layout_t* set_layouts[]; @@ -177,7 +177,7 @@ static iree_hal_webgpu_pipeline_layout_t* iree_hal_webgpu_pipeline_layout_cast( iree_status_t iree_hal_webgpu_pipeline_layout_create( WGPUDevice device, iree_host_size_t set_layout_count, iree_hal_descriptor_set_layout_t* const* set_layouts, - iree_host_size_t push_constant_count, + iree_host_size_t constant_count, iree_hal_webgpu_staging_buffer_t* staging_buffer, iree_allocator_t host_allocator, iree_hal_pipeline_layout_t** out_pipeline_layout) { @@ -198,8 +198,8 @@ iree_status_t iree_hal_webgpu_pipeline_layout_create( // Pad to IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX for push constant emulation. iree_host_size_t bind_group_layouts_count = - push_constant_count > 0 ? IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX + 1 - : set_layout_count; + constant_count > 0 ? IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX + 1 + : set_layout_count; // Populate a WGPUBindGroupLayout array with the provided set layouts, then // set the staging buffer's bind group layout at the right index, padding @@ -215,7 +215,7 @@ iree_status_t iree_hal_webgpu_pipeline_layout_create( *iree_inline_array_at(bind_group_layouts, i) = staging_buffer->empty_bind_group_layout; } - if (push_constant_count > 0) { + if (constant_count > 0) { *iree_inline_array_at(bind_group_layouts, IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX) = staging_buffer->bind_group_layout; @@ -247,7 +247,7 @@ iree_status_t iree_hal_webgpu_pipeline_layout_create( &pipeline_layout->resource); pipeline_layout->host_allocator = host_allocator; pipeline_layout->handle = handle; - pipeline_layout->push_constant_count = push_constant_count; + pipeline_layout->constant_count = constant_count; pipeline_layout->set_layout_count = set_layout_count; pipeline_layout->set_binding_info.set_count = set_layout_count; @@ -292,10 +292,10 @@ WGPUPipelineLayout iree_hal_webgpu_pipeline_layout_handle( return iree_hal_webgpu_pipeline_layout_cast(layout)->handle; } -iree_host_size_t iree_hal_webgpu_pipeline_layout_push_constant_count( +iree_host_size_t iree_hal_webgpu_pipeline_layout_constant_count( iree_hal_pipeline_layout_t* layout) { IREE_ASSERT_ARGUMENT(layout); - return iree_hal_webgpu_pipeline_layout_cast(layout)->push_constant_count; + return iree_hal_webgpu_pipeline_layout_cast(layout)->constant_count; } const iree_hal_webgpu_set_binding_info_t* diff --git a/experimental/webgpu/pipeline_layout.h b/experimental/webgpu/pipeline_layout.h index e15b3bf48268f..c620f9c42564a 100644 --- a/experimental/webgpu/pipeline_layout.h +++ b/experimental/webgpu/pipeline_layout.h @@ -61,7 +61,7 @@ typedef struct iree_hal_webgpu_set_binding_info_t { iree_status_t iree_hal_webgpu_pipeline_layout_create( WGPUDevice device, iree_host_size_t set_layout_count, iree_hal_descriptor_set_layout_t* const* set_layouts, - iree_host_size_t push_constant_count, + iree_host_size_t constant_count, iree_hal_webgpu_staging_buffer_t* staging_buffer, iree_allocator_t host_allocator, iree_hal_pipeline_layout_t** out_pipeline_layout); @@ -69,7 +69,7 @@ iree_status_t iree_hal_webgpu_pipeline_layout_create( WGPUPipelineLayout iree_hal_webgpu_pipeline_layout_handle( iree_hal_pipeline_layout_t* layout); -iree_host_size_t iree_hal_webgpu_pipeline_layout_push_constant_count( +iree_host_size_t iree_hal_webgpu_pipeline_layout_constant_count( iree_hal_pipeline_layout_t* layout); const iree_hal_webgpu_set_binding_info_t* diff --git a/experimental/webgpu/webgpu_device.c b/experimental/webgpu/webgpu_device.c index 165dee3beea78..5498caf011301 100644 --- a/experimental/webgpu/webgpu_device.c +++ b/experimental/webgpu/webgpu_device.c @@ -295,13 +295,13 @@ static iree_status_t iree_hal_webgpu_device_import_file( } static iree_status_t iree_hal_webgpu_device_create_pipeline_layout( - iree_hal_device_t* base_device, iree_host_size_t push_constants, + iree_hal_device_t* base_device, iree_host_size_t constants, iree_host_size_t set_layout_count, iree_hal_descriptor_set_layout_t* const* set_layouts, iree_hal_pipeline_layout_t** out_pipeline_layout) { iree_hal_webgpu_device_t* device = iree_hal_webgpu_device_cast(base_device); return iree_hal_webgpu_pipeline_layout_create( - device->handle, set_layout_count, set_layouts, push_constants, + device->handle, set_layout_count, set_layouts, constants, &device->staging_buffer, device->host_allocator, out_pipeline_layout); } diff --git a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputBase.td b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputBase.td index 526cdd6480195..223dcdc505a3b 100644 --- a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputBase.td +++ b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputBase.td @@ -265,68 +265,49 @@ def IREEInput_DescriptorFlagsAttr : let cppNamespace = "::mlir::iree_compiler::IREE::Input"; } -def IREEInput_DescriptorSetBindingAttr : - AttrDef { - let mnemonic = "descriptor_set.binding"; +def IREEInput_PipelineBindingAttr : + AttrDef { + let mnemonic = "pipeline.binding"; let summary = [{descriptor set binding specification}]; let parameters = (ins - AttrParameter<"int64_t", "">:$ordinal, AttrParameter<"DescriptorType", "">:$type, OptionalParameter<"std::optional">:$flags ); let assemblyFormat = [{ - `<` $ordinal `,` $type (`,` $flags^)? `>` + `<` $type (`,` $flags^)? `>` }]; } -def IREEInput_DescriptorSetLayoutFlags_None : +def IREEInput_PipelineLayoutFlags_None : I32BitEnumAttrCase<"None", 0x0000>; -def IREEInput_DescriptorSetLayoutFlags_Indirect : +def IREEInput_PipelineLayoutFlags_Indirect : I32BitEnumAttrCase<"Indirect", 0x0001>; -def IREEInput_DescriptorSetLayoutFlagsAttr : - I32BitEnumAttr<"DescriptorSetLayoutFlags", "valid DescriptorSetLayout flags", [ - IREEInput_DescriptorSetLayoutFlags_None, - IREEInput_DescriptorSetLayoutFlags_Indirect, +def IREEInput_PipelineLayoutFlagsAttr : + I32BitEnumAttr<"PipelineLayoutFlags", "valid PipelineLayout flags", [ + IREEInput_PipelineLayoutFlags_None, + IREEInput_PipelineLayoutFlags_Indirect, ]> { let cppNamespace = "::mlir::iree_compiler::IREE::Input"; } -def IREEInput_DescriptorSetLayoutAttr : - AttrDef { - let mnemonic = "descriptor_set.layout"; - let summary = [{descriptor set layout specification}]; - - let parameters = (ins - AttrParameter<"int64_t", "">:$ordinal, - ArrayRefParameter<"DescriptorSetBindingAttr", "">:$bindings, - OptionalParameter<"std::optional">:$flags - ); - - let assemblyFormat = [{ - `<` - $ordinal `,` - `bindings` `=` `[` $bindings `]` - (`,` `flags` `=` $flags^)? - `>` - }]; -} - def IREEInput_PipelineLayoutAttr : AttrDef { let mnemonic = "pipeline.layout"; let summary = [{executable entry point layout specification}]; let parameters = (ins - AttrParameter<"int64_t", "">:$pushConstants, - ArrayRefParameter<"DescriptorSetLayoutAttr", "">:$setLayouts + ArrayRefParameter<"PipelineBindingAttr", "">:$bindings, + OptionalParameter<"int64_t", "0">:$constants, + OptionalParameter<"std::optional">:$flags ); let assemblyFormat = [{ `<` - `push_constants` `=` $pushConstants `,` - `sets` `=` `[` $setLayouts `]` + (`constants` `=` $constants^ `,`)? + `bindings` `=` `[` qualified($bindings) `]` + (`,` `flags` `=` $flags^)? `>` }]; } diff --git a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputDialect.h b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputDialect.h index 41ecf9fc1f54d..0ae1d30b986b3 100644 --- a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputDialect.h +++ b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputDialect.h @@ -48,24 +48,24 @@ namespace mlir { template <> struct FieldParser< - std::optional> { - static FailureOr + std::optional> { + static FailureOr parse(AsmParser &parser) { std::string value; if (parser.parseKeywordOrString(&value)) return failure(); auto result = mlir::iree_compiler::IREE::Input::symbolizeEnum< - mlir::iree_compiler::IREE::Input::DescriptorSetLayoutFlags>(value); + mlir::iree_compiler::IREE::Input::PipelineLayoutFlags>(value); if (!result.has_value()) return failure(); return result.value(); } }; -static inline AsmPrinter &operator<<( - AsmPrinter &printer, - std::optional - param) { +static inline AsmPrinter & +operator<<(AsmPrinter &printer, + std::optional + param) { printer << (param.has_value() ? mlir::iree_compiler::IREE::Input::stringifyEnum( param.value()) diff --git a/runtime/src/iree/hal/cts/CMakeLists.txt b/runtime/src/iree/hal/cts/CMakeLists.txt index 715f02e3ff2d2..1e7ea8fc15194 100644 --- a/runtime/src/iree/hal/cts/CMakeLists.txt +++ b/runtime/src/iree/hal/cts/CMakeLists.txt @@ -10,8 +10,8 @@ set(IREE_ALL_CTS_TESTS "command_buffer" "command_buffer_copy_buffer" "command_buffer_dispatch" + "command_buffer_dispatch_constants" "command_buffer_fill_buffer" - "command_buffer_push_constants" "command_buffer_update_buffer" "driver" "event" @@ -27,7 +27,7 @@ set(IREE_ALL_CTS_TESTS # connected to a functional compiler target, these tests can be skipped. set(IREE_EXECUTABLE_CTS_TESTS "command_buffer_dispatch" - "command_buffer_push_constants" + "command_buffer_dispatch_constants" "executable_cache" PARENT_SCOPE ) @@ -35,7 +35,7 @@ set(IREE_EXECUTABLE_CTS_TESTS # List of testdata/{name}.mlir source files. set(IREE_ALL_CTS_EXECUTABLE_SOURCES "command_buffer_dispatch_test" - "command_buffer_push_constants_test" + "command_buffer_dispatch_constants_test" "executable_cache_test" PARENT_SCOPE ) @@ -120,9 +120,9 @@ iree_cc_library( iree_cc_library( NAME - command_buffer_fill_buffer_test_library + command_buffer_dispatch_constants_test_library HDRS - "command_buffer_fill_buffer_test.h" + "command_buffer_dispatch_constants_test.h" DEPS ::cts_test_base iree::base @@ -133,9 +133,9 @@ iree_cc_library( iree_cc_library( NAME - command_buffer_push_constants_test_library + command_buffer_fill_buffer_test_library HDRS - "command_buffer_push_constants_test.h" + "command_buffer_fill_buffer_test.h" DEPS ::cts_test_base iree::base diff --git a/runtime/src/iree/hal/cts/command_buffer_push_constants_test.h b/runtime/src/iree/hal/cts/command_buffer_dispatch_constants_test.h similarity index 91% rename from runtime/src/iree/hal/cts/command_buffer_push_constants_test.h rename to runtime/src/iree/hal/cts/command_buffer_dispatch_constants_test.h index 36520829dd42b..d53c7b6ab7609 100644 --- a/runtime/src/iree/hal/cts/command_buffer_push_constants_test.h +++ b/runtime/src/iree/hal/cts/command_buffer_dispatch_constants_test.h @@ -4,8 +4,8 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_HAL_CTS_COMMAND_BUFFER_PUSH_CONSTANTS_TEST_H_ -#define IREE_HAL_CTS_COMMAND_BUFFER_PUSH_CONSTANTS_TEST_H_ +#ifndef IREE_HAL_CTS_COMMAND_BUFFER_DISPATCH_CONSTANTS_TEST_H_ +#define IREE_HAL_CTS_COMMAND_BUFFER_DISPATCH_CONSTANTS_TEST_H_ #include "iree/base/api.h" #include "iree/base/string_view.h" @@ -18,7 +18,7 @@ namespace iree::hal::cts { using ::testing::ContainerEq; -class CommandBufferPushConstantsTest : public CTSTestBase<> { +class CommandBufferDispatchConstantsTest : public CTSTestBase<> { protected: void PrepareExecutable() { IREE_ASSERT_OK(iree_hal_executable_cache_create( @@ -32,7 +32,7 @@ class CommandBufferPushConstantsTest : public CTSTestBase<> { executable_params.executable_format = iree_make_cstring_view(get_test_executable_format()); executable_params.executable_data = get_test_executable_data( - iree_make_cstring_view("command_buffer_push_constants_test.bin")); + iree_make_cstring_view("command_buffer_dispatch_constants_test.bin")); // No executable-level "specialization constants" (not to be confused with // per-dispatch varying "push constants"). executable_params.constant_count = 0; @@ -53,7 +53,7 @@ class CommandBufferPushConstantsTest : public CTSTestBase<> { iree_hal_executable_t* executable_ = NULL; }; -TEST_F(CommandBufferPushConstantsTest, DispatchWithPushConstants) { +TEST_F(CommandBufferDispatchConstantsTest, DispatchWithDispatchConstants) { ASSERT_NO_FATAL_FAILURE(PrepareExecutable()); iree_hal_command_buffer_t* command_buffer = NULL; @@ -130,4 +130,4 @@ TEST_F(CommandBufferPushConstantsTest, DispatchWithPushConstants) { } // namespace iree::hal::cts -#endif // IREE_HAL_CTS_COMMAND_BUFFER_PUSH_CONSTANTS_TEST_H_ +#endif // IREE_HAL_CTS_COMMAND_BUFFER_DISPATCH_CONSTANTS_TEST_H_ diff --git a/runtime/src/iree/hal/cts/testdata/command_buffer_push_constants_test.mlir b/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_constants_test.mlir similarity index 72% rename from runtime/src/iree/hal/cts/testdata/command_buffer_push_constants_test.mlir rename to runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_constants_test.mlir index df041bdab9778..6369fc496b0c7 100644 --- a/runtime/src/iree/hal/cts/testdata/command_buffer_push_constants_test.mlir +++ b/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_constants_test.mlir @@ -1,25 +1,23 @@ // This program writes push constant values into an output buffer. -#pipeline_layout = #hal.pipeline.layout - ]> +#pipeline_layout = #hal.pipeline.layout ]> hal.executable.source public @executable { - hal.executable.export public @write_push_constants ordinal(0) layout(#pipeline_layout) attributes {workgroup_size = [1 : index, 1 : index, 1 : index]} { + hal.executable.export public @write_constants ordinal(0) layout(#pipeline_layout) attributes {workgroup_size = [1 : index, 1 : index, 1 : index]} { ^bb0(%arg0: !hal.device): %c1 = arith.constant 1 : index hal.return %c1, %c1, %c1 : index, index, index } builtin.module { - func.func @write_push_constants() { + func.func @write_constants() { %input_0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 %input_1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32 %input_2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : i32 %input_3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : i32 - %out = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4xi32> + %out = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4xi32> %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir b/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir index 136879d604dc1..d816c155785db 100644 --- a/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir +++ b/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir @@ -5,11 +5,9 @@ // return %result : tensor<2xf32> // } -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable.source public @executable { @@ -22,8 +20,8 @@ hal.executable.source public @executable { func.func @abs() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(4) offset(%c0) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(4) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(4) offset(%c0) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(4) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [2], strides = [1] : !flow.dispatch.tensor> -> tensor<2xf32> %3 = tensor.empty() : tensor<2xf32> diff --git a/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir b/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir index 43e33be2f9a35..6553a446efac5 100644 --- a/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir +++ b/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir @@ -5,11 +5,9 @@ // return %result : tensor // } -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding ]> hal.executable.source public @executable { @@ -22,8 +20,8 @@ hal.executable.source public @executable { func.func @abs() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor - %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor -> tensor %3 = tensor.empty() : tensor diff --git a/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c b/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c index 0da43bb370512..0e60669a61507 100644 --- a/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c +++ b/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c @@ -712,8 +712,8 @@ typedef struct iree_hal_task_cmd_dispatch_t { iree_hal_local_executable_t* executable; int32_t ordinal; - // Total number of available 4 byte push constant values in |push_constants|. - uint16_t push_constant_count; + // Total number of available 4 byte push constant values in |constants|. + uint16_t constant_count; // Total number of binding base pointers in |binding_ptrs| and // |binding_lengths|. The set is packed densely based on which bindings are @@ -721,7 +721,7 @@ typedef struct iree_hal_task_cmd_dispatch_t { uint16_t binding_count; // Following this structure in memory there are 3 tables: - // - const uint32_t push_constants[push_constant_count]; + // - const uint32_t constants[constant_count]; // - void* binding_ptrs[binding_count]; // - const size_t binding_lengths[binding_count]; } iree_hal_task_cmd_dispatch_t; @@ -742,7 +742,7 @@ static iree_status_t iree_hal_task_cmd_dispatch_tile( .workgroup_size_x = tile_context->workgroup_size[0], .workgroup_size_y = tile_context->workgroup_size[1], .workgroup_size_z = tile_context->workgroup_size[2], - .push_constant_count = cmd->push_constant_count, + .constant_count = cmd->constant_count, .workgroup_count_x = tile_context->workgroup_count[0], .workgroup_count_y = tile_context->workgroup_count[1], .workgroup_count_z = tile_context->workgroup_count[2], @@ -751,8 +751,8 @@ static iree_status_t iree_hal_task_cmd_dispatch_tile( .binding_count = cmd->binding_count, }; uint8_t* cmd_ptr = (uint8_t*)cmd + sizeof(*cmd); - dispatch_state.push_constants = (uint32_t*)cmd_ptr; - cmd_ptr += cmd->push_constant_count * sizeof(*dispatch_state.push_constants); + dispatch_state.constants = (uint32_t*)cmd_ptr; + cmd_ptr += cmd->constant_count * sizeof(*dispatch_state.constants); dispatch_state.binding_ptrs = (void**)cmd_ptr; cmd_ptr += cmd->binding_count * sizeof(*dispatch_state.binding_ptrs); dispatch_state.binding_lengths = (size_t*)cmd_ptr; @@ -802,7 +802,7 @@ static iree_status_t iree_hal_task_command_buffer_build_dispatch( cmd->executable = local_executable; cmd->ordinal = entry_point; - cmd->push_constant_count = dispatch_attrs.constant_count; + cmd->constant_count = dispatch_attrs.constant_count; cmd->binding_count = dispatch_attrs.binding_count; // TODO(benvanik): expose on API or keep fixed on executable. @@ -835,10 +835,10 @@ static iree_status_t iree_hal_task_command_buffer_build_dispatch( constants.data_length / sizeof(uint32_t)); } uint8_t* cmd_ptr = (uint8_t*)cmd + sizeof(*cmd); - uint32_t* push_constants = (uint32_t*)cmd_ptr; - memcpy(push_constants, constants.data, - dispatch_attrs.constant_count * sizeof(*push_constants)); - cmd_ptr += dispatch_attrs.constant_count * sizeof(*push_constants); + uint32_t* constants_ptr = (uint32_t*)cmd_ptr; + memcpy(constants_ptr, constants.data, + dispatch_attrs.constant_count * sizeof(*constants_ptr)); + cmd_ptr += dispatch_attrs.constant_count * sizeof(*constants_ptr); // Produce the dense binding list based on the declared bindings used. // diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir index fafa77b48c97a..07df2a2c87b17 100644 --- a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir +++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir @@ -19,12 +19,10 @@ // --binding=4xf32=0,0,0,0 // lhs * rhs => dst / s0b0 * s0b1 => s0b2 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> // A single executable source definition is allowed per translation in this mode @@ -47,9 +45,9 @@ hal.executable.source public @ex { // exported. builtin.module { func.func @elementwise_mul() { - %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor> - %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) : !flow.dispatch.tensor> - %dst = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor> + %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor> + %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor> + %dst = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor> %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_id_x = hal.interface.workgroup.id[0] : index %workgroup_count_x = hal.interface.workgroup.count[0] : index diff --git a/runtime/src/iree/hal/local/executable_library.h b/runtime/src/iree/hal/local/executable_library.h index 62b87d8a9710d..f9da2095d2543 100644 --- a/runtime/src/iree/hal/local/executable_library.h +++ b/runtime/src/iree/hal/local/executable_library.h @@ -279,8 +279,8 @@ typedef struct iree_hal_executable_dispatch_state_v0_t { uint32_t workgroup_size_y; uint16_t workgroup_size_z; - // Total number of available 4 byte push constant values in |push_constants|. - uint16_t push_constant_count; + // Total number of available 4 byte push constant values in |constants|. + uint16_t constant_count; // Total workgroup count for the dispatch. This is sourced from either the // original dispatch call (for iree_hal_command_buffer_dispatch) or the @@ -299,8 +299,8 @@ typedef struct iree_hal_executable_dispatch_state_v0_t { // used (known at compile-time). uint8_t binding_count; - // |push_constant_count| values. - const uint32_t* push_constants; + // |constant_count| values. + const uint32_t* constants; // Base pointers to each binding buffer. void* const* binding_ptrs; // The length of each binding in bytes, 1:1 with |binding_ptrs|. diff --git a/runtime/src/iree/hal/local/executable_library_benchmark.c b/runtime/src/iree/hal/local/executable_library_benchmark.c index 93134fe351948..8988b5dd6d6b5 100644 --- a/runtime/src/iree/hal/local/executable_library_benchmark.c +++ b/runtime/src/iree/hal/local/executable_library_benchmark.c @@ -51,45 +51,44 @@ IREE_FLAG(int32_t, max_concurrency, 1, // Parsed parameters from flags. // Used to construct the dispatch parameters for the benchmark invocation. struct { - int32_t push_constant_count; + int32_t constant_count; union { uint32_t ui32; - } push_constants[IREE_HAL_EXECUTABLE_MAX_CONSTANT_COUNT]; + } constants[IREE_HAL_EXECUTABLE_MAX_CONSTANT_COUNT]; int32_t binding_count; iree_string_view_t bindings[IREE_HAL_EXECUTABLE_MAX_BINDING_COUNT]; } dispatch_params = { - .push_constant_count = 0, + .constant_count = 0, .binding_count = 0, }; -static iree_status_t parse_push_constant(iree_string_view_t flag_name, - void* storage, - iree_string_view_t value) { - IREE_ASSERT_LE(dispatch_params.push_constant_count + 1, - IREE_ARRAYSIZE(dispatch_params.push_constants), +static iree_status_t parse_constant(iree_string_view_t flag_name, void* storage, + iree_string_view_t value) { + IREE_ASSERT_LE(dispatch_params.constant_count + 1, + IREE_ARRAYSIZE(dispatch_params.constants), "too many push constants"); - dispatch_params.push_constants[dispatch_params.push_constant_count++].ui32 = + dispatch_params.constants[dispatch_params.constant_count++].ui32 = atoi(value.data); return iree_ok_status(); } -static void print_push_constant(iree_string_view_t flag_name, void* storage, - FILE* file) { - if (dispatch_params.push_constant_count == 0) { +static void print_constant(iree_string_view_t flag_name, void* storage, + FILE* file) { + if (dispatch_params.constant_count == 0) { fprintf(file, "# --%.*s=[integer value]\n", (int)flag_name.size, flag_name.data); return; } - for (int32_t i = 0; i < dispatch_params.push_constant_count; ++i) { + for (int32_t i = 0; i < dispatch_params.constant_count; ++i) { fprintf(file, "--%.*s=%u", (int)flag_name.size, flag_name.data, - dispatch_params.push_constants[i].ui32); - if (i < dispatch_params.push_constant_count - 1) { + dispatch_params.constants[i].ui32); + if (i < dispatch_params.constant_count - 1) { fprintf(file, "\n"); } } } -IREE_FLAG_CALLBACK(parse_push_constant, print_push_constant, &dispatch_params, - push_constant_callback, +IREE_FLAG_CALLBACK(parse_constant, print_constant, &dispatch_params, + constant_callback, "Appends a uint32_t push constant value.\n"); static iree_status_t parse_binding(iree_string_view_t flag_name, void* storage, @@ -217,8 +216,8 @@ static iree_status_t iree_hal_executable_library_run( .workgroup_size_y = FLAG_workgroup_size_y, .workgroup_size_z = FLAG_workgroup_size_z, .max_concurrency = FLAG_max_concurrency, - .push_constant_count = dispatch_params.push_constant_count, - .push_constants = &dispatch_params.push_constants[0].ui32, + .constant_count = dispatch_params.constant_count, + .constants = &dispatch_params.constants[0].ui32, .binding_count = dispatch_params.binding_count, .binding_ptrs = binding_ptrs, .binding_lengths = binding_lengths, diff --git a/runtime/src/iree/hal/local/executable_library_benchmark.md b/runtime/src/iree/hal/local/executable_library_benchmark.md index 1e7d59ac2cef2..07bfa312b63c4 100644 --- a/runtime/src/iree/hal/local/executable_library_benchmark.md +++ b/runtime/src/iree/hal/local/executable_library_benchmark.md @@ -205,7 +205,7 @@ good candidates for this tool. 5. Look up in the IR to see the values of push constants, if required: ```mlir - hal.command_buffer.push_constants<%cmd : !hal.command_buffer> + hal.command_buffer.constants<%cmd : !hal.command_buffer> layout(%0 : !hal.pipeline_layout) offset(0) values(%c1, %c2, %c3, %c4) : i32, i32, i32, i32 @@ -216,8 +216,8 @@ non-constant. This microbenchmarking approach is not generally suited for things like this but in cases where you know the meaning you can provide values: ``` ---push_constant=1 ---push_constant=2 ---push_constant=3 ---push_constant=4 +--constant=1 +--constant=2 +--constant=3 +--constant=4 ``` diff --git a/runtime/src/iree/hal/local/executable_library_demo.c b/runtime/src/iree/hal/local/executable_library_demo.c index 300d645120ae4..bb03027cca9af 100644 --- a/runtime/src/iree/hal/local/executable_library_demo.c +++ b/runtime/src/iree/hal/local/executable_library_demo.c @@ -22,17 +22,17 @@ // communication between invocations must use the buffer bindings for I/O. // // This is a simple scalar addition: -// binding[1] = binding[0] + push_constant[0] +// binding[1] = binding[0] + constant[0] static int dispatch_tile_a( const iree_hal_executable_environment_v0_t* environment, const iree_hal_executable_dispatch_state_v0_t* dispatch_state, const iree_hal_executable_workgroup_state_v0_t* workgroup_state) { - const dispatch_tile_a_push_constants_t* push_constants = - (const dispatch_tile_a_push_constants_t*)dispatch_state->push_constants; + const dispatch_tile_a_constants_t* constants = + (const dispatch_tile_a_constants_t*)dispatch_state->constants; const float* src = ((const float*)dispatch_state->binding_ptrs[0]); float* dst = ((float*)dispatch_state->binding_ptrs[1]); const uint32_t x = workgroup_state->workgroup_id_x; - dst[x] = src[x] + push_constants->f0; + dst[x] = src[x] + constants->f0; return 0; } diff --git a/runtime/src/iree/hal/local/executable_library_demo.h b/runtime/src/iree/hal/local/executable_library_demo.h index f458768cde046..1ebcfe9d16bda 100644 --- a/runtime/src/iree/hal/local/executable_library_demo.h +++ b/runtime/src/iree/hal/local/executable_library_demo.h @@ -26,14 +26,14 @@ typedef union { struct { float f0; }; -} dispatch_tile_a_push_constants_t; +} dispatch_tile_a_constants_t; // Returns a simple demo library with the following structure: // // Name: 'demo_library' // // [0] 'dispatch_tile_a': matmul+div -// push constants: 1 (dispatch_tile_a_push_constants_t) +// push constants: 1 (dispatch_tile_a_constants_t) // bindings: 2 // [0] = R // [1] = W diff --git a/runtime/src/iree/hal/local/executable_library_test.c b/runtime/src/iree/hal/local/executable_library_test.c index cb354489d96dd..b6ffc363f5768 100644 --- a/runtime/src/iree/hal/local/executable_library_test.c +++ b/runtime/src/iree/hal/local/executable_library_test.c @@ -58,9 +58,9 @@ int main(int argc, char** argv) { // to specify (no buffer pointer indirection) and more efficient to access // (static struct offset address calculation, all fit in a few cache lines, // etc). They are limited in capacity, though, so only <=64(ish) are usable. - dispatch_tile_a_push_constants_t push_constants; - memset(&push_constants, 0, sizeof(push_constants)); - push_constants.f0 = 5.0f; + dispatch_tile_a_constants_t constants; + memset(&constants, 0, sizeof(constants)); + constants.f0 = 5.0f; // Setup the two buffer bindings the entry point is expecting. // They only need to remain valid for the duration of the invocation and all @@ -90,8 +90,8 @@ int main(int argc, char** argv) { .workgroup_size_y = 1, .workgroup_size_z = 1, .max_concurrency = 1, - .push_constant_count = IREE_ARRAYSIZE(push_constants.values), - .push_constants = push_constants.values, + .constant_count = IREE_ARRAYSIZE(constants.values), + .constants = constants.values, .binding_count = IREE_ARRAYSIZE(binding_ptrs), .binding_ptrs = binding_ptrs, .binding_lengths = binding_lengths, diff --git a/runtime/src/iree/hal/local/inline_command_buffer.c b/runtime/src/iree/hal/local/inline_command_buffer.c index c002d221f7c86..7ea85135ea84f 100644 --- a/runtime/src/iree/hal/local/inline_command_buffer.c +++ b/runtime/src/iree/hal/local/inline_command_buffer.c @@ -29,7 +29,7 @@ typedef struct iree_hal_inline_command_buffer_t { struct { // Cached and initialized dispatch state reused for all dispatches. // Individual dispatches must populate the dynamically changing fields like - // push_constant_count and binding_count. + // constant_count and binding_count. iree_alignas(64) iree_hal_executable_dispatch_state_v0_t dispatch_state; // Persistent storage for binding pointers used by dispatch_state. void* binding_ptr_storage[IREE_HAL_EXECUTABLE_MAX_BINDING_COUNT]; @@ -397,8 +397,8 @@ static iree_status_t iree_hal_inline_command_buffer_dispatch( (uint32_t)dispatch_attrs.constant_count, constants.data_length / sizeof(uint32_t)); } - dispatch_state->push_constant_count = dispatch_attrs.constant_count; - dispatch_state->push_constants = (const uint32_t*)constants.data; + dispatch_state->constant_count = dispatch_attrs.constant_count; + dispatch_state->constants = (const uint32_t*)constants.data; // Produce the dense binding list based on the declared bindings used. // diff --git a/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c b/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c index bfdb1dbbdc5e2..78663ba266ee3 100644 --- a/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c +++ b/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c @@ -434,9 +434,8 @@ static iree_status_t iree_hal_vmvx_executable_issue_call( iree_vm_buffer_t constants_buffer; iree_vm_buffer_initialize( IREE_VM_BUFFER_ACCESS_ORIGIN_HOST, - iree_make_byte_span( - (void*)dispatch_state->push_constants, - sizeof(uint32_t) * dispatch_state->push_constant_count), + iree_make_byte_span((void*)dispatch_state->constants, + sizeof(uint32_t) * dispatch_state->constant_count), iree_allocator_null(), &constants_buffer); // Prepare call argument buffer. We've verified the signature on creation and diff --git a/runtime/src/iree/modules/hal/loader/module.c b/runtime/src/iree/modules/hal/loader/module.c index c88aaba82ce63..94baa951bb524 100644 --- a/runtime/src/iree/modules/hal/loader/module.c +++ b/runtime/src/iree/modules/hal/loader/module.c @@ -222,8 +222,8 @@ typedef struct { }; iree_vm_abi_riiii_t params; }; - iree_vm_size_t push_constant_count; - const uint32_t* push_constants; + iree_vm_size_t constant_count; + const uint32_t* constants; iree_vm_size_t binding_count; const iree_vm_abi_rII_t* bindings; } iree_hal_loader_dispatch_args_t; @@ -264,13 +264,13 @@ static iree_status_t iree_hal_loader_module_executable_dispatch( .workgroup_size_x = 1, .workgroup_size_y = 1, .workgroup_size_z = 1, - .push_constant_count = args->push_constant_count, + .constant_count = args->constant_count, .workgroup_count_x = args->workgroup_x, .workgroup_count_y = args->workgroup_y, .workgroup_count_z = args->workgroup_z, .max_concurrency = 1, .binding_count = args->binding_count, - .push_constants = args->push_constants, + .constants = args->constants, .binding_ptrs = binding_ptrs, .binding_lengths = binding_lengths, }; @@ -302,13 +302,12 @@ static iree_status_t iree_vm_shim_dispatch_v( .params = *(const iree_vm_abi_riiii_t*)args_storage.data, }; if (args_ok) { - const uint8_t* push_constants_ptr = args_storage.data + sizeof(args.params); - args.push_constant_count = *(const iree_vm_size_t*)push_constants_ptr; - args.push_constants = - (const uint32_t*)(push_constants_ptr + sizeof(iree_vm_size_t)); + const uint8_t* constants_ptr = args_storage.data + sizeof(args.params); + args.constant_count = *(const iree_vm_size_t*)constants_ptr; + args.constants = (const uint32_t*)(constants_ptr + sizeof(iree_vm_size_t)); const uint8_t* bindings_ptr = - push_constants_ptr + sizeof(iree_vm_size_t) + - args.push_constant_count * sizeof(args.push_constants[0]); + constants_ptr + sizeof(iree_vm_size_t) + + args.constant_count * sizeof(args.constants[0]); args.binding_count = *(const iree_vm_size_t*)bindings_ptr; args.bindings = (const iree_vm_abi_rII_t*)(bindings_ptr + sizeof(iree_vm_size_t)); diff --git a/samples/custom_dispatch/cpu/embedded/example_hal.mlir b/samples/custom_dispatch/cpu/embedded/example_hal.mlir index 5bb5299153b3e..64b246bdc4914 100644 --- a/samples/custom_dispatch/cpu/embedded/example_hal.mlir +++ b/samples/custom_dispatch/cpu/embedded/example_hal.mlir @@ -45,19 +45,15 @@ #x86_64_target ]> : !hal.device -#pipeline_layout_0 = #hal.pipeline.layout, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer> - ]> +#pipeline_layout_0 = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> -#pipeline_layout_1 = #hal.pipeline.layout, - <1, storage_buffer> - ]> +#pipeline_layout_1 = #hal.pipeline.layout, + #hal.pipeline.binding ]> module @example attributes {hal.device.targets = [#cpu_target]} { @@ -96,19 +92,7 @@ module @example attributes {hal.device.targets = [#cpu_target]} { // The ordinal must be assigned by the user and unique for the executable. // The layout defines the required bindings and push constants and can be // thought of as the function signature. - hal.executable.export public @simple_mul ordinal(0) layout(#pipeline_layout_0) attributes { - // Bindings are automatically inferred when possible as part of the - // ABI but can be overridden if the user wants to use features such - // as sparse bindings or multiple descriptor sets. To do so the - // `hal.interface.bindings` attribute can be added to a dispatch op - // as follows mapping tensor operands/results to the pipeline layout - // sets/bindings: - hal.interface.bindings = [ - #hal.interface.binding<0, 0>, - #hal.interface.binding<0, 1>, - #hal.interface.binding<0, 2> - ] - } { + hal.executable.export public @simple_mul ordinal(0) layout(#pipeline_layout_0) { ^bb0(%device: !hal.device, %workload: index): // This host function is used to compute the XYZ workgroup count // dispatched at runtime. It can query the %device for capabilities @@ -186,9 +170,9 @@ module @example attributes {hal.device.targets = [#cpu_target]} { %tid = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] // Bindings are accessed by reference. - %binding0 = hal.interface.binding.subspan layout(#pipeline_layout_0) set(0) binding(0) alignment(64) offset(%c0) : memref{%dim} - %binding1 = hal.interface.binding.subspan layout(#pipeline_layout_0) set(0) binding(1) alignment(64) offset(%c0) : memref{%dim} - %binding2 = hal.interface.binding.subspan layout(#pipeline_layout_0) set(0) binding(2) alignment(64) offset(%c0) : memref{%dim} + %binding0 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(0) alignment(64) offset(%c0) : memref{%dim} + %binding1 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(1) alignment(64) offset(%c0) : memref{%dim} + %binding2 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(2) alignment(64) offset(%c0) : memref{%dim} // Call the externally defined C function with an (almost) plain C // calling convention (see above for details about the mess memrefs @@ -220,8 +204,8 @@ module @example attributes {hal.device.targets = [#cpu_target]} { %tid = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] // Same as above but note that we're treating %binding1 as read/write. - %binding0 = hal.interface.binding.subspan layout(#pipeline_layout_1) set(0) binding(0) alignment(64) offset(%c0) : memref{%dim} - %binding1 = hal.interface.binding.subspan layout(#pipeline_layout_1) set(0) binding(1) alignment(64) offset(%c0) : memref{%dim} + %binding0 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(0) alignment(64) offset(%c0) : memref{%dim} + %binding1 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(1) alignment(64) offset(%c0) : memref{%dim} func.call @simple_mul_inplace_workgroup(%binding0, %binding1, %dim, %tid) : (memref, memref, index, index) -> () @@ -273,10 +257,6 @@ module @example attributes {hal.device.targets = [#cpu_target]} { // to allow user-controlled overrides of the dispatches, custom selection // logic based on runtime parameters, etc. In general, though, the above // automatic selection should be used. - // - // Note that we don't declare the hal.interface.bindings and let them be - // inferred - this only works when either specifying the variant that has - // a pipeline layout defined or all variants have the same pipeline layouts. %2 = flow.dispatch @executable::@x86_64::@simple_mul_inplace[%dim](%dim_i32, %0, %1) : (i32, tensor{%dim}, tensor{%dim}) -> %1{%dim} // CHECK: 8xf32=96 96 96 96 96 96 96 96 diff --git a/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir b/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir index b04e0755c4acb..159c95faa75ff 100644 --- a/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir +++ b/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir @@ -20,12 +20,10 @@ #x86_64_target ]> -#pipeline_layout = #hal.pipeline.layout, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> module attributes {transform.with_named_sequence} { @@ -56,9 +54,9 @@ module attributes {transform.with_named_sequence} { %workgroup_id_x = hal.interface.workgroup.id[0] : index %tid = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] - %binding0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref{%dim} - %binding1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref{%dim} - %binding2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref{%dim} + %binding0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref{%dim} + %binding1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref{%dim} + %binding2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref{%dim} func.call @simple_mul_abs_negate_workgroup(%binding0, %binding1, %binding2, %dim, %tid) : (memref, memref, memref, index, index) -> () return diff --git a/samples/custom_dispatch/cpu/embedded/functions.c b/samples/custom_dispatch/cpu/embedded/functions.c index a9626668a3407..495fd6df8c36f 100644 --- a/samples/custom_dispatch/cpu/embedded/functions.c +++ b/samples/custom_dispatch/cpu/embedded/functions.c @@ -36,12 +36,10 @@ // `ret = lhs * rhs` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer, ReadOnly>, -// <2, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding, +// #hal.pipeline.binding // ]> // With a workgroup size of 64x1x1. void simple_mul_workgroup( @@ -64,11 +62,9 @@ void simple_mul_workgroup( // `rhs *= lhs` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding // ]> // With a workgroup size of 64x1x1. void simple_mul_inplace_workgroup( @@ -89,12 +85,10 @@ void simple_mul_inplace_workgroup( // `ret = -|lhs * rhs|` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer, ReadOnly>, -// <2, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding, +// #hal.pipeline.binding // ]> // With a workgroup size of 64x1x1. void simple_mul_abs_negate_workgroup( diff --git a/samples/custom_dispatch/cpu/mlp_plugin/mlp_plugin.c b/samples/custom_dispatch/cpu/mlp_plugin/mlp_plugin.c index 88417a6df03e5..4fe4c4eb2d73c 100644 --- a/samples/custom_dispatch/cpu/mlp_plugin/mlp_plugin.c +++ b/samples/custom_dispatch/cpu/mlp_plugin/mlp_plugin.c @@ -32,12 +32,10 @@ static size_t get_index(size_t i, size_t j, size_t offset, size_t stride) { // `ret = mlp(lhs, rhs)` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer, ReadOnly>, -// <2, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding, +// #hal.pipeline.binding // ]> // With a workgroup size of 64x1x1. // diff --git a/samples/custom_dispatch/cpu/plugin/standalone_plugin.c b/samples/custom_dispatch/cpu/plugin/standalone_plugin.c index 038666f834931..0305fb12e427f 100644 --- a/samples/custom_dispatch/cpu/plugin/standalone_plugin.c +++ b/samples/custom_dispatch/cpu/plugin/standalone_plugin.c @@ -24,12 +24,10 @@ // `ret = lhs * rhs` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer, ReadOnly>, -// <2, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding, +// #hal.pipeline.binding // ]> // With a workgroup size of 64x1x1. // diff --git a/samples/custom_dispatch/cpu/plugin/system_plugin.c b/samples/custom_dispatch/cpu/plugin/system_plugin.c index 816daacf83492..86ac02f300a19 100644 --- a/samples/custom_dispatch/cpu/plugin/system_plugin.c +++ b/samples/custom_dispatch/cpu/plugin/system_plugin.c @@ -42,12 +42,10 @@ typedef struct { // `ret = lhs * rhs` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer, ReadOnly>, -// <2, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding, +// #hal.pipeline.binding // ]> // With a workgroup size of 64x1x1. // diff --git a/samples/custom_dispatch/cuda/kernels/README.md b/samples/custom_dispatch/cuda/kernels/README.md index ac4e99d605b0a..51decc893629e 100644 --- a/samples/custom_dispatch/cuda/kernels/README.md +++ b/samples/custom_dispatch/cuda/kernels/README.md @@ -71,12 +71,10 @@ nvcc ... (TODO, see CMakeLists.txt) -o kernels_sm_80.ptx ] }> hal.executable.export public @simple_mul ordinal(0) - layout(#hal.pipeline.layout, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]>) attributes {workgroup_size = [64 : index, 1 : index, 1 : index]} { ^bb0(%device: !hal.device, %workload: index): %x = affine.apply affine_map<()[s0] -> (s0 ceildiv 64)>()[%workload] diff --git a/samples/custom_dispatch/cuda/kernels/example.mlir b/samples/custom_dispatch/cuda/kernels/example.mlir index 62e49c6a94e6a..aaba1afe95784 100644 --- a/samples/custom_dispatch/cuda/kernels/example.mlir +++ b/samples/custom_dispatch/cuda/kernels/example.mlir @@ -75,27 +75,14 @@ module @example attributes {hal.device.targets = [#cuda_target]} { // The layout defines the required bindings and push constants and can be // thought of as the function signature. hal.executable.export public @simple_mul ordinal(0) - layout(#hal.pipeline.layout, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]>) attributes { // Certain backends (like CUDA) require a workgroup size (aka block // size) to be defined ahead of time. - workgroup_size = [64 : index, 1 : index, 1 : index], - // Bindings are automatically inferred when possible as part of the ABI - // but can be overridden if the user wants to use features such as sparse - // bindings or multiple descriptor sets. To do so the - // `hal.interface.bindings` attribute can be added to a dispatch op as - // follows mapping tensor operands/results to the pipeline layout - // sets/bindings: - hal.interface.bindings = [ - #hal.interface.binding<0, 0>, - #hal.interface.binding<0, 1>, - #hal.interface.binding<0, 2> - ] + workgroup_size = [64 : index, 1 : index, 1 : index] } { ^bb0(%device: !hal.device, %workload: index): // This host function is used to compute the XYZ workgroup count @@ -110,11 +97,9 @@ module @example attributes {hal.device.targets = [#cuda_target]} { // Similar to the above but in-place by using a read/write binding. hal.executable.export public @simple_mul_inplace ordinal(1) - layout(#hal.pipeline.layout, - <1, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) attributes { workgroup_size = [64 : index, 1 : index, 1 : index] } { @@ -153,10 +138,6 @@ module @example attributes {hal.device.targets = [#cuda_target]} { %1 = arith.addf %0, %arg1 : tensor // Dispatch an in-place `rhs *= lhs` kernel. - // - // Note that we don't declare the hal.interface.bindings and let them be - // inferred - this only works when either specifying the variant that has - // a pipeline layout defined or all variants have the same pipeline layouts. %2 = flow.dispatch @executable::@simple_mul_inplace[%dim](%dim_i32, %0, %1) : (i32, tensor{%dim}, tensor{%dim}) -> %1{%dim} // CHECK: 8xf32=96 96 96 96 96 96 96 96 diff --git a/samples/custom_dispatch/cuda/kernels/kernels.cu b/samples/custom_dispatch/cuda/kernels/kernels.cu index 250f676308ab0..8bca00e704f48 100644 --- a/samples/custom_dispatch/cuda/kernels/kernels.cu +++ b/samples/custom_dispatch/cuda/kernels/kernels.cu @@ -36,12 +36,10 @@ // `ret = lhs * rhs` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer, ReadOnly>, -// <2, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding, +// #hal.pipeline.binding // ]> // workgroup_size = [64 : index, 1 : index, 1 : index] extern "C" __global__ void simple_mul(const float* __restrict__ binding0, @@ -56,11 +54,9 @@ extern "C" __global__ void simple_mul(const float* __restrict__ binding0, // `rhs *= lhs` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding // ]> // workgroup_size = [64 : index, 1 : index, 1 : index] extern "C" __global__ void simple_mul_inplace( diff --git a/samples/custom_dispatch/hip/kernels/example.mlir b/samples/custom_dispatch/hip/kernels/example.mlir index 6a2148c0d2e97..a24a9999adaaf 100644 --- a/samples/custom_dispatch/hip/kernels/example.mlir +++ b/samples/custom_dispatch/hip/kernels/example.mlir @@ -66,27 +66,14 @@ module @example attributes {hal.device.targets = [#rocm_target]} { // The layout defines the required bindings and push constants and can be // thought of as the function signature. hal.executable.export public @simple_mul ordinal(0) - layout(#hal.pipeline.layout, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]>) attributes { // Certain backends (like ROCM) require a workgroup size (aka block // size) to be defined ahead of time. - workgroup_size = [64 : index, 1 : index, 1 : index], - // Bindings are automatically inferred when possible as part of the ABI - // but can be overridden if the user wants to use features such as sparse - // bindings or multiple descriptor sets. To do so the - // `hal.interface.bindings` attribute can be added to a dispatch op as - // follows mapping tensor operands/results to the pipeline layout - // sets/bindings: - hal.interface.bindings = [ - #hal.interface.binding<0, 0>, - #hal.interface.binding<0, 1>, - #hal.interface.binding<0, 2> - ] + workgroup_size = [64 : index, 1 : index, 1 : index] } { ^bb0(%device: !hal.device, %workload: index): // This host function is used to compute the XYZ workgroup count @@ -101,11 +88,9 @@ module @example attributes {hal.device.targets = [#rocm_target]} { // Similar to the above but in-place by using a read/write binding. hal.executable.export public @simple_mul_inplace ordinal(1) - layout(#hal.pipeline.layout, - <1, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) attributes { workgroup_size = [64 : index, 1 : index, 1 : index] } { diff --git a/samples/custom_dispatch/hip/kernels/kernels.cu b/samples/custom_dispatch/hip/kernels/kernels.cu index 87e29eead02a6..cc4762be27254 100644 --- a/samples/custom_dispatch/hip/kernels/kernels.cu +++ b/samples/custom_dispatch/hip/kernels/kernels.cu @@ -38,12 +38,10 @@ // `ret = lhs * rhs` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer, ReadOnly>, -// <2, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding, +// #hal.pipeline.binding // ]> // workgroup_size = [64 : index, 1 : index, 1 : index] extern "C" __global__ void simple_mul(const float* __restrict__ binding0, @@ -58,11 +56,9 @@ extern "C" __global__ void simple_mul(const float* __restrict__ binding0, // `rhs *= lhs` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding // ]> // workgroup_size = [64 : index, 1 : index, 1 : index] extern "C" __global__ void simple_mul_inplace( diff --git a/samples/custom_dispatch/vulkan/shaders/README.md b/samples/custom_dispatch/vulkan/shaders/README.md index fb7406b2f8d3b..7e4129f3eb3f5 100644 --- a/samples/custom_dispatch/vulkan/shaders/README.md +++ b/samples/custom_dispatch/vulkan/shaders/README.md @@ -75,12 +75,10 @@ glslc -fshader-stage=compute simple_mul.glsl -o simple_mul.spv ] }> hal.executable.export public @simple_mul ordinal(0) - layout(#hal.pipeline.layout, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]>) { ^bb0(%device: !hal.device, %workload: index): %x = affine.apply affine_map<()[s0] -> (s0 ceildiv 64)>()[%workload] diff --git a/samples/custom_dispatch/vulkan/shaders/example.mlir b/samples/custom_dispatch/vulkan/shaders/example.mlir index d9cb5e1d07b42..17e2e4522f1a1 100644 --- a/samples/custom_dispatch/vulkan/shaders/example.mlir +++ b/samples/custom_dispatch/vulkan/shaders/example.mlir @@ -76,25 +76,11 @@ module @example attributes {hal.device.targets = [#vulkan_target]} { // The layout defines the required bindings and push constants and can be // thought of as the function signature. hal.executable.export public @main ordinal(0) - layout(#hal.pipeline.layout, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer> - ]> - ]>) attributes { - // Bindings are automatically inferred when possible as part of the - // ABI but can be overridden if the user wants to use features such as - // sparse bindings or multiple descriptor sets. To do so the - // `hal.interface.bindings` attribute can be added to an export op as - // follows mapping tensor operands/results to the pipeline layout - // sets/bindings: - hal.interface.bindings = [ - #hal.interface.binding<0, 0>, - #hal.interface.binding<0, 1>, - #hal.interface.binding<0, 2> - ] - } { + layout(#hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding + ]>) { ^bb0(%device: !hal.device, %workload: index): // This host function is used to compute the XYZ workgroup count // dispatched at runtime. It can query the %device for capabilities @@ -119,11 +105,9 @@ module @example attributes {hal.device.targets = [#vulkan_target]} { } { // Similar to the above but in-place by using a read/write binding. hal.executable.export public @main ordinal(0) - layout(#hal.pipeline.layout, - <1, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) { ^bb0(%device: !hal.device, %workload: index): %x = affine.apply affine_map<()[s0] -> (s0 ceildiv 64)>()[%workload] diff --git a/samples/custom_dispatch/vulkan/shaders/example_inline.mlir b/samples/custom_dispatch/vulkan/shaders/example_inline.mlir index 2882134a05671..d6ef84f3804b9 100644 --- a/samples/custom_dispatch/vulkan/shaders/example_inline.mlir +++ b/samples/custom_dispatch/vulkan/shaders/example_inline.mlir @@ -67,24 +67,11 @@ module @example attributes {hal.device.targets = [#vulkan_target]} { } // The layout defines the required bindings and push constants and can be // thought of as the function signature. - layout(#hal.pipeline.layout, - <1, storage_buffer, ReadOnly>, - <2, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]>) - // Bindings are automatically inferred when possible as part of the ABI - // but can be overridden if the user wants to use features such as sparse - // bindings or multiple descriptor sets. To do so the - // `hal.interface.bindings` attribute can be added to a dispatch op as - // follows mapping tensor operands/results to the pipeline layout - // sets/bindings: - bindings([ - #hal.interface.binding<0, 0>, - #hal.interface.binding<0, 1>, - #hal.interface.binding<0, 2> - ]) // Object files linked into the executable. // Certain backends (today) support either wholesale definition or linking // of partial objects for imports used by generated code. Each compilation diff --git a/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir b/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir index 8e232069fa153..6d2cba8202507 100644 --- a/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir +++ b/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir @@ -36,15 +36,13 @@ module attributes {transform.with_named_sequence} { %c1_0 = arith.constant 1 : index hal.return %c1_0, %c1_0, %c1_0 : index, index, index } - layout(#hal.pipeline.layout, - <1, storage_buffer> - ]> + layout(#hal.pipeline.layout, + #hal.pipeline.binding ]>) bindings([ - #hal.interface.binding<0, 0>, - #hal.interface.binding<0, 1> + #hal.interface.binding<0>, + #hal.interface.binding<1> ]) objects({ #spirv_target ordinal(0) = [ diff --git a/samples/custom_dispatch/vulkan/shaders/one_workgroup_argmax_subgroup_f32.glsl b/samples/custom_dispatch/vulkan/shaders/one_workgroup_argmax_subgroup_f32.glsl index 5a9c6f6751cc5..b5a563afe0877 100644 --- a/samples/custom_dispatch/vulkan/shaders/one_workgroup_argmax_subgroup_f32.glsl +++ b/samples/custom_dispatch/vulkan/shaders/one_workgroup_argmax_subgroup_f32.glsl @@ -7,11 +7,9 @@ // `ret = argmax(in)` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding // ]> #version 450 core diff --git a/samples/custom_dispatch/vulkan/shaders/simple_mul.glsl b/samples/custom_dispatch/vulkan/shaders/simple_mul.glsl index ec40146074e04..f2418a6d19143 100644 --- a/samples/custom_dispatch/vulkan/shaders/simple_mul.glsl +++ b/samples/custom_dispatch/vulkan/shaders/simple_mul.glsl @@ -7,12 +7,10 @@ // `ret = lhs * rhs` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer, ReadOnly>, -// <2, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding, +// #hal.pipeline.binding // ]> #version 450 diff --git a/samples/custom_dispatch/vulkan/shaders/simple_mul_inplace.glsl b/samples/custom_dispatch/vulkan/shaders/simple_mul_inplace.glsl index adc6d82035da9..b24d9a2b72935 100644 --- a/samples/custom_dispatch/vulkan/shaders/simple_mul_inplace.glsl +++ b/samples/custom_dispatch/vulkan/shaders/simple_mul_inplace.glsl @@ -7,11 +7,9 @@ // `rhs *= lhs` // // Conforms to ABI: -// #hal.pipeline.layout, -// <1, storage_buffer> -// ]> +// #hal.pipeline.layout, +// #hal.pipeline.binding // ]> #version 450 diff --git a/samples/transform_dialect/example_module.mlir b/samples/transform_dialect/example_module.mlir index 2fb3498a1b3e1..40f2fee9cb0e9 100644 --- a/samples/transform_dialect/example_module.mlir +++ b/samples/transform_dialect/example_module.mlir @@ -29,9 +29,19 @@ compute = fp32|int32, storage = b32, subgroup = none, dot = none, mma = [], subgroup_size_choices = [64, 64], max_workgroup_sizes = [128, 128, 64], max_thread_count_per_workgroup = 128, max_workgroup_memory_bytes = 16384, max_workgroup_counts = [65535, 65535, 65535]>> -#pipeline_layout_0 = #hal.pipeline.layout, <1, storage_buffer>]>]> -#pipeline_layout_1 = #hal.pipeline.layout, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]> -#pipeline_layout_2 = #hal.pipeline.layout, <1, storage_buffer>]>]> +#pipeline_layout_0 = #hal.pipeline.layout, + #hal.pipeline.binding +]> +#pipeline_layout_1 = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding +]> +#pipeline_layout_2 = #hal.pipeline.layout, + #hal.pipeline.binding +]> module attributes { hal.device.targets = [ @@ -52,8 +62,8 @@ module attributes { builtin.module { func.func @example_module_dispatch_0_generic_80_f32() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout_0) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout_0) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [80], strides = [1] : !flow.dispatch.tensor> -> tensor<80xf32> %3 = tensor.empty() : tensor<80xf32> %4 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%2 : tensor<80xf32>) outs(%3 : tensor<80xf32>) { @@ -77,9 +87,9 @@ module attributes { builtin.module { func.func @example_module_dispatch_1_matmul_16x16x5_f32() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout_1) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout_1) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %2 = hal.interface.binding.subspan layout(#pipeline_layout_1) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 5], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x5xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [5, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<5x16xf32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> @@ -100,8 +110,8 @@ module attributes { builtin.module { func.func @example_module_dispatch_2_generic_16x16_f32() { %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(#pipeline_layout_2) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(#pipeline_layout_2) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout_2) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout_2) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> %3 = tensor.empty() : tensor<16xf32> %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor<16x16xf32>) outs(%3 : tensor<16xf32>) { diff --git a/tests/compiler_driver/executable_benchmarks.mlir b/tests/compiler_driver/executable_benchmarks.mlir index b1ee0ec5783cc..f1eb326703bb0 100644 --- a/tests/compiler_driver/executable_benchmarks.mlir +++ b/tests/compiler_driver/executable_benchmarks.mlir @@ -18,4 +18,4 @@ func.func @abs(%input : tensor) -> (tensor) { // CHECK: vm.rodata private @abs_dispatch_0_vmvx_bytecode_fb // CHECK: vm.func private @abs_dispatch_0_vmvx_bytecode_fb_abs_dispatch_0{{.+}}(%arg0: i32) // CHECK-SAME: iree.reflection = {iree.benchmark = "dispatch"} -// CHECK: vm.call @hal.command_buffer.dispatch +// CHECK: vm.call.variadic @hal.command_buffer.dispatch diff --git a/tests/compiler_driver/hal_executable.mlir b/tests/compiler_driver/hal_executable.mlir index d3e9ed0f9a940..2e7dc2c69cd20 100644 --- a/tests/compiler_driver/hal_executable.mlir +++ b/tests/compiler_driver/hal_executable.mlir @@ -7,12 +7,10 @@ // push constants available and the descriptor sets and their bindings. // Push constants are dense (0..N) while the sets/bindings are sparse and may // contain unused or omitted entries. -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> // A single executable source definition is allowed per translation in this mode @@ -39,9 +37,9 @@ hal.executable.source public @executable { // Bindings are dereferenced by their set/binding ordinal and may have a // byte offset from the base of the descriptor. Alignment information when // available can help code generation emit better loads/stores. - %s0b0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor> - %s0b1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%offset) : !flow.dispatch.tensor> - %s0b2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor> + %s0b0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor> + %s0b1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%offset) : !flow.dispatch.tensor> + %s0b2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor> // Workgroup information can be queried from the interface. %workgroup_id_x = hal.interface.workgroup.id[0] : index diff --git a/tests/compiler_driver/streams.mlir b/tests/compiler_driver/streams.mlir index b1fe335bd8b0a..03ebbc331527f 100644 --- a/tests/compiler_driver/streams.mlir +++ b/tests/compiler_driver/streams.mlir @@ -54,7 +54,7 @@ stream.executable private @executable_0 { // CHECK: vm.func private @simple_mul func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { %c4 = arith.constant 4 : index - // CHECK: vm.call @hal.command_buffer.dispatch + // CHECK: vm.call.variadic @hal.command_buffer.dispatch %ret0 = flow.dispatch @executable_0::@dispatch[%c4](%arg0, %arg1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> return %ret0 : tensor<4xf32> } @@ -101,7 +101,7 @@ stream.executable private @executable_1 { // CHECK: vm.func private @simple_mul_inplace func.func @simple_mul_inplace(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { %c4 = arith.constant 4 : index - // CHECK: vm.call @hal.command_buffer.dispatch + // CHECK: vm.call.variadic @hal.command_buffer.dispatch %ret0 = flow.dispatch @executable_1::@dispatch[%c4](%arg0, %arg1) : (tensor<4xf32>, tensor<4xf32>) -> %arg0 return %ret0 : tensor<4xf32> } @@ -155,7 +155,7 @@ func.func @simple_mul_dynamic(%arg0: tensor, %arg1: tensor) -> ten %arg0_dim0 = tensor.dim %arg0, %c0 : tensor // CHECK: vm.call @hal.buffer_view.dim %arg1_dim0 = tensor.dim %arg1, %c0 : tensor - // CHECK: vm.call @hal.command_buffer.dispatch + // CHECK: vm.call.variadic @hal.command_buffer.dispatch %ret0 = flow.dispatch @executable_2::@dispatch[%arg0_dim0](%arg0, %arg0_dim0, %arg1, %arg1_dim0) : (tensor{%arg0_dim0}, index, tensor{%arg1_dim0}, index) -> tensor{%arg0_dim0} return %ret0 : tensor } diff --git a/tests/e2e/stablehlo_ops/CMakeLists.txt b/tests/e2e/stablehlo_ops/CMakeLists.txt index a3076520ae0ff..d5e67dbc4b6a0 100644 --- a/tests/e2e/stablehlo_ops/CMakeLists.txt +++ b/tests/e2e/stablehlo_ops/CMakeLists.txt @@ -658,7 +658,7 @@ iree_check_single_backend_test_suite( "dot.mlir" "dot_general.mlir" # "dynamic_slice.mlir" # TODO(#13702): update WebGPU to simplified bindings. - "dynamic_update_slice.mlir" + # "dynamic_update_slice.mlir" # TODO(#13702): update WebGPU to simplified bindings. "exponential.mlir" "exponential_fp16.mlir" "exponential_minus_one.mlir" @@ -685,8 +685,8 @@ iree_check_single_backend_test_suite( "rng_uniform.mlir" "round.mlir" "rsqrt.mlir" - "scatter.mlir" - "scatter_dynamic.mlir" + # "scatter.mlir" # TODO(#13702): update WebGPU to simplified bindings. + # "scatter_dynamic.mlir" # TODO(#13702): update WebGPU to simplified bindings. "select.mlir" "shape_assertion.mlir" "sine.mlir" diff --git a/tools/test/compile_to_phase.mlir b/tools/test/compile_to_phase.mlir index f1861a0b36c09..db22ffa50686b 100644 --- a/tools/test/compile_to_phase.mlir +++ b/tools/test/compile_to_phase.mlir @@ -41,12 +41,12 @@ // RUN: iree-compile --compile-to=vm --iree-hal-target-device=local --iree-hal-local-target-device-backends=vmvx %s | FileCheck %s --check-prefix=VM-PHASE // VM-PHASE: vm.rodata private @abs_dispatch_0 -// VM-PHASE: vm.call @hal.command_buffer.dispatch +// VM-PHASE: vm.call.variadic @hal.command_buffer.dispatch // RUN: iree-compile --output-format=vm-asm --compile-to=end --iree-hal-target-device=local --iree-hal-local-target-device-backends=vmvx %s | FileCheck %s --check-prefix=END-PHASE // RUN: iree-compile --output-format=vm-asm --iree-hal-target-device=local --iree-hal-local-target-device-backends=vmvx %s | FileCheck %s --check-prefix=END-PHASE // END-PHASE: vm.rodata private @abs_dispatch_0 -// END-PHASE: vm.call @hal.command_buffer.dispatch +// END-PHASE: vm.call.variadic @hal.command_buffer.dispatch func.func @abs(%input : tensor) -> (tensor) { %result = math.absf %input : tensor diff --git a/tools/test/iree-benchmark-executable.mlir b/tools/test/iree-benchmark-executable.mlir index 29505143d54aa..be8e093eef817 100644 --- a/tools/test/iree-benchmark-executable.mlir +++ b/tools/test/iree-benchmark-executable.mlir @@ -34,12 +34,10 @@ // CHECK: BM_dispatch_512x1x1 // lhs * rhs => dst / s0b0 * s0b1 => s0b2 -#pipeline_layout = #hal.pipeline.layout, - #hal.descriptor_set.binding<1, storage_buffer>, - #hal.descriptor_set.binding<2, storage_buffer> - ]> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding ]> hal.executable.source public @executable { hal.executable.export public @elementwise_mul ordinal(0) layout(#pipeline_layout) attributes { @@ -52,9 +50,9 @@ hal.executable.source public @executable { } builtin.module { func.func @elementwise_mul() { - %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor> - %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) : !flow.dispatch.tensor> - %dst = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor> + %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor> + %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor> + %dst = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor> // TODO(#16554): GPU/SPIR-V lowering doesn't handle workgroup size queries. // %workgroup_size_x = hal.interface.workgroup.size[0] : index %workgroup_size_x = arith.constant 1 : index