Skip to content

Commit

Permalink
[Codegen][SME] Recognize transposed output indexing map as transpose (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
MacDue authored Aug 30, 2024
1 parent 3ceffb6 commit a7ed2d7
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 1 deletion.
3 changes: 2 additions & 1 deletion compiler/src/iree/compiler/Codegen/LLVMCPU/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ bool isLinalgGeneric2DTranspose(linalg::GenericOp genericOp) {
// Check that the two indexing maps are a permutation of each other.
SmallVector<AffineMap> indexingMaps = genericOp.getIndexingMapsArray();
bool isTranspose =
indexingMaps[0].isPermutation() && indexingMaps[1].isIdentity();
(indexingMaps[0].isPermutation() && indexingMaps[1].isIdentity()) ||
(indexingMaps[1].isPermutation() && indexingMaps[0].isIdentity());
if (!isTranspose)
return false;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,36 @@ func.func @transpose_f32() attributes {hal.executable.target = #executable_targe
#hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @transpose_output_indexing_map_f32() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x32xf32>>
%1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x32xf32>> -> tensor<32x32xf32>
%3 = tensor.empty() : tensor<32x32xf32>
%4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<32x32xf32>) outs(%3 : tensor<32x32xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<32x32xf32>
flow.dispatch.tensor.store %4, %1, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : tensor<32x32xf32> -> !flow.dispatch.tensor<writeonly:tensor<32x32xf32>>
return
}

// CHECK: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[4, 16], {{\[}}[4], [4]], [0, 0], [0, 0]]>
// CHECK: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
// CHECK: func.func @transpose_output_indexing_map_f32()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.generic
// CHECK-SAME: lowering_config = #[[CONFIG]]

// -----

#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
#hal.descriptor_set.layout<0, bindings = [
#hal.descriptor_set.binding<0, storage_buffer>,
#hal.descriptor_set.binding<1, storage_buffer>
]>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @transpose_f64() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x32xf64>>
Expand Down

0 comments on commit a7ed2d7

Please sign in to comment.