Skip to content

Commit

Permalink
Merge 6809dc5 into 8c41e51
Browse files Browse the repository at this point in the history
  • Loading branch information
hanhanW authored May 30, 2023
2 parents 8c41e51 + 6809dc5 commit 04f6c7e
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 8 deletions.
2 changes: 1 addition & 1 deletion compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUTileAndFusePass(

/// Pass to tile TilingInterface ops with given tilingLevel.
std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUTilePass(
int64_t tilingLevel = -1);
int64_t tilingLevel = -1, bool reductionOnly = false);

/// Replaces llvm.intr.fma with its unfused mul and add ops.
std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUUnfuseFMAOpsPass();
Expand Down
14 changes: 11 additions & 3 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ static SmallVector<Value> buildTileSizesForOp(OpBuilder &b, Operation *op,
/// be specified. It picks the `tilingLevel`-th list as tiling sizes from
/// lowering_config.
struct LLVMCPUTilePass : LLVMCPUTileBase<LLVMCPUTilePass> {
LLVMCPUTilePass(int64_t tilingLevel = -1) {
LLVMCPUTilePass(int64_t tilingLevel, bool reductionOnly) {
this->tilingLevel.setValue(tilingLevel);
this->reductionOnly.setValue(reductionOnly);
}
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<arith::ArithDialect, affine::AffineDialect,
Expand Down Expand Up @@ -89,6 +90,13 @@ void LLVMCPUTilePass::runOnOperation() {
// Need a better way for handling this, but this works for now.
if (isa<tensor::PadOp>(computeOp)) continue;

if (reductionOnly &&
llvm::none_of(op.getLoopIteratorTypes(), [](auto iterType) {
return iterType == utils::IteratorType::reduction;
})) {
continue;
}

LLVM_DEBUG(llvm::dbgs() << "candidate: " << op << "\n");

IRRewriter rewriter(context);
Expand Down Expand Up @@ -117,8 +125,8 @@ void LLVMCPUTilePass::runOnOperation() {
} // namespace

std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUTilePass(
int64_t tilingLevel) {
return std::make_unique<LLVMCPUTilePass>(tilingLevel);
int64_t tilingLevel, bool reductionOnly) {
return std::make_unique<LLVMCPUTilePass>(tilingLevel, reductionOnly);
}

} // namespace iree_compiler
Expand Down
10 changes: 10 additions & 0 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,16 @@ void LLVMCPUTileAndFusePass::runOnOperation() {
return;
}

auto iterTypes = consumerOp.getLoopIteratorTypes();
for (auto [idx, size] : llvm::enumerate(tilingSizes)) {
if (size == 0) continue;
if (iterTypes[idx] == utils::IteratorType::reduction) {
LLVM_DEBUG(llvm::dbgs()
<< "----- skip, can't tile and fuse reduction dims -----\n");
return;
}
}

auto options = scf::SCFTilingOptions().setTileSizes(tilingSizes);
IRRewriter rewriter(context);
if (failed(applyTileAndFuse(rewriter, consumerOp, options))) {
Expand Down
15 changes: 12 additions & 3 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -442,12 +442,21 @@ void addMultiTilingExpertPassPipeline(OpPassManager &passManager,
nestedModulePM.addNestedPass<func::FuncOp>(
createConcretizePadResultShapePass());
}
// Run SplitReductionPass before the final reduction Fuse pass, because
// SplitReductionPass takes care of banked-tiling.

// Apply vector level of tiling on the ops. We firstly tile the reduction
// ops, and then handle the consumer ops. There are no differences between
// LLVMCPUTile and LLVMCPUTileAndFuse if we have a single consumer op. They
// will just tile the consumer op. But it's important if there is a consumer
// ops chain, e.g., reduction + broadcast + tensor.pack/pad ops. We want to
// tile and fuse `boadcast + pack` ops.
// SplitReductionPass is run before the final reduction Fuse pass, because it
// takes care of banked-tiling.
nestedModulePM.addNestedPass<func::FuncOp>(
createLLVMCPUSplitReductionPass(clEnableReassociateFpReductions));
nestedModulePM.addNestedPass<func::FuncOp>(
createLLVMCPUTilePass(numLevels - 1));
createLLVMCPUTilePass(numLevels - 1, /*reductionOnly=*/true));
nestedModulePM.addNestedPass<func::FuncOp>(
createLLVMCPUTileAndFusePass(numLevels - 1));

nestedModulePM.addNestedPass<func::FuncOp>(
createFuseTensorPadWithConsumerPass());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ iree_lit_test_suite(
"synchronize_symbol_visibility.mlir",
"tensor_pad.mlir",
"test_config_mmt4d.mlir",
"tile.mlir",
"tile_and_fuse.mlir",
"transform_dialect_bufferize.mlir",
"transform_dialect_iree_tile_to_forall.mlir",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ iree_lit_test_suite(
"synchronize_symbol_visibility.mlir"
"tensor_pad.mlir"
"test_config_mmt4d.mlir"
"tile.mlir"
"tile_and_fuse.mlir"
"transform_dialect_bufferize.mlir"
"transform_dialect_iree_tile_to_forall.mlir"
Expand Down
44 changes: 44 additions & 0 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmcpu-tile{tiling-level=0}))" --split-input-file %s | FileCheck %s
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmcpu-tile{tiling-level=0 reduction-only=true}))" --split-input-file %s | FileCheck %s --check-prefix=CHECK-REDUCTION

func.func @matmul_bias_add(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2 : tensor<?xf32>) -> tensor<?x?xf32> {
%cst = arith.constant 0.0 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
%init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%0 = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
%1 = linalg.matmul {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[10, 20, 30]]>}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%2 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1)-> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
ins(%1, %arg2 : tensor<?x?xf32>, tensor<?xf32>)
outs(%init : tensor<?x?xf32>) {
^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%3 = arith.addf %arg3, %arg4 : f32
linalg.yield %3 : f32
} -> tensor<?x?xf32>
return %2 : tensor<?x?xf32>
}
// CHECK-LABEL: func.func @matmul_bias_add(
// CHECK: scf.for
// CHECK: scf.for
// CHECK: linalg.fill
// CHECK: scf.for
// CHECK: scf.for
// CHECK: scf.for
// CHECK: linalg.matmul
// CHECK: scf.for
// CHECK: scf.for
// CHECK: linalg.generic
// CHECK-REDUCTION-LABEL: func.func @matmul_bias_add(
// CHECK-REDUCTION: linalg.fill
// CHECK-REDUCTION: scf.for
// CHECK-REDUCTION: scf.for
// CHECK-REDUCTION: scf.for
// CHECK-REDUCTION: linalg.matmul
// CHECK-REDUCITON-NOT: scf.for
// CHECK-REDUCTION: linalg.generic
4 changes: 3 additions & 1 deletion compiler/src/iree/compiler/Codegen/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,9 @@ def LLVMCPUTile :
"mlir::iree_compiler::createLLVMCPUTilePass()";
let options = [
Option<"tilingLevel", "tiling-level", "int64_t", /*default=*/"-1",
"Use default tiling level used to retrieve the configuration from lowering_config">
"Use default tiling level used to retrieve the configuration from lowering_config">,
Option<"reductionOnly", "reduction-only", "bool", /*default=*/"false",
"Only reduction ops are tiled if the option is true">,
];
}

Expand Down

0 comments on commit 04f6c7e

Please sign in to comment.