Skip to content

Commit

Permalink
Fix problem bug in MobileBert with vectorization enable. (iree-org#5211)
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasRaoux authored Mar 23, 2021
1 parent b7e2b09 commit f5804ec
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 11 deletions.
24 changes: 14 additions & 10 deletions iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,14 @@ LogicalResult getOpLaunchConfig(linalg::GenericOp op,
const SPIRVCodegenOptions &options,
TileSizesListType &tileSizes,
LaunchConfigInfo &config) {
// Skip vectorization for non-minor identity inputs as it generates
// transfer_read ops with permutation maps that we currently cannot lower.
// TODO: Remove this restriction once the lowering of the permutation map is
// supported in core.
bool vectorize = options.enableVectorization &&
llvm::all_of(op.getIndexingMaps(), [](AffineMap &map) {
return map.isMinorIdentity();
});
int64_t subgroupSize =
targetEnv.getResourceLimits().subgroup_size().getValue().getSExtValue();
config.workgroupSize[0] = subgroupSize;
Expand All @@ -330,9 +338,10 @@ LogicalResult getOpLaunchConfig(linalg::GenericOp op,
// avoid a mismatch in the number of workgroup dispatched, we pick a tile size
// to have one element per thread.
// TODO: Remove this once we switch to linalg on tensor path.
if (options.enableVectorization) {
if (vectorize) {
candidateTileSizes.append({4 * subgroupSize, 2 * subgroupSize});
}

candidateTileSizes.push_back(subgroupSize);
// Use the first tile size that can divide the shape. If the shape is not
// aligned on any of the tile sizes pick the smallest tile of one element per
Expand All @@ -351,21 +360,16 @@ LogicalResult getOpLaunchConfig(linalg::GenericOp op,
// If the shape is not exactly aligned on the tile size skip the second level
// of tiling as it expect the number of iteration to be exactly equal to the
// number of processors.
if (outputShape.getShape().back() % lowerTs != 0) return success();

// Skip vectorization for non-minor identity inputs as it generates
// transfer_read ops with permutation maps that we currently cannot lower.
// TODO: Remove this restriction once the lowering of the permutation map is
// supported in core.
for (unsigned i = 0, e = op.getNumInputs(); i < e; i++) {
if (!op.getInputIndexingMap(i).isMinorIdentity()) return success();
if (!vectorize || outputShape.getShape().back() % lowerTs != 0) {
config.vectorize = false;
return success();
}

tileSizes.emplace_back(); // Subgroup level.
ts.back() = lowerTs / subgroupSize;
tileSizes.emplace_back(ts); // Thread level.
// Vectorize only if we are processing more than one element per thread.
config.vectorize = options.enableVectorization && (ts.back() > 1);
config.vectorize = vectorize && (ts.back() > 1);
return success();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,24 @@ class LinalgTileAndDistributePass
llvm::dbgs() << "}\n";
}
});

// Annotate the linalg op with the original types.
for (linalg::LinalgOp op : linalgOps) {
const char inputTypeAttrName[] = "iree.codegen.original_input_types";
const char outputTypeAttrName[] = "iree.codegen.original_output_types";

SmallVector<Type> inputTypes;
SmallVector<Type> outputTypes;
for (Type type : op.getInputBufferTypes()) inputTypes.push_back(type);
for (Type type : op.getOutputBufferTypes()) outputTypes.push_back(type);
if (!inputTypes.empty()) {
op->setAttr(inputTypeAttrName,
Builder(op).getTypeArrayAttr(inputTypes));
}
if (!outputTypes.empty()) {
op->setAttr(outputTypeAttrName,
Builder(op).getTypeArrayAttr(outputTypes));
}
}
TileAndFuseOptions tileAndFuseOptions = {
getWorkgroupDistributionOptions(), allocateWorkgroupMemory};
if (failed(tileAndFuseLinalgBufferOps(funcOp, linalgOps, dependenceGraph,
Expand Down

0 comments on commit f5804ec

Please sign in to comment.