From cf8180ea8428e16e3d492c17be6af3bf56db7912 Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 23 Mar 2021 14:50:29 -0700 Subject: [PATCH] Increase K tile size for small matrices. (#5213) This improves mobileBert by 6% --- .../Conversion/LinalgToSPIRV/KernelDispatchUtils.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.cpp b/iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.cpp index 5ffd3ac9be4b..67ef07c61d94 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.cpp +++ b/iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.cpp @@ -122,12 +122,14 @@ static void getMaliBestMatMulTileSizes( int64_t dstSize) { const int64_t smallMatrixSizeThreshold = 512 * 512; if (elementType.isF16()) { - // When the destination is smaller than the threshold, we prefer smaller - // tiles to increase parallelism. + // For smaller destination size we cannot fill out the GPU with bigger tile + // sizes. Instead we pick smaller tiles along M and N to increase the number + // of workgroups and a larger K tile size since we have lower pressure and + // need extra instructions to hide latency. // TODO: The threshold needs to be fine tuned by doing exploration based on // matrix shapes. if (dstSize <= smallMatrixSizeThreshold) { - tileSizes.push_back(TileWorkgroupSizePair({{16, 32, 8}, {8, 2, 1}})); + tileSizes.push_back(TileWorkgroupSizePair({{16, 32, 16}, {8, 2, 1}})); } else { tileSizes.push_back(TileWorkgroupSizePair({{16, 64, 4}, {8, 2, 1}})); tileSizes.push_back(TileWorkgroupSizePair({{8, 128, 4}, {8, 2, 1}}));