Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code Generation for Matrix Multiplication #653

Merged
merged 41 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
aa65b45
Adaptations to build code on my laptop
resting-dove Nov 16, 2023
8624b6f
matmul tiled as blog
resting-dove Jan 14, 2024
1a6fefb
Set optimizer level 3
resting-dove Jan 14, 2024
2803904
Hand vectorized the matmul operation
resting-dove Jan 15, 2024
ac73661
SumAll could not be lowered for single floats. Thus rely on daphne ke…
resting-dove Jan 15, 2024
0be2141
Only lower MatMul, when size fits to the vector size
resting-dove Jan 15, 2024
a2ac409
Get vec_size for Matmul from UserConfig.json
resting-dove Jan 15, 2024
c1528eb
Multiple options for MatMul Lowerings
resting-dove Jan 15, 2024
68e7718
Add Lowering options and corresponding ifs inside the match and rewri…
resting-dove Jan 19, 2024
f896955
Added Matmul command line options
resting-dove Jan 20, 2024
acba555
Vector size depending on bit width of matrix type
resting-dove Jan 24, 2024
1749e6b
distinguish between fixed and adaptable tile sizes
resting-dove Jan 24, 2024
ddc0fff
Add command line option for fixed tile sizes
resting-dove Jan 24, 2024
6161030
Some Pass always destroyed the affine loop nest
resting-dove Jan 24, 2024
0dcbc6c
Get cache sizes programmatically
resting-dove Jan 24, 2024
7dc051c
Added unit some unit tests under the cli directory
resting-dove Jan 25, 2024
8b8155b
Enable lower-mm in daphne-opt
resting-dove Jan 28, 2024
aa4177a
Lit tests for tiled matmul lowering
resting-dove Jan 29, 2024
8167f02
Unit tests for Vectorization
resting-dove Jan 29, 2024
928c76a
Unit tests for Matmul accuracy
resting-dove Jan 29, 2024
3250ce8
MatmulAccuracyTest no longer prints for every entry in output
resting-dove Feb 4, 2024
725e69b
Ad separate unroll-jam-factor option
resting-dove Feb 4, 2024
62213ac
Recreated matmul tile tests with current tiling strategy
resting-dove Feb 4, 2024
3b67afd
Remove adaptations for building on my laptop
resting-dove Feb 4, 2024
b4f72aa
Formatted the MatMulLowering
resting-dove Feb 12, 2024
a9a7636
removed reference to x86vector dialect since no effect
resting-dove Feb 15, 2024
70e82e0
removed fma comment and prepared for reenabling int types
resting-dove Feb 15, 2024
8ab1b2b
remove unnecessary scope
resting-dove Feb 15, 2024
a689d53
affine valid loop interchange checking seems to not support loops wit…
resting-dove Feb 15, 2024
9dfb83c
Enable sum() for integer matrices
resting-dove Feb 19, 2024
b23f02c
Fill Memrefs with signfull integers of various types
resting-dove Feb 19, 2024
e682dc4
SumAgg tests for integer types
resting-dove Feb 19, 2024
861581f
Enable Matmul lowering for integer valued matrices
resting-dove Feb 19, 2024
a4bde1b
Change from abstract register_size option to more clear num_vec_regis…
resting-dove Feb 19, 2024
b528810
non square matmul tests
resting-dove Feb 19, 2024
6dc4dfa
Update documentation
resting-dove Feb 19, 2024
78e9047
Unroll before loop promotion caused segfaults
resting-dove Feb 22, 2024
6259fef
Added tests for previously breaking sumAll and unroll
resting-dove Feb 23, 2024
8b6f33a
SumAll handles single value type
resting-dove Feb 23, 2024
1b9b935
Invert loops as fall back option
resting-dove Feb 29, 2024
487745b
[minor] warnings, remove code duplication
philipportner Apr 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions UserConfig.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
{
"matmul_vec_size_bits": 0,
"matmul_tile": false,
"matmul_use_fixed_tile_sizes": true,
"matmul_fixed_tile_sizes": [4, 4, 4, 4, 4],
"matmul_unroll_factor": 1,
"matmul_unroll_jam_factor": 4,
"matmul_num_vec_registers": 16,
"use_cuda": false,
"use_vectorized_exec": false,
"use_obj_ref_mgnt": true,
Expand Down
Empty file modified install-ubuntu-packages.sh
100644 → 100755
Empty file.
12 changes: 10 additions & 2 deletions src/api/cli/DaphneUserConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,22 @@ class DaphneLogger;
* Container to pass around user configuration
*/
struct DaphneUserConfig {
// Remember to update UserConfig.json accordingly!

// Remember to update UserConfig.json accordingly!
bool use_cuda = false;
bool use_vectorized_exec = false;
bool use_distributed = false;
bool use_obj_ref_mgnt = true;
bool use_ipa_const_propa = true;
bool use_phy_op_selection = true;
bool use_mlir_codegen = false;
int matmul_vec_size_bits = 0;
bool matmul_tile = false;
int matmul_unroll_factor = 1;
int matmul_unroll_jam_factor=4;
int matmul_num_vec_registers=16;
bool matmul_use_fixed_tile_sizes = false;
std::vector<unsigned> matmul_fixed_tile_sizes = {4, 4};
bool matmul_invert_loops = false;
bool use_mlir_hybrid_codegen = false;
bool cuda_fuse_any = false;
bool vectorized_single_queue = false;
Expand Down Expand Up @@ -74,6 +81,7 @@ struct DaphneUserConfig {
size_t max_distributed_serialization_chunk_size = std::numeric_limits<int>::max() - 1024; // 2GB (-1KB to make up for gRPC headers etc.) - which is the maximum size allowed by gRPC / MPI. TODO: Investigate what might be the optimal.
int numberOfThreads = -1;
int minimumTaskSize = 1;

// minimum considered log level (e.g., no logging below ERROR (essentially suppressing WARN, INFO, DEBUG and TRACE)
spdlog::level::level_enum log_level_limit = spdlog::level::err;
std::vector<LogConfig> loggers;
Expand Down
50 changes: 49 additions & 1 deletion src/api/internal/daphne_internal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "runtime/local/datastructures/IAllocationDescriptor.h"
#include <vector>
#ifdef USE_MPI
#include "runtime/distributed/worker/MPIWorker.h"
#endif
Expand All @@ -31,7 +32,6 @@
#include "mlir/ExecutionEngine/ExecutionEngine.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/Pass/PassManager.h"
#include "llvm/Support/CommandLine.h"

#ifdef USE_CUDA
Expand Down Expand Up @@ -260,10 +260,46 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int
"libdir", cat(daphneOptions),
desc("The directory containing kernel libraries")
);

static opt<bool> mlirCodegen(
"mlir-codegen", cat(daphneOptions),
desc("Enables lowering of certain DaphneIR operations on DenseMatrix to low-level MLIR operations.")
);
static opt<int> matmul_vec_size_bits(
"matmul-vec-size-bits", cat(daphneOptions),
desc("Set the vector size to be used in the lowering of the MatMul operation if possible. Value of 0 is interpreted as off switch."),
init(0)
);
static opt<bool> matmul_tile(
"matmul-tile", cat(daphneOptions),
desc("Enables loop tiling in the lowering of the MatMul operation.")
);
static opt<int> matmul_unroll_factor(
"matmul-unroll-factor", cat(daphneOptions),
desc("Factor by which to unroll the finally resulting inner most loop in the lowered MatMul if tiling is used."),
init(1)
);
static opt<int> matmul_unroll_jam_factor(
"matmul-unroll-jam-factor", cat(daphneOptions),
desc("Factor by which to unroll jam the two inner most loop in the lowered MatMul if tiling is used."),
init(4)
);
static opt<int> matmul_num_vec_registers(
"matmul-num-vec-registers", cat(daphneOptions),
desc("Number of vector registers. Used during automatic tiling in lowering of MatMulOp"),
init(16)
);
static llvm::cl::list<unsigned> matmul_fixed_tile_sizes(
"matmul-fixed-tile-sizes", cat(daphneOptions),
desc("Set fixed tile sizes to be used for the lowering of MatMul if tiling is used. This also enables tiling."),
CommaSeparated
);
static opt<bool> matmul_invert_loops(
"matmul-invert-loops", cat(daphneOptions),
desc("Enable inverting of the inner two loops in the matrix multiplication as a fallback option, if tiling is not possible or deactivated.")
);


static opt<bool> performHybridCodegen(
"mlir-hybrid-codegen", cat(daphneOptions),
desc("Enables prototypical hybrid code generation combining pre-compiled kernels and MLIR code generation.")
Expand Down Expand Up @@ -382,6 +418,18 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int
user_config.use_ipa_const_propa = !noIPAConstPropa;
user_config.use_phy_op_selection = !noPhyOpSelection;
user_config.use_mlir_codegen = mlirCodegen;
user_config.matmul_vec_size_bits = matmul_vec_size_bits;
user_config.matmul_tile = matmul_tile;
user_config.matmul_unroll_factor = matmul_unroll_factor;
user_config.matmul_unroll_jam_factor = matmul_unroll_jam_factor;
user_config.matmul_num_vec_registers = matmul_num_vec_registers;
user_config.matmul_invert_loops = matmul_invert_loops;
if (matmul_fixed_tile_sizes.size() > 0) {
user_config.matmul_use_fixed_tile_sizes = true;
user_config.matmul_fixed_tile_sizes = matmul_fixed_tile_sizes;
// Specifying a fixed tile size will be interpreted as wanting to use tiling.
user_config.matmul_tile = true;
}
user_config.use_mlir_hybrid_codegen = performHybridCodegen;

if(!libDir.getValue().empty())
Expand Down
29 changes: 21 additions & 8 deletions src/compiler/execution/DaphneIrExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <ir/daphneir/Daphne.h>
#include <ir/daphneir/Passes.h>
#include <ir/daphneir/Passes.h.inc>
#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
#include <mlir/Dialect/LLVMIR/Transforms/Passes.h>

Expand All @@ -32,6 +33,7 @@
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/Passes.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
Expand Down Expand Up @@ -298,25 +300,36 @@ void DaphneIrExecutor::buildCodegenPipeline(mlir::PassManager &pm) {
mlir::daphne::createPrintIRPass("IR before codegen pipeline"));

pm.addPass(mlir::daphne::createDaphneOptPass());

if (!userConfig_.use_mlir_hybrid_codegen) {
pm.addPass(mlir::daphne::createMatMulOpLoweringPass());
}

pm.addPass(mlir::daphne::createEwOpLoweringPass());
pm.addPass(mlir::daphne::createAggAllOpLoweringPass());
pm.addPass(mlir::daphne::createMapOpLoweringPass());
pm.addPass(mlir::createInlinerPass());

pm.addPass(mlir::daphne::createEwOpLoweringPass());
pm.addNestedPass<mlir::func::FuncOp>(mlir::createLoopFusionPass());

if (!userConfig_.use_mlir_hybrid_codegen) {
pm.addPass(mlir::daphne::createMatMulOpLoweringPass(
userConfig_.matmul_tile, userConfig_.matmul_vec_size_bits,
userConfig_.matmul_fixed_tile_sizes,
userConfig_.matmul_use_fixed_tile_sizes,
userConfig_.matmul_unroll_factor, userConfig_.matmul_unroll_jam_factor,
userConfig_.matmul_num_vec_registers,
userConfig_.matmul_invert_loops));
if (userConfig_.explain_mlir_codegen)
pm.addPass(
mlir::daphne::createPrintIRPass("IR directly after lowering MatMulOp."));
}

pm.addPass(mlir::createConvertMathToLLVMPass());
pm.addPass(mlir::daphne::createModOpLoweringPass());
pm.addPass(mlir::createCanonicalizerPass());
pm.addPass(mlir::createCSEPass());
pm.addNestedPass<mlir::func::FuncOp>(mlir::createLoopFusionPass());
pm.addNestedPass<mlir::func::FuncOp>(
mlir::createAffineScalarReplacementPass());
pm.addPass(mlir::createLowerAffinePass());

mlir::LowerVectorToLLVMOptions lowerVectorToLLVMOptions;
pm.addPass(mlir::createConvertVectorToLLVMPass(lowerVectorToLLVMOptions));

if (userConfig_.explain_mlir_codegen)
pm.addPass(
mlir::daphne::createPrintIRPass("IR after codegen pipeline"));
Expand Down
Loading
Loading