Skip to content

Commit

Permalink
Recreated matmul tile tests with current tiling strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
resting-dove committed Feb 4, 2024
1 parent ab48bb4 commit 449c373
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 102 deletions.
55 changes: 20 additions & 35 deletions src/compiler/lowering/MatMulOpLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,11 @@
#include <utility>
#include <vector>

#include "api/cli/DaphneUserConfig.h"
#include "compiler/utils/LoweringUtils.h"
#include "hwloc.h"
#include "ir/daphneir/Daphne.h"
#include "ir/daphneir/Passes.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
Expand All @@ -52,7 +50,6 @@
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/Block.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dominance.h"
Expand Down Expand Up @@ -93,6 +90,7 @@ struct LowerMatMulOpOptions {
LowerMatMulOpOptions &setTileSizes(std::vector<unsigned> sizes) {
tile_sizes.clear();
for (auto s : sizes) {
if (s <= 1) throw std::invalid_argument("Tile sizes must be an integer larger than 1.");
tile_sizes.push_back(s);
}
return *this;
Expand Down Expand Up @@ -232,12 +230,10 @@ llvm::SmallVector<AffineForOp, 3> vectorizedAffineMatMul(mlir::Value &lhs, mlir:

class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {
const LowerMatMulOpOptions options;
std::shared_ptr<spdlog::logger> logger;
public:
using OpConversionPattern::OpConversionPattern;
explicit MatMulLowering(MLIRContext *context, LowerMatMulOpOptions const &options)
: OpConversionPattern(context, PatternBenefit(1)), options(options) {
logger = spdlog::get("compiler");
}

bool is_vectorizable(ArrayRef<int64_t> const rhsShape, Type const matrixElementType) const {
Expand Down Expand Up @@ -348,14 +344,20 @@ class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {
// KC * NR ~ L1,
// MC * KC ~ L2,
// NC * MC ~ L3
// & NR divides NC & MR divides MC
SmallVector<unsigned, 5> getTileSizesFromCache(Type const matrixElementType, int64_t vec_size, int64_t loop_length) const {
SmallVector<unsigned, 5> tile_sizes;
int bitwidth = matrixElementType.getIntOrFloatBitWidth();
tile_sizes.push_back(std::max(1, (int)(std::sqrt(options.register_size / bitwidth))));
tile_sizes.push_back(tile_sizes.back());
if (options.cache_sizes.size() > 0) {
int idx = 0;
for (auto cache_size=options.cache_sizes.begin(); cache_size != options.cache_sizes.end(); cache_size++) {
tile_sizes.push_back(std::max(1, (int)(*cache_size / tile_sizes.back() / bitwidth)));
unsigned candidate = std::max(1, (int)(*cache_size / tile_sizes.back() / bitwidth));
if (idx == 3) candidate = candidate - (candidate % tile_sizes[0]);
if (idx == 4) candidate = candidate - (candidate % tile_sizes[1]);
tile_sizes.push_back(candidate);
idx++;
}
}
while (tile_sizes.size() < 5) {
Expand All @@ -381,11 +383,7 @@ class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {
// tile i with MC, j with NC, k with KC
llvm::SmallVector<AffineForOp> tiledNest;
if (failed(tilePerfectlyNested(loopNest, {MC, NC, KC}, &tiledNest))) {
if(logger->should_log(spdlog::level::debug)) {
std::string s;
llvm::raw_string_ostream stream(s);
logger->debug("Could not tile the loop nest in MatMulLowering", s);
}
spdlog::warn("Could not tile the loop nest in MatMulLowering");
};
assert(tiledNest[0].getStep() == MC && "0 should have step size MC.");
assert(tiledNest[1].getStep() == NC * vec_size && "1 should have step size NC * vec_size.");
Expand All @@ -396,21 +394,13 @@ class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {

// Further tile the i mod MC loop with MR
if (failed(tilePerfectlyNested(tiledNest[3], {MR}))) {
if(logger->should_log(spdlog::level::debug)) {
std::string s;
llvm::raw_string_ostream stream(s);
logger->debug("Could not tile the second i loop in MatMulLowering", s);
}
spdlog::warn("Could not tile the second i loop in MatMulLowering");
};

// Further tile the j mod NC loop with NR
assert(tiledNest[4].getStep() == 1 * vec_size && "4 should have step size vec_size.");
if (failed(tilePerfectlyNested(tiledNest[4], {NR}))) {
if(logger->should_log(spdlog::level::debug)) {
std::string s;
llvm::raw_string_ostream stream(s);
logger->debug("Could not tile the second j loop in MatMulLowering", s);
}
spdlog::warn("Could not tile the second j loop in MatMulLowering");
};


Expand Down Expand Up @@ -443,26 +433,21 @@ class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {
assert(blisTiledLoops[1].getStep() == KC && "blisTiled: 1 should have step size 1."); // k loops
assert(blisTiledLoops[5].getStep() == 1 && "blisTiled: 5 should have step size 1.");
// Unroll jam causes Segfault, if called in a way where the loop is not cleanly divided.
if (blisTiledLoops[5].getUpperBound().getMap().getNumResults() == 1) {
(void)loopUnrollJamUpToFactor(blisTiledLoops[5], options.unroll_jam_factor);
Block &block = blisTiledLoops[5].getRegion().front();
auto itForOp = block.getOps<AffineForOp>();
for (auto f : itForOp) {
if (f.getUpperBound().getMap().getNumResults() == 1) {
(void)loopUnrollJamUpToFactor(f,options.unroll_jam_factor);
if (blisTiledLoops[5].getUpperBound().getMap().getNumResults() == 1 &&
succeeded(loopUnrollJamUpToFactor(blisTiledLoops[5], options.unroll_jam_factor))) {
if (blisTiledLoops[6].getUpperBound().getMap().getNumResults() != 1 ||
failed(loopUnrollJamUpToFactor(blisTiledLoops[6],options.unroll_jam_factor))) {
spdlog::warn("Could not unroll the (j mod NC) mod NR loop in MatMulLowering");
}
}
} else {
spdlog::warn("Could not unroll the (i mod MC) mod MR loop in MatMulLowering");
}

llvm::SmallVector<AffineForOp> lastNest;
getPerfectlyNestedLoops(lastNest, blisTiledLoops.front());

if (failed(loopUnrollUpToFactor(lastNest.back(), KU))) {
if(logger->should_log(spdlog::level::debug)) {
std::string s;
llvm::raw_string_ostream stream(s);
logger->debug("Could not unroll the K loop in MatMulLowering", s);
}
spdlog::warn("Could not unroll the K loop in MatMulLowering");
}
int64_t i = 0;
while (succeeded(promoteIfSingleIteration(lastNest[i])) && i < 4) {
Expand Down Expand Up @@ -589,4 +574,4 @@ std::unique_ptr<OperationPass<ModuleOp>> mlir::daphne::createMatMulOpLoweringPas
// This is used by daphne-opt and automatically inserts the options provided on the command line into the pass.
std::unique_ptr<OperationPass<ModuleOp>> mlir::daphne::createMatMulOpLoweringPass() {
return std::make_unique<MatMulLoweringPass>();
}
}
36 changes: 35 additions & 1 deletion test/api/cli/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ void compareDaphneToStringNumerically(const std::string & exp, const std::string
// Long double just to be sure
f_exp = std::stold(s_exp);
f_out = std::stold(s_out);
} catch (std::invalid_argument) {
} catch (std::invalid_argument const&) {
FAIL("The result does not have the right number of outputs.");
}
correct_so_far = std::norm(f_exp - f_out) < epsilon * std::norm(f_exp);
Expand All @@ -362,6 +362,40 @@ void compareDaphneToStringNumerically(const std::string & exp, const std::string
CHECK(err.str() == "");
}

/**
* @brief Checks if the numerical values in the standard output of the two given DaphneDSL script
* runs are within a relative distance to a reference text.
*
* @param left The output from a runDaphne().
* @param right The output from a runDaphne().
* @param ignore_lines How many lines in the beginning of the DaphneDSL output do contain numerical values to compare.
* @param epsilon The relative error that is acceptable.
*/
template<typename... Args>
void compareDaphneRunsNumerically(std::stringstream & left, std::stringstream & right, const int ignore_lines, const long double epsilon) {
std::string s_left;
std::string s_right;
float f_left;
float f_right;
for (auto i = 0; i != ignore_lines; i++) {
std::getline(left, s_left);
std::getline(right, s_right);
}
bool correct_so_far = true;

while (std::getline(left, s_left, ' ') && std::getline(right, s_right, ' ') && correct_so_far) {
try {
// Long double just to be sure
f_left = std::stold(s_left);
f_right = std::stold(s_right);
} catch (std::invalid_argument const&) {
FAIL("The result does not have the right number of outputs.");
}
correct_so_far = std::norm(f_left - f_right) < epsilon * std::norm(f_left);
}
CHECK(correct_so_far == true);
}

/**
* @brief Compares the standard output of executing the given DaphneDSL script
* with the command line interface of the DAPHNE Prototype to a reference text
Expand Down
28 changes: 9 additions & 19 deletions test/api/cli/codegen/MatMulTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

const std::string dirPath = "test/api/cli/codegen/";

TEST_CASE("matmul", "[codegen][matmul]") {
TEST_CASE("matmul", TAG_CODEGEN TAG_MATMUL) {
std::string result =
"DenseMatrix(3x3, double)\n"
"45 45 45\n"
Expand All @@ -35,7 +35,7 @@ TEST_CASE("matmul", "[codegen][matmul]") {
compareDaphneToStr(result, dirPath + "matmul.daphne");
compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen");
}
TEST_CASE("matmul vectorized", "[codegen][matmul]") {
TEST_CASE("matmul vectorized", TAG_CODEGEN TAG_MATMUL) {
std::string result =
"DenseMatrix(3x3, double)\n"
"45 45 45\n"
Expand All @@ -46,36 +46,36 @@ TEST_CASE("matmul vectorized", "[codegen][matmul]") {
compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen", "--no-obj-ref-mgnt",
"--matmul-vec-size-bits=128");
}
TEST_CASE("matmul tiled", "[codegen][matmul]") {
TEST_CASE("matmul tiled", TAG_CODEGEN TAG_MATMUL) {
std::string result =
"DenseMatrix(3x3, double)\n"
"45 45 45\n"
"45 45 45\n"
"45 45 45\n";

compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen", "--matmul-fixed-tile-sizes=1,1,1");
compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen", "--matmul-fixed-tile-sizes=2,2,2");
}
TEST_CASE("matmul tiled and vectorized", "[codegen][matmul]") {
TEST_CASE("matmul tiled and vectorized", TAG_CODEGEN TAG_MATMUL) {
std::string result =
"DenseMatrix(3x3, double)\n"
"45 45 45\n"
"45 45 45\n"
"45 45 45\n";

compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen", "--matmul-vec-size-bits=64",
"--matmul-fixed-tile-sizes=1,1,1");
"--matmul-fixed-tile-sizes=2,2,2");
}
TEST_CASE("matmul single", "[codegen][matmul]") {
TEST_CASE("matmul single", TAG_CODEGEN TAG_MATMUL) {
std::string result =
"DenseMatrix(3x3, single)\n"
"DenseMatrix(3x3, float)\n"
"45 45 45\n"
"45 45 45\n"
"45 45 45\n";

compareDaphneToStr(result, dirPath + "matmul_single.daphne");
compareDaphneToStr(result, dirPath + "matmul_single.daphne", "--mlir-codegen");
}
TEST_CASE("matmul non square", "[codegen][matmul]") {
TEST_CASE("matmul non square", TAG_CODEGEN TAG_MATMUL) {
std::string result =
"DenseMatrix(3x3, double)\n"
"60 60 60\n"
Expand All @@ -85,16 +85,6 @@ TEST_CASE("matmul non square", "[codegen][matmul]") {
compareDaphneToStr(result, dirPath + "matmul_non_square.daphne");
compareDaphneToStr(result, dirPath + "matmul_non_square.daphne", "--mlir-codegen");
}
/* TEST_CASE("slightly larger matmul", "[codegen][matmul]") {
std::string result = readTextFile(dirPath + "matmul128.result");
compareDaphneToStr(result, dirPath + "matmul128.daphne");
compareDaphneToStr(result, dirPath + "matmul128.daphne", "--mlir-codegen");
compareDaphneToStr(result, dirPath + "matmul128.daphne", "--mlir-codegen", "-matmul-fixed-tile-sizes=2,3,4,5,6");
compareDaphneToStr(result, dirPath + "matmul128.daphne", "--mlir-codegen", "-matmul-fixed-tile-sizes=2,3,4,5");
compareDaphneToStr(result, dirPath + "matmul128.daphne", "--mlir-codegen", "-matmul-fixed-tile-sizes=2,3,4");
}
*/


TEST_CASE("matvec", TAG_CODEGEN) {
Expand Down
7 changes: 7 additions & 0 deletions test/api/cli/codegen/matmul576.daphne
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
N = 576;
A = rand(N, N, -10.0, 10.0, 1.0, 42);
B = rand(N, N, -10.0, 10.0, 1.0, 44);

C = A@B;

print(C);
15 changes: 14 additions & 1 deletion test/codegen/MatMulAccuracyTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,17 @@ std::string result = readTextFile(dirPath + "matmul128.result");
compareDaphneToStringNumerically(result, dirPath + "matmul128.daphne", 1, epsilon, "--mlir-codegen", "--matmul-fixed-tile-sizes=2");
compareDaphneToStringNumerically(result, dirPath + "matmul128.daphne", 1, epsilon, "--mlir-codegen", "--matmul-vec-size-bits=64");
compareDaphneToStringNumerically(result, dirPath + "matmul128.daphne", 1, epsilon, "--mlir-codegen", "--matmul-fixed-tile-sizes=2,3,4", "--matmul-vec-size-bits=64");
}
}

TEST_CASE("matmul accuracy 576", "[codegen][matmul]") {
double epsilon = std::numeric_limits<double>().epsilon();
std::stringstream kernel;
std::stringstream err_kernel;
std::stringstream codegen;
std::stringstream err_codegen;
int status = runDaphne(kernel, err_kernel, (dirPath + "matmul576.daphne").c_str());
CHECK(status == StatusCode::SUCCESS);
status = runDaphne(codegen, err_codegen, "--mlir-codegen", "--matmul-fixed-tile-sizes=2,3,4,5,6", (dirPath + "matmul576.daphne").c_str());
CHECK(status == StatusCode::SUCCESS);
compareDaphneRunsNumerically(kernel, codegen, 1, epsilon);
}
Loading

0 comments on commit 449c373

Please sign in to comment.