Recreated matmul tile tests with current tiling strategy

daphne-eu · Feb 4, 2024 · 449c373 · 449c373
1 parent ab48bb4
commit 449c373
Show file tree

Hide file tree

Showing 8 changed files with 128 additions and 102 deletions.
diff --git a/src/compiler/lowering/MatMulOpLowering.cpp b/src/compiler/lowering/MatMulOpLowering.cpp
@@ -23,13 +23,11 @@
 #include <utility>
 #include <vector>
 
-#include "api/cli/DaphneUserConfig.h"
 #include "compiler/utils/LoweringUtils.h"
 #include "hwloc.h"
 #include "ir/daphneir/Daphne.h"
 #include "ir/daphneir/Passes.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
 #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
@@ -52,7 +50,6 @@
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/SCF/Utils/Utils.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
-#include "mlir/IR/Block.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dominance.h"
@@ -93,6 +90,7 @@ struct LowerMatMulOpOptions {
     LowerMatMulOpOptions &setTileSizes(std::vector<unsigned> sizes) {
         tile_sizes.clear();
         for (auto s : sizes) {
+            if (s <= 1) throw std::invalid_argument("Tile sizes must be an integer larger than 1.");
             tile_sizes.push_back(s);
         }  
         return *this;
@@ -232,12 +230,10 @@ llvm::SmallVector<AffineForOp, 3> vectorizedAffineMatMul(mlir::Value &lhs, mlir:
 
 class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {
     const LowerMatMulOpOptions options;
-    std::shared_ptr<spdlog::logger> logger;
    public:
     using OpConversionPattern::OpConversionPattern;
     explicit MatMulLowering(MLIRContext *context, LowerMatMulOpOptions const &options) 
         : OpConversionPattern(context, PatternBenefit(1)), options(options) {
-            logger = spdlog::get("compiler");
         } 
 
     bool is_vectorizable(ArrayRef<int64_t> const rhsShape, Type const matrixElementType) const {
@@ -348,14 +344,20 @@ class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {
     //          KC * NR ~ L1,
     //          MC * KC ~ L2,
     //          NC * MC ~ L3 
+    //          & NR divides NC & MR divides MC
     SmallVector<unsigned, 5> getTileSizesFromCache(Type const matrixElementType, int64_t vec_size, int64_t loop_length) const {
         SmallVector<unsigned, 5> tile_sizes;
         int bitwidth = matrixElementType.getIntOrFloatBitWidth();
         tile_sizes.push_back(std::max(1, (int)(std::sqrt(options.register_size / bitwidth))));
         tile_sizes.push_back(tile_sizes.back());
         if (options.cache_sizes.size() > 0) {
+            int idx = 0;
             for (auto cache_size=options.cache_sizes.begin(); cache_size != options.cache_sizes.end(); cache_size++) {
-                tile_sizes.push_back(std::max(1, (int)(*cache_size / tile_sizes.back() / bitwidth)));
+                unsigned candidate = std::max(1, (int)(*cache_size / tile_sizes.back() / bitwidth));
+                if (idx == 3) candidate = candidate - (candidate % tile_sizes[0]);
+                if (idx == 4) candidate = candidate - (candidate % tile_sizes[1]);
+                tile_sizes.push_back(candidate);
+                idx++;
             }
         }
         while (tile_sizes.size() < 5) {
@@ -381,11 +383,7 @@ class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {
         // tile i with MC, j with NC, k with KC
         llvm::SmallVector<AffineForOp> tiledNest;
         if (failed(tilePerfectlyNested(loopNest, {MC, NC, KC}, &tiledNest))) {
-            if(logger->should_log(spdlog::level::debug)) {
-                std::string s;
-                llvm::raw_string_ostream stream(s);
-                logger->debug("Could not tile the loop nest in MatMulLowering", s);
-            }
+            spdlog::warn("Could not tile the loop nest in MatMulLowering");
         };
         assert(tiledNest[0].getStep() == MC && "0 should have step size MC.");
         assert(tiledNest[1].getStep() == NC * vec_size && "1 should have step size NC * vec_size.");
@@ -396,21 +394,13 @@ class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {
 
         // Further tile the i mod MC loop with MR
         if (failed(tilePerfectlyNested(tiledNest[3], {MR}))) {
-            if(logger->should_log(spdlog::level::debug)) {
-                std::string s;
-                llvm::raw_string_ostream stream(s);
-                logger->debug("Could not tile the second i loop in MatMulLowering", s);
-            }
+            spdlog::warn("Could not tile the second i loop in MatMulLowering");
         };
 
         // Further tile the j mod NC loop with NR
         assert(tiledNest[4].getStep() == 1 * vec_size && "4 should have step size vec_size.");
         if (failed(tilePerfectlyNested(tiledNest[4], {NR}))) {
-            if(logger->should_log(spdlog::level::debug)) {
-                std::string s;
-                llvm::raw_string_ostream stream(s);
-                logger->debug("Could not tile the second j loop in MatMulLowering", s);
-            }
+            spdlog::warn("Could not tile the second j loop in MatMulLowering");
         };
 
 
@@ -443,26 +433,21 @@ class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {
         assert(blisTiledLoops[1].getStep() == KC && "blisTiled: 1 should have step size 1.");  // k loops
         assert(blisTiledLoops[5].getStep() == 1 && "blisTiled: 5 should have step size 1.");
         // Unroll jam causes Segfault, if called in a way where the loop is not cleanly divided.
-        if (blisTiledLoops[5].getUpperBound().getMap().getNumResults() == 1) {
-            (void)loopUnrollJamUpToFactor(blisTiledLoops[5], options.unroll_jam_factor);
-            Block &block = blisTiledLoops[5].getRegion().front();
-            auto itForOp = block.getOps<AffineForOp>();
-            for (auto f : itForOp) {
-                if (f.getUpperBound().getMap().getNumResults() == 1) {
-                    (void)loopUnrollJamUpToFactor(f,options.unroll_jam_factor);
+        if (blisTiledLoops[5].getUpperBound().getMap().getNumResults() == 1 &&
+            succeeded(loopUnrollJamUpToFactor(blisTiledLoops[5], options.unroll_jam_factor))) {
+                if (blisTiledLoops[6].getUpperBound().getMap().getNumResults() != 1 || 
+                failed(loopUnrollJamUpToFactor(blisTiledLoops[6],options.unroll_jam_factor))) {                     
+                    spdlog::warn("Could not unroll the (j mod NC) mod NR loop in MatMulLowering");
                 }
-            }
+        } else {
+            spdlog::warn("Could not unroll the (i mod MC) mod MR loop in MatMulLowering");
         }
 
         llvm::SmallVector<AffineForOp> lastNest;
         getPerfectlyNestedLoops(lastNest, blisTiledLoops.front()); 
 
         if (failed(loopUnrollUpToFactor(lastNest.back(), KU))) {
-            if(logger->should_log(spdlog::level::debug)) {
-                std::string s;
-                llvm::raw_string_ostream stream(s);
-                logger->debug("Could not unroll the K loop in MatMulLowering", s);
-            }
+            spdlog::warn("Could not unroll the K loop in MatMulLowering");
         }
         int64_t i = 0;
         while (succeeded(promoteIfSingleIteration(lastNest[i])) && i < 4) {
@@ -589,4 +574,4 @@ std::unique_ptr<OperationPass<ModuleOp>> mlir::daphne::createMatMulOpLoweringPas
 // This is used by daphne-opt and automatically inserts the options provided on the command line into the pass.
 std::unique_ptr<OperationPass<ModuleOp>>  mlir::daphne::createMatMulOpLoweringPass() {
     return std::make_unique<MatMulLoweringPass>();
-}
+}
diff --git a/test/api/cli/Utils.h b/test/api/cli/Utils.h
@@ -353,7 +353,7 @@ void compareDaphneToStringNumerically(const std::string & exp, const std::string
             // Long double just to be sure
             f_exp = std::stold(s_exp);
             f_out = std::stold(s_out);
-        } catch (std::invalid_argument) {
+        } catch (std::invalid_argument const&) {
             FAIL("The result does not have the right number of outputs.");
         }
         correct_so_far = std::norm(f_exp - f_out) < epsilon * std::norm(f_exp);
@@ -362,6 +362,40 @@ void compareDaphneToStringNumerically(const std::string & exp, const std::string
     CHECK(err.str() == "");
 }
 
+/**
+ * @brief Checks if the numerical values in the standard output of the two given DaphneDSL script
+ * runs are within a relative distance to a reference text.
+ * 
+ * @param left The output from a runDaphne().
+ * @param right The output from a runDaphne().
+ * @param ignore_lines How many lines in the beginning of the DaphneDSL output do contain numerical values to compare.
+ * @param epsilon The relative error that is acceptable.  
+ */
+template<typename... Args>
+void compareDaphneRunsNumerically(std::stringstream & left, std::stringstream & right, const int ignore_lines, const long double epsilon) {
+    std::string s_left;
+    std::string s_right;
+    float f_left;
+    float f_right;
+    for (auto i = 0; i != ignore_lines; i++) {
+        std::getline(left, s_left);
+        std::getline(right, s_right);
+    } 
+    bool correct_so_far = true;
+
+    while (std::getline(left, s_left, ' ') && std::getline(right, s_right, ' ') && correct_so_far) {
+        try {
+            // Long double just to be sure
+            f_left = std::stold(s_left);
+            f_right = std::stold(s_right);
+        } catch (std::invalid_argument const&) {
+            FAIL("The result does not have the right number of outputs.");
+        }
+        correct_so_far = std::norm(f_left - f_right) < epsilon * std::norm(f_left);
+    }
+    CHECK(correct_so_far == true);
+}
+
 /**
  * @brief Compares the standard output of executing the given DaphneDSL script
  * with the command line interface of the DAPHNE Prototype to a reference text

diff --git a/test/api/cli/codegen/MatMulTest.cpp b/test/api/cli/codegen/MatMulTest.cpp
@@ -25,7 +25,7 @@
 
 const std::string dirPath = "test/api/cli/codegen/";
 
-TEST_CASE("matmul", "[codegen][matmul]") {
+TEST_CASE("matmul", TAG_CODEGEN TAG_MATMUL) {
     std::string result =
         "DenseMatrix(3x3, double)\n"
         "45 45 45\n"
@@ -35,7 +35,7 @@ TEST_CASE("matmul", "[codegen][matmul]") {
     compareDaphneToStr(result, dirPath + "matmul.daphne");
     compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen");
 }
-TEST_CASE("matmul vectorized", "[codegen][matmul]") {
+TEST_CASE("matmul vectorized", TAG_CODEGEN TAG_MATMUL) {
     std::string result =
         "DenseMatrix(3x3, double)\n"
         "45 45 45\n"
@@ -46,36 +46,36 @@ TEST_CASE("matmul vectorized", "[codegen][matmul]") {
     compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen", "--no-obj-ref-mgnt", 
     "--matmul-vec-size-bits=128");
 }
-TEST_CASE("matmul tiled", "[codegen][matmul]") {
+TEST_CASE("matmul tiled", TAG_CODEGEN TAG_MATMUL) {
     std::string result =
         "DenseMatrix(3x3, double)\n"
         "45 45 45\n"
         "45 45 45\n"
         "45 45 45\n";
 
-    compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen", "--matmul-fixed-tile-sizes=1,1,1");
+    compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen", "--matmul-fixed-tile-sizes=2,2,2");
 }
-TEST_CASE("matmul tiled and vectorized", "[codegen][matmul]") {
+TEST_CASE("matmul tiled and vectorized", TAG_CODEGEN TAG_MATMUL) {
     std::string result =
         "DenseMatrix(3x3, double)\n"
         "45 45 45\n"
         "45 45 45\n"
         "45 45 45\n";
 
     compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen", "--matmul-vec-size-bits=64", 
-    "--matmul-fixed-tile-sizes=1,1,1");
+    "--matmul-fixed-tile-sizes=2,2,2");
 }
-TEST_CASE("matmul single", "[codegen][matmul]") {
+TEST_CASE("matmul single", TAG_CODEGEN TAG_MATMUL) {
     std::string result =
-        "DenseMatrix(3x3, single)\n"
+        "DenseMatrix(3x3, float)\n"
         "45 45 45\n"
         "45 45 45\n"
         "45 45 45\n";
 
     compareDaphneToStr(result, dirPath + "matmul_single.daphne");
     compareDaphneToStr(result, dirPath + "matmul_single.daphne", "--mlir-codegen");
 }
-TEST_CASE("matmul non square", "[codegen][matmul]") {
+TEST_CASE("matmul non square", TAG_CODEGEN TAG_MATMUL) {
     std::string result =
         "DenseMatrix(3x3, double)\n"
         "60 60 60\n"
@@ -85,16 +85,6 @@ TEST_CASE("matmul non square", "[codegen][matmul]") {
     compareDaphneToStr(result, dirPath + "matmul_non_square.daphne");
     compareDaphneToStr(result, dirPath + "matmul_non_square.daphne", "--mlir-codegen");
 }
-/* TEST_CASE("slightly larger matmul", "[codegen][matmul]") {
-    std::string result = readTextFile(dirPath + "matmul128.result");
-    
-    compareDaphneToStr(result, dirPath + "matmul128.daphne");
-    compareDaphneToStr(result, dirPath + "matmul128.daphne", "--mlir-codegen");
-    compareDaphneToStr(result, dirPath + "matmul128.daphne", "--mlir-codegen", "-matmul-fixed-tile-sizes=2,3,4,5,6");
-    compareDaphneToStr(result, dirPath + "matmul128.daphne", "--mlir-codegen", "-matmul-fixed-tile-sizes=2,3,4,5");
-    compareDaphneToStr(result, dirPath + "matmul128.daphne", "--mlir-codegen", "-matmul-fixed-tile-sizes=2,3,4");
-}
- */
 
 
 TEST_CASE("matvec", TAG_CODEGEN) {

diff --git a/test/api/cli/codegen/matmul576.daphne b/test/api/cli/codegen/matmul576.daphne
@@ -0,0 +1,7 @@
+N = 576;
+A = rand(N, N, -10.0, 10.0, 1.0, 42);
+B = rand(N, N, -10.0, 10.0, 1.0, 44);
+
+C = A@B;
+
+print(C);
diff --git a/test/codegen/MatMulAccuracyTest.cpp b/test/codegen/MatMulAccuracyTest.cpp
@@ -22,4 +22,17 @@ std::string result = readTextFile(dirPath + "matmul128.result");
     compareDaphneToStringNumerically(result, dirPath + "matmul128.daphne", 1, epsilon, "--mlir-codegen", "--matmul-fixed-tile-sizes=2");
     compareDaphneToStringNumerically(result, dirPath + "matmul128.daphne", 1, epsilon, "--mlir-codegen", "--matmul-vec-size-bits=64");
     compareDaphneToStringNumerically(result, dirPath + "matmul128.daphne", 1, epsilon, "--mlir-codegen", "--matmul-fixed-tile-sizes=2,3,4", "--matmul-vec-size-bits=64");
-}
+}
+
+TEST_CASE("matmul accuracy 576", "[codegen][matmul]") {
+    double epsilon = std::numeric_limits<double>().epsilon();
+    std::stringstream kernel;
+    std::stringstream err_kernel;
+    std::stringstream codegen;
+    std::stringstream err_codegen;
+    int status = runDaphne(kernel, err_kernel, (dirPath + "matmul576.daphne").c_str());
+    CHECK(status == StatusCode::SUCCESS);
+    status = runDaphne(codegen, err_codegen, "--mlir-codegen", "--matmul-fixed-tile-sizes=2,3,4,5,6", (dirPath + "matmul576.daphne").c_str());
+    CHECK(status == StatusCode::SUCCESS);
+    compareDaphneRunsNumerically(kernel, codegen, 1, epsilon);   
+}