From ad1c73bbad666cbbf3a8c8cbbd35e43450339392 Mon Sep 17 00:00:00 2001 From: codygunton Date: Sun, 5 May 2024 22:05:32 +0000 Subject: [PATCH 1/6] Fix failure at cost of regression. --- .../src/barretenberg/eccvm/msm_builder.hpp | 363 +++++++++--------- .../goblin/goblin_recursion.test.cpp | 8 +- 2 files changed, 192 insertions(+), 179 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp index 3be74f357aa..1eff50b35e9 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp @@ -207,146 +207,154 @@ class ECCVMMSMMBuilder { // we start the accumulator at the point at infinity accumulator_trace[0] = (CycleGroup::affine_point_at_infinity); + // TODO(https://github.com/AztecProtocol/barretenberg/issues/973): Reinstate multitreading. + // For now, only multithread some parts of this builder for circuits that are larger than ones we see or test. + static constexpr uint32_t HACK_TO_FIX_VANILLA = uint32_t(1) << 31; // populate point trace data, and the components of the MSM execution trace that do not relate to affine point // operations - run_loop_in_parallel(msms.size(), [&](size_t start, size_t end) { - for (size_t i = start; i < end; i++) { - Element accumulator = CycleGroup::affine_point_at_infinity; - const auto& msm = msms[i]; - size_t msm_row_index = msm_row_indices[i]; - const size_t msm_size = msm.size(); - const size_t rows_per_round = - (msm_size / ADDITIONS_PER_ROW) + (msm_size % ADDITIONS_PER_ROW != 0 ? 1 : 0); - size_t trace_index = (msm_row_indices[i] - 1) * 4; - - for (size_t j = 0; j < num_rounds; ++j) { - const uint32_t pc = static_cast(pc_indices[i]); - - for (size_t k = 0; k < rows_per_round; ++k) { - const size_t points_per_row = - (k + 1) * ADDITIONS_PER_ROW > msm_size ? msm_size % ADDITIONS_PER_ROW : ADDITIONS_PER_ROW; - auto& row = msm_state[msm_row_index]; - const size_t idx = k * ADDITIONS_PER_ROW; - row.msm_transition = (j == 0) && (k == 0); - for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { - - auto& add_state = row.add_state[m]; - add_state.add = points_per_row > m; - int slice = add_state.add ? msm[idx + m].wnaf_slices[j] : 0; - // In the MSM columns in the ECCVM circuit, we can add up to 4 points per row. - // if `row.add_state[m].add = 1`, this indicates that we want to add the `m`'th point in - // the MSM columns into the MSM accumulator `add_state.slice` = A 4-bit WNAF slice of - // the scalar multiplier associated with the point we are adding (the specific slice - // chosen depends on the value of msm_round) (WNAF = windowed-non-adjacent-form. Value - // range is `-15, -13, - // ..., 15`) If `add_state.add = 1`, we want `add_state.slice` to be the *compressed* - // form of the WNAF slice value. (compressed = no gaps in the value range. i.e. -15, - // -13, ..., 15 maps to 0, ... , 15) - add_state.slice = add_state.add ? (slice + 15) / 2 : 0; - add_state.point = add_state.add - ? msm[idx + m].precomputed_table[static_cast(add_state.slice)] - : AffineElement{ 0, 0 }; + run_loop_in_parallel( + msms.size(), + [&](size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + Element accumulator = CycleGroup::affine_point_at_infinity; + const auto& msm = msms[i]; + size_t msm_row_index = msm_row_indices[i]; + const size_t msm_size = msm.size(); + const size_t rows_per_round = + (msm_size / ADDITIONS_PER_ROW) + (msm_size % ADDITIONS_PER_ROW != 0 ? 1 : 0); + size_t trace_index = (msm_row_indices[i] - 1) * 4; + + for (size_t j = 0; j < num_rounds; ++j) { + const uint32_t pc = static_cast(pc_indices[i]); - // predicate logic: - // add_predicate should normally equal add_state.add - // However! if j == 0 AND k == 0 AND m == 0 this implies we are examing the 1st point - // addition of a new MSM In this case, we do NOT add the 1st point into the accumulator, - // instead we SET the accumulator to equal the 1st point. add_predicate is used to - // determine whether we add the output of a point addition into the accumulator, - // therefore if j == 0 AND k == 0 AND m == 0, add_predicate = 0 even if add_state.add = - // true - bool add_predicate = (m == 0 ? (j != 0 || k != 0) : add_state.add); - - Element p1 = (m == 0) ? Element(add_state.point) : accumulator; - Element p2 = (m == 0) ? accumulator : Element(add_state.point); - - accumulator = add_predicate ? (accumulator + add_state.point) : Element(p1); - p1_trace[trace_index] = p1; - p2_trace[trace_index] = p2; - p3_trace[trace_index] = accumulator; - operation_trace[trace_index] = false; - trace_index++; - } - accumulator_trace[msm_row_index] = accumulator; - row.q_add = true; - row.q_double = false; - row.q_skew = false; - row.msm_round = static_cast(j); - row.msm_size = static_cast(msm_size); - row.msm_count = static_cast(idx); - row.pc = pc; - msm_row_index++; - } - // doubling - if (j < num_rounds - 1) { - auto& row = msm_state[msm_row_index]; - row.msm_transition = false; - row.msm_round = static_cast(j + 1); - row.msm_size = static_cast(msm_size); - row.msm_count = static_cast(0); - row.q_add = false; - row.q_double = true; - row.q_skew = false; - for (size_t m = 0; m < 4; ++m) { - - auto& add_state = row.add_state[m]; - add_state.add = false; - add_state.slice = 0; - add_state.point = { 0, 0 }; - add_state.collision_inverse = 0; - - p1_trace[trace_index] = accumulator; - p2_trace[trace_index] = accumulator; - accumulator = accumulator.dbl(); - p3_trace[trace_index] = accumulator; - operation_trace[trace_index] = true; - trace_index++; - } - accumulator_trace[msm_row_index] = accumulator; - msm_row_index++; - } else { for (size_t k = 0; k < rows_per_round; ++k) { - auto& row = msm_state[msm_row_index]; - const size_t points_per_row = (k + 1) * ADDITIONS_PER_ROW > msm_size ? msm_size % ADDITIONS_PER_ROW : ADDITIONS_PER_ROW; + auto& row = msm_state[msm_row_index]; const size_t idx = k * ADDITIONS_PER_ROW; - row.msm_transition = false; - - Element acc_expected = accumulator; + row.msm_transition = (j == 0) && (k == 0); + for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { - for (size_t m = 0; m < 4; ++m) { auto& add_state = row.add_state[m]; add_state.add = points_per_row > m; - add_state.slice = add_state.add ? msm[idx + m].wnaf_skew ? 7 : 0 : 0; - + int slice = add_state.add ? msm[idx + m].wnaf_slices[j] : 0; + // In the MSM columns in the ECCVM circuit, we can add up to 4 points per row. + // if `row.add_state[m].add = 1`, this indicates that we want to add the `m`'th point in + // the MSM columns into the MSM accumulator `add_state.slice` = A 4-bit WNAF slice of + // the scalar multiplier associated with the point we are adding (the specific slice + // chosen depends on the value of msm_round) (WNAF = windowed-non-adjacent-form. Value + // range is `-15, -13, + // ..., 15`) If `add_state.add = 1`, we want `add_state.slice` to be the *compressed* + // form of the WNAF slice value. (compressed = no gaps in the value range. i.e. -15, + // -13, ..., 15 maps to 0, ... , 15) + add_state.slice = add_state.add ? (slice + 15) / 2 : 0; add_state.point = add_state.add ? msm[idx + m].precomputed_table[static_cast(add_state.slice)] : AffineElement{ 0, 0 }; - bool add_predicate = add_state.add ? msm[idx + m].wnaf_skew : false; - auto p1 = accumulator; - accumulator = add_predicate ? accumulator + add_state.point : accumulator; + + // predicate logic: + // add_predicate should normally equal add_state.add + // However! if j == 0 AND k == 0 AND m == 0 this implies we are examing the 1st point + // addition of a new MSM In this case, we do NOT add the 1st point into the accumulator, + // instead we SET the accumulator to equal the 1st point. add_predicate is used to + // determine whether we add the output of a point addition into the accumulator, + // therefore if j == 0 AND k == 0 AND m == 0, add_predicate = 0 even if add_state.add = + // true + bool add_predicate = (m == 0 ? (j != 0 || k != 0) : add_state.add); + + Element p1 = (m == 0) ? Element(add_state.point) : accumulator; + Element p2 = (m == 0) ? accumulator : Element(add_state.point); + + accumulator = add_predicate ? (accumulator + add_state.point) : Element(p1); p1_trace[trace_index] = p1; - p2_trace[trace_index] = add_state.point; + p2_trace[trace_index] = p2; p3_trace[trace_index] = accumulator; operation_trace[trace_index] = false; trace_index++; } - row.q_add = false; + accumulator_trace[msm_row_index] = accumulator; + row.q_add = true; row.q_double = false; - row.q_skew = true; - row.msm_round = static_cast(j + 1); + row.q_skew = false; + row.msm_round = static_cast(j); row.msm_size = static_cast(msm_size); row.msm_count = static_cast(idx); row.pc = pc; + msm_row_index++; + } + // doubling + if (j < num_rounds - 1) { + auto& row = msm_state[msm_row_index]; + row.msm_transition = false; + row.msm_round = static_cast(j + 1); + row.msm_size = static_cast(msm_size); + row.msm_count = static_cast(0); + row.q_add = false; + row.q_double = true; + row.q_skew = false; + for (size_t m = 0; m < 4; ++m) { + + auto& add_state = row.add_state[m]; + add_state.add = false; + add_state.slice = 0; + add_state.point = { 0, 0 }; + add_state.collision_inverse = 0; + + p1_trace[trace_index] = accumulator; + p2_trace[trace_index] = accumulator; + accumulator = accumulator.dbl(); + p3_trace[trace_index] = accumulator; + operation_trace[trace_index] = true; + trace_index++; + } accumulator_trace[msm_row_index] = accumulator; msm_row_index++; + } else { + for (size_t k = 0; k < rows_per_round; ++k) { + auto& row = msm_state[msm_row_index]; + + const size_t points_per_row = (k + 1) * ADDITIONS_PER_ROW > msm_size + ? msm_size % ADDITIONS_PER_ROW + : ADDITIONS_PER_ROW; + const size_t idx = k * ADDITIONS_PER_ROW; + row.msm_transition = false; + + Element acc_expected = accumulator; + + for (size_t m = 0; m < 4; ++m) { + auto& add_state = row.add_state[m]; + add_state.add = points_per_row > m; + add_state.slice = add_state.add ? msm[idx + m].wnaf_skew ? 7 : 0 : 0; + + add_state.point = + add_state.add + ? msm[idx + m].precomputed_table[static_cast(add_state.slice)] + : AffineElement{ 0, 0 }; + bool add_predicate = add_state.add ? msm[idx + m].wnaf_skew : false; + auto p1 = accumulator; + accumulator = add_predicate ? accumulator + add_state.point : accumulator; + p1_trace[trace_index] = p1; + p2_trace[trace_index] = add_state.point; + p3_trace[trace_index] = accumulator; + operation_trace[trace_index] = false; + trace_index++; + } + row.q_add = false; + row.q_double = false; + row.q_skew = true; + row.msm_round = static_cast(j + 1); + row.msm_size = static_cast(msm_size); + row.msm_count = static_cast(idx); + row.pc = pc; + accumulator_trace[msm_row_index] = accumulator; + msm_row_index++; + } } } } - } - }); + }, + HACK_TO_FIX_VANILLA); // Normalize the points in the point trace run_loop_in_parallel(point_trace.size(), [&](size_t start, size_t end) { @@ -369,67 +377,24 @@ class ECCVMMSMMBuilder { // complete the computation of the ECCVM execution trace, by adding the affine intermediate point data // i.e. row.accumulator_x, row.accumulator_y, row.add_state[0...3].collision_inverse, // row.add_state[0...3].lambda - run_loop_in_parallel(msms.size(), [&](size_t start, size_t end) { - for (size_t i = start; i < end; i++) { - const auto& msm = msms[i]; - size_t trace_index = ((msm_row_indices[i] - 1) * ADDITIONS_PER_ROW); - size_t msm_row_index = msm_row_indices[i]; - // 1st MSM row will have accumulator equal to the previous MSM output - // (or point at infinity for 1st MSM) - size_t accumulator_index = msm_row_indices[i] - 1; - const size_t msm_size = msm.size(); - const size_t rows_per_round = - (msm_size / ADDITIONS_PER_ROW) + (msm_size % ADDITIONS_PER_ROW != 0 ? 1 : 0); - - for (size_t j = 0; j < num_rounds; ++j) { - for (size_t k = 0; k < rows_per_round; ++k) { - auto& row = msm_state[msm_row_index]; - const Element& normalized_accumulator = accumulator_trace[accumulator_index]; - const FF& acc_x = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; - const FF& acc_y = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; - row.accumulator_x = acc_x; - row.accumulator_y = acc_y; - - for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { - auto& add_state = row.add_state[m]; - bool add_predicate = (m == 0 ? (j != 0 || k != 0) : add_state.add); - - const auto& inverse = inverse_trace[trace_index]; - const auto& p1 = p1_trace[trace_index]; - const auto& p2 = p2_trace[trace_index]; - add_state.collision_inverse = add_predicate ? inverse : 0; - add_state.lambda = add_predicate ? (p2.y - p1.y) * inverse : 0; - trace_index++; - } - accumulator_index++; - msm_row_index++; - } - - if (j < num_rounds - 1) { - MSMState& row = msm_state[msm_row_index]; - const Element& normalized_accumulator = accumulator_trace[accumulator_index]; - const FF& acc_x = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; - const FF& acc_y = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; - row.accumulator_x = acc_x; - row.accumulator_y = acc_y; - - for (size_t m = 0; m < 4; ++m) { - auto& add_state = row.add_state[m]; - add_state.collision_inverse = 0; - const FF& dx = p1_trace[trace_index].x; - const FF& inverse = inverse_trace[trace_index]; - add_state.lambda = ((dx + dx + dx) * dx) * inverse; - trace_index++; - } - accumulator_index++; - msm_row_index++; - } else { + run_loop_in_parallel( + msms.size(), + [&](size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + const auto& msm = msms[i]; + size_t trace_index = ((msm_row_indices[i] - 1) * ADDITIONS_PER_ROW); + size_t msm_row_index = msm_row_indices[i]; + // 1st MSM row will have accumulator equal to the previous MSM output + // (or point at infinity for 1st MSM) + size_t accumulator_index = msm_row_indices[i] - 1; + const size_t msm_size = msm.size(); + const size_t rows_per_round = + (msm_size / ADDITIONS_PER_ROW) + (msm_size % ADDITIONS_PER_ROW != 0 ? 1 : 0); + + for (size_t j = 0; j < num_rounds; ++j) { for (size_t k = 0; k < rows_per_round; ++k) { - MSMState& row = msm_state[msm_row_index]; + auto& row = msm_state[msm_row_index]; const Element& normalized_accumulator = accumulator_trace[accumulator_index]; - - const size_t idx = k * ADDITIONS_PER_ROW; - const FF& acc_x = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; const FF& acc_y = @@ -439,7 +404,7 @@ class ECCVMMSMMBuilder { for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { auto& add_state = row.add_state[m]; - bool add_predicate = add_state.add ? msm[idx + m].wnaf_skew : false; + bool add_predicate = (m == 0 ? (j != 0 || k != 0) : add_state.add); const auto& inverse = inverse_trace[trace_index]; const auto& p1 = p1_trace[trace_index]; @@ -451,10 +416,60 @@ class ECCVMMSMMBuilder { accumulator_index++; msm_row_index++; } + + if (j < num_rounds - 1) { + MSMState& row = msm_state[msm_row_index]; + const Element& normalized_accumulator = accumulator_trace[accumulator_index]; + const FF& acc_x = + normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; + const FF& acc_y = + normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; + row.accumulator_x = acc_x; + row.accumulator_y = acc_y; + + for (size_t m = 0; m < 4; ++m) { + auto& add_state = row.add_state[m]; + add_state.collision_inverse = 0; + const FF& dx = p1_trace[trace_index].x; + const FF& inverse = inverse_trace[trace_index]; + add_state.lambda = ((dx + dx + dx) * dx) * inverse; + trace_index++; + } + accumulator_index++; + msm_row_index++; + } else { + for (size_t k = 0; k < rows_per_round; ++k) { + MSMState& row = msm_state[msm_row_index]; + const Element& normalized_accumulator = accumulator_trace[accumulator_index]; + + const size_t idx = k * ADDITIONS_PER_ROW; + + const FF& acc_x = + normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; + const FF& acc_y = + normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; + row.accumulator_x = acc_x; + row.accumulator_y = acc_y; + + for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { + auto& add_state = row.add_state[m]; + bool add_predicate = add_state.add ? msm[idx + m].wnaf_skew : false; + + const auto& inverse = inverse_trace[trace_index]; + const auto& p1 = p1_trace[trace_index]; + const auto& p2 = p2_trace[trace_index]; + add_state.collision_inverse = add_predicate ? inverse : 0; + add_state.lambda = add_predicate ? (p2.y - p1.y) * inverse : 0; + trace_index++; + } + accumulator_index++; + msm_row_index++; + } + } } } - } - }); + }, + HACK_TO_FIX_VANILLA); // populate the final row in the MSM execution trace. // we always require 1 extra row at the end of the trace, because the accumulator x/y coordinates for row `i` diff --git a/barretenberg/cpp/src/barretenberg/goblin/goblin_recursion.test.cpp b/barretenberg/cpp/src/barretenberg/goblin/goblin_recursion.test.cpp index 43280a800ed..814dfecd9c3 100644 --- a/barretenberg/cpp/src/barretenberg/goblin/goblin_recursion.test.cpp +++ b/barretenberg/cpp/src/barretenberg/goblin/goblin_recursion.test.cpp @@ -33,12 +33,10 @@ class GoblinRecursionTests : public ::testing::Test { }; /** - * @brief A full Goblin test that mimicks the basic aztec client architecture - * @details + * @brief Test illustrating a Goblin-based IVC scheme + * @details Goblin is usd to accumulate recursive verifications of the GoblinUltraHonk proving system. */ -// TODO fix with https://github.com/AztecProtocol/barretenberg/issues/930 -// intermittent failures, presumably due to uninitialized memory -TEST_F(GoblinRecursionTests, DISABLED_Vanilla) +TEST_F(GoblinRecursionTests, Vanilla) { Goblin goblin; From b4e40dab487e274565c134ad86aaa4c60a72d89f Mon Sep 17 00:00:00 2001 From: codygunton Date: Mon, 6 May 2024 14:19:08 +0000 Subject: [PATCH 2/6] Bring benchmark comparison script in line with others. --- .../compare_branch_vs_baseline.sh | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) rename barretenberg/cpp/{src/barretenberg/benchmark => scripts}/compare_branch_vs_baseline.sh (74%) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/compare_branch_vs_baseline.sh b/barretenberg/cpp/scripts/compare_branch_vs_baseline.sh similarity index 74% rename from barretenberg/cpp/src/barretenberg/benchmark/compare_branch_vs_baseline.sh rename to barretenberg/cpp/scripts/compare_branch_vs_baseline.sh index 34ee2ce171d..acd11d0139a 100755 --- a/barretenberg/cpp/src/barretenberg/benchmark/compare_branch_vs_baseline.sh +++ b/barretenberg/cpp/scripts/compare_branch_vs_baseline.sh @@ -5,10 +5,12 @@ # it is up to date with local master, and run the script. # Specify the benchmark suite and the "baseline" branch against which to compare -BENCH_TARGET=${1:?"Please provide the name of a benchmark target."} +BENCHMARK=${1:?"Please provide the name of a benchmark target."} +COMMAND=${2:-./$BENCHMARK} BASELINE_BRANCH="master" -echo -e "\nComparing $BENCH_TARGET between $BASELINE_BRANCH and current branch:" + +echo -e "\nComparing $BENCHMARK between $BASELINE_BRANCH and current branch:" # Set some directories BASE_DIR="$HOME/aztec-packages/barretenberg/cpp" BUILD_DIR="$BASE_DIR/build-bench" # matches build dir specified in bench preset @@ -24,24 +26,24 @@ cd $BASE_DIR mkdir $BENCH_RESULTS_DIR # Build and run bench in current branch -echo -e "\nConfiguring and building $BENCH_TARGET in current feature branch..\n" -rm -rf $BUILD_DIR -cmake --preset bench > /dev/null && cmake --build --preset bench --target $BENCH_TARGET +echo -e "\nConfiguring and building $BENCHMARK in current feature branch..\n" +# rm -rf $BUILD_DIR +cmake --preset bench > /dev/null && cmake --build --preset bench --target $BENCHMARK cd build-bench BRANCH_RESULTS="$BENCH_RESULTS_DIR/results_branch.json" -echo -e "\nRunning $BENCH_TARGET in feature branch.." -bin/$BENCH_TARGET --benchmark_format=json > $BRANCH_RESULTS +echo -e "\nRunning $COMMAND in feature branch.." +bin/$COMMAND --benchmark_format=json > $BRANCH_RESULTS # Checkout baseline branch, run benchmarks, save results in json format -echo -e "\nConfiguring and building $BENCH_TARGET in $BASELINE_BRANCH branch..\n" +echo -e "\nConfiguring and building $BENCHMARK in $BASELINE_BRANCH branch..\n" git checkout master > /dev/null cd $BASE_DIR rm -rf $BUILD_DIR -cmake --preset bench > /dev/null && cmake --build --preset bench --target $BENCH_TARGET +cmake --preset bench > /dev/null && cmake --build --preset bench --target $BENCHMARK cd build-bench BASELINE_RESULTS="$BENCH_RESULTS_DIR/results_baseline.json" -echo -e "\nRunning $BENCH_TARGET in master.." -bin/$BENCH_TARGET --benchmark_format=json > $BASELINE_RESULTS +echo -e "\nRunning $COMMAND in master.." +bin/$COMMAND --benchmark_format=json > $BASELINE_RESULTS # Call compare.py on the results (json) to get high level statistics. # See docs at https://github.com/google/benchmark/blob/main/docs/tools.md for more details. From 0f9f29112d386945c1c2f370fc6198cac11e2dcc Mon Sep 17 00:00:00 2001 From: codygunton Date: Mon, 6 May 2024 16:36:31 +0000 Subject: [PATCH 3/6] Add update compare script to use remote instance and align other --- .../cpp/scripts/compare_branch_vs_baseline.sh | 85 ++++++++++--------- .../compare_branch_vs_baseline_remote.sh | 61 +++++++++++++ 2 files changed, 106 insertions(+), 40 deletions(-) create mode 100755 barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh diff --git a/barretenberg/cpp/scripts/compare_branch_vs_baseline.sh b/barretenberg/cpp/scripts/compare_branch_vs_baseline.sh index acd11d0139a..f798893781e 100755 --- a/barretenberg/cpp/scripts/compare_branch_vs_baseline.sh +++ b/barretenberg/cpp/scripts/compare_branch_vs_baseline.sh @@ -1,56 +1,61 @@ #!/usr/bin/env bash +# Install requirements (numpy + scipy) for comparison script if necessary. +# Note: By default, installation will occur in $HOME/.local/bin. +# pip3 install --user -r $BUILD_DIR/_deps/benchmark-src/requirements.txt + + # This script is used to compare a suite of benchmarks between baseline (default: master) and # the branch from which the script is run. Simply check out the branch of interest, ensure # it is up to date with local master, and run the script. # Specify the benchmark suite and the "baseline" branch against which to compare -BENCHMARK=${1:?"Please provide the name of a benchmark target."} -COMMAND=${2:-./$BENCHMARK} -BASELINE_BRANCH="master" +BENCHMARK=${1:-goblin_bench} +FILTER=${2:-""} +PRESET=${3:-clang16} +BUILD_DIR=${4:-build} +HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16} +BASELINE_BRANCH="master" +BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools" echo -e "\nComparing $BENCHMARK between $BASELINE_BRANCH and current branch:" -# Set some directories -BASE_DIR="$HOME/aztec-packages/barretenberg/cpp" -BUILD_DIR="$BASE_DIR/build-bench" # matches build dir specified in bench preset -BENCH_RESULTS_DIR="$BASE_DIR/tmp_bench_results" -BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools" -# Install requirements (numpy + scipy) for comparison script if necessary. -# Note: By default, installation will occur in $HOME/.local/bin. -pip3 install --user -r $BUILD_DIR/_deps/benchmark-src/requirements.txt - -# Create temporary directory for benchmark results (json) -cd $BASE_DIR -mkdir $BENCH_RESULTS_DIR - -# Build and run bench in current branch -echo -e "\nConfiguring and building $BENCHMARK in current feature branch..\n" -# rm -rf $BUILD_DIR -cmake --preset bench > /dev/null && cmake --build --preset bench --target $BENCHMARK -cd build-bench -BRANCH_RESULTS="$BENCH_RESULTS_DIR/results_branch.json" -echo -e "\nRunning $COMMAND in feature branch.." -bin/$COMMAND --benchmark_format=json > $BRANCH_RESULTS - -# Checkout baseline branch, run benchmarks, save results in json format -echo -e "\nConfiguring and building $BENCHMARK in $BASELINE_BRANCH branch..\n" -git checkout master > /dev/null -cd $BASE_DIR -rm -rf $BUILD_DIR -cmake --preset bench > /dev/null && cmake --build --preset bench --target $BENCHMARK -cd build-bench -BASELINE_RESULTS="$BENCH_RESULTS_DIR/results_baseline.json" -echo -e "\nRunning $COMMAND in master.." -bin/$COMMAND --benchmark_format=json > $BASELINE_RESULTS +# Move above script dir. +cd $(dirname $0)/.. + +# Configure and build benchmark in feature branch +echo -e "\nConfiguring and building $BENCHMARK in current feature branch...\n" +cmake --preset $PRESET +cmake --build --preset $PRESET --target $BENCHMARK + +# Run bench in current branch +echo -e "\nRunning benchmark in feature branch.." +./scripts/benchmark.sh $BENCHMARK\ + "./$BENCHMARK --benchmark_filter=$FILTER\ + --benchmark_out=results_after.json\ + --benchmark_out_format=json"\ + $PRESET + $BUILD_DIR + +# Configure and build benchmark in $BASELINE branch +echo -e "\nConfiguring and building $BENCHMARK in $BASELINE_BRANCH...\n" +git checkout $BASELINE_BRANCH +cmake --preset $PRESET +cmake --build --preset $PRESET --target $BENCHMARK + +# Run bench in current branch +echo -e "\nRunning benchmark in feature branch.." +./scripts/benchmark.sh $BENCHMARK\ + "./$BENCHMARK --benchmark_filter=$FILTER\ + --benchmark_out=results_before.json\ + --benchmark_out_format=json"\ + $PRESET + $BUILD_DIR # Call compare.py on the results (json) to get high level statistics. # See docs at https://github.com/google/benchmark/blob/main/docs/tools.md for more details. -$BENCH_TOOLS_DIR/compare.py benchmarks $BASELINE_RESULTS $BRANCH_RESULTS +$BENCH_TOOLS_DIR/compare.py benchmarks $BUILD_DIR/results_before.json $BUILD_DIR/results_after.json # Return to branch from which the script was called -git checkout - - -# Delete the temporary results directory and its contents -rm -r $BENCH_RESULTS_DIR +git checkout - \ No newline at end of file diff --git a/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh b/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh new file mode 100755 index 00000000000..4211dfb4a76 --- /dev/null +++ b/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +# Install requirements (numpy + scipy) for comparison script if necessary. +# Note: By default, installation will occur in $HOME/.local/bin. +# pip3 install --user -r $BUILD_DIR/_deps/benchmark-src/requirements.txt + + +# This script is used to compare a suite of benchmarks between baseline (default: master) and +# the branch from which the script is run. Simply check out the branch of interest, ensure +# it is up to date with local master, and run the script. + +# Specify the benchmark suite and the "baseline" branch against which to compare +BENCHMARK=${1:-goblin_bench} +FILTER=${2:-""} +PRESET=${3:-clang16} +BUILD_DIR=${4:-build} +HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16} + +BASELINE_BRANCH="master" +BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools" + +echo -e "\nComparing $BENCHMARK between $BASELINE_BRANCH and current branch:" + +# Move above script dir. +cd $(dirname $0)/.. + +# Configure and build benchmark in feature branch +echo -e "\nConfiguring and building $BENCHMARK in current feature branch...\n" +cmake --preset $PRESET +cmake --build --preset $PRESET --target $BENCHMARK + +# Run bench in current branch +echo -e "\nRunning benchmark in feature branch.." +./scripts/benchmark_remote.sh $BENCHMARK\ + "./$BENCHMARK --benchmark_filter=$FILTER\ + --benchmark_out=results_after.json\ + --benchmark_out_format=json"\ + $PRESET + $BUILD_DIR + +# Configure and build benchmark in $BASELINE branch +echo -e "\nConfiguring and building $BENCHMARK in $BASELINE_BRANCH...\n" +git checkout $BASELINE_BRANCH +cmake --preset $PRESET +cmake --build --preset $PRESET --target $BENCHMARK + +zs# Run bench in current branch +echo -e "\nRunning benchmark in feature branch.." +./scripts/benchmark_remote.sh $BENCHMARK\ + "./$BENCHMARK --benchmark_filter=$FILTER\ + --benchmark_out=results_before.json\ + --benchmark_out_format=json"\ + $PRESET + $BUILD_DIR + +# Call compare.py on the results (json) to get high level statistics. +# See docs at https://github.com/google/benchmark/blob/main/docs/tools.md for more details. +$BENCH_TOOLS_DIR/compare.py benchmarks $BUILD_DIR/results_before.json $BUILD_DIR/results_after.json + +# Return to branch from which the script was called +git checkout - \ No newline at end of file From 75ec2fad6bb7abbb8fc66262fd59a9a092aa2dca Mon Sep 17 00:00:00 2001 From: codygunton Date: Mon, 6 May 2024 17:30:06 +0000 Subject: [PATCH 4/6] Reinstate scp lines --- barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh b/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh index 4211dfb4a76..d28cc0a4972 100755 --- a/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh +++ b/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh @@ -37,6 +37,7 @@ echo -e "\nRunning benchmark in feature branch.." --benchmark_out_format=json"\ $PRESET $BUILD_DIR +scp $BB_SSH_KEY $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build/results_after.json $BUILD_DIR/ # Configure and build benchmark in $BASELINE branch echo -e "\nConfiguring and building $BENCHMARK in $BASELINE_BRANCH...\n" @@ -44,7 +45,7 @@ git checkout $BASELINE_BRANCH cmake --preset $PRESET cmake --build --preset $PRESET --target $BENCHMARK -zs# Run bench in current branch +# Run bench in current branch echo -e "\nRunning benchmark in feature branch.." ./scripts/benchmark_remote.sh $BENCHMARK\ "./$BENCHMARK --benchmark_filter=$FILTER\ @@ -52,6 +53,7 @@ echo -e "\nRunning benchmark in feature branch.." --benchmark_out_format=json"\ $PRESET $BUILD_DIR +scp $BB_SSH_KEY $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build/results_before.json $BUILD_DIR/ # Call compare.py on the results (json) to get high level statistics. # See docs at https://github.com/google/benchmark/blob/main/docs/tools.md for more details. From dd4418879afe24e63bf2d02ec19f896ce0f746b3 Mon Sep 17 00:00:00 2001 From: codygunton Date: Mon, 6 May 2024 17:31:08 +0000 Subject: [PATCH 5/6] Just don't multithread the MSM builder --- .../src/barretenberg/eccvm/msm_builder.hpp | 439 +++++++++--------- 1 file changed, 208 insertions(+), 231 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp index 1eff50b35e9..5572bab54ee 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp @@ -207,154 +207,143 @@ class ECCVMMSMMBuilder { // we start the accumulator at the point at infinity accumulator_trace[0] = (CycleGroup::affine_point_at_infinity); - // TODO(https://github.com/AztecProtocol/barretenberg/issues/973): Reinstate multitreading. - // For now, only multithread some parts of this builder for circuits that are larger than ones we see or test. - static constexpr uint32_t HACK_TO_FIX_VANILLA = uint32_t(1) << 31; + // TODO(https://github.com/AztecProtocol/barretenberg/issues/973): Reinstate multitreading? // populate point trace data, and the components of the MSM execution trace that do not relate to affine point // operations - run_loop_in_parallel( - msms.size(), - [&](size_t start, size_t end) { - for (size_t i = start; i < end; i++) { - Element accumulator = CycleGroup::affine_point_at_infinity; - const auto& msm = msms[i]; - size_t msm_row_index = msm_row_indices[i]; - const size_t msm_size = msm.size(); - const size_t rows_per_round = - (msm_size / ADDITIONS_PER_ROW) + (msm_size % ADDITIONS_PER_ROW != 0 ? 1 : 0); - size_t trace_index = (msm_row_indices[i] - 1) * 4; - - for (size_t j = 0; j < num_rounds; ++j) { - const uint32_t pc = static_cast(pc_indices[i]); - - for (size_t k = 0; k < rows_per_round; ++k) { - const size_t points_per_row = (k + 1) * ADDITIONS_PER_ROW > msm_size - ? msm_size % ADDITIONS_PER_ROW - : ADDITIONS_PER_ROW; - auto& row = msm_state[msm_row_index]; - const size_t idx = k * ADDITIONS_PER_ROW; - row.msm_transition = (j == 0) && (k == 0); - for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { - - auto& add_state = row.add_state[m]; - add_state.add = points_per_row > m; - int slice = add_state.add ? msm[idx + m].wnaf_slices[j] : 0; - // In the MSM columns in the ECCVM circuit, we can add up to 4 points per row. - // if `row.add_state[m].add = 1`, this indicates that we want to add the `m`'th point in - // the MSM columns into the MSM accumulator `add_state.slice` = A 4-bit WNAF slice of - // the scalar multiplier associated with the point we are adding (the specific slice - // chosen depends on the value of msm_round) (WNAF = windowed-non-adjacent-form. Value - // range is `-15, -13, - // ..., 15`) If `add_state.add = 1`, we want `add_state.slice` to be the *compressed* - // form of the WNAF slice value. (compressed = no gaps in the value range. i.e. -15, - // -13, ..., 15 maps to 0, ... , 15) - add_state.slice = add_state.add ? (slice + 15) / 2 : 0; - add_state.point = - add_state.add ? msm[idx + m].precomputed_table[static_cast(add_state.slice)] - : AffineElement{ 0, 0 }; + for (size_t i = 0; i < msms.size(); i++) { + Element accumulator = CycleGroup::affine_point_at_infinity; + const auto& msm = msms[i]; + size_t msm_row_index = msm_row_indices[i]; + const size_t msm_size = msm.size(); + const size_t rows_per_round = (msm_size / ADDITIONS_PER_ROW) + (msm_size % ADDITIONS_PER_ROW != 0 ? 1 : 0); + size_t trace_index = (msm_row_indices[i] - 1) * 4; - // predicate logic: - // add_predicate should normally equal add_state.add - // However! if j == 0 AND k == 0 AND m == 0 this implies we are examing the 1st point - // addition of a new MSM In this case, we do NOT add the 1st point into the accumulator, - // instead we SET the accumulator to equal the 1st point. add_predicate is used to - // determine whether we add the output of a point addition into the accumulator, - // therefore if j == 0 AND k == 0 AND m == 0, add_predicate = 0 even if add_state.add = - // true - bool add_predicate = (m == 0 ? (j != 0 || k != 0) : add_state.add); - - Element p1 = (m == 0) ? Element(add_state.point) : accumulator; - Element p2 = (m == 0) ? accumulator : Element(add_state.point); - - accumulator = add_predicate ? (accumulator + add_state.point) : Element(p1); - p1_trace[trace_index] = p1; - p2_trace[trace_index] = p2; - p3_trace[trace_index] = accumulator; - operation_trace[trace_index] = false; - trace_index++; - } - accumulator_trace[msm_row_index] = accumulator; - row.q_add = true; - row.q_double = false; - row.q_skew = false; - row.msm_round = static_cast(j); - row.msm_size = static_cast(msm_size); - row.msm_count = static_cast(idx); - row.pc = pc; - msm_row_index++; - } - // doubling - if (j < num_rounds - 1) { - auto& row = msm_state[msm_row_index]; - row.msm_transition = false; - row.msm_round = static_cast(j + 1); - row.msm_size = static_cast(msm_size); - row.msm_count = static_cast(0); - row.q_add = false; - row.q_double = true; - row.q_skew = false; - for (size_t m = 0; m < 4; ++m) { - - auto& add_state = row.add_state[m]; - add_state.add = false; - add_state.slice = 0; - add_state.point = { 0, 0 }; - add_state.collision_inverse = 0; - - p1_trace[trace_index] = accumulator; - p2_trace[trace_index] = accumulator; - accumulator = accumulator.dbl(); - p3_trace[trace_index] = accumulator; - operation_trace[trace_index] = true; - trace_index++; - } - accumulator_trace[msm_row_index] = accumulator; - msm_row_index++; - } else { - for (size_t k = 0; k < rows_per_round; ++k) { - auto& row = msm_state[msm_row_index]; - - const size_t points_per_row = (k + 1) * ADDITIONS_PER_ROW > msm_size - ? msm_size % ADDITIONS_PER_ROW - : ADDITIONS_PER_ROW; - const size_t idx = k * ADDITIONS_PER_ROW; - row.msm_transition = false; - - Element acc_expected = accumulator; - - for (size_t m = 0; m < 4; ++m) { - auto& add_state = row.add_state[m]; - add_state.add = points_per_row > m; - add_state.slice = add_state.add ? msm[idx + m].wnaf_skew ? 7 : 0 : 0; - - add_state.point = - add_state.add - ? msm[idx + m].precomputed_table[static_cast(add_state.slice)] - : AffineElement{ 0, 0 }; - bool add_predicate = add_state.add ? msm[idx + m].wnaf_skew : false; - auto p1 = accumulator; - accumulator = add_predicate ? accumulator + add_state.point : accumulator; - p1_trace[trace_index] = p1; - p2_trace[trace_index] = add_state.point; - p3_trace[trace_index] = accumulator; - operation_trace[trace_index] = false; - trace_index++; - } - row.q_add = false; - row.q_double = false; - row.q_skew = true; - row.msm_round = static_cast(j + 1); - row.msm_size = static_cast(msm_size); - row.msm_count = static_cast(idx); - row.pc = pc; - accumulator_trace[msm_row_index] = accumulator; - msm_row_index++; - } + for (size_t j = 0; j < num_rounds; ++j) { + const uint32_t pc = static_cast(pc_indices[i]); + + for (size_t k = 0; k < rows_per_round; ++k) { + const size_t points_per_row = + (k + 1) * ADDITIONS_PER_ROW > msm_size ? msm_size % ADDITIONS_PER_ROW : ADDITIONS_PER_ROW; + auto& row = msm_state[msm_row_index]; + const size_t idx = k * ADDITIONS_PER_ROW; + row.msm_transition = (j == 0) && (k == 0); + for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { + + auto& add_state = row.add_state[m]; + add_state.add = points_per_row > m; + int slice = add_state.add ? msm[idx + m].wnaf_slices[j] : 0; + // In the MSM columns in the ECCVM circuit, we can add up to 4 points per row. + // if `row.add_state[m].add = 1`, this indicates that we want to add the `m`'th point in + // the MSM columns into the MSM accumulator `add_state.slice` = A 4-bit WNAF slice of + // the scalar multiplier associated with the point we are adding (the specific slice + // chosen depends on the value of msm_round) (WNAF = windowed-non-adjacent-form. Value + // range is `-15, -13, + // ..., 15`) If `add_state.add = 1`, we want `add_state.slice` to be the *compressed* + // form of the WNAF slice value. (compressed = no gaps in the value range. i.e. -15, + // -13, ..., 15 maps to 0, ... , 15) + add_state.slice = add_state.add ? (slice + 15) / 2 : 0; + add_state.point = add_state.add + ? msm[idx + m].precomputed_table[static_cast(add_state.slice)] + : AffineElement{ 0, 0 }; + + // predicate logic: + // add_predicate should normally equal add_state.add + // However! if j == 0 AND k == 0 AND m == 0 this implies we are examing the 1st point + // addition of a new MSM In this case, we do NOT add the 1st point into the accumulator, + // instead we SET the accumulator to equal the 1st point. add_predicate is used to + // determine whether we add the output of a point addition into the accumulator, + // therefore if j == 0 AND k == 0 AND m == 0, add_predicate = 0 even if add_state.add = + // true + bool add_predicate = (m == 0 ? (j != 0 || k != 0) : add_state.add); + + Element p1 = (m == 0) ? Element(add_state.point) : accumulator; + Element p2 = (m == 0) ? accumulator : Element(add_state.point); + + accumulator = add_predicate ? (accumulator + add_state.point) : Element(p1); + p1_trace[trace_index] = p1; + p2_trace[trace_index] = p2; + p3_trace[trace_index] = accumulator; + operation_trace[trace_index] = false; + trace_index++; + } + accumulator_trace[msm_row_index] = accumulator; + row.q_add = true; + row.q_double = false; + row.q_skew = false; + row.msm_round = static_cast(j); + row.msm_size = static_cast(msm_size); + row.msm_count = static_cast(idx); + row.pc = pc; + msm_row_index++; + } + // doubling + if (j < num_rounds - 1) { + auto& row = msm_state[msm_row_index]; + row.msm_transition = false; + row.msm_round = static_cast(j + 1); + row.msm_size = static_cast(msm_size); + row.msm_count = static_cast(0); + row.q_add = false; + row.q_double = true; + row.q_skew = false; + for (size_t m = 0; m < 4; ++m) { + + auto& add_state = row.add_state[m]; + add_state.add = false; + add_state.slice = 0; + add_state.point = { 0, 0 }; + add_state.collision_inverse = 0; + + p1_trace[trace_index] = accumulator; + p2_trace[trace_index] = accumulator; + accumulator = accumulator.dbl(); + p3_trace[trace_index] = accumulator; + operation_trace[trace_index] = true; + trace_index++; + } + accumulator_trace[msm_row_index] = accumulator; + msm_row_index++; + } else { + for (size_t k = 0; k < rows_per_round; ++k) { + auto& row = msm_state[msm_row_index]; + + const size_t points_per_row = + (k + 1) * ADDITIONS_PER_ROW > msm_size ? msm_size % ADDITIONS_PER_ROW : ADDITIONS_PER_ROW; + const size_t idx = k * ADDITIONS_PER_ROW; + row.msm_transition = false; + + Element acc_expected = accumulator; + + for (size_t m = 0; m < 4; ++m) { + auto& add_state = row.add_state[m]; + add_state.add = points_per_row > m; + add_state.slice = add_state.add ? msm[idx + m].wnaf_skew ? 7 : 0 : 0; + + add_state.point = add_state.add + ? msm[idx + m].precomputed_table[static_cast(add_state.slice)] + : AffineElement{ 0, 0 }; + bool add_predicate = add_state.add ? msm[idx + m].wnaf_skew : false; + auto p1 = accumulator; + accumulator = add_predicate ? accumulator + add_state.point : accumulator; + p1_trace[trace_index] = p1; + p2_trace[trace_index] = add_state.point; + p3_trace[trace_index] = accumulator; + operation_trace[trace_index] = false; + trace_index++; } + row.q_add = false; + row.q_double = false; + row.q_skew = true; + row.msm_round = static_cast(j + 1); + row.msm_size = static_cast(msm_size); + row.msm_count = static_cast(idx); + row.pc = pc; + accumulator_trace[msm_row_index] = accumulator; + msm_row_index++; } } - }, - HACK_TO_FIX_VANILLA); + } + } // Normalize the points in the point trace run_loop_in_parallel(point_trace.size(), [&](size_t start, size_t end) { @@ -377,99 +366,87 @@ class ECCVMMSMMBuilder { // complete the computation of the ECCVM execution trace, by adding the affine intermediate point data // i.e. row.accumulator_x, row.accumulator_y, row.add_state[0...3].collision_inverse, // row.add_state[0...3].lambda - run_loop_in_parallel( - msms.size(), - [&](size_t start, size_t end) { - for (size_t i = start; i < end; i++) { - const auto& msm = msms[i]; - size_t trace_index = ((msm_row_indices[i] - 1) * ADDITIONS_PER_ROW); - size_t msm_row_index = msm_row_indices[i]; - // 1st MSM row will have accumulator equal to the previous MSM output - // (or point at infinity for 1st MSM) - size_t accumulator_index = msm_row_indices[i] - 1; - const size_t msm_size = msm.size(); - const size_t rows_per_round = - (msm_size / ADDITIONS_PER_ROW) + (msm_size % ADDITIONS_PER_ROW != 0 ? 1 : 0); - - for (size_t j = 0; j < num_rounds; ++j) { - for (size_t k = 0; k < rows_per_round; ++k) { - auto& row = msm_state[msm_row_index]; - const Element& normalized_accumulator = accumulator_trace[accumulator_index]; - const FF& acc_x = - normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; - const FF& acc_y = - normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; - row.accumulator_x = acc_x; - row.accumulator_y = acc_y; - - for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { - auto& add_state = row.add_state[m]; - bool add_predicate = (m == 0 ? (j != 0 || k != 0) : add_state.add); - - const auto& inverse = inverse_trace[trace_index]; - const auto& p1 = p1_trace[trace_index]; - const auto& p2 = p2_trace[trace_index]; - add_state.collision_inverse = add_predicate ? inverse : 0; - add_state.lambda = add_predicate ? (p2.y - p1.y) * inverse : 0; - trace_index++; - } - accumulator_index++; - msm_row_index++; - } + for (size_t i = 0; i < msms.size(); i++) { + const auto& msm = msms[i]; + size_t trace_index = ((msm_row_indices[i] - 1) * ADDITIONS_PER_ROW); + size_t msm_row_index = msm_row_indices[i]; + // 1st MSM row will have accumulator equal to the previous MSM output + // (or point at infinity for 1st MSM) + size_t accumulator_index = msm_row_indices[i] - 1; + const size_t msm_size = msm.size(); + const size_t rows_per_round = (msm_size / ADDITIONS_PER_ROW) + (msm_size % ADDITIONS_PER_ROW != 0 ? 1 : 0); - if (j < num_rounds - 1) { - MSMState& row = msm_state[msm_row_index]; - const Element& normalized_accumulator = accumulator_trace[accumulator_index]; - const FF& acc_x = - normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; - const FF& acc_y = - normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; - row.accumulator_x = acc_x; - row.accumulator_y = acc_y; - - for (size_t m = 0; m < 4; ++m) { - auto& add_state = row.add_state[m]; - add_state.collision_inverse = 0; - const FF& dx = p1_trace[trace_index].x; - const FF& inverse = inverse_trace[trace_index]; - add_state.lambda = ((dx + dx + dx) * dx) * inverse; - trace_index++; - } - accumulator_index++; - msm_row_index++; - } else { - for (size_t k = 0; k < rows_per_round; ++k) { - MSMState& row = msm_state[msm_row_index]; - const Element& normalized_accumulator = accumulator_trace[accumulator_index]; - - const size_t idx = k * ADDITIONS_PER_ROW; - - const FF& acc_x = - normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; - const FF& acc_y = - normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; - row.accumulator_x = acc_x; - row.accumulator_y = acc_y; - - for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { - auto& add_state = row.add_state[m]; - bool add_predicate = add_state.add ? msm[idx + m].wnaf_skew : false; - - const auto& inverse = inverse_trace[trace_index]; - const auto& p1 = p1_trace[trace_index]; - const auto& p2 = p2_trace[trace_index]; - add_state.collision_inverse = add_predicate ? inverse : 0; - add_state.lambda = add_predicate ? (p2.y - p1.y) * inverse : 0; - trace_index++; - } - accumulator_index++; - msm_row_index++; - } + for (size_t j = 0; j < num_rounds; ++j) { + for (size_t k = 0; k < rows_per_round; ++k) { + auto& row = msm_state[msm_row_index]; + const Element& normalized_accumulator = accumulator_trace[accumulator_index]; + const FF& acc_x = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; + const FF& acc_y = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; + row.accumulator_x = acc_x; + row.accumulator_y = acc_y; + + for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { + auto& add_state = row.add_state[m]; + bool add_predicate = (m == 0 ? (j != 0 || k != 0) : add_state.add); + + const auto& inverse = inverse_trace[trace_index]; + const auto& p1 = p1_trace[trace_index]; + const auto& p2 = p2_trace[trace_index]; + add_state.collision_inverse = add_predicate ? inverse : 0; + add_state.lambda = add_predicate ? (p2.y - p1.y) * inverse : 0; + trace_index++; + } + accumulator_index++; + msm_row_index++; + } + + if (j < num_rounds - 1) { + MSMState& row = msm_state[msm_row_index]; + const Element& normalized_accumulator = accumulator_trace[accumulator_index]; + const FF& acc_x = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; + const FF& acc_y = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; + row.accumulator_x = acc_x; + row.accumulator_y = acc_y; + + for (size_t m = 0; m < 4; ++m) { + auto& add_state = row.add_state[m]; + add_state.collision_inverse = 0; + const FF& dx = p1_trace[trace_index].x; + const FF& inverse = inverse_trace[trace_index]; + add_state.lambda = ((dx + dx + dx) * dx) * inverse; + trace_index++; + } + accumulator_index++; + msm_row_index++; + } else { + for (size_t k = 0; k < rows_per_round; ++k) { + MSMState& row = msm_state[msm_row_index]; + const Element& normalized_accumulator = accumulator_trace[accumulator_index]; + + const size_t idx = k * ADDITIONS_PER_ROW; + + const FF& acc_x = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x; + const FF& acc_y = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; + row.accumulator_x = acc_x; + row.accumulator_y = acc_y; + + for (size_t m = 0; m < ADDITIONS_PER_ROW; ++m) { + auto& add_state = row.add_state[m]; + bool add_predicate = add_state.add ? msm[idx + m].wnaf_skew : false; + + const auto& inverse = inverse_trace[trace_index]; + const auto& p1 = p1_trace[trace_index]; + const auto& p2 = p2_trace[trace_index]; + add_state.collision_inverse = add_predicate ? inverse : 0; + add_state.lambda = add_predicate ? (p2.y - p1.y) * inverse : 0; + trace_index++; } + accumulator_index++; + msm_row_index++; } } - }, - HACK_TO_FIX_VANILLA); + } + } // populate the final row in the MSM execution trace. // we always require 1 extra row at the end of the trace, because the accumulator x/y coordinates for row `i` From 78055b0cbd7ec31ce4b2793ea9f8bb8bd11eee11 Mon Sep 17 00:00:00 2001 From: codygunton Date: Mon, 6 May 2024 17:35:39 +0000 Subject: [PATCH 6/6] Add needed escape --- .../cpp/scripts/compare_branch_vs_baseline_remote.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh b/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh index d28cc0a4972..27d1af8966a 100755 --- a/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh +++ b/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh @@ -35,8 +35,9 @@ echo -e "\nRunning benchmark in feature branch.." "./$BENCHMARK --benchmark_filter=$FILTER\ --benchmark_out=results_after.json\ --benchmark_out_format=json"\ - $PRESET + $PRESET\ $BUILD_DIR + scp $BB_SSH_KEY $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build/results_after.json $BUILD_DIR/ # Configure and build benchmark in $BASELINE branch @@ -51,8 +52,9 @@ echo -e "\nRunning benchmark in feature branch.." "./$BENCHMARK --benchmark_filter=$FILTER\ --benchmark_out=results_before.json\ --benchmark_out_format=json"\ - $PRESET + $PRESET\ $BUILD_DIR + scp $BB_SSH_KEY $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build/results_before.json $BUILD_DIR/ # Call compare.py on the results (json) to get high level statistics.