From 1c008d9a2fad747142e8ca356d6c00cee1663f2c Mon Sep 17 00:00:00 2001 From: Cody Gunton Date: Sat, 12 Oct 2024 00:11:55 +0100 Subject: [PATCH] feat: Tracy time with instrumentation (#9170) At scripts for profiling locally with tracy and samply, add instrumentation so that tracy profile is pretty complete, and combine BB_OP_COUNT macros with tracy macros. --- barretenberg/README.md | 2 +- barretenberg/cpp/CMakeLists.txt | 13 +-- barretenberg/cpp/CMakePresets.json | 34 +++++--- barretenberg/cpp/scripts/benchmark_wasm.sh | 4 +- ...le_tracy_build_mainframe_capture_local.sh} | 0 ...ile_tracy_capture_mainframe_view_local.sh} | 15 ++-- .../cpp/scripts/profile_tracy_local.sh | 36 ++++++++ .../cpp/scripts/profile_wasm_samply.sh | 19 +++++ .../benchmark/ultra_bench/mock_circuits.hpp | 5 +- .../client_ivc/test_bench_shared.hpp | 2 +- .../commitment_schemes/commitment_key.hpp | 4 +- .../cpp/src/barretenberg/common/mem.cpp | 2 +- .../cpp/src/barretenberg/common/op_count.hpp | 23 ++++-- .../barretenberg/common/slab_allocator.cpp | 9 +- .../barretenberg/ecc/groups/element_impl.hpp | 2 +- .../scalar_multiplication/runtime_states.cpp | 11 +++ .../scalar_multiplication.cpp | 18 +++- .../src/barretenberg/eccvm/eccvm_prover.cpp | 4 +- .../execution_trace/execution_trace.cpp | 34 +++----- .../execution_trace/execution_trace.hpp | 14 ++-- .../cpp/src/barretenberg/goblin/goblin.hpp | 44 ++++------ .../src/barretenberg/goblin/mock_circuits.hpp | 14 ++++ .../composer/permutation_lib.hpp | 15 ++-- .../barretenberg/polynomials/polynomial.hpp | 12 ++- .../protogalaxy/protogalaxy_prover_impl.hpp | 19 ++--- .../protogalaxy_prover_internal.hpp | 6 +- .../relations/auxiliary_relation.hpp | 2 +- .../relations/databus_lookup_relation.hpp | 2 +- .../delta_range_constraint_relation.hpp | 2 +- .../relations/ecc_op_queue_relation.hpp | 2 +- .../relations/elliptic_relation.hpp | 2 +- .../relations/logderiv_lookup_relation.hpp | 2 +- .../relations/permutation_relation.hpp | 2 +- .../relations/poseidon2_external_relation.hpp | 2 +- .../relations/poseidon2_internal_relation.hpp | 2 +- .../relations/ultra_arithmetic_relation.hpp | 2 +- .../srs/factories/file_crs_factory.cpp | 3 + .../srs/factories/file_crs_factory.hpp | 5 +- .../srs/factories/mem_bn254_crs_factory.cpp | 3 + .../stdlib/primitives/bool/bool.test.cpp | 8 ++ .../mega_circuit_builder.hpp | 2 + .../stdlib_circuit_builders/mega_flavor.hpp | 4 +- .../stdlib_circuit_builders/ultra_flavor.hpp | 12 ++- .../src/barretenberg/sumcheck/sumcheck.hpp | 9 +- .../barretenberg/sumcheck/sumcheck_round.hpp | 11 +-- .../translator_circuit_builder.hpp | 2 +- .../translator_vm/translator_flavor.hpp | 2 +- .../translator_vm/translator_prover.cpp | 4 +- .../ultra_honk/decider_prover.cpp | 7 +- .../ultra_honk/decider_proving_key.hpp | 82 ++++++++----------- .../barretenberg/ultra_honk/oink_prover.cpp | 53 ++++++------ 51 files changed, 337 insertions(+), 251 deletions(-) rename barretenberg/cpp/scripts/{benchmark_tracy_build_mainframe_run_local.sh => profile_tracy_build_mainframe_capture_local.sh} (100%) rename barretenberg/cpp/scripts/{benchmark_tracy_build_mainframe_view_local.sh => profile_tracy_capture_mainframe_view_local.sh} (76%) mode change 100644 => 100755 create mode 100755 barretenberg/cpp/scripts/profile_tracy_local.sh create mode 100755 barretenberg/cpp/scripts/profile_wasm_samply.sh diff --git a/barretenberg/README.md b/barretenberg/README.md index cea102725da..f1cbf9c3bfc 100644 --- a/barretenberg/README.md +++ b/barretenberg/README.md @@ -477,4 +477,4 @@ See Tracy manual linked here for in-depth Tra The basic use of Tracy is to run a benchmark with the `cmake --preset tracy` build type, create a capture file, then transfer it to a local machine for interactive UI introspection. -All the steps to do this effectively are included in cpp/scripts/benchmark_tracy.sh +All the steps to do this effectively are included in various scripts in cpp/scripts/. diff --git a/barretenberg/cpp/CMakeLists.txt b/barretenberg/cpp/CMakeLists.txt index 8403aded867..38bdf552b21 100644 --- a/barretenberg/cpp/CMakeLists.txt +++ b/barretenberg/cpp/CMakeLists.txt @@ -54,21 +54,22 @@ if(CHECK_CIRCUIT_STACKTRACES) add_compile_options(-DCHECK_CIRCUIT_STACKTRACES) endif() -if(ENABLE_TRACY) +if(ENABLE_TRACY OR ENABLE_TRACY_TIME_INSTRUMENTED) add_compile_options(-DTRACY_ENABLE) SET(TRACY_LIBS Tracy::TracyClient) else() SET(TRACY_LIBS) endif() -if(TRACY_PROFILE_MEMORY) - add_compile_options(-DTRACY_MEMORY) -endif() -if(TRACY_PROFILE_TIME) - add_compile_options(-DTRACY_TIME) +if(ENABLE_TRACY_TIME_INSTRUMENTED) + add_compile_options(-DTRACY_INSTRUMENTED) endif() +if(TRACY_PROFILE_MEMORY) + add_compile_options(-DTRACY_MEMORY) + add_compile_options(-DTRACY_INSTRUMENTED) +endif() if(ENABLE_ASAN) add_compile_options(-fsanitize=address) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index da2930e37a6..202b06038ae 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -112,8 +112,8 @@ }, { "name": "tracy-memory", - "displayName": "Release build with tracy, optimized for memory tracking", - "description": "Release build with tracy, optimized for memory tracking", + "displayName": "Release build with tracy with memory tracking", + "description": "Release build with tracy with memory tracking", "inherits": "clang16", "binaryDir": "build-tracy-memory", "cacheVariables": { @@ -122,11 +122,21 @@ } }, { - "name": "tracy-time", - "displayName": "Build for tracy time profiling", - "description": "Build for tracy time profiling", - "binaryDir": "build-tracy-time", + "name": "tracy-time-instrumented", + "displayName": "Build for tracy time profiling via instrumentation", + "description": "Build for tracy time profiling via instrumentation", + "binaryDir": "build-tracy-time-instrumented", "inherits": "clang16", + "cacheVariables": { + "ENABLE_TRACY_TIME_INSTRUMENTED": "ON" + } + }, + { + "name": "tracy-time-sampled", + "displayName": "Build for tracy time profiling via sampling", + "description": "Build for tracy time profiling via sampling", + "binaryDir": "build-tracy-time-sampled", + "inherits": "default", "environment": { "CMAKE_BUILD_TYPE": "RelWithDebInfo", "CFLAGS": "-g -fno-omit-frame-pointer", @@ -134,8 +144,7 @@ "LDFLAGS": "-g -fno-omit-frame-pointer -rdynamic" }, "cacheVariables": { - "ENABLE_TRACY": "ON", - "TRACY_PROFILE_TIME": "ON" + "ENABLE_TRACY": "ON" } }, { @@ -497,9 +506,14 @@ "configurePreset": "tracy-memory" }, { - "name": "tracy-time", + "name": "tracy-time-instrumented", + "inherits": "default", + "configurePreset": "tracy-time-instrumented" + }, + { + "name": "tracy-time-sampled", "inherits": "default", - "configurePreset": "tracy-time" + "configurePreset": "tracy-time-sampled" }, { "name": "clang16-pic", diff --git a/barretenberg/cpp/scripts/benchmark_wasm.sh b/barretenberg/cpp/scripts/benchmark_wasm.sh index 228c28e796f..e5bd148e4e4 100755 --- a/barretenberg/cpp/scripts/benchmark_wasm.sh +++ b/barretenberg/cpp/scripts/benchmark_wasm.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash set -eu -BENCHMARK=${1:-goblin_bench} -COMMAND=${2:-./bin/$BENCHMARK} +BENCHMARK=${1:-client_ivc_bench} +COMMAND=${2:-./bin/$BENCHMARK --benchmark_filter=ClientIVCBench/Full/6} HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16} # Move above script dir. diff --git a/barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_run_local.sh b/barretenberg/cpp/scripts/profile_tracy_build_mainframe_capture_local.sh similarity index 100% rename from barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_run_local.sh rename to barretenberg/cpp/scripts/profile_tracy_build_mainframe_capture_local.sh diff --git a/barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_view_local.sh b/barretenberg/cpp/scripts/profile_tracy_capture_mainframe_view_local.sh old mode 100644 new mode 100755 similarity index 76% rename from barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_view_local.sh rename to barretenberg/cpp/scripts/profile_tracy_capture_mainframe_view_local.sh index 5d75011b4e9..69291d96cb8 --- a/barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_view_local.sh +++ b/barretenberg/cpp/scripts/profile_tracy_capture_mainframe_view_local.sh @@ -13,30 +13,31 @@ USER=${1:-$USER} BOX=$USER-box BENCHMARK=${2:-client_ivc_bench} COMMAND=${3:-./bin/$BENCHMARK --benchmark_filter=ClientIVCBench/Full/6"\$"} - +HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16} # Can also set PRESET=tracy-gates env variable PRESET=${PRESET:-tracy-memory} ssh $BOX " set -eux ; - cd ~/aztec-packages/barretenberg/cpp/ ; - cmake --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK ; ! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy ; cd ~/tracy/capture ; git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 ; - sudo apt-get install -y libdbus-1-dev libdbus-glib-1-dev ; - mkdir -p build && cd build && cmake .. && make -j ; + sudo apt-get install -y libdbus-1-dev libdbus-glib-1-dev libtbb-dev libfreetype-dev ; + mkdir -p build && cd build && cmake -DCMAKE_MESSAGE_LOG_LEVEL=Warning .. && make -j ; + cd ~/aztec-packages/barretenberg/cpp/ ; + cmake -DCMAKE_MESSAGE_LOG_LEVEL=Warning --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK ; ./tracy-capture -a 127.0.0.1 -f -o trace-$BENCHMARK & ; sleep 0.1 ; cd ~/aztec-packages/barretenberg/cpp/build-$PRESET ; ninja $BENCHMARK ; - $COMMAND ; + HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY $COMMAND ; " & + wait # TODO(AD) hack - not sure why needed ! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy cd ~/tracy git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 # release 0.11.0 -cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release +cmake -DCMAKE_MESSAGE_LOG_LEVEL=Warning -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release cmake --build profiler/build --parallel scp $BOX:/mnt/user-data/$USER/tracy/capture/build/trace-$BENCHMARK . ~/tracy/profiler/build/tracy-profiler trace-$BENCHMARK diff --git a/barretenberg/cpp/scripts/profile_tracy_local.sh b/barretenberg/cpp/scripts/profile_tracy_local.sh new file mode 100755 index 00000000000..77491e47790 --- /dev/null +++ b/barretenberg/cpp/scripts/profile_tracy_local.sh @@ -0,0 +1,36 @@ +# Collect a profile completely locally, i.e., without using any remote machine for building or capturing. + +set -eux +USER=${1:-$USER} +BOX=$USER-box +BENCHMARK=${2:-client_ivc_bench} +COMMAND=${3:-./bin/$BENCHMARK --benchmark_filter=ClientIVCBench/Full/6} +HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16} +PRESET=${PRESET:-tracy-time-sampled} + +! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy +cd ~/tracy +git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 # release 0.11.0 +cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release +cmake --build profiler/build --parallel + +cd ~/aztec-packages/barretenberg/cpp/ +cmake --preset $PRESET -DCMAKE_MESSAGE_LOG_LEVEL=Warning && cmake --build --preset $PRESET --target $BENCHMARK + +! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy +cd ~/tracy/capture +git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 +mkdir -p build && cd build && cmake .. -DCMAKE_MESSAGE_LOG_LEVEL=Warning && make -j + +./tracy-capture -a 127.0.0.1 -f -o ../trace-$BENCHMARK & +sleep 0.1 +cd ~/aztec-packages/barretenberg/cpp/build-$PRESET/ + +# Run the COMMAND with sudo if PRESET is 'tracy-time-sampled' +if [ "$PRESET" = "tracy-time-sampled" ]; then + sudo HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY $COMMAND +else + HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY $COMMAND +fi + +~/tracy/profiler/build/tracy-profiler ~/tracy/capture/trace-$BENCHMARK diff --git a/barretenberg/cpp/scripts/profile_wasm_samply.sh b/barretenberg/cpp/scripts/profile_wasm_samply.sh new file mode 100755 index 00000000000..5e4f6928ec7 --- /dev/null +++ b/barretenberg/cpp/scripts/profile_wasm_samply.sh @@ -0,0 +1,19 @@ +# This is to be run locally not in a container, so the user must handle samply installation. +#!/usr/bin/env bash +set -eu + +BENCHMARK=${1:-client_ivc_bench} +COMMAND=${2:-./bin/$BENCHMARK --benchmark_filter=ClientIVCBench/Full/6} +HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16} + +# Move above script dir. +cd $(dirname $0)/.. + +# Configure and build. +cmake --preset wasm-threads -DCMAKE_MESSAGE_LOG_LEVEL=Warning +cmake --build --preset wasm-threads --target $BENCHMARK + +cd build-wasm-threads +# Consistency with _wasm.sh targets / shorter $COMMAND. +cp ./bin/$BENCHMARK . +samply record wasmtime run --profile=perfmap --env HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY -Wthreads=y -Sthreads=y --dir=.. $COMMAND \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp index 0d2787095f3..8a56e671424 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp @@ -54,9 +54,8 @@ Prover get_prover(void (*test_circuit_function)(typename Prover::Flavor::Circuit Composer composer; return composer.create_prover(builder); } else { -#ifdef TRACY_MEMORY - ZoneScopedN("creating prover"); -#endif + PROFILE_THIS_NAME("creating prover"); + return Prover(builder); } }; diff --git a/barretenberg/cpp/src/barretenberg/client_ivc/test_bench_shared.hpp b/barretenberg/cpp/src/barretenberg/client_ivc/test_bench_shared.hpp index d1529a2a9b2..b66ff324b2f 100644 --- a/barretenberg/cpp/src/barretenberg/client_ivc/test_bench_shared.hpp +++ b/barretenberg/cpp/src/barretenberg/client_ivc/test_bench_shared.hpp @@ -44,7 +44,7 @@ void perform_ivc_accumulation_rounds(size_t NUM_CIRCUITS, for (size_t circuit_idx = 0; circuit_idx < NUM_CIRCUITS; ++circuit_idx) { MegaCircuitBuilder circuit; { - BB_OP_COUNT_TIME_NAME("construct_circuits"); + PROFILE_THIS_NAME("construct_circuits"); circuit = circuit_producer.create_next_circuit(ivc); } diff --git a/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp b/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp index afbc584020b..c33ff953d00 100644 --- a/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp +++ b/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp @@ -85,7 +85,7 @@ template class CommitmentKey { */ Commitment commit(PolynomialSpan polynomial) { - BB_OP_COUNT_TIME(); + PROFILE_THIS(); // We must have a power-of-2 SRS points *after* subtracting by start_index. const size_t consumed_srs = numeric::round_up_power_2(polynomial.size()) + polynomial.start_index; auto srs = srs::get_crs_factory()->get_prover_crs(consumed_srs); @@ -120,7 +120,7 @@ template class CommitmentKey { */ Commitment commit_sparse(PolynomialSpan polynomial) { - BB_OP_COUNT_TIME(); + PROFILE_THIS(); const size_t poly_size = polynomial.size(); ASSERT(polynomial.end_index() <= srs->get_monomial_size()); diff --git a/barretenberg/cpp/src/barretenberg/common/mem.cpp b/barretenberg/cpp/src/barretenberg/common/mem.cpp index d6f4891ada7..03547252a1e 100644 --- a/barretenberg/cpp/src/barretenberg/common/mem.cpp +++ b/barretenberg/cpp/src/barretenberg/common/mem.cpp @@ -1,6 +1,6 @@ #include "barretenberg/common/mem.hpp" -#ifdef TRACY_ENABLE +#ifdef TRACY_MEMORY void* operator new(std::size_t count) { // NOLINTBEGIN(cppcoreguidelines-no-malloc) diff --git a/barretenberg/cpp/src/barretenberg/common/op_count.hpp b/barretenberg/cpp/src/barretenberg/common/op_count.hpp index af24ecb1e10..313d0c6e56b 100644 --- a/barretenberg/cpp/src/barretenberg/common/op_count.hpp +++ b/barretenberg/cpp/src/barretenberg/common/op_count.hpp @@ -3,6 +3,18 @@ #include #include + +#ifdef BB_USE_OP_COUNT_TIME_ONLY +#define PROFILE_THIS() BB_OP_COUNT_TIME_NAME(__func__) +#define PROFILE_THIS_NAME(name) BB_OP_COUNT_TIME_NAME(name) +#elif defined TRACY_INSTRUMENTED +#define PROFILE_THIS() ZoneScopedN(__func__) +#define PROFILE_THIS_NAME(name) ZoneScopedN(name) +#else +#define PROFILE_THIS() (void)0 +#define PROFILE_THIS_NAME(name) (void)0 +#endif + #ifndef BB_USE_OP_COUNT // require a semicolon to appease formatters // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) @@ -12,18 +24,11 @@ // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define BB_OP_COUNT_CYCLES_NAME(name) (void)0 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define BB_OP_COUNT_CYCLES() (void)0 -#ifndef TRACY_TIME -// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define BB_OP_COUNT_TIME_NAME(name) (void)0 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define BB_OP_COUNT_TIME() (void)0 -#else -// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define BB_OP_COUNT_TIME_NAME(name) ZoneScopedN(name) +#define BB_OP_COUNT_CYCLES() (void)0 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define BB_OP_COUNT_TIME() BB_OP_COUNT_TIME_NAME(__func__) -#endif +#define BB_OP_COUNT_TIME() (void)0 #else /** * Provides an abstraction that counts operations based on function names. diff --git a/barretenberg/cpp/src/barretenberg/common/slab_allocator.cpp b/barretenberg/cpp/src/barretenberg/common/slab_allocator.cpp index c0e4a03e915..caeb505e50f 100644 --- a/barretenberg/cpp/src/barretenberg/common/slab_allocator.cpp +++ b/barretenberg/cpp/src/barretenberg/common/slab_allocator.cpp @@ -1,7 +1,8 @@ #include "slab_allocator.hpp" -#include -#include -#include +#include "barretenberg/common/assert.hpp" +#include "barretenberg/common/log.hpp" +#include "barretenberg/common/mem.hpp" +#include "barretenberg/common/op_count.hpp" #include #include #include @@ -211,6 +212,8 @@ void init_slab_allocator(size_t circuit_subgroup_size) std::shared_ptr get_mem_slab(size_t size) { + PROFILE_THIS(); + return allocator.get(size); } diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp b/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp index 25d460ac5a7..148e3e4df80 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp @@ -792,7 +792,7 @@ template std::vector> element::batch_mul_with_endomorphism( const std::span>& points, const Fr& scalar) noexcept { - BB_OP_COUNT_TIME(); + PROFILE_THIS(); typedef affine_element affine_element; const size_t num_points = points.size(); diff --git a/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/runtime_states.cpp b/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/runtime_states.cpp index a86acbbf44e..eac3bb356d4 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/runtime_states.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/runtime_states.cpp @@ -40,6 +40,8 @@ pippenger_runtime_state::pippenger_runtime_state(const size_t num_initial , bucket_empty_status(reinterpret_cast(aligned_alloc(64, num_threads * num_buckets * sizeof(bool)))) , round_counts(reinterpret_cast(aligned_alloc(32, MAX_NUM_ROUNDS * sizeof(uint64_t)))) { + PROFILE_THIS(); + using Fq = typename Curve::BaseField; using AffineElement = typename Curve::AffineElement; @@ -51,6 +53,7 @@ pippenger_runtime_state::pippenger_runtime_state(const size_t num_initial const size_t points_per_thread = static_cast(num_points) / num_threads; parallel_for(num_threads, [&](size_t i) { + PROFILE_THIS_NAME("memset in Pippenger runtime state creation"); const size_t thread_offset = i * points_per_thread; memset(reinterpret_cast(point_pairs_1 + thread_offset + (i * 16)), 0, @@ -96,6 +99,8 @@ pippenger_runtime_state::pippenger_runtime_state(pippenger_runtime_state& , round_counts(other.round_counts) { + PROFILE_THIS(); + other.point_schedule = nullptr; other.skew_table = nullptr; other.point_pairs_1 = nullptr; @@ -111,6 +116,8 @@ template pippenger_runtime_state& pippenger_runtime_state::operator=( pippenger_runtime_state&& other) noexcept { + PROFILE_THIS(); + if (skew_table != nullptr) { aligned_free(skew_table); } @@ -164,6 +171,8 @@ template affine_product_runtime_state pippenger_runtime_state::get_affine_product_runtime_state( const size_t num_threads, const size_t thread_index) { + PROFILE_THIS(); + const auto points_per_thread = static_cast(num_points / num_threads); const auto num_buckets = static_cast(1U << scalar_multiplication::get_optimal_bucket_width(static_cast(num_points) / 2)); @@ -181,6 +190,8 @@ affine_product_runtime_state pippenger_runtime_state::get_affine_p template pippenger_runtime_state::~pippenger_runtime_state() noexcept { + PROFILE_THIS(); + if (skew_table != nullptr) { aligned_free(skew_table); } diff --git a/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.cpp b/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.cpp index b4a3866d2d0..af772f2a73d 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.cpp @@ -204,6 +204,8 @@ void compute_wnaf_states(uint64_t* point_schedule, const std::span scalars, const size_t num_initial_points) { + PROFILE_THIS(); + using Fr = typename Curve::ScalarField; const size_t num_points = num_initial_points * 2; constexpr size_t MAX_NUM_ROUNDS = 256; @@ -283,6 +285,8 @@ void compute_wnaf_states(uint64_t* point_schedule, **/ void organize_buckets(uint64_t* point_schedule, const size_t num_points) { + PROFILE_THIS(); + const size_t num_rounds = get_num_rounds(num_points); parallel_for(num_rounds, [&](size_t i) { @@ -446,6 +450,8 @@ void evaluate_addition_chains(affine_product_runtime_state& state, const size_t max_bucket_bits, bool handle_edge_cases) { + PROFILE_THIS(); + size_t end = state.num_points; size_t start = 0; for (size_t i = 0; i < max_bucket_bits; ++i) { @@ -483,6 +489,7 @@ typename Curve::AffineElement* reduce_buckets(affine_product_runtime_state uint32_t construct_addition_chains(affine_product_runtime_state& state, bool empty_bucket_counts) { + PROFILE_THIS(); + using Group = typename Curve::Group; // if this is the first call to `construct_addition_chains`, we need to count up our buckets if (empty_bucket_counts) { @@ -763,6 +772,8 @@ typename Curve::Element evaluate_pippenger_rounds(pippenger_runtime_state const size_t num_points, bool handle_edge_cases) { + PROFILE_THIS(); + using Element = typename Curve::Element; using AffineElement = typename Curve::AffineElement; const size_t num_rounds = get_num_rounds(num_points); @@ -881,6 +892,7 @@ typename Curve::Element pippenger_internal(std::span& state, bool handle_edge_cases) { + PROFILE_THIS(); // multiplication_runtime_state state; compute_wnaf_states(state.point_schedule, state.skew_table, state.round_counts, scalars, num_initial_points); organize_buckets(state.point_schedule, num_initial_points * 2); @@ -895,7 +907,7 @@ typename Curve::Element pippenger(std::span s pippenger_runtime_state& state, bool handle_edge_cases) { - BB_OP_COUNT_TIME_NAME("pippenger"); + PROFILE_THIS(); using Group = typename Curve::Group; using Element = typename Curve::Element; @@ -912,6 +924,8 @@ typename Curve::Element pippenger(std::span s } if (num_initial_points <= threshold) { + PROFILE_THIS_NAME("handle num_initial_points <= threshold"); + std::vector exponentiation_results(num_initial_points); // might as well multithread this... // Possible optimization: use group::batch_mul_with_endomorphism here. @@ -947,7 +961,7 @@ typename Curve::Element pippenger_unsafe_optimized_for_non_dyadic_polys( std::span points, pippenger_runtime_state& state) { - BB_OP_COUNT_TIME(); + PROFILE_THIS(); // our windowed non-adjacent form algorthm requires that each thread can work on at least 8 points. const size_t threshold = get_num_cpus_pow2() * 8; diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp index e3556c11767..7e637f48d94 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp @@ -14,7 +14,7 @@ namespace bb { ECCVMProver::ECCVMProver(CircuitBuilder& builder, const std::shared_ptr& transcript) : transcript(transcript) { - BB_OP_COUNT_TIME_NAME("ECCVMProver(CircuitBuilder&)"); + PROFILE_THIS_NAME("ECCVMProver(CircuitBuilder&)"); // TODO(https://github.com/AztecProtocol/barretenberg/issues/939): Remove redundancy between // ProvingKey/ProverPolynomials and update the model to reflect what's done in all other proving systems. @@ -193,7 +193,7 @@ HonkProof ECCVMProver::export_proof() HonkProof ECCVMProver::construct_proof() { - BB_OP_COUNT_TIME_NAME("ECCVMProver::construct_proof"); + PROFILE_THIS_NAME("ECCVMProver::construct_proof"); execute_preamble_round(); diff --git a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp index 0459f89faa5..684f6269830 100644 --- a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp +++ b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp @@ -8,9 +8,8 @@ namespace bb { template void ExecutionTrace_::populate_public_inputs_block(Builder& builder) { -#ifdef TRACY_MEMORY - ZoneScopedN("populate_public_inputs_block"); -#endif + PROFILE_THIS_NAME("populate_public_inputs_block"); + // Update the public inputs block for (const auto& idx : builder.public_inputs) { for (size_t wire_idx = 0; wire_idx < NUM_WIRES; ++wire_idx) { @@ -30,9 +29,8 @@ template void ExecutionTrace_::populate(Builder& builder, typename Flavor::ProvingKey& proving_key, bool is_structured) { -#ifdef TRACY_MEMORY - ZoneScopedN("trace populate"); -#endif + PROFILE_THIS_NAME("trace populate"); + // Share wire polynomials, selector polynomials between proving key and builder and copy cycles from raw circuit // data auto trace_data = construct_trace_data(builder, proving_key, is_structured); @@ -42,26 +40,23 @@ void ExecutionTrace_::populate(Builder& builder, typename Flavor::Provin } if constexpr (IsUltraPlonkOrHonk) { -#ifdef TRACY_MEMORY - ZoneScopedN("add_memory_records_to_proving_key"); -#endif + PROFILE_THIS_NAME("add_memory_records_to_proving_key"); + add_memory_records_to_proving_key(trace_data, builder, proving_key); } if constexpr (IsGoblinFlavor) { -#ifdef TRACY_MEMORY - ZoneScopedN("add_ecc_op_wires_to_proving_key"); -#endif + PROFILE_THIS_NAME("add_ecc_op_wires_to_proving_key"); + add_ecc_op_wires_to_proving_key(builder, proving_key); } // Compute the permutation argument polynomials (sigma/id) and add them to proving key { -#ifdef TRACY_MEMORY - ZoneScopedN("compute_permutation_argument_polynomials"); -#endif + PROFILE_THIS_NAME("compute_permutation_argument_polynomials"); + compute_permutation_argument_polynomials(builder, &proving_key, trace_data.copy_cycles); } } @@ -88,9 +83,7 @@ typename ExecutionTrace_::TraceData ExecutionTrace_::construct_t Builder& builder, typename Flavor::ProvingKey& proving_key, bool is_structured) { -#ifdef TRACY_MEMORY - ZoneScopedN("construct_trace_data"); -#endif + PROFILE_THIS_NAME("construct_trace_data"); if constexpr (IsPlonkFlavor) { // Complete the public inputs execution trace block from builder.public_inputs @@ -114,9 +107,8 @@ typename ExecutionTrace_::TraceData ExecutionTrace_::construct_t // NB: The order of row/column loops is arbitrary but needs to be row/column to match old copy_cycle code { -#ifdef TRACY_MEMORY - ZoneScopedN("populating wires and copy_cycles"); -#endif + PROFILE_THIS_NAME("populating wires and copy_cycles"); + for (uint32_t block_row_idx = 0; block_row_idx < block_size; ++block_row_idx) { for (uint32_t wire_idx = 0; wire_idx < NUM_WIRES; ++wire_idx) { uint32_t var_idx = block.wires[wire_idx][block_row_idx]; // an index into the variables array diff --git a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp index 045c41b56a7..2f107a3444d 100644 --- a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp +++ b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp @@ -29,9 +29,8 @@ template class ExecutionTrace_ { TraceData(Builder& builder, ProvingKey& proving_key) { -#ifdef TRACY_MEMORY - ZoneScopedN("TraceData constructor"); -#endif + PROFILE_THIS_NAME("TraceData constructor"); + if constexpr (IsHonkFlavor) { // Initialize and share the wire and selector polynomials for (auto [wire, other_wire] : zip_view(wires, proving_key.polynomials.get_wires())) { @@ -49,9 +48,8 @@ template class ExecutionTrace_ { } { -#ifdef TRACY_MEMORY - ZoneScopedN("selector initialization"); -#endif + PROFILE_THIS_NAME("selector initialization"); + for (size_t idx = 0; idx < Builder::Arithmetization::NUM_SELECTORS; ++idx) { selectors[idx] = Polynomial(proving_key.circuit_size); std::string selector_tag = builder.selector_names[idx] + "_lagrange"; @@ -60,10 +58,8 @@ template class ExecutionTrace_ { } } { + PROFILE_THIS_NAME("copy cycle initialization"); -#ifdef TRACY_MEMORY - ZoneScopedN("copy cycle initialization"); -#endif copy_cycles.resize(builder.variables.size()); } } diff --git a/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp b/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp index 4ef27dbcd9c..fcfd0d78913 100644 --- a/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp +++ b/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp @@ -134,7 +134,7 @@ class GoblinProver { */ PairingPoints verify_merge(MegaCircuitBuilder& circuit_builder, MergeProof& proof) const { - BB_OP_COUNT_TIME_NAME("Goblin::merge"); + PROFILE_THIS_NAME("Goblin::merge"); RecursiveMergeVerifier merge_verifier{ &circuit_builder }; return merge_verifier.verify_proof(proof); }; @@ -146,7 +146,7 @@ class GoblinProver { */ MergeProof prove_merge(MegaCircuitBuilder& circuit_builder) { - BB_OP_COUNT_TIME_NAME("Goblin::merge"); + PROFILE_THIS_NAME("Goblin::merge"); // TODO(https://github.com/AztecProtocol/barretenberg/issues/993): Some circuits (particularly on the first call // to accumulate) may not have any goblin ecc ops prior to the call to merge(), so the commitment to the new // contribution (C_t_shift) in the merge prover will be the point at infinity. (Note: Some dummy ops are added @@ -172,25 +172,22 @@ class GoblinProver { { { -#ifdef TRACY_MEMORY - ZoneScopedN("Create ECCVMBuilder and ECCVMProver"); -#endif + PROFILE_THIS_NAME("Create ECCVMBuilder and ECCVMProver"); + auto eccvm_builder = std::make_unique(op_queue); eccvm_prover = std::make_unique(*eccvm_builder); } { -#ifdef TRACY_MEMORY - ZoneScopedN("Construct ECCVM Proof"); -#endif + PROFILE_THIS_NAME("Construct ECCVM Proof"); + goblin_proof.eccvm_proof = eccvm_prover->construct_proof(); } { -#ifdef TRACY_MEMORY - ZoneScopedN("Assign Translation Evaluations"); -#endif + PROFILE_THIS_NAME("Assign Translation Evaluations"); + goblin_proof.translation_evaluations = eccvm_prover->translation_evaluations; } } @@ -208,9 +205,8 @@ class GoblinProver { eccvm_prover = nullptr; { -#ifdef TRACY_MEMORY - ZoneScopedN("Create TranslatorBuilder and TranslatorProver"); -#endif + PROFILE_THIS_NAME("Create TranslatorBuilder and TranslatorProver"); + auto translator_builder = std::make_unique(translation_batching_challenge_v, evaluation_challenge_x, op_queue); translator_prover = std::make_unique(*translator_builder, transcript); @@ -218,9 +214,8 @@ class GoblinProver { { -#ifdef TRACY_MEMORY - ZoneScopedN("Construct Translator Proof"); -#endif + PROFILE_THIS_NAME("Construct Translator Proof"); + goblin_proof.translator_proof = translator_prover->construct_proof(); } } @@ -235,22 +230,19 @@ class GoblinProver { GoblinProof prove(MergeProof merge_proof_in = {}) { -#ifdef TRACY_MEMORY - ZoneScopedN("Goblin::prove"); -#endif + PROFILE_THIS_NAME("Goblin::prove"); + goblin_proof.merge_proof = merge_proof_in.empty() ? std::move(merge_proof) : std::move(merge_proof_in); { -#ifdef TRACY_MEMORY - ZoneScopedN("prove_eccvm"); -#endif + PROFILE_THIS_NAME("prove_eccvm"); + prove_eccvm(); } { -#ifdef TRACY_MEMORY - ZoneScopedN("prove_translator"); -#endif + PROFILE_THIS_NAME("prove_translator"); + prove_translator(); } return goblin_proof; diff --git a/barretenberg/cpp/src/barretenberg/goblin/mock_circuits.hpp b/barretenberg/cpp/src/barretenberg/goblin/mock_circuits.hpp index 7daf890d8ae..73c2ad839fb 100644 --- a/barretenberg/cpp/src/barretenberg/goblin/mock_circuits.hpp +++ b/barretenberg/cpp/src/barretenberg/goblin/mock_circuits.hpp @@ -54,6 +54,8 @@ class GoblinMockCircuits { */ static void construct_mock_app_circuit(MegaBuilder& builder, bool large = false) { + PROFILE_THIS(); + if (large) { // Results in circuit size 2^19 stdlib::generate_sha256_test_circuit(builder, 12); stdlib::generate_ecdsa_verification_test_circuit(builder, 10); @@ -83,6 +85,8 @@ class GoblinMockCircuits { */ static void construct_mock_function_circuit(MegaBuilder& builder, bool large = false) { + PROFILE_THIS(); + // Determine number of times to execute the below operations that constitute the mock circuit logic. Note that // the circuit size does not scale linearly with number of iterations due to e.g. amortization of lookup costs const size_t NUM_ITERATIONS_LARGE = 12; // results in circuit size 2^19 (502238 gates) @@ -119,6 +123,8 @@ class GoblinMockCircuits { */ static void perform_op_queue_interactions_for_mock_first_circuit(std::shared_ptr& op_queue) { + PROFILE_THIS(); + bb::MegaCircuitBuilder builder{ op_queue }; // Add some goblinized ecc ops @@ -145,6 +151,8 @@ class GoblinMockCircuits { */ static void add_some_ecc_op_gates(MegaBuilder& builder) { + PROFILE_THIS(); + // Add some arbitrary ecc op gates for (size_t i = 0; i < 3; ++i) { auto point = Point::random_element(&engine); @@ -163,6 +171,8 @@ class GoblinMockCircuits { */ static void construct_simple_circuit(MegaBuilder& builder) { + PROFILE_THIS(); + add_some_ecc_op_gates(builder); MockCircuits::construct_arithmetic_circuit(builder); } @@ -178,6 +188,8 @@ class GoblinMockCircuits { */ static void construct_mock_folding_kernel(MegaBuilder& builder) { + PROFILE_THIS(); + // Add operations representing general kernel logic e.g. state updates. Note: these are structured to make // the kernel "full" within the dyadic size 2^17 const size_t NUM_MERKLE_CHECKS = 20; @@ -196,6 +208,8 @@ class GoblinMockCircuits { const KernelInput& function_accum, const KernelInput& prev_kernel_accum) { + PROFILE_THIS(); + // Execute recursive aggregation of function proof auto verification_key = std::make_shared(&builder, function_accum.verification_key); auto proof = bb::convert_proof_to_witness(&builder, function_accum.proof); diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp index 3a87d0efdab..687682f3eb9 100644 --- a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp @@ -63,9 +63,8 @@ template struct PermutationMapping { PermutationMapping(size_t circuit_size) { -#ifdef TRACY_MEMORY - ZoneScopedN("PermutationMapping constructor"); -#endif + PROFILE_THIS_NAME("PermutationMapping constructor"); + for (uint8_t col_idx = 0; col_idx < NUM_WIRES; ++col_idx) { sigmas[col_idx].reserve(circuit_size); if constexpr (generalized) { @@ -390,17 +389,15 @@ void compute_permutation_argument_polynomials(const typename Flavor::CircuitBuil // Compute Honk-style sigma and ID polynomials from the corresponding mappings { -#ifdef TRACY_MEMORY - ZoneScopedN("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); -#endif + PROFILE_THIS_NAME("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); + compute_honk_style_permutation_lagrange_polynomials_from_mapping( key->polynomials.get_sigmas(), mapping.sigmas, key); } { -#ifdef TRACY_MEMORY - ZoneScopedN("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); -#endif + PROFILE_THIS_NAME("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); + compute_honk_style_permutation_lagrange_polynomials_from_mapping( key->polynomials.get_ids(), mapping.ids, key); } diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp index e372f0f6dea..bd5f88d8fa1 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp @@ -1,5 +1,6 @@ #pragma once #include "barretenberg/common/mem.hpp" +#include "barretenberg/common/op_count.hpp" #include "barretenberg/common/zip_view.hpp" #include "barretenberg/crypto/sha256/sha256.hpp" #include "barretenberg/ecc/curves/grumpkin/grumpkin.hpp" @@ -57,7 +58,9 @@ template class Polynomial { // Intended just for plonk, where size == virtual_size always Polynomial(size_t size) : Polynomial(size, size) - {} + { + PROFILE_THIS(); + } // Constructor that does not initialize values, use with caution to save time. Polynomial(size_t size, size_t virtual_size, size_t start_index, DontZeroMemory flag); Polynomial(size_t size, size_t virtual_size, DontZeroMemory flag) @@ -257,13 +260,18 @@ template class Polynomial { static Polynomial random(size_t size, size_t start_index = 0) { + PROFILE_THIS_NAME("generate random polynomial"); + return random(size - start_index, size, start_index); } static Polynomial random(size_t size, size_t virtual_size, size_t start_index) { Polynomial p(size, virtual_size, start_index, DontZeroMemory::FLAG); - std::generate_n(p.coefficients_.data(), size, []() { return Fr::random_element(); }); + parallel_for_heuristic( + size, + [&](size_t i) { p.coefficients_.data()[i] = Fr::random_element(); }, + thread_heuristics::ALWAYS_MULTITHREAD); return p; } diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp index 85266185d91..bedd0fd974e 100644 --- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp @@ -12,9 +12,8 @@ void ProtogalaxyProver_::run_oink_prover_on_one_incomplete_k const std::string& domain_separator) { -#ifdef TRACY_MEMORY - ZoneScopedN("ProtogalaxyProver::run_oink_prover_on_one_incomplete_key"); -#endif + PROFILE_THIS_NAME("ProtogalaxyProver::run_oink_prover_on_one_incomplete_key"); + OinkProver oink_prover(keys, transcript, domain_separator + '_'); oink_prover.prove(); } @@ -22,7 +21,7 @@ void ProtogalaxyProver_::run_oink_prover_on_one_incomplete_k template void ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key() { - BB_OP_COUNT_TIME_NAME("ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key"); + PROFILE_THIS_NAME("ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key"); size_t idx = 0; auto& key = keys_to_fold[0]; auto domain_separator = std::to_string(idx); @@ -49,7 +48,7 @@ std::tuple, Polynomial::perturbator_round( const std::shared_ptr& accumulator) { - BB_OP_COUNT_TIME_NAME("ProtogalaxyProver_::perturbator_round"); + PROFILE_THIS_NAME("ProtogalaxyProver_::perturbator_round"); using Fun = ProtogalaxyProverInternal; @@ -79,7 +78,7 @@ ProtogalaxyProver_::combiner_quotient_round(const std::vecto const std::vector& deltas, const DeciderProvingKeys& keys) { - BB_OP_COUNT_TIME_NAME("ProtogalaxyProver_::combiner_quotient_round"); + PROFILE_THIS_NAME("ProtogalaxyProver_::combiner_quotient_round"); using Fun = ProtogalaxyProverInternal; @@ -119,7 +118,7 @@ FoldingResult ProtogalaxyProver_; const FF combiner_challenge = transcript->template get_challenge("combiner_quotient_challenge"); @@ -163,10 +162,8 @@ template FoldingResult ProtogalaxyProver_::prove() { -#ifdef TRACY_MEMORY - ZoneScopedN("ProtogalaxyProver::prove"); -#endif - BB_OP_COUNT_TIME_NAME("ProtogalaxyProver::prove"); + PROFILE_THIS_NAME("ProtogalaxyProver::prove"); + // Ensure keys are all of the same size for (size_t idx = 0; idx < DeciderProvingKeys::NUM - 1; ++idx) { if (keys_to_fold[idx]->proving_key.circuit_size != keys_to_fold[idx + 1]->proving_key.circuit_size) { diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp index 0275884e84d..7f345ea7daa 100644 --- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp +++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp @@ -110,7 +110,7 @@ template class ProtogalaxyProverInternal { { - BB_OP_COUNT_TIME_NAME("ProtogalaxyProver_::compute_row_evaluations"); + PROFILE_THIS_NAME("ProtogalaxyProver_::compute_row_evaluations"); const size_t polynomial_size = polynomials.get_polynomial_size(); std::vector aggregated_relation_evaluations(polynomial_size); @@ -205,7 +205,7 @@ template class ProtogalaxyProverInternal { static Polynomial compute_perturbator(const std::shared_ptr& accumulator, const std::vector& deltas) { - BB_OP_COUNT_TIME(); + PROFILE_THIS(); auto full_honk_evaluations = compute_row_evaluations( accumulator->proving_key.polynomials, accumulator->alphas, accumulator->relation_parameters); const auto betas = accumulator->gate_challenges; @@ -316,7 +316,7 @@ template class ProtogalaxyProverInternal { const UnivariateRelationSeparator& alphas, TupleOfTuples& univariate_accumulators) { - BB_OP_COUNT_TIME(); + PROFILE_THIS(); // Whether to use univariates whose operators ignore some values which an honest prover would compute to be zero constexpr bool skip_zero_computations = std::same_as; diff --git a/barretenberg/cpp/src/barretenberg/relations/auxiliary_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/auxiliary_relation.hpp index 144fb1a3933..bc7ca4ee266 100644 --- a/barretenberg/cpp/src/barretenberg/relations/auxiliary_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/auxiliary_relation.hpp @@ -110,7 +110,7 @@ template class AuxiliaryRelationImpl { const Parameters& params, const FF& scaling_factor) { - BB_OP_COUNT_TIME_NAME("Auxiliary::accumulate"); + PROFILE_THIS_NAME("Auxiliary::accumulate"); // declare the accumulator of the maximum length, in non-ZK Flavors, they are of the same length, // whereas in ZK Flavors, the accumulator corresponding to RAM consistency sub-relation 1 is the longest using Accumulator = typename std::tuple_element_t<3, ContainerOverSubrelations>; diff --git a/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp index 3945c8a3e20..bb29d9bc8a9 100644 --- a/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp @@ -282,7 +282,7 @@ template class DatabusLookupRelationImpl { const Parameters& params, const FF& scaling_factor) { - BB_OP_COUNT_TIME_NAME("DatabusRead::accumulate"); + PROFILE_THIS_NAME("DatabusRead::accumulate"); using Accumulator = typename std::tuple_element_t<0, ContainerOverSubrelations>; using View = typename Accumulator::View; diff --git a/barretenberg/cpp/src/barretenberg/relations/delta_range_constraint_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/delta_range_constraint_relation.hpp index 47883138208..35385577b5f 100644 --- a/barretenberg/cpp/src/barretenberg/relations/delta_range_constraint_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/delta_range_constraint_relation.hpp @@ -55,7 +55,7 @@ template class DeltaRangeConstraintRelationImpl { const Parameters&, const FF& scaling_factor) { - BB_OP_COUNT_TIME_NAME("DeltaRange::accumulate"); + PROFILE_THIS_NAME("DeltaRange::accumulate"); using Accumulator = std::tuple_element_t<0, ContainerOverSubrelations>; using View = typename Accumulator::View; auto w_1 = View(in.w_l); diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_op_queue_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_op_queue_relation.hpp index 1b716d6c147..5ae3ca02028 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_op_queue_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_op_queue_relation.hpp @@ -65,7 +65,7 @@ template class EccOpQueueRelationImpl { const Parameters&, const FF& scaling_factor) { - BB_OP_COUNT_TIME_NAME("EccOp::accumulate"); + PROFILE_THIS_NAME("EccOp::accumulate"); using Accumulator = std::tuple_element_t<0, ContainerOverSubrelations>; using View = typename Accumulator::View; diff --git a/barretenberg/cpp/src/barretenberg/relations/elliptic_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/elliptic_relation.hpp index 9033179a59e..82f7d01d141 100644 --- a/barretenberg/cpp/src/barretenberg/relations/elliptic_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/elliptic_relation.hpp @@ -57,7 +57,7 @@ template class EllipticRelationImpl { const Parameters&, const FF& scaling_factor) { - BB_OP_COUNT_TIME_NAME("Elliptic::accumulate"); + PROFILE_THIS_NAME("Elliptic::accumulate"); // TODO(@zac - williamson #2608 when Pedersen refactor is completed, // replace old addition relations with these ones and // remove endomorphism coefficient in ecc add gate(not used)) diff --git a/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp index 93b793f168b..afd1326a91d 100644 --- a/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp @@ -214,7 +214,7 @@ template class LogDerivLookupRelationImpl { const Parameters& params, const FF& scaling_factor) { - BB_OP_COUNT_TIME_NAME("Lookup::accumulate"); + PROFILE_THIS_NAME("Lookup::accumulate"); // declare the accumulator of the maximum length, in non-ZK Flavors, they are of the same length, // whereas in ZK Flavors, the accumulator corresponding log derivative lookup argument sub-relation is the // longest diff --git a/barretenberg/cpp/src/barretenberg/relations/permutation_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/permutation_relation.hpp index 7f5afc0b384..af1c8bd9f64 100644 --- a/barretenberg/cpp/src/barretenberg/relations/permutation_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/permutation_relation.hpp @@ -129,7 +129,7 @@ template class UltraPermutationRelationImpl { const Parameters& params, const FF& scaling_factor) { - BB_OP_COUNT_TIME_NAME("Permutation::accumulate"); + PROFILE_THIS_NAME("Permutation::accumulate"); // Contribution (1) [&]() { using Accumulator = std::tuple_element_t<0, ContainerOverSubrelations>; diff --git a/barretenberg/cpp/src/barretenberg/relations/poseidon2_external_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/poseidon2_external_relation.hpp index bb75064effa..e13650e711a 100644 --- a/barretenberg/cpp/src/barretenberg/relations/poseidon2_external_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/poseidon2_external_relation.hpp @@ -63,7 +63,7 @@ template class Poseidon2ExternalRelationImpl { const Parameters&, const FF& scaling_factor) { - BB_OP_COUNT_TIME_NAME("PoseidonExt::accumulate"); + PROFILE_THIS_NAME("PoseidonExt::accumulate"); using Accumulator = std::tuple_element_t<0, ContainerOverSubrelations>; using View = typename Accumulator::View; auto w_l = View(in.w_l); diff --git a/barretenberg/cpp/src/barretenberg/relations/poseidon2_internal_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/poseidon2_internal_relation.hpp index 02dcfaf6192..9e84d736af4 100644 --- a/barretenberg/cpp/src/barretenberg/relations/poseidon2_internal_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/poseidon2_internal_relation.hpp @@ -60,7 +60,7 @@ template class Poseidon2InternalRelationImpl { const Parameters&, const FF& scaling_factor) { - BB_OP_COUNT_TIME_NAME("PoseidonInt::accumulate"); + PROFILE_THIS_NAME("PoseidonInt::accumulate"); using Accumulator = std::tuple_element_t<0, ContainerOverSubrelations>; using View = typename Accumulator::View; auto w_l = View(in.w_l); diff --git a/barretenberg/cpp/src/barretenberg/relations/ultra_arithmetic_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ultra_arithmetic_relation.hpp index 69dfd2b9d11..bfc6e85bf85 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ultra_arithmetic_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ultra_arithmetic_relation.hpp @@ -82,7 +82,7 @@ template class UltraArithmeticRelationImpl { const Parameters&, const FF& scaling_factor) { - BB_OP_COUNT_TIME_NAME("Arithmetic::accumulate"); + PROFILE_THIS_NAME("Arithmetic::accumulate"); { using Accumulator = std::tuple_element_t<0, ContainerOverSubrelations>; using View = typename Accumulator::View; diff --git a/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.cpp b/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.cpp index eb37760417b..c0d664ca9de 100644 --- a/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.cpp +++ b/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.cpp @@ -1,5 +1,6 @@ #include "file_crs_factory.hpp" #include "../io.hpp" +#include "barretenberg/common/op_count.hpp" #include "barretenberg/ecc/curves/bn254/bn254.hpp" #include "barretenberg/ecc/curves/bn254/g1.hpp" #include "barretenberg/ecc/curves/bn254/pairing.hpp" @@ -56,6 +57,8 @@ FileCrsFactory::FileCrsFactory(std::string path, size_t initial_degree) template std::shared_ptr> FileCrsFactory::get_prover_crs(size_t degree) { + PROFILE_THIS(); + if (prover_degree_ < degree || !prover_crs_) { prover_crs_ = std::make_shared>(degree, path_); prover_degree_ = degree; diff --git a/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp b/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp index b12469ec158..bf3dd8d871b 100644 --- a/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp +++ b/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp @@ -45,9 +45,8 @@ template class FileProverCrs : public ProverCrs { : num_points(num_points) { -#ifdef TRACY_MEMORY - ZoneScopedN("FileProverCrs constructor"); -#endif + PROFILE_THIS_NAME("FileProverCrs constructor"); + monomials_ = scalar_multiplication::point_table_alloc(num_points); srs::IO::read_transcript_g1(monomials_.get(), num_points, path); diff --git a/barretenberg/cpp/src/barretenberg/srs/factories/mem_bn254_crs_factory.cpp b/barretenberg/cpp/src/barretenberg/srs/factories/mem_bn254_crs_factory.cpp index ed9ab38739f..75c7f950147 100644 --- a/barretenberg/cpp/src/barretenberg/srs/factories/mem_bn254_crs_factory.cpp +++ b/barretenberg/cpp/src/barretenberg/srs/factories/mem_bn254_crs_factory.cpp @@ -1,4 +1,5 @@ #include "mem_bn254_crs_factory.hpp" +#include "barretenberg/common/op_count.hpp" #include "barretenberg/ecc/curves/bn254/bn254.hpp" #include "barretenberg/ecc/curves/bn254/g1.hpp" #include "barretenberg/ecc/curves/bn254/pairing.hpp" @@ -59,6 +60,8 @@ MemBn254CrsFactory::MemBn254CrsFactory(std::vector const& po std::shared_ptr> MemBn254CrsFactory::get_prover_crs(size_t degree) { + PROFILE_THIS(); + if (prover_crs_->get_monomial_size() < degree) { throw_or_abort(format("prover trying to get too many points in MemBn254CrsFactory! ", prover_crs_->get_monomial_size(), diff --git a/barretenberg/cpp/src/barretenberg/stdlib/primitives/bool/bool.test.cpp b/barretenberg/cpp/src/barretenberg/stdlib/primitives/bool/bool.test.cpp index fe268261725..87987906387 100644 --- a/barretenberg/cpp/src/barretenberg/stdlib/primitives/bool/bool.test.cpp +++ b/barretenberg/cpp/src/barretenberg/stdlib/primitives/bool/bool.test.cpp @@ -189,7 +189,9 @@ TYPED_TEST(BoolTest, And) for (size_t i = 0; i < 32; ++i) { bool_ct a = witness_ct(&builder, (bool)(i % 1)); bool_ct b = witness_ct(&builder, (bool)(i % 2 == 1)); + // clang-format off a& b; + // clang-format on } bool result = CircuitChecker::check(builder); @@ -204,17 +206,23 @@ TYPED_TEST(BoolTest, AndConstants) for (size_t i = 0; i < 32; ++i) { bool_ct a = witness_ct(&builder, (bool)(i % 2)); bool_ct b = witness_ct(&builder, (bool)(i % 3 == 1)); + // clang-format off a& b; + // clang-format on } for (size_t i = 0; i < 32; ++i) { if (i % 2 == 0) { bool_ct a = witness_ct(&builder, (bool)(i % 2)); bool_ct b(&builder, (bool)(i % 3 == 1)); + // clang-format off a& b; + // clang-format on } else { bool_ct a(&builder, (bool)(i % 2)); bool_ct b = witness_ct(&builder, (bool)(i % 3 == 1)); + // clang-format off a& b; + // clang-format on } } diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_circuit_builder.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_circuit_builder.hpp index 0ac5518efe2..7f8f8c8003b 100644 --- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_circuit_builder.hpp +++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_circuit_builder.hpp @@ -48,6 +48,8 @@ template class MegaCircuitBuilder_ : public UltraCircuitBuilder_>(size_hint) , op_queue(op_queue_in) { + PROFILE_THIS(); + // Set indices to constants corresponding to Goblin ECC op codes set_goblin_ecc_op_code_constant_variables(); }; diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp index d2d0982e3f9..e8560197a6d 100644 --- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp @@ -372,7 +372,7 @@ class MegaFlavor { { // TODO(https://github.com/AztecProtocol/barretenberg/issues/1072): Unexpected jump in time to allocate all // of these polys (in client_ivc_bench only). - BB_OP_COUNT_TIME_NAME("ProverPolynomials(size_t)"); + PROFILE_THIS_NAME("ProverPolynomials(size_t)"); for (auto& poly : get_to_be_shifted()) { poly = Polynomial{ /*memory size*/ circuit_size - 1, @@ -396,7 +396,7 @@ class MegaFlavor { [[nodiscard]] size_t get_polynomial_size() const { return q_c.size(); } [[nodiscard]] AllValues get_row(size_t row_idx) const { - BB_OP_COUNT_TIME_NAME("MegaFlavor::get_row"); + PROFILE_THIS_NAME("MegaFlavor::get_row"); AllValues result; for (auto [result_field, polynomial] : zip_view(result.get_all(), this->get_all())) { result_field = polynomial[row_idx]; diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp index c03251258af..abc906e79d9 100644 --- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp @@ -294,9 +294,8 @@ class UltraFlavor { ProverPolynomials(size_t circuit_size) { -#ifdef TRACY_MEMORY - ZoneScopedN("creating empty prover polys"); -#endif + PROFILE_THIS_NAME("creating empty prover polys"); + for (auto& poly : get_to_be_shifted()) { poly = Polynomial{ /*memory size*/ circuit_size - 1, /*largest possible index*/ circuit_size, @@ -318,7 +317,7 @@ class UltraFlavor { [[nodiscard]] size_t get_polynomial_size() const { return q_c.size(); } [[nodiscard]] AllValues get_row(const size_t row_idx) const { - BB_OP_COUNT_TIME(); + PROFILE_THIS(); AllValues result; for (auto [result_field, polynomial] : zip_view(result.get_all(), get_all())) { result_field = polynomial[row_idx]; @@ -565,9 +564,8 @@ class UltraFlavor { PartiallyEvaluatedMultivariates(const size_t circuit_size) { -#ifdef TRACY_MEMORY - ZoneScopedN("PartiallyEvaluatedMultivariates constructor"); -#endif + PROFILE_THIS_NAME("PartiallyEvaluatedMultivariates constructor"); + // Storage is only needed after the first partial evaluation, hence polynomials of // size (n / 2) for (auto& poly : this->get_all()) { diff --git a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp index c9904412467..083236863f0 100644 --- a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp +++ b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp @@ -210,9 +210,7 @@ template class SumcheckProver { round_idx, full_polynomials, relation_parameters, gate_separators, alpha, zk_sumcheck_data); { -#ifdef TRACY_MEMORY - ZoneScopedN("rest of sumcheck round 1"); -#endif + PROFILE_THIS_NAME("rest of sumcheck round 1"); // Place the evaluations of the round univariate into transcript. transcript->send_to_verifier("Sumcheck:univariate_0", round_univariate); @@ -232,9 +230,8 @@ template class SumcheckProver { vinfo("completed sumcheck round 0"); for (size_t round_idx = 1; round_idx < multivariate_d; round_idx++) { -#ifdef TRACY_MEMORY - ZoneScopedN("sumcheck loop"); -#endif + PROFILE_THIS_NAME("sumcheck loop"); + // Write the round univariate to the transcript round_univariate = round.compute_univariate(round_idx, partially_evaluated_polynomials, diff --git a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp index 30ca1b0d536..f36a0b4627e 100644 --- a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp +++ b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp @@ -67,9 +67,8 @@ template class SumcheckProverRound { : round_size(initial_round_size) { -#ifdef TRACY_MEMORY - ZoneScopedN("SumcheckProverRound constructor"); -#endif + PROFILE_THIS_NAME("SumcheckProverRound constructor"); + // Initialize univariate accumulators to 0 Utils::zero_univariates(univariate_accumulators); } @@ -164,11 +163,7 @@ template class SumcheckProverRound { const RelationSeparator alpha, std::optional> zk_sumcheck_data = std::nullopt) // only submitted when Flavor HasZK { - -#ifdef TRACY_MEMORY - ZoneScopedN("compute_univariate"); -#endif - BB_OP_COUNT_TIME(); + PROFILE_THIS_NAME("compute_univariate"); // Determine number of threads for multithreading. // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based diff --git a/barretenberg/cpp/src/barretenberg/translator_vm/translator_circuit_builder.hpp b/barretenberg/cpp/src/barretenberg/translator_vm/translator_circuit_builder.hpp index 6a5ecb02341..2c8e0d2c9c4 100644 --- a/barretenberg/cpp/src/barretenberg/translator_vm/translator_circuit_builder.hpp +++ b/barretenberg/cpp/src/barretenberg/translator_vm/translator_circuit_builder.hpp @@ -358,7 +358,7 @@ class TranslatorCircuitBuilder : public CircuitBuilderBase { TranslatorCircuitBuilder(Fq batching_challenge_v_, Fq evaluation_input_x_, std::shared_ptr op_queue) : TranslatorCircuitBuilder(batching_challenge_v_, evaluation_input_x_) { - BB_OP_COUNT_TIME_NAME("TranslatorCircuitBuilder::constructor"); + PROFILE_THIS_NAME("TranslatorCircuitBuilder::constructor"); feed_ecc_op_queue_into_circuit(op_queue); } diff --git a/barretenberg/cpp/src/barretenberg/translator_vm/translator_flavor.hpp b/barretenberg/cpp/src/barretenberg/translator_vm/translator_flavor.hpp index a6578e68aa9..1be1825aed2 100644 --- a/barretenberg/cpp/src/barretenberg/translator_vm/translator_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/translator_vm/translator_flavor.hpp @@ -643,7 +643,7 @@ class TranslatorFlavor { */ [[nodiscard]] AllValues get_row(size_t row_idx) const { - BB_OP_COUNT_TIME(); + PROFILE_THIS(); AllValues result; for (auto [result_field, polynomial] : zip_view(result.get_all(), this->get_all())) { result_field = polynomial[row_idx]; diff --git a/barretenberg/cpp/src/barretenberg/translator_vm/translator_prover.cpp b/barretenberg/cpp/src/barretenberg/translator_vm/translator_prover.cpp index 56f09320512..136168282e3 100644 --- a/barretenberg/cpp/src/barretenberg/translator_vm/translator_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/translator_vm/translator_prover.cpp @@ -13,7 +13,7 @@ TranslatorProver::TranslatorProver(CircuitBuilder& circuit_builder, const std::s , mini_circuit_dyadic_size(Flavor::compute_mini_circuit_dyadic_size(circuit_builder)) , transcript(transcript) { - BB_OP_COUNT_TIME(); + PROFILE_THIS(); // Compute total number of gates, dyadic circuit size, etc. key = std::make_shared(circuit_builder); @@ -194,7 +194,7 @@ HonkProof TranslatorProver::export_proof() HonkProof TranslatorProver::construct_proof() { - BB_OP_COUNT_TIME_NAME("TranslatorProver::construct_proof"); + PROFILE_THIS_NAME("TranslatorProver::construct_proof"); // Add circuit size public input size and public inputs to transcript. execute_preamble_round(); diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp index 4e98d307bfe..3f8fe01b3cc 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp @@ -31,9 +31,8 @@ template void DeciderProver_::execute_relation_ch auto sumcheck = Sumcheck(polynomial_size, transcript); { -#ifdef TRACY_MEMORY - ZoneScopedN("sumcheck.prove"); -#endif + PROFILE_THIS_NAME("sumcheck.prove"); + sumcheck_output = sumcheck.prove(proving_key->proving_key.polynomials, proving_key->relation_parameters, proving_key->alphas, @@ -76,7 +75,7 @@ template HonkProof DeciderProver_::export_proof() template HonkProof DeciderProver_::construct_proof() { - BB_OP_COUNT_TIME_NAME("Decider::construct_proof"); + PROFILE_THIS_NAME("Decider::construct_proof"); // Run sumcheck subprotocol. vinfo("executing relation checking rounds..."); diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp index e2054e2a540..0d6eab716e2 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp @@ -50,10 +50,11 @@ template class DeciderProvingKey_ { std::shared_ptr commitment_key = nullptr) : is_structured(trace_structure != TraceStructure::NONE) { - BB_OP_COUNT_TIME_NAME("DeciderProvingKey(Circuit&)"); + PROFILE_THIS_NAME("DeciderProvingKey(Circuit&)"); + vinfo("DeciderProvingKey(Circuit&)"); vinfo("creating decider proving key"); - circuit.finalize_circuit(/*ensure_nonzero=*/true); + circuit.finalize_circuit(/* ensure_nonzero = */ true); // If using a structured trace, set fixed block sizes, check their validity, and set the dyadic circuit size if (is_structured) { @@ -76,10 +77,9 @@ template class DeciderProvingKey_ { } { -#ifdef TRACY_MEMORY - ZoneScopedN("constructing proving key"); -#endif + PROFILE_THIS_NAME("constructing proving key"); vinfo("constructing proving key"); + proving_key = ProvingKey(dyadic_circuit_size, circuit.public_inputs.size(), commitment_key); if (IsGoblinFlavor && !is_structured) { // Allocate full size polynomials @@ -87,19 +87,17 @@ template class DeciderProvingKey_ { } else { // Allocate only a correct amount of memory for each polynomial // Allocate the wires and selectors polynomials { -#ifdef TRACY_MEMORY - ZoneScopedN("allocating wires"); -#endif + PROFILE_THIS_NAME("allocating wires"); vinfo("allocating wires"); + for (auto& wire : proving_key.polynomials.get_wires()) { wire = Polynomial::shiftable(proving_key.circuit_size); } } { -#ifdef TRACY_MEMORY - ZoneScopedN("allocating gate selectors"); -#endif + PROFILE_THIS_NAME("allocating gate selectors"); vinfo("allocating gate selectors"); + // Define gate selectors over the block they are isolated to for (auto [selector, block] : zip_view(proving_key.polynomials.get_gate_selectors(), circuit.blocks.get_gate_blocks())) { @@ -119,20 +117,18 @@ template class DeciderProvingKey_ { } } { -#ifdef TRACY_MEMORY - ZoneScopedN("allocating non-gate selectors"); -#endif + PROFILE_THIS_NAME("allocating non-gate selectors"); vinfo("allocating non-gate selectors"); + // Set the other non-gate selector polynomials to full size for (auto& selector : proving_key.polynomials.get_non_gate_selectors()) { selector = Polynomial(proving_key.circuit_size); } } if constexpr (IsGoblinFlavor) { -#ifdef TRACY_MEMORY - ZoneScopedN("allocating ecc op wires and selector"); -#endif + PROFILE_THIS_NAME("allocating ecc op wires and selector"); vinfo("allocating ecc op wires and selector"); + // Allocate the ecc op wires and selector const size_t ecc_op_block_size = circuit.blocks.ecc_op.get_fixed_size(is_structured); const size_t op_wire_offset = Flavor::has_zero_row ? 1 : 0; @@ -169,10 +165,9 @@ template class DeciderProvingKey_ { std::min(static_cast(MAX_LOOKUP_TABLES_SIZE), dyadic_circuit_size - 1); size_t table_offset = dyadic_circuit_size - max_tables_size; { -#ifdef TRACY_MEMORY - ZoneScopedN("allocating table polynomials"); -#endif + PROFILE_THIS_NAME("allocating table polynomials"); vinfo("allocating table polynomials"); + ASSERT(dyadic_circuit_size > max_tables_size); // Allocate the table polynomials @@ -183,10 +178,9 @@ template class DeciderProvingKey_ { } } { -#ifdef TRACY_MEMORY - ZoneScopedN("allocating sigmas and ids"); -#endif + PROFILE_THIS_NAME("allocating sigmas and ids"); vinfo("allocating sigmas and ids"); + for (auto& sigma : proving_key.polynomials.get_sigmas()) { sigma = typename Flavor::Polynomial(proving_key.circuit_size); } @@ -233,19 +227,17 @@ template class DeciderProvingKey_ { } } { -#ifdef TRACY_MEMORY - ZoneScopedN("constructing z_perm"); -#endif + PROFILE_THIS_NAME("constructing z_perm"); vinfo("constructing z_perm"); + // Allocate the z_perm polynomial proving_key.polynomials.z_perm = Polynomial::shiftable(proving_key.circuit_size); } { -#ifdef TRACY_MEMORY - ZoneScopedN("allocating lagrange polynomials"); -#endif + PROFILE_THIS_NAME("allocating lagrange polynomials"); vinfo("allocating lagrange polynomials"); + // First and last lagrange polynomials (in the full circuit size) proving_key.polynomials.lagrange_first = Polynomial(1, dyadic_circuit_size, 0); proving_key.polynomials.lagrange_last = Polynomial(1, dyadic_circuit_size, dyadic_circuit_size - 1); @@ -259,38 +251,34 @@ template class DeciderProvingKey_ { // Construct and add to proving key the wire, selector and copy constraint polynomials Trace::populate(circuit, proving_key, is_structured); -#ifdef TRACY_MEMORY - ZoneScopedN("constructing prover instance after trace populate"); -#endif - vinfo("constructing prover instance after trace populate"); + { + PROFILE_THIS_NAME("constructing prover instance after trace populate"); + vinfo("constructing prover instance after trace populate"); - // If Goblin, construct the databus polynomials - if constexpr (IsGoblinFlavor) { -#ifdef TRACY_MEMORY - ZoneScopedN("constructing databus polynomials"); -#endif - vinfo("constructing databus polynomials"); - construct_databus_polynomials(circuit); - } + // If Goblin, construct the databus polynomials + if constexpr (IsGoblinFlavor) { + PROFILE_THIS_NAME("constructing databus polynomials"); + vinfo("constructing databus polynomials"); + construct_databus_polynomials(circuit); + } + } // Set the lagrange polynomials proving_key.polynomials.lagrange_first.at(0) = 1; proving_key.polynomials.lagrange_last.at(dyadic_circuit_size - 1) = 1; { -#ifdef TRACY_MEMORY - ZoneScopedN("constructing lookup table polynomials"); -#endif + PROFILE_THIS_NAME("constructing lookup table polynomials"); vinfo("constructing lookup table polynomials"); + construct_lookup_table_polynomials( proving_key.polynomials.get_tables(), circuit, dyadic_circuit_size); } { -#ifdef TRACY_MEMORY - ZoneScopedN("constructing lookup read counts"); -#endif + PROFILE_THIS_NAME("constructing lookup read counts"); vinfo("constructing lookup read counts"); + construct_lookup_read_counts(proving_key.polynomials.lookup_read_counts, proving_key.polynomials.lookup_read_tags, circuit, diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp index 092d2dfc73d..b9279d2a5ae 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp @@ -19,43 +19,38 @@ template void OinkProver::prove() } { -#ifdef TRACY_MEMORY - ZoneScopedN("execute_preamble_round"); -#endif + PROFILE_THIS_NAME("execute_preamble_round"); + // Add circuit size public input size and public inputs to transcript-> execute_preamble_round(); } { -#ifdef TRACY_MEMORY - ZoneScopedN("execute_wire_commitments_round"); -#endif + PROFILE_THIS_NAME("execute_wire_commitments_round"); + // Compute first three wire commitments execute_wire_commitments_round(); } { -#ifdef TRACY_MEMORY - ZoneScopedN("execute_sorted_list_accumulator_round"); -#endif + PROFILE_THIS_NAME("execute_sorted_list_accumulator_round"); + // Compute sorted list accumulator and commitment execute_sorted_list_accumulator_round(); } { -#ifdef TRACY_MEMORY - ZoneScopedN("execute_log_derivative_inverse_round"); -#endif + PROFILE_THIS_NAME("execute_log_derivative_inverse_round"); + // Fiat-Shamir: beta & gamma execute_log_derivative_inverse_round(); } { -#ifdef TRACY_MEMORY - ZoneScopedN("execute_grand_product_computation_round"); -#endif + PROFILE_THIS_NAME("execute_grand_product_computation_round"); + // Compute grand product(s) and commitments. execute_grand_product_computation_round(); } @@ -73,7 +68,7 @@ template void OinkProver::prove() */ template void OinkProver::execute_preamble_round() { - BB_OP_COUNT_TIME_NAME("OinkProver::execute_preamble_round"); + PROFILE_THIS_NAME("OinkProver::execute_preamble_round"); const auto circuit_size = static_cast(proving_key->proving_key.circuit_size); const auto num_public_inputs = static_cast(proving_key->proving_key.num_public_inputs); transcript->send_to_verifier(domain_separator + "circuit_size", circuit_size); @@ -96,11 +91,11 @@ template void OinkProver::execute_preamble_round( */ template void OinkProver::execute_wire_commitments_round() { - BB_OP_COUNT_TIME_NAME("OinkProver::execute_wire_commitments_round"); + PROFILE_THIS_NAME("OinkProver::execute_wire_commitments_round"); // Commit to the first three wire polynomials // We only commit to the fourth wire polynomial after adding memory recordss { - BB_OP_COUNT_TIME_NAME("COMMIT::wires"); + PROFILE_THIS_NAME("COMMIT::wires"); if (proving_key->get_is_structured()) { witness_commitments.w_l = proving_key->proving_key.commitment_key->commit_structured( proving_key->proving_key.polynomials.w_l, proving_key->proving_key.active_block_ranges); @@ -131,7 +126,7 @@ template void OinkProver::execute_wire_commitment proving_key->proving_key.polynomials.get_ecc_op_wires(), commitment_labels.get_ecc_op_wires())) { { - BB_OP_COUNT_TIME_NAME("COMMIT::ecc_op_wires"); + PROFILE_THIS_NAME("COMMIT::ecc_op_wires"); commitment = proving_key->proving_key.commitment_key->commit(polynomial); } transcript->send_to_verifier(domain_separator + label, commitment); @@ -143,7 +138,7 @@ template void OinkProver::execute_wire_commitment proving_key->proving_key.polynomials.get_databus_entities(), commitment_labels.get_databus_entities())) { { - BB_OP_COUNT_TIME_NAME("COMMIT::databus"); + PROFILE_THIS_NAME("COMMIT::databus"); commitment = proving_key->proving_key.commitment_key->commit(polynomial); } transcript->send_to_verifier(domain_separator + label, commitment); @@ -157,7 +152,7 @@ template void OinkProver::execute_wire_commitment */ template void OinkProver::execute_sorted_list_accumulator_round() { - BB_OP_COUNT_TIME_NAME("OinkProver::execute_sorted_list_accumulator_round"); + PROFILE_THIS_NAME("OinkProver::execute_sorted_list_accumulator_round"); // Get eta challenges auto [eta, eta_two, eta_three] = transcript->template get_challenges( domain_separator + "eta", domain_separator + "eta_two", domain_separator + "eta_three"); @@ -169,14 +164,14 @@ template void OinkProver::execute_sorted_list_acc // Commit to lookup argument polynomials and the finalized (i.e. with memory records) fourth wire polynomial { - BB_OP_COUNT_TIME_NAME("COMMIT::lookup_counts_tags"); + PROFILE_THIS_NAME("COMMIT::lookup_counts_tags"); witness_commitments.lookup_read_counts = proving_key->proving_key.commitment_key->commit(proving_key->proving_key.polynomials.lookup_read_counts); witness_commitments.lookup_read_tags = proving_key->proving_key.commitment_key->commit(proving_key->proving_key.polynomials.lookup_read_tags); } { - BB_OP_COUNT_TIME_NAME("COMMIT::wires"); + PROFILE_THIS_NAME("COMMIT::wires"); if (proving_key->get_is_structured()) { witness_commitments.w_4 = proving_key->proving_key.commitment_key->commit_structured( proving_key->proving_key.polynomials.w_4, proving_key->proving_key.active_block_ranges); @@ -199,7 +194,7 @@ template void OinkProver::execute_sorted_list_acc */ template void OinkProver::execute_log_derivative_inverse_round() { - BB_OP_COUNT_TIME_NAME("OinkProver::execute_log_derivative_inverse_round"); + PROFILE_THIS_NAME("OinkProver::execute_log_derivative_inverse_round"); auto [beta, gamma] = transcript->template get_challenges(domain_separator + "beta", domain_separator + "gamma"); proving_key->relation_parameters.beta = beta; proving_key->relation_parameters.gamma = gamma; @@ -208,7 +203,7 @@ template void OinkProver::execute_log_derivative_ proving_key->proving_key.compute_logderivative_inverses(proving_key->relation_parameters); { - BB_OP_COUNT_TIME_NAME("COMMIT::lookup_inverses"); + PROFILE_THIS_NAME("COMMIT::lookup_inverses"); witness_commitments.lookup_inverses = proving_key->proving_key.commitment_key->commit(proving_key->proving_key.polynomials.lookup_inverses); } @@ -222,7 +217,7 @@ template void OinkProver::execute_log_derivative_ proving_key->proving_key.polynomials.get_databus_inverses(), commitment_labels.get_databus_inverses())) { { - BB_OP_COUNT_TIME_NAME("COMMIT::databus_inverses"); + PROFILE_THIS_NAME("COMMIT::databus_inverses"); commitment = proving_key->proving_key.commitment_key->commit_sparse(polynomial); } transcript->send_to_verifier(domain_separator + label, commitment); @@ -236,12 +231,12 @@ template void OinkProver::execute_log_derivative_ */ template void OinkProver::execute_grand_product_computation_round() { - BB_OP_COUNT_TIME_NAME("OinkProver::execute_grand_product_computation_round"); + PROFILE_THIS_NAME("OinkProver::execute_grand_product_computation_round"); // Compute the permutation and lookup grand product polynomials proving_key->proving_key.compute_grand_product_polynomials(proving_key->relation_parameters); { - BB_OP_COUNT_TIME_NAME("COMMIT::z_perm"); + PROFILE_THIS_NAME("COMMIT::z_perm"); if (proving_key->get_is_structured()) { witness_commitments.z_perm = proving_key->proving_key.commitment_key->commit_structured_with_nonzero_complement( @@ -256,7 +251,7 @@ template void OinkProver::execute_grand_product_c template typename Flavor::RelationSeparator OinkProver::generate_alphas_round() { - BB_OP_COUNT_TIME_NAME("OinkProver::generate_alphas_round"); + PROFILE_THIS_NAME("OinkProver::generate_alphas_round"); RelationSeparator alphas; std::array args; for (size_t idx = 0; idx < alphas.size(); ++idx) {