diff --git a/barretenberg/cpp/scripts/benchmark_wasm_remote.sh b/barretenberg/cpp/scripts/benchmark_wasm_remote.sh index 62213556579..bdd91606db5 100755 --- a/barretenberg/cpp/scripts/benchmark_wasm_remote.sh +++ b/barretenberg/cpp/scripts/benchmark_wasm_remote.sh @@ -16,7 +16,7 @@ cd $(dirname $0)/.. # Configure and build. cmake --preset wasm-threads -cmake --build --preset wasm-threads --target $BENCHMARK +cmake --build --preset wasm-threads --parallel --target $BENCHMARK source scripts/_benchmark_remote_lock.sh diff --git a/barretenberg/cpp/scripts/benchmark_wasm_remote_wasmer.sh b/barretenberg/cpp/scripts/benchmark_wasm_remote_wasmer.sh new file mode 100755 index 00000000000..8df56fe2593 --- /dev/null +++ b/barretenberg/cpp/scripts/benchmark_wasm_remote_wasmer.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# This script automates the process of benchmarking WASM on a remote EC2 instance. +# Prerequisites: +# 1. Define the following environment variables: +# - BB_SSH_KEY: SSH key for EC2 instance, e.g., '-i key.pem' +# - BB_SSH_INSTANCE: EC2 instance URL +# - BB_SSH_CPP_PATH: Path to barretenberg/cpp in a cloned repository on the EC2 instance +set -eu + +BENCHMARK=${1:-goblin_bench} +COMMAND=${2:-./$BENCHMARK} +HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16} + +# Move above script dir. +cd $(dirname $0)/.. + +# Configure and build. +cmake --preset wasm-threads +cmake --build --preset wasm-threads --parallel --target $BENCHMARK + +source scripts/_benchmark_remote_lock.sh + +cd build-wasm-threads +# ensure folder structure +ssh $BB_SSH_KEY $BB_SSH_INSTANCE "mkdir -p $BB_SSH_CPP_PATH/build-wasm-threads" +# copy build wasm threads +scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build-wasm-threads +# run wasm benchmarking +ssh $BB_SSH_KEY $BB_SSH_INSTANCE \ + "cd $BB_SSH_CPP_PATH/build-wasm-threads ; /home/ubuntu/.wasmer/bin/wasmer run --dir=$BB_SSH_CPP_PATH --enable-threads --env HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY $COMMAND" diff --git a/barretenberg/cpp/src/barretenberg/benchmark/basics_bench/basics.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/basics_bench/basics.bench.cpp index 03614693396..6bd76603106 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/basics_bench/basics.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/basics_bench/basics.bench.cpp @@ -364,6 +364,65 @@ void sequential_copy(State& state) } } } + +/** + * @brief Evaluate how much uint256_t multiplication costs (in cache) + * + * @param state + */ +void uint_multiplication(State& state) +{ + numeric::RNG& engine = numeric::get_debug_randomness(); + std::vector copy_vector(2); + for (size_t j = 0; j < 2; j++) { + copy_vector.emplace_back(engine.get_random_uint256()); + copy_vector.emplace_back(engine.get_random_uint256()); + copy_vector[0] += (1 - copy_vector[0].get_bit(0)); + copy_vector[1] += (1 - copy_vector[1].get_bit(0)); + } + + for (auto _ : state) { + state.PauseTiming(); + size_t num_cycles = 1 << static_cast(state.range(0)); + state.ResumeTiming(); + for (size_t i = 0; i < num_cycles; i++) { + copy_vector[i & 1] *= copy_vector[1 - (i & 1)]; + } + } +} + +/** + * @brief Evaluate how much uint256_t extended multiplication costs (in cache) + * + * @param state + */ +void uint_extended_multiplication(State& state) +{ + numeric::RNG& engine = numeric::get_debug_randomness(); + std::vector copy_vector(2); + for (size_t j = 0; j < 2; j++) { + copy_vector.emplace_back(engine.get_random_uint256()); + copy_vector.emplace_back(engine.get_random_uint256()); + copy_vector[0] += (1 - copy_vector[0].get_bit(0)); + copy_vector[1] += (1 - copy_vector[1].get_bit(0)); + } + + for (auto _ : state) { + state.PauseTiming(); + size_t num_cycles = 1 << static_cast(state.range(0)); + state.ResumeTiming(); + for (size_t i = 0; i < num_cycles; i++) { + auto [r0, r1] = copy_vector[i & 1].mul_extended(copy_vector[1 - (i & 1)]); + state.PauseTiming(); + copy_vector[i & 1] += r0; + copy_vector[1 - (i & 1)] += r1; + copy_vector[0] += (1 - copy_vector[0].get_bit(0)); + copy_vector[1] += (1 - copy_vector[1].get_bit(0)); + state.ResumeTiming(); + } + } +} + } // namespace BENCHMARK(parallel_for_field_element_addition)->Unit(kMicrosecond)->DenseRange(0, MAX_REPETITION_LOG); @@ -380,4 +439,6 @@ BENCHMARK(projective_point_doubling)->Unit(kMicrosecond)->DenseRange(12, 22); BENCHMARK(scalar_multiplication)->Unit(kMicrosecond)->DenseRange(12, 18); BENCHMARK(cycle_waste)->Unit(kMicrosecond)->DenseRange(20, 30); BENCHMARK(sequential_copy)->Unit(kMicrosecond)->DenseRange(20, 25); +BENCHMARK(uint_multiplication)->Unit(kMicrosecond)->DenseRange(12, 27); +BENCHMARK(uint_extended_multiplication)->Unit(kMicrosecond)->DenseRange(12, 27); BENCHMARK_MAIN(); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.hpp index ffda71bbd47..dce9f2634e9 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.hpp @@ -24,11 +24,36 @@ class Bn254FqParams { static constexpr uint64_t cube_root_2 = 0xaa303344d4741444UL; static constexpr uint64_t cube_root_3 = 0x2c3b3f0d26594943UL; + static constexpr uint64_t modulus_wasm_0 = 0x187cfd47; + static constexpr uint64_t modulus_wasm_1 = 0x10460b6; + static constexpr uint64_t modulus_wasm_2 = 0x1c72a34f; + static constexpr uint64_t modulus_wasm_3 = 0x2d522d0; + static constexpr uint64_t modulus_wasm_4 = 0x1585d978; + static constexpr uint64_t modulus_wasm_5 = 0x2db40c0; + static constexpr uint64_t modulus_wasm_6 = 0xa6e141; + static constexpr uint64_t modulus_wasm_7 = 0xe5c2634; + static constexpr uint64_t modulus_wasm_8 = 0x30644e; + + static constexpr uint64_t r_squared_wasm_0 = 0xe1a2a074659bac10UL; + static constexpr uint64_t r_squared_wasm_1 = 0x639855865406005aUL; + static constexpr uint64_t r_squared_wasm_2 = 0xff54c5802d3e2632UL; + static constexpr uint64_t r_squared_wasm_3 = 0x2a11a68c34ea65a6UL; + + static constexpr uint64_t cube_root_wasm_0 = 0x62b1a3a46a337995UL; + static constexpr uint64_t cube_root_wasm_1 = 0xadc97d2722e2726eUL; + static constexpr uint64_t cube_root_wasm_2 = 0x64ee82ede2db85faUL; + static constexpr uint64_t cube_root_wasm_3 = 0x0c0afea1488a03bbUL; + static constexpr uint64_t primitive_root_0 = 0UL; static constexpr uint64_t primitive_root_1 = 0UL; static constexpr uint64_t primitive_root_2 = 0UL; static constexpr uint64_t primitive_root_3 = 0UL; + static constexpr uint64_t primitive_root_wasm_0 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_1 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_2 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_3 = 0x0000000000000000UL; + static constexpr uint64_t endo_g1_lo = 0x7a7bd9d4391eb18d; static constexpr uint64_t endo_g1_mid = 0x4ccef014a773d2cfUL; static constexpr uint64_t endo_g1_hi = 0x0000000000000002UL; @@ -57,6 +82,24 @@ class Bn254FqParams { 0x2a1f6744ce179d8eULL, 0x3829df06681f7cbdULL, 0x463456c802275bedULL, 0x543ece899c2f3b1cULL, 0x180a96573d3d9f8ULL, 0xf8b21270ddbb927ULL, 0x1d9598e8a7e39857ULL, 0x2ba010aa41eb7786ULL, }; + + static constexpr uint64_t coset_generators_wasm_0[8] = { 0xeb8a8ec140766463ULL, 0xfded87957d76333dULL, + 0x4c710c8092f2ff5eULL, 0x9af4916ba86fcb7fULL, + 0xe9781656bdec97a0ULL, 0xfbdb0f2afaec667aULL, + 0x4a5e94161069329bULL, 0x98e2190125e5febcULL }; + static constexpr uint64_t coset_generators_wasm_1[8] = { 0xf2b1f20626a3da49ULL, 0x56c12d76cb13587fULL, + 0x5251d378d7f4a143ULL, 0x4de2797ae4d5ea06ULL, + 0x49731f7cf1b732c9ULL, 0xad825aed9626b0ffULL, + 0xa91300efa307f9c3ULL, 0xa4a3a6f1afe94286ULL }; + static constexpr uint64_t coset_generators_wasm_2[8] = { 0xf905ef8d84d5fea4ULL, 0x93b7a45b84f1507eULL, + 0xe6b99ee0068dfab5ULL, 0x39bb9964882aa4ecULL, + 0x8cbd93e909c74f23ULL, 0x276f48b709e2a0fcULL, + 0x7a71433b8b7f4b33ULL, 0xcd733dc00d1bf56aULL }; + static constexpr uint64_t coset_generators_wasm_3[8] = { 0x2958a27c02b7cd5fULL, 0x06bc8a3277c371abULL, + 0x1484c05bce00b620ULL, 0x224cf685243dfa96ULL, + 0x30152cae7a7b3f0bULL, 0x0d791464ef86e357ULL, + 0x1b414a8e45c427ccULL, 0x290980b79c016c41ULL }; + // used in msgpack schema serialization static constexpr char schema_name[] = "fq"; static constexpr bool has_high_2adicity = false; diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp index e65527e6424..343156b5c18 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp @@ -88,13 +88,16 @@ TEST(fq, RandomElement) TEST(fq, MulCheckAgainstConstants) { // test against some randomly generated test data - constexpr fq a{ 0x2523b6fa3956f038, 0x158aa08ecdd9ec1d, 0xf48216a4c74738d4, 0x2514cc93d6f0a1bf }; - constexpr fq a_copy{ 0x2523b6fa3956f038, 0x158aa08ecdd9ec1d, 0xf48216a4c74738d4, 0x2514cc93d6f0a1bf }; - constexpr fq b{ 0xb68aee5e4c8fc17c, 0xc5193de7f401d5e8, 0xb8777d4dde671db3, 0xe513e75c087b0bb }; - constexpr fq b_copy = { 0xb68aee5e4c8fc17c, 0xc5193de7f401d5e8, 0xb8777d4dde671db3, 0xe513e75c087b0bb }; - constexpr fq const_expected{ 0x7ed4174114b521c4, 0x58f5bd1d4279fdc2, 0x6a73ac09ee843d41, 0x687a76ae9b3425c }; + constexpr fq a = uint256_t{ 0xa9b879029c49e60eUL, 0x2517b72250caa7b3UL, 0x6b86c81105dae2d1UL, 0x3a81735d5aec0c3UL }; + constexpr fq a_copy = + uint256_t{ 0xa9b879029c49e60eUL, 0x2517b72250caa7b3UL, 0x6b86c81105dae2d1UL, 0x3a81735d5aec0c3UL }; + constexpr fq b = uint256_t{ 0x744fc10aec23e56aUL, 0x5dea4788a3b936a6UL, 0xa0a89f4a8af01df1UL, 0x72ae28836807df3UL }; + constexpr fq b_copy = + uint256_t{ 0x744fc10aec23e56aUL, 0x5dea4788a3b936a6UL, 0xa0a89f4a8af01df1UL, 0x72ae28836807df3UL }; + + constexpr fq const_expected = + uint256_t{ 0x6c0a789c0028fd09UL, 0xca9520d84c684efaUL, 0xcbf3f7b023a852b4UL, 0x1b2e4dac41400621UL }; constexpr fq const_result = a * b; - static_assert(const_result == const_expected); static_assert(a == a_copy); static_assert(b == b_copy); @@ -111,7 +114,10 @@ TEST(fq, MulShortIntegers) { constexpr fq a{ 0xa, 0, 0, 0 }; constexpr fq b{ 0xb, 0, 0, 0 }; - constexpr fq const_expected = { 0x65991a6dc2f3a183, 0xe3ba1f83394a2d08, 0x8401df65a169db3f, 0x1727099643607bba }; + constexpr uint256_t a_original(a); + constexpr uint256_t b_original(b); + constexpr uint256_t prod_expected = (uint512_t(a_original) * uint512_t(b_original) % uint512_t(fq::modulus)).lo; + constexpr fq const_expected = prod_expected; constexpr fq const_result = a * b; static_assert(const_result == const_expected); @@ -141,8 +147,10 @@ TEST(fq, MulSqrConsistency) TEST(fq, SqrCheckAgainstConstants) { - constexpr fq a{ 0x329596aa978981e8, 0x8542e6e254c2a5d0, 0xc5b687d82eadb178, 0x2d242aaf48f56b8a }; - constexpr fq expected{ 0xbf4fb34e120b8b12, 0xf64d70efbf848328, 0xefbb6a533f2e7d89, 0x1de50f941425e4aa }; + constexpr fq a = uint256_t{ 0xa9b879029c49e60eUL, 0x2517b72250caa7b3UL, 0x6b86c81105dae2d1UL, 0x3a81735d5aec0c3UL }; + + constexpr fq expected = + uint256_t{ 0x41081a42fdaa7e23UL, 0x44d1140f756ed419UL, 0x53716b0a6f253e63UL, 0xb1a0b04044d75fUL }; constexpr fq result = a.sqr(); static_assert(result == expected); diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq12.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq12.hpp index 1fddd927768..af41374d5c8 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq12.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq12.hpp @@ -6,6 +6,8 @@ namespace bb { struct Bn254Fq12Params { + +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) static constexpr fq2 frobenius_coefficients_1{ { 0xaf9ba69633144907UL, 0xca6b1d7387afb78aUL, 0x11bded5ef08a2087UL, 0x02f34d751a1f3a7cUL }, { 0xa222ae234c492d72UL, 0xd00f02a4565de15bUL, 0xdc2ff3a253dfc926UL, 0x10a75716b3899551UL } @@ -20,6 +22,22 @@ struct Bn254Fq12Params { { 0x365316184e46d97dUL, 0x0af7129ed4c96d9fUL, 0x659da72fca1009b5UL, 0x08116d8983a20d23UL }, { 0xb1df4af7c39c1939UL, 0x3d9f02878a73bf7fUL, 0x9b2220928caf0ae0UL, 0x26684515eff054a6UL } }; +#else + static constexpr fq2 frobenius_coefficients_1{ + { 0xb75446af8a0c2399UL, 0xb5e243df8d8526c8UL, 0x7f6d66278fc2b89bUL, 0x2e05603062b5af58UL }, + { 0xaeefbf6e3bc6cc33UL, 0x7f50c04b4ed87762UL, 0x9a8b7572eb6a58d4UL, 0x9b83e6c410c870UL } + }; + + static constexpr fq2 frobenius_coefficients_2{ + { 0xd96ee8726e4983b2UL, 0xe9b7ed6a458f581eUL, 0x5361c2c89ea5d262UL, 0x24594fd198a79c6eUL }, + { 0UL, 0UL, 0UL, 0UL } + }; + + static constexpr fq2 frobenius_coefficients_3{ + { 0x9dc006978e6a3d3dUL, 0x695b3f038ef4bf24UL, 0x1a238968ba7a7ccdUL, 0x103828f20e49839cUL }, + { 0x5cbbb0bd4f4e6b31UL, 0xe83ce8be1b5b282bUL, 0x646d437ef03fbae3UL, 0x133cf9860031f0c0UL } + }; +#endif }; using fq12 = field12; diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq12.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq12.test.cpp index c6dc81000b6..d9e55827100 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq12.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq12.test.cpp @@ -185,6 +185,8 @@ TEST(fq12, SubCheckAgainstConstants) TEST(fq12, MulCheckAgainstConstants) { + +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq12 a = { { { { 0xd43e9f8be859502b, 0x26a42a1a95cee1ef, 0x3d63c085c1892b32, 0x2e5beaf431211a76 }, { 0x5f32ad7cee215ff5, 0xce967fda9424120e, 0x10ea4e52628bac33, 0x51b85ee9671b7f3 } }, { { 0x95f8e84e0ff94a83, 0x6c6fb2cf3c73b30a, 0x28e8e13841f714a8, 0x2a3412f681e31b4d }, @@ -221,12 +223,56 @@ TEST(fq12, MulCheckAgainstConstants) { 0xeaf256aa7a6b49b5, 0xeaa1b56258e3194e, 0xde3b531fd4fe961b, 0x26a0b5c35ce4be53 } }, { { 0x1f7661fa7dd7d68c, 0x71c1360fdb272200, 0x3fdb8fcc1dbfd160, 0x1ba330295e24399b }, { 0x5c93a291c6579918, 0x6536baab9e09bc80, 0x93ad9959edff4c64, 0x138af9a14abfeb1e } } } }; +#else + fq12 a = { { { { 0x7c0386cfac84570eUL, 0x135ac6487c86816dUL, 0x130fe55503fd0b4dUL, 0x1fbc2d0fc05289e4UL }, + { 0x31f40b593ab506cbUL, 0xc4bbb9e4b2ce224UL, 0xf458f928ccf17d61UL, 0x1243d27a2aa21de4UL } }, + { { 0x67ae435929fa99e3UL, 0x93501c918a76046dUL, 0xaca4ccc8963e432eUL, 0x2bee18b27c27853eUL }, + { 0xd0c6730507d0d015UL, 0xd41cfd656c0a9059UL, 0xb292659d53fa0444UL, 0x2e8f0ac98edef6fdUL } }, + { { 0x700740aa0efd0e50UL, 0x2c5e9c0660931b42UL, 0x188425137ce80beUL, 0x15a745139a2d95a4UL }, + { 0xc270eebcc77b120eUL, 0x8dd2034c9f5e661dUL, 0xd0cacb8be3443ebbUL, 0x2206cf8406979618UL } } }, + { { { 0x14beeea0c29cf256UL, 0xec331baf4a9d8e57UL, 0x84c18cf8f3dfd61cUL, 0x172f849c8867a6a3UL }, + { 0x49c8f77c0173904UL, 0xa7ec5eadf91525UL, 0xb6af342102d7f350UL, 0x1931766a4a4de218UL } }, + { { 0x1d05943f42ce34b6UL, 0x2ec4bdddbaed0295UL, 0xf29903765d9d2a7UL, 0x180626982a98bb32UL }, + { 0x16cef1562b3f9cbfUL, 0x564982ca86391192UL, 0x338241cef0f07d6eUL, 0x2eceb2ea88b46fcdUL } }, + { { 0x16d7e01a042c1c8dUL, 0x6ccf62b19f1db7abUL, 0xdf7b7fb19a040d7bUL, 0x17278879d86f5ffaUL }, + { 0xb80f047affe4ba5aUL, 0xd4768f74c5e34883UL, 0x413437ff1a222a7UL, 0x1c9f79ff1e326bd6UL } } } }; + fq12 b = { { { { 0x8b6d20fcb2e4cfe1UL, 0xd90b5af04637d61UL, 0xe5213491fb1c8ddUL, 0x22c31d57c6199047UL }, + { 0x5d5e4792797a849fUL, 0xef0fb5048682755eUL, 0x4262903127b8490UL, 0x1c5a05774b7b87c2UL } }, + { { 0x6afefb11e053997dUL, 0xa9425cc6d3438879UL, 0xc589bf0a479257f6UL, 0x2f265a3f46125967UL }, + { 0x16d32bf792576ea5UL, 0x838faa5f1ec28d7dUL, 0xf78fe731049b021dUL, 0x2b0eaaf50224c689UL } }, + { { 0x37aff72139bcccfcUL, 0xb3d22b3397a55baeUL, 0xf3efabf7233a8667UL, 0x3dbff83c87691bcUL }, + { 0x25f36df6da3ba93dUL, 0x2939ccbc8f01881dUL, 0x10a81e15af7aed31UL, 0x2e518a473abafad5UL } } }, + { { { 0xad5021ea46c06b79UL, 0xb7b76193fc41efe1UL, 0xa69eed0eb6ec2c57UL, 0x2c89ae19e58186bUL }, + { 0xae75112332f4de13UL, 0x374e8d70552ca0d8UL, 0x68e87f702af0ecf1UL, 0x95ede632701dd39UL } }, + { { 0x6de7a94aa7bc5726UL, 0x7874ee4c3c04b1cUL, 0x9a6e5d3e5875115dUL, 0x651f42a42021fb7UL }, + { 0x555a79f9e6ea299bUL, 0xd504f95c1ecbea79UL, 0xe97d114d516cef0bUL, 0x2d27cfdd54e9f124UL } }, + { { 0x8b3ae5f063f26da4UL, 0xf797224bfa14f904UL, 0xcdcd9c93aa02adfbUL, 0x25d073040d79eb5dUL }, + { 0xf84a169b376e11UL, 0xac1f29c1236def7cUL, 0xc84235bd3c78d593UL, 0x11668081e4c22e74UL } } } }; + fq12 expected = { + { { { 0xe1692f3291c79addUL, 0x75a0f3f9cb5b780fUL, 0x94fc10049567941cUL, 0x2cbd84240c99322dUL }, + { 0xec0b5c231d51cf6eUL, 0xaf66fb345ef4b557UL, 0x684bd6749e20d417UL, 0x6acb8ccf83a8a5aUL } }, + { { 0x4b5bfec7495191d9UL, 0xaaf3b2fb8c9417b3UL, 0x9e8cc0788452ef36UL, 0x150f0c9b2bd490d9UL }, + { 0xd38c4d68e8d61244UL, 0x7854bc167c3f883UL, 0xe422e992b4fd0935UL, 0x2e2ee820869b7371UL } }, + { { 0x9458ec7554a72a3eUL, 0x611f6d973e483feaUL, 0x3f8ea4f8370c8826UL, 0x189afef00f4165e6UL }, + { 0x8a57858c6746a623UL, 0x5c2f5d8907db836eUL, 0x18aa628b09f8cc39UL, 0x301cdc8e2edb165bUL } } }, + { { { 0xe339df6c6902f315UL, 0xbcf4e508382eec7UL, 0x1a86782e58331768UL, 0x15a3a1d93ce727deUL }, + { 0x9d3911edc9a69069UL, 0xacf7dd9e1ee36b27UL, 0xd3c0532725cf9a45UL, 0x1c7c570ac4e21c68UL } }, + { { 0x434490153d5b55f1UL, 0x3e3b2fb04143f767UL, 0x8960b1eb0cea5302UL, 0x2ebbac70edefc529UL }, + { 0x7f1d271429347ab1UL, 0x88934417e9466212UL, 0xc7939527fa312259UL, 0x1a4b0f339ebf2668UL } }, + { { 0xb48265b482310282UL, 0x910d43c20ce40215UL, 0x5cd12ae9ce1f579UL, 0xd588117ef09f079UL }, + { 0xc0edc126a51743acUL, 0x8cc656a2dbe2116cUL, 0xd1efe6afadd96829UL, 0x2cab86c6c9a9e1ddUL } } } + }; +#endif + fq12 result = a * b; EXPECT_EQ(result, expected); } TEST(fq12, SparseMulCheckAgainstConstants) { + fq12::ell_coeffs ell; + +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq12 a = { { { { 0x8860ba0c4eea41a8, 0x71b65207984d47d2, 0x67e55696f8982ba9, 0x18236f03bcec9b00 }, { 0xa69e0f0ce60f64fd, 0x1cf52f3b2335b9b3, 0x45e8ec475fcb1d71, 0x1627ac08d10cebd9 } }, { { 0xc7343ce2fb7b4829, 0xff667dd3e618123b, 0xd03970bcf60881b4, 0x188e0b7acdd0b801 }, @@ -239,7 +285,6 @@ TEST(fq12, SparseMulCheckAgainstConstants) { 0x5752f0197b67dfa3, 0xb4ff7a53c23b98fd, 0x95dec4882eb275cd, 0x6815e3c55e10152 } }, { { 0x64f434f52a58b19b, 0xcdab64e3ae898031, 0x5d10a474f28b9462, 0x85452691edf6f18 }, { 0x2bb46c10f494b711, 0x66a853baee9e6a00, 0x3b3e0fd932afa021, 0x1ae752d1bbdef131 } } } }; - fq12::ell_coeffs ell; ell.o = { { 0xe49c67a74aaf8c22, 0xc5cc428c85da5d5a, 0xc946262e0c99d3d9, 0x2307b236a862e3e9 }, { 0x1659aef76f0397ef, 0x32d0c2d00f81d8a5, 0x7e87867d5f0c5ccd, 0x247307a3fd6fece7 } }; ell.vv = { { 0x6e6f2db65bdf07bd, 0xc26fa997848fb1e4, 0x13ec10cb6a0cd0ae, 0xf86d8967480301c }, @@ -258,6 +303,41 @@ TEST(fq12, SparseMulCheckAgainstConstants) { 0xee892e54b68159d6, 0xe0421cb20d103d69, 0xfe0591fdca60e2e3, 0x1650989fd73116b9 } }, { { 0x475dec6d5f2e2a75, 0xf25390f14ed7106, 0x61a4b571cb15d2fe, 0x1ad83abac0d5bdd7 }, { 0x8f730272c4cfee79, 0x60833c047d98a040, 0xbd1da3dc3fe5ad4a, 0x11bcc8faf5176d94 } } } }; +#else + fq12 a = { { { { 0x862f0c332df55dd7UL, 0x5635026deafe1c0aUL, 0x2ff6bd2d7c7147b2UL, 0x2e8d47bc6baafd9bUL }, + { 0x89fa385ceb16c5beUL, 0x55921370b07e22bbUL, 0xa8b9b8f0e450d905UL, 0x1f7936d1d0e6b8e0UL } }, + { { 0x247edaf1e79930b0UL, 0x74b911663be59ea5UL, 0x8229bc36a8fab0c6UL, 0xb7c882ba6fcfd9eUL }, + { 0x9b0f501da1aba3f8UL, 0xb9eada8afbf600cbUL, 0xaae6ba9ad6dcdf6cUL, 0x2e7c7f0141a21168UL } }, + { { 0x69f468a007750941UL, 0x8717631d88d69af9UL, 0x92ebf135aa3ae0fbUL, 0x2fde53f8ed2d4e89UL }, + { 0xaa3fdf0219928031UL, 0xb9f6c209fa53b22fUL, 0x42f0094eee6a0282UL, 0x1265a0c64610dd82UL } } }, + { { { 0xafba8564aac749c5UL, 0xa3fc62e90dcf2998UL, 0xe76f032508e46ceUL, 0x512a45304405419UL }, + { 0xaed59d4a84fadccfUL, 0xa8545ab127e7b2e7UL, 0xa22a72b116b8cd30UL, 0x2ed2f79741896fa7UL } }, + { { 0xe4e3c9f195fc922UL, 0x1e369406a0dd4156UL, 0x5cd5b95268031c47UL, 0x10e1a470d68ccc24UL }, + { 0xf9dbd2d40b07ff44UL, 0x41e9a032a5abf575UL, 0xda977a2bd0495840UL, 0xe9a8dbf3759a9abUL } }, + { { 0x71e3e23310a540fdUL, 0xbfe5879ec6f7116aUL, 0x883320dc9ebd285UL, 0x1894c4e575f5c23bUL }, + { 0x7864349a324a1069UL, 0xc57263b5e43ecca4UL, 0x2a6d5a07bc5d25f5UL, 0x2641249687928362UL } } } }; + ell.o = { { 0x2ca05edbe2b6c2dfUL, 0x1ce7be80591278acUL, 0x998e825bf09b8ad0UL, 0x7f33a82d0e71979UL }, + { 0x2828bcc994bb3f38UL, 0x25f65c6025901765UL, 0x896a468fbd6b50e0UL, 0x4f999ba91569906UL } }; + ell.vv = { { 0x74a03de706ff12daUL, 0x62e709427d845309UL, 0x4a5f604c328ca230UL, 0xcf00251c415c1e1UL }, + { 0x9ed8dd9107199bcfUL, 0xc8687b1778022dc1UL, 0xc85d46f0f0503eb5UL, 0x74aa6da92bf7c52UL } }; + ell.vw = { { 0x40dd44ad118c702eUL, 0x8e54bb7dba2b064dUL, 0x2f70e0e61191e016UL, 0x15da2c76976d740eUL }, + { 0xa0f9565f3904fdb9UL, 0xf365bb919669dda8UL, 0x9d115033fcfce745UL, 0x1d5b9ac7270a74d2UL } }; + fq12 expected = { + { { { 0x82285526713a81afUL, 0xb9bdafcf85cffdd7UL, 0x81e818f8dda89057UL, 0x538df3884fe91cdUL }, + { 0x2334d3765086fcc4UL, 0x5241e1cb445f8d0cUL, 0x9d67bfc7f3580b66UL, 0x16935cb50949401aUL } }, + { { 0x79e51946415c1d86UL, 0x2dc112e62e072e35UL, 0x31cc85fadec9dac7UL, 0x21cafde5a92abf84UL }, + { 0xc499133572cc7b01UL, 0xf1d5b744396dc992UL, 0x5a07a594d45537e2UL, 0x5308670f9c3d1f9UL } }, + { { 0xbc1249df0a659895UL, 0x35a160c6d314cba0UL, 0xc6feb7b32ad48754UL, 0x17c6a9d4f6fcaafdUL }, + { 0xb9be974900458414UL, 0x1746303c5d7e6930UL, 0x91409abda635eb5cUL, 0x2b3c8b47d060a389UL } } }, + { { { 0xdc1dee7128fefe88UL, 0x378772d011ad83b3UL, 0xc1532ed19f546d80UL, 0x1955fc12038a7e9UL }, + { 0x6da079dfafc00254UL, 0xc86f97e68cba4484UL, 0x5dcaeaac906d378aUL, 0x121934a4af2b7682UL } }, + { { 0x86a09145e69d83d6UL, 0xfe70b95e33cefbc7UL, 0x76b0cc3628b7342eUL, 0x172e1bbf4c53ebafUL }, + { 0x875e2156f95560deUL, 0xbf2fc24debce9984UL, 0xac4e6fbe3709875dUL, 0x2496c9b2956c14f7UL } }, + { { 0xed94402785787ce9UL, 0xd50f0e3aae1fad67UL, 0xf7420d1ac923818aUL, 0x245e21b7266c1826UL }, + { 0x58fa495d4c9eed13UL, 0x8dd7ec5036305400UL, 0xb8417cb06c26dd46UL, 0x2349c06ef5cbd0ccUL } } } + }; +#endif + a.self_sparse_mul(ell); fq12 result = a; EXPECT_EQ(result, expected); @@ -265,6 +345,7 @@ TEST(fq12, SparseMulCheckAgainstConstants) TEST(fq12, SqrCheckAgainstConstants) { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq12 a = { { { { 0xef9d68a7df0715fd, 0xfda8aff4030523cf, 0xd09b1482069c0972, 0x252195422f351b07 }, { 0x3192057a31dec453, 0xe1c2dd8879191e47, 0xe90a8a00c9b29c5b, 0x1db75f06dff5dd5e } }, { { 0xdb01b2dbb451df8f, 0x42d8923147ae4171, 0xd1264f3077ab1733, 0x2fbabfe2fbc0c62f }, @@ -289,6 +370,35 @@ TEST(fq12, SqrCheckAgainstConstants) { 0x19ea0ed62e5093c2, 0xcf288a69b5a24352, 0xa9bdc89dd4491b7d, 0x447edc7b33f3d1c } }, { { 0xceb417494bece8e, 0x7f3d84971a20d351, 0x31679ed74c101d91, 0x1bb2c06842073c0c }, { 0x6db2993066e5fd73, 0x2c08c9fd6c3b5483, 0x3b32d43ab22d6cea, 0x3df72d32906f5f0 } } } }; +#else + fq12 a = { { { { 0x509ff2d7952b00f8UL, 0x80f400de95f97cc0UL, 0xcbdc0724af60e599UL, 0x1acb4d80c9fc5d10UL }, + { 0xbbd649942a91be1bUL, 0xf9c0c84462b1c06aUL, 0x735c138d99b9fc89UL, 0x1f7a0e55480cc8c4UL } }, + { { 0x184564b253194647UL, 0x2665e8d5000a721UL, 0xd31174f546b93313UL, 0x1b327c76331660ecUL }, + { 0xcf1585c76f7e33faUL, 0xd42af737f2d68572UL, 0x3b4f1daaf9248cf2UL, 0x28102c8df7cb8188UL } }, + { { 0xfd34a1893271a08dUL, 0xa8bb3e8ddf935064UL, 0xaf2e701ff4238744UL, 0x112cb808f50649edUL }, + { 0xfa6a796e73099831UL, 0xc33d172135fc08f1UL, 0xffc1f0839ae21c08UL, 0xd5487b930349686UL } } }, + { { { 0xa138da16197ba208UL, 0x131b351230ea78f4UL, 0x67d421144983327fUL, 0x301ad90db1293961UL }, + { 0x2aaf49d5664bf971UL, 0x41de301d76480c2UL, 0xf1b7cd92f25da91eUL, 0x266ad04894fb98a1UL } }, + { { 0x5430ab66ae7c441eUL, 0x56b0046a411a6a05UL, 0x769a94899a38a9a8UL, 0x47009b2bb1105a4UL }, + { 0x90e78ec3428acf7fUL, 0x494d36f303578d13UL, 0xf860c04788d78bd4UL, 0xbff46fe73771bc5UL } }, + { { 0x4deef8f7b5691d29UL, 0x4ca2a905e4dc7c9UL, 0xd346bb2f908bf92dUL, 0x4e7f53251024a06UL }, + { 0x506c4af6c096a839UL, 0xb66ec8f49dcd25d7UL, 0x1d956454caa9c224UL, 0x80fd62496656a00UL } } } }; + fq12 expected = { + { { { 0x444065edd96c27eUL, 0x441edd1fb7593b4dUL, 0xebca21f0aba5b86aUL, 0x1a0f7150178bce4UL }, + { 0xd6944c6d8a9a1326UL, 0xebe3e1c083a9070aUL, 0x90085ed26d41b187UL, 0x270dbc63380d166fUL } }, + { { 0x6ff64bb4265979c2UL, 0x934f9a7229efd61bUL, 0xf2633f5fc77c71cdUL, 0x794a11250897c9UL }, + { 0x4c16eb3426ead093UL, 0xc6b10f92e5172d17UL, 0x722cc34bab735deeUL, 0x2ef62e8e932612a9UL } }, + { { 0xe5eb6b4fe61af24bUL, 0xf4ad92e89647ddbeUL, 0xf07438f58235164fUL, 0x2ddf71d5540c3861UL }, + { 0x1f892a5ed0dbc0bfUL, 0xdea7e0ca077a8f66UL, 0x561aba1a7909c0acUL, 0x2296a5f0bb3fca3UL } } }, + { { { 0xb33c0e27dc05cf5eUL, 0x9b5ac27c7f9f3fafUL, 0xb34ce34b0ddc0e33UL, 0x8d34950d591462UL }, + { 0x6633d2139211d6feUL, 0x1c194cb263ca6182UL, 0x280ced1e54e99b63UL, 0x78892452fa76a9eUL } }, + { { 0x8ffaebac35d5999eUL, 0x8e3226d773c7cac4UL, 0x180b0a89641fbc37UL, 0xd165c35b4cefb88UL }, + { 0xc500c29819187db2UL, 0xb60e7813e364d528UL, 0xc718884d8620befeUL, 0x28351c10a5846341UL } }, + { { 0x631e54f75f1002c2UL, 0x409714a9ec1a2c33UL, 0x374ef41466eb7b9bUL, 0xf4a88f46b6a3e97UL }, + { 0x3e120ddf2bc5b3d2UL, 0x52166a8ab686fb53UL, 0xf5b9fbe942aaec8aUL, 0x1b25bd7f5e7b7db3UL } } } + }; +#endif + fq12 result = a.sqr(); EXPECT_EQ(result, expected); } @@ -311,6 +421,7 @@ TEST(fq12, UnitaryInverse) TEST(fq12, FrobeniusMapThree) { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq12 a = { { { { 0x9a56f1e63b1f0db8, 0xd629a6c847f6cedd, 0x4a179c053a91458b, 0xa84c02b0b6d7470 }, { 0xffa3e17eab3609a1, 0x6a97b9cf5c3fe152, 0x8996248da177be9f, 0x113bd2d7f24591d } }, { { 0x572c4fd8a85cc3b, 0x48197102a98815e8, 0x3a1d00190e8ee460, 0x8c0a0ce9c093781 }, @@ -335,12 +446,42 @@ TEST(fq12, FrobeniusMapThree) { 0x373dde8dfb6dceb3, 0xa0feac44ec583fb4, 0x257146bc7ad7d5c2, 0x1ee0a5c45a91938b } }, { { 0xf8c975188dd668a5, 0xfa38a6144e0c5451, 0x8ebdddc91016c224, 0x13fe7e09fe48aefb }, { 0x2ce375ffd1c12d33, 0xc2099e064cd9724d, 0x9c54b742a4d8bd59, 0x1c79d60ac5202c8c } } } }; +#else + fq12 a = { { { { 0xe21af43e50f3c756UL, 0x382c59a08c2f1c63UL, 0xf111de6049209f49UL, 0x2e3e2eb02684cd0eUL }, + { 0xf47c2fd566c13420UL, 0x52f739eb87fc2a5fUL, 0x32c491b42ef7d3edUL, 0x2277a5afe48b23b1UL } }, + { { 0x81b5e33f164894fdUL, 0xda70b7e26c9c83eUL, 0xaa0ea6914a55d235UL, 0x261e91951b2ecf56UL }, + { 0x8777f8c814c07822UL, 0xb1d30aee8bbb4fdbUL, 0xd68096f26bc12a63UL, 0x226bdb647a45d0b3UL } }, + { { 0xe196e3bdeadc85f8UL, 0xfc4ead6ed1903f55UL, 0x35fbc522dfecf6e5UL, 0x2ea7141ed2d4f68aUL }, + { 0x5018998ba882e541UL, 0x1f2f49ebb929119UL, 0x10bf13b591b51304UL, 0x2715b1dab0519809UL } } }, + { { { 0x41dfb519bce7a2a2UL, 0x57e69632d7d5db93UL, 0x63059436226719c0UL, 0x1382e9227bb12da2UL }, + { 0x78a2f4b9c37bba73UL, 0x9f5fa1370c59e023UL, 0x36960dd11dca7d4eUL, 0x1bb2293869e6eeaaUL } }, + { { 0xa7bb52bda67d2ce5UL, 0xd12b03267bae96bUL, 0x45ead6d4c0922699UL, 0x357633e5fd4e57bUL }, + { 0xf6caeb876f66196eUL, 0x5c88f8b1ea233a64UL, 0x6d24d190eef310f6UL, 0x2fa0d06ea9b6d35dUL } }, + { { 0x4bed4d1891ba154fUL, 0x2bf8026dae838260UL, 0xdcbd5388441e5626UL, 0xee0668e4e2fb0f6UL }, + { 0x8723a4e98854ba0bUL, 0x4d22e9a149ea8618UL, 0x5dda9a16aa96fb0aUL, 0x2fef151f315f190UL } } } }; + fq12 expected = { + { { { 0xe21af43e50f3c756UL, 0x382c59a08c2f1c63UL, 0xf111de6049209f49UL, 0x2e3e2eb02684cd0eUL }, + { 0x47a45c4171bbc927UL, 0x448a30a5e075a02dUL, 0x858bb40252898470UL, 0xdeca8c2fca67c78UL } }, + { { 0xd358397e360f2515UL, 0xfd6900b5784eb831UL, 0x64b0f2a74cb5b985UL, 0x303bdfa5683f19d3UL }, + { 0xee96f5c48ada25b8UL, 0xb17d89d5ee0965adUL, 0x5d90f2b14f0a7867UL, 0x11089d3bd9d1812fUL } }, + { { 0x8b4a37515d2483f4UL, 0xe2f2f3d7704a8333UL, 0x82a719484b992a0cUL, 0x8358f71dd30b350UL }, + { 0xff959db32aa39cd3UL, 0xb246a2f8b40c4889UL, 0xb9d5613c61fc64c3UL, 0x127acef64f2e0dfbUL } } }, + { { { 0xc2603367444cbf36UL, 0x4cebd20389e5d4eeUL, 0xcb3f9abc665e6992UL, 0x1290194e45a92b01UL }, + { 0x7c9e4c6727aa44d7UL, 0x7e9de180a367babUL, 0x500cb6ac8f91a2a0UL, 0x1f7ac11ce8c52bb3UL } }, + { { 0x3796fd3f2e8f6cddUL, 0xb93f0f07868dcf79UL, 0x69a0645a73c08c82UL, 0x665f5d67055274UL }, + { 0x3530dff683f60cd4UL, 0x49b935416224237eUL, 0x47e3654d3cdfd104UL, 0x143e9791ba51ee22UL } }, + { { 0xb8785e8bc743805UL, 0x9582592773c34113UL, 0x7ba82edd6f46c7deUL, 0xab7c56a5990bd53UL }, + { 0x6224e65eff5bd762UL, 0x9a1a4290432e0bb7UL, 0x94f2017f7fff74a3UL, 0x282d3d44ce884ea4UL } } } + }; +#endif + fq12 result = a.frobenius_map_three(); EXPECT_EQ(result, expected); } TEST(fq12, FrobeniusMapTwo) { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq12 a = { { { { 0x52c2cc6e77bfe9bb, 0xd03d98cc3fd6d95, 0xfaeb6d6577aa9a30, 0x1ea38b81330e34df }, { 0x1f55d493000a14f3, 0x1db7ec50e2f5a356, 0xf3cfcc74b91481ae, 0x256fe76342b33dbb } }, { { 0xf3e95f622620a0f9, 0xe297badf08d73c22, 0x4df25d06ae059cfb, 0x16db699bc5bbddcb }, @@ -365,12 +506,42 @@ TEST(fq12, FrobeniusMapTwo) { 0x2461a96edf6a6749, 0xe0c7f9244e8d0ed1, 0xb55df0a79cb9ac2c, 0xa357103af082354 } }, { { 0xe1148c424a589341, 0x40ab0d25fb7fd0d1, 0x7909a54a9569db90, 0x99bde98bbc4352f }, { 0xfaa4fdcf224e38ee, 0x42b25f170bf5f577, 0xc13bf097c75be619, 0xbcb9923cbd60387 } } } }; +#else + fq12 a = { { { { 0xa5ce9c060e396dd4UL, 0xca5ede3c56c9dfa1UL, 0xf7283a6cd7385eb1UL, 0xc9b4f2cc9e618bcUL }, + { 0x47ad703bb58adfb8UL, 0x82db8c7a94096d86UL, 0x3273057afe6fecfdUL, 0x249591a339c0b395UL } }, + { { 0xf743b6ee14c147f7UL, 0x72621d5bfc3ca617UL, 0xf1978b242a1f7200UL, 0x58c9abd859356f7UL }, + { 0x9fc148e808531ae4UL, 0x7e33428ce1e43d80UL, 0x8246ca0b17d04b6cUL, 0x13266ecc9ef22872UL } }, + { { 0xef813b9466e4f00dUL, 0x41be0a62083cce0UL, 0xb4bbcf52f290d43cUL, 0x255bcc4ea029409dUL }, + { 0xdef7a848a4ded44eUL, 0xcd9fc4819661004fUL, 0x28353ecc041c3066UL, 0x27a6a7890b897c1cUL } } }, + { { { 0x569b1e1b9916eab7UL, 0x77f844752482d618UL, 0xc8d2dfa5b90c75a1UL, 0x2b91d0892e6f3036UL }, + { 0xd83a28cd569274d7UL, 0xacd31b4648059115UL, 0x2d291841a5f79fffUL, 0x8853bfca3cd9a50UL } }, + { { 0xe904f05380da0bc2UL, 0xc9a74003c930b32fUL, 0x5a9981596b16c136UL, 0x2eea5b92180eb16eUL }, + { 0x18aea6c3fe1e03d1UL, 0xb8ac570097aafb8UL, 0x5e73d309f353e4f3UL, 0xc1004ae4756f68dUL } }, + { { 0x370079d737c6ed86UL, 0x298c4ec1f2b51e25UL, 0xdfc6f1416cbf760bUL, 0x2d5c11050cbe98d1UL }, + { 0x1462ea1f533b22a9UL, 0xb5262fc0a622613eUL, 0x6685b2cda9398a5cUL, 0x2fc6212886ea733aUL } } } }; + fq12 expected = { + { { { 0xa5ce9c060e396dd4UL, 0xca5ede3c56c9dfa1UL, 0xf7283a6cd7385eb1UL, 0xc9b4f2cc9e618bcUL }, + { 0x47ad703bb58adfb8UL, 0x82db8c7a94096d86UL, 0x3273057afe6fecfdUL, 0x249591a339c0b395UL } }, + { { 0x9828994245688eeaUL, 0xe5a280f898969f11UL, 0xb4b0ecd3af49dcc6UL, 0x21670b00576e3cafUL }, + { 0xd343da039e48db0eUL, 0xb3b4e737ecb54579UL, 0x1608becbcac11801UL, 0x8a492bd585ba0e3UL } }, + { { 0x249b9eedf5fd4d00UL, 0x61c05dafd482a437UL, 0x3e9c9f9aeb106d88UL, 0x9073e4985688fa5UL }, + { 0x264823249f36c1a0UL, 0xa7ad4a28f1311aeeUL, 0xa802735777a625bUL, 0x182813dd5fc55593UL } } }, + { { { 0x91e56d561fc65bf4UL, 0x91c74e7a38170c9bUL, 0xf8da19ddb4129b39UL, 0x4a864abc1999de1UL }, + { 0xf9dbb5cb6765f02eUL, 0x484221af215c12f1UL, 0x9dae4490f9df3878UL, 0x22d6e5b80da4cc69UL } }, + { { 0x531b9bc357a2f185UL, 0xcdda2a8d9f41175dUL, 0x5db6c45d166a9726UL, 0x179f2e0c922eebbUL }, + { 0x2371e552da5ef976UL, 0x8bf6a5215ef71ad5UL, 0x59dc72ac8e2d736aUL, 0x245449c499daa99cUL } }, + { { 0xb9ce3fc038247876UL, 0x88592556fd4f5aecUL, 0xcf53070ba4335fd6UL, 0x1121fc66315ce4f4UL }, + { 0xafbbe5445e5c30cfUL, 0x31c1f8e7a3a22522UL, 0x1d4c2afb60f35899UL, 0x26b4ff5552650fd4UL } } } + }; +#endif fq12 result = a.frobenius_map_two(); EXPECT_EQ(result, expected); } TEST(fq12, FrobeniusMapOne) { + +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq12 a = { { { { 0x6c9edca7f0d6f6e, 0x7bb482de96b01e0, 0xb04fc4b2b2ea7e6, 0x4d9efc00ceb8323 }, { 0xb55c2222935ee583, 0x9c114ab89499b4da, 0x771cb5cabe1f458a, 0x1c3f0ac5303a5935 } }, { { 0x524feabf94af29ea, 0x95573536ab8b6ced, 0x524e16790930912c, 0x280d5af94a3424d0 }, @@ -395,6 +566,35 @@ TEST(fq12, FrobeniusMapOne) { 0x6602e7e93a714d67, 0x7398f14acf72c7e0, 0x8028d203d5e4928, 0x7d1fad57418b580 } }, { { 0xcba1922169de670, 0xcd20689212638b5e, 0x8dbbc53af7639bbb, 0x57a19a043d38c39 }, { 0x2b2d3090bfb1118b, 0xa752e789e316e0c7, 0xc1c4d33385bc3e10, 0x2610936b5468ba45 } } } }; +#else + fq12 a = { { { { 0x24dc150b5836f5ebUL, 0x30e4c608f40adc59UL, 0x37aeb841e150f3a8UL, 0xa110ca8f9db83e4UL }, + { 0x713a6ab73312e162UL, 0xdb0fd8d93b365d68UL, 0xedf1d282a8d07abeUL, 0x20d3d49231cde3bfUL } }, + { { 0x2eaf1da09933840aUL, 0x47c1d410d5df0b52UL, 0x919bba97feef2c11UL, 0x177b677e677a55cdUL }, + { 0xf888f6cf22cba791UL, 0xf820cd3640d260ebUL, 0x32742ec8e28152aeUL, 0x36fc6b21931e9e2UL } }, + { { 0x779044381bcbd101UL, 0x3f5ba296ae5db8faUL, 0xc2dbbc1691c8456aUL, 0x12d18799d91da0dUL }, + { 0xd089a63726293a6aUL, 0x77cd64002c1c4bcaUL, 0xd76a11cb5f5c0da6UL, 0x21add603f21af96eUL } } }, + { { { 0x8dcabcf31424c06fUL, 0x16bac862dc9fed95UL, 0xc1ae831f305040e5UL, 0x1e6200dce1120d3dUL }, + { 0xd1f5ad6845446895UL, 0x74526d8ca424b736UL, 0x849b3d172cc8381fUL, 0x12e88895f9e2a0d4UL } }, + { { 0x85cc8318ddbe2910UL, 0x961fb2e5108e0e4fUL, 0x781905321776e776UL, 0x2e8093940b560716UL }, + { 0x8b2ce4303baba4d9UL, 0x866a756e2161f73eUL, 0x1b230d82dbc3d550UL, 0x210f44fb356348c0UL } }, + { { 0xc57933e5530111baUL, 0xe45d80ed27b8a6b4UL, 0x7feeb0f2e09ca2cbUL, 0x1fdb773784242816UL }, + { 0xb5580ae30b1f6bf0UL, 0x51e1fbe74aad988dUL, 0x1a4e45b3185c094bUL, 0x1d0f5f64f6aa211aUL } } } }; + fq12 expected = { + { { { 0x24dc150b5836f5ebUL, 0x30e4c608f40adc59UL, 0x37aeb841e150f3a8UL, 0xa110ca8f9db83e4UL }, + { 0xcae6215fa56a1be5UL, 0xbc7191b82d3b6d24UL, 0xca5e7333d8b0dd9eUL, 0xf9079e0af63bc69UL } }, + { { 0x2691a685eb8b9e52UL, 0xc66888725d4805e4UL, 0xfc9cca7897e98f66UL, 0xbba94db29fe53ddUL }, + { 0x9f81e7019e774940UL, 0x36c0b8a5a6682687UL, 0x430a3924d0194d94UL, 0x2e938f15bd7f14a6UL } }, + { { 0x74e35b32ad2905fUL, 0x35afc43add46aeedUL, 0xb0309a03e6a3fe42UL, 0x3f0424b1202b900UL }, + { 0x1d98151eed9dceaeUL, 0x13f07d5ab22bb4fUL, 0xe14df7a387f2a2cfUL, 0x1ba0ba8d43259443UL } } }, + { { { 0x4d7742f9a326103fUL, 0x4f500f51726e60e7UL, 0xcce27ad8fe9043c1UL, 0x45db038f7fc875bUL }, + { 0x675053d4c95fe601UL, 0x8dc76ffbc91ef3feUL, 0x4b7246a3829a5be1UL, 0x2a53c42803e89a45UL } }, + { { 0xef087aab854dca2UL, 0x6de4ca5802af8bfaUL, 0xcc29efb20b2d894dUL, 0x2fef6cff0a2d4495UL }, + { 0x93ba40b513b8ba7dUL, 0x7d971482e420074aUL, 0x66c0477724426b3aUL, 0x849d2701d1e8f30UL } }, + { { 0xe2e17ffe4a45d62bUL, 0xdd88d28e131c0c19UL, 0x8e87d63b67ef6e60UL, 0x1e1648afd6dca6b4UL }, + { 0x867863dcd1ed7571UL, 0x1eb989092fbf511aUL, 0x38c3979e11e620f1UL, 0x846c4328f3ea4a5UL } } } + }; +#endif + fq12 result = a.frobenius_map_one(); EXPECT_EQ(result, expected); } diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq2.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq2.hpp index ec4148ad2d1..fce8cc53521 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq2.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq2.hpp @@ -5,6 +5,7 @@ namespace bb { struct Bn254Fq2Params { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) static constexpr fq twist_coeff_b_0{ 0x3bf938e377b802a8UL, 0x020b1b273633535dUL, 0x26b7edf049755260UL, 0x2514c6324384a86dUL }; @@ -29,6 +30,32 @@ struct Bn254Fq2Params { static constexpr fq twist_cube_root_1{ 0xad607f911cfe17a8UL, 0xb6bb78aa154154c4UL, 0xb53dd351736b20dbUL, 0x1d8ed57c5cc33d41UL }; +#else + static constexpr fq twist_coeff_b_0{ + 0xdc19fa4aab489658UL, 0xd416744fbbf6e69UL, 0x8f7734ed0a8a033aUL, 0x19316b8353ee09bbUL + }; + static constexpr fq twist_coeff_b_1{ + 0x1cfd999a3b9fece0UL, 0xbe166fb279c1a7c7UL, 0xe93a1ba45580154cUL, 0x283739c94d11a9baUL + }; + static constexpr fq twist_mul_by_q_x_0{ + 0xecdea09b24a59190UL, 0x17db8ffeae2fe1c2UL, 0xbb09c97c6dabac4dUL, 0x2492b3d41d289af3UL + }; + static constexpr fq twist_mul_by_q_x_1{ + 0xf1663598f1142ef1UL, 0x77ec057e0bf56062UL, 0xdd0baaecb677a631UL, 0x135e4e31d284d463UL + }; + static constexpr fq twist_mul_by_q_y_0{ + 0xf46e7f60db1f0678UL, 0x31fc2eba5bcc5c3eUL, 0xedb3adc3086a2411UL, 0x1d46bd0f837817bcUL + }; + static constexpr fq twist_mul_by_q_y_1{ + 0x6b3fbdf579a647d5UL, 0xcc568fb62ff64974UL, 0xc1bfbf4ac4348ac6UL, 0x15871d4d3940b4d3UL + }; + static constexpr fq twist_cube_root_0{ + 0x49d0cc74381383d0UL, 0x9611849fe4bbe3d6UL, 0xd1a231d73067c92aUL, 0x445c312767932c2UL + }; + static constexpr fq twist_cube_root_1{ + 0x35a58c718e7c28bbUL, 0x98d42c77e7b8901aUL, 0xf9c53da2d0ca8c84UL, 0x1a68dd04e1b8c51dUL + }; +#endif }; using fq2 = field2; diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq2.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq2.test.cpp index 168b1c95ea7..0a0223b938d 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq2.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq2.test.cpp @@ -51,46 +51,85 @@ TEST(fq2, RandomElement) TEST(fq2, MulCheckAgainstConstants) { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq2 a = { { 0xd673ba38b8c4bc86, 0x860cd1cb9e2f0c85, 0x3185f9f9166177b7, 0xd043f963ced2529 }, { 0xd4d2fad9a3de5d98, 0x260f72ca434ef415, 0xca5c20c435accb2d, 0x122a54f828a07ffe } }; fq2 b = { { 0x37710e0986ad0fab, 0xd9b1f41ba9d3bd92, 0xf71f600e90104795, 0x24e1f6018a4d85c6 }, { 0x5e65448f225b0f60, 0x7783aecd5d7bfa84, 0xc7a76eed72d68723, 0xc8f427c031af99a } }; fq2 expected = { { 0x1652ca66b00ad519, 0x6619a315656ea7c7, 0x1d8491b044e9a08f, 0xcbe6d11bff2e56b }, { 0x9694fb422eff4e79, 0xebdbcf03e8539a17, 0xc4787fb63b8d10e8, 0x1a5cc397aae8811f } }; +#else + fq2 a = { { 0xed72e66054afa688UL, 0x58ee4e882533c50UL, 0x6e3d116ec0243404UL, 0x1d657f309417a3d8UL }, + { 0xc8d8ca2255efd3acUL, 0xa7dd5a778489041bUL, 0xa7c0d3f8a3894141UL, 0x96f1a285bc7de4UL } }; + fq2 b = { { 0x4b149f0c89ea36b8UL, 0x21c85d36fccb509UL, 0x9c6578b5dde8a9f5UL, 0x12d7656c2d09b4f5UL }, + { 0xeba4312d877a01c8UL, 0x346a85206bf0fc21UL, 0x326baffa4ec62182UL, 0xec5dbe959d2320bUL } }; + fq2 expected = { { 0xe954ec1f3d72b8e8UL, 0x7290e216a46a478UL, 0xee10085491294f00UL, 0x14ab2ea0f4cfac15UL }, + { 0xd4761ac17f9cfd69UL, 0x6be1ccd51ae4cf91UL, 0x51bb55a8d80b3ee6UL, 0x14ef3d5468c48133UL } }; +#endif + fq2 result = a * b; EXPECT_EQ(result, expected); } TEST(fq2, SqrCheckAgainstConstants) { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq2 a = { { 0x26402fd760069ee8, 0x17828cf3bf7dd3e3, 0x4e7449f7b1149987, 0x102f6467805d7298 }, { 0xa2a31bf895eaf6f8, 0xf0c88d415c372b16, 0xa65ccca8b7806691, 0x1b51e4526673451f } }; fq2 expected = { { 0xb51c9049894c45f3, 0xf8ef65c0244dfc90, 0x42c37c0f7d09aacb, 0x64ddfb845b2901f }, { 0x9e176fa8cdca97b1, 0xd04ae89dab7da31e, 0x637b83e950322d50, 0x155cccfadafc70b4 } }; +#else + fq2 a = { { 0x6ec082078bf1f83aUL, 0x54374c9db4892e0UL, 0x9b6685d51385bd3bUL, 0x22017c733fbe1168UL }, + { 0x1a19a57784951002UL, 0x71f829f22ee524e6UL, 0xd5f4ae41d4f49ba9UL, 0x32f0638f8eb6105UL } }; + fq2 expected = { { 0xb30fd8d5c794c944UL, 0xbfe70dbee7f867e1UL, 0x772e6b159b2ff808UL, 0x82abd3d318b8341UL }, + { 0x79264bd9e27d1c3eUL, 0xc0493fc1b97b501aUL, 0x5b0cad2ef132d4fbUL, 0x61d55130ed75444UL } }; +#endif + fq2 result = a.sqr(); EXPECT_EQ(result, expected); } TEST(fq2, AddCheckAgainstConstants) { + +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq2 a = { { 0x517c157ce1664f30, 0x114ba401b0996437, 0x11b9ae2d856012e8, 0xcc19341ea7cf685 }, { 0x17c6020dde15fdc0, 0x310bc25961b2f002, 0xa766e7e94a865c0d, 0x20176bc8e6b82863 } }; fq2 b = { { 0xffad1c8ac38be684, 0x2a953b27cb1f541d, 0xfc12b9dfe76a0f12, 0x434c570deb975a6 }, { 0x87430d4b17897ace, 0x33ab4d0e55e8932a, 0xe4465ff65990dd31, 0x83db0b3c55f9e9f } }; fq2 expected = { { 0x51293207a4f235b4, 0x3be0df297bb8b855, 0xdcc680d6cca21fa, 0x10f658b2c9366c2c }, { 0x9f090f58f59f788e, 0x64b70f67b79b832c, 0x8bad47dfa417393e, 0x28551c7cac17c703 } }; +#else + fq2 a = { { 0x4e7e4ee568e1fbc8UL, 0x6d692baacf9e3280UL, 0x74b397fc9ff79a15UL, 0x150ff4a64611cf54UL }, + { 0xa14c3dc007ef12dUL, 0xb3da8d3ea50862adUL, 0xce474530b12f41f8UL, 0xab309b05df2e908UL } }; + fq2 b = { { 0x7d62792ac082d5f2UL, 0x23a48fd69306eea5UL, 0x11b6b08fea3f318aUL, 0x25d0113614cb748cUL }, + { 0xbbbeecf0b6be675dUL, 0x7fe28cf3b2d9708eUL, 0xef3aa23aaa94ec52UL, 0x15c08e3a45fbb32bUL } }; + fq2 expected = { { 0x8fc03bf950e7d473UL, 0xf98c50effa335698UL, 0xce1a02d608b57341UL, 0xa7bb76979aba3b6UL }, + { 0xc5d3b0ccb73d588aUL, 0x33bd1a3257e1d33bUL, 0xbd81e76b5bc42e4bUL, 0x207397eaa3ee9c34UL } }; +#endif fq2 result = a + b; EXPECT_EQ(result, expected); } TEST(fq2, SubCheckAgainstConstants) { + +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq2 a = { { 0x3212c3a7d7886da5, 0xcea893f4addae4aa, 0x5c8bfca7a7ed01be, 0x1a8e9dfecd598ef1 }, { 0x4a8d9e6443fda462, 0x93248a3fde6374e7, 0xf4a6c52f75c0fc2e, 0x270aaabb4ae43370 } }; fq2 b = { { 0x875cef17b3b46751, 0xbba7211cb92b554b, 0xa4790f1657f85606, 0x74e61182f5b5068 }, { 0x8a84fff282dfd5a3, 0x77986fd41c21a7a3, 0xdc7072908fe375a9, 0x2e98a18c7d570269 } }; fq2 expected = { { 0xaab5d49023d40654, 0x130172d7f4af8f5e, 0xb812ed914ff4abb8, 0x13403ce69dfe3e88 }, { 0xfc292a88999acc06, 0xb30d84fd2ab397d0, 0xd0869855675edee2, 0x28d657a1aebed130 } }; +#else + fq2 a = { { 0x442f277690c0e2e9UL, 0xc57a6aedcbce21e5UL, 0x542af3d6640959a2UL, 0x1b2a8a38b6e63b66UL }, + { 0x72861e4d5b7fd051UL, 0x98eddfc89951d51eUL, 0x9501d71c127de4aeUL, 0x2789ae315eadca0bUL } }; + fq2 b = { { 0xfb1bb29b1498f504UL, 0x16de795183a37f3bUL, 0xade0cbf0f9055f61UL, 0x283ae93a66a38c6dUL }, + { 0x44cf93a2fd55060eUL, 0x31e37d7946df37e4UL, 0xf4a626aecf465a37UL, 0x27530019470f8857UL } }; + fq2 expected = { { 0x853400f254a4eb2cUL, 0x461d5c2db09c6d36UL, 0x5e9a6d9bec85529fUL, 0x2353ef7131744f22UL }, + { 0x2db68aaa5e2aca43UL, 0x670a624f52729d3aUL, 0xa05bb06d43378a77UL, 0x36ae18179e41b3UL } }; +#endif + fq2 result = a - b; EXPECT_EQ(result, expected); } diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq6.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq6.hpp index 360b5d140d6..ca86ae26203 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq6.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq6.hpp @@ -6,6 +6,8 @@ namespace bb { struct Bn254Fq6Params { + +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) static constexpr fq2 frobenius_coeffs_c1_1{ { 0xb5773b104563ab30UL, 0x347f91c8a9aa6454UL, 0x7a007127242e0991UL, 0x1956bcd8118214ecUL }, { 0x6e849f1ea0aa4757UL, 0xaa1c7b6d89f89141UL, 0xb6e713cdfae0ca3aUL, 0x26694fbb4e82ebc3UL } @@ -35,6 +37,36 @@ struct Bn254Fq6Params { { 0x448a93a57b6762dfUL, 0xbfd62df528fdeadfUL, 0xd858f5d00e9bd47aUL, 0x06b03d4d3476ec58UL }, { 0x2b19daf4bcc936d1UL, 0xa1a54e7a56f4299fUL, 0xb533eee05adeaef1UL, 0x170c812b84dda0b2UL } }; +#else + static constexpr fq2 frobenius_coeffs_c1_1{ + { 0xecdea09b24a59190UL, 0x17db8ffeae2fe1c2UL, 0xbb09c97c6dabac4dUL, 0x2492b3d41d289af3UL }, + { 0xf1663598f1142ef1UL, 0x77ec057e0bf56062UL, 0xdd0baaecb677a631UL, 0x135e4e31d284d463UL } + }; + + static constexpr fq2 frobenius_coeffs_c1_2{ + { 0x8aeb638758ccb791UL, 0xee27476838ae0f5bUL, 0x5fc8441d09282bUL, 0x169119a8426a57f9UL }, { 0UL, 0UL, 0UL, 0UL } + }; + + static constexpr fq2 frobenius_coeffs_c1_3{ + { 0x4738e103136caecdUL, 0xf491475bc376b8c3UL, 0x1f4034a3a97cbee8UL, 0xcad5f8fef61ccd7UL }, + { 0x2f41c395e6e485d6UL, 0x997230c70242aa46UL, 0xeae16f2184887ab5UL, 0x266696f73bcfc9b2UL } + }; + + static constexpr fq2 frobenius_coeffs_c2_1{ + { 0x227346b0b081f85eUL, 0x6e51a67130492bb5UL, 0x7e20162e52b19e16UL, 0x1677516f2343bb4bUL }, + { 0x18b280852f616a78UL, 0x25433712bde06eceUL, 0xb00a58256b9a0e66UL, 0x6f9f8e111971bbdUL } + }; + + static constexpr fq2 frobenius_coeffs_c2_2{ + { 0x62b1a3a46a337995UL, 0xadc97d2722e2726eUL, 0x64ee82ede2db85faUL, 0xc0afea1488a03bbUL }, + { 0UL, 0UL, 0UL, 0UL } + }; + + static constexpr fq2 frobenius_coeffs_c2_3{ + { 0xa0d044540af866c4UL, 0x9cc0145f7df631b3UL, 0x29dda327cd752de1UL, 0x14766fdb0a170a74UL }, + { 0xdd532940e9d402f7UL, 0x541490c5bfda559eUL, 0xd9c9c659c541b0b8UL, 0xbaf8cb569cbb3e4UL } + }; +#endif // non residue = 9 + i \in Fq2 static inline constexpr fq2 mul_by_non_residue(const fq2& a) diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq6.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq6.test.cpp index 370abd9a6e7..cc0c7e45abb 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq6.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq6.test.cpp @@ -92,6 +92,7 @@ TEST(fq6, AddCheckAgainstConstants) { 0xf2431be14b4df482, 0xc9bb05cb691445b8, 0xf02ed57856eb46bb, 0x16dbf34bb8373fd5 } }, { { 0x8d683ec33bd2d09f, 0xbb76c48d1ad7befe, 0xfc20598f07f9868f, 0x2251f84b9cb740d7 }, { 0x91137730616d416f, 0x7892e5f10d06fc71, 0x7115b23cadf2176, 0x243b593fe662d53 } } }; + fq6 result = a + b; EXPECT_EQ(result, expected); } @@ -122,6 +123,7 @@ TEST(fq6, SubCheckAgainstConstants) TEST(fq6, MulCheckAgainstConstants) { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq6 a{ { { 0xa7e3494fc528b8c8, 0xc8c8906c9682e43f, 0xc6e76fc21152721c, 0x12a4c3ee3ff10dbd }, { 0x887ce62a3ae2a578, 0x70caee28e1942bac, 0xc1a58242c34ff94f, 0x0b154d910b492542 } }, { { 0x8c885006cc08667a, 0xee0b6c4a0dbb9592, 0xa755229d6272b51e, 0x2629b93f67eb8dd6 }, @@ -140,12 +142,35 @@ TEST(fq6, MulCheckAgainstConstants) { 0x4b2fbc422420f06a, 0x3a8e5b388fdedd1f, 0x06006b4471134540, 0x0d4fee4f7966d63d } }, { { 0x4ffcbaa876979a1c, 0x32b7c1ef7d251306, 0x1b4e0712f969804e, 0x200592dfe71b710f }, { 0xe3eb378754bfb1ac, 0x6b517c1cae53d784, 0xd1b29c0eb1e4d46f, 0x08b42f13fdd14172 } } }; +#else + fq6 a{ { { 0x2ae298e67f3b39acUL, 0xff010ec1eb070956UL, 0x392ab3b4183e1f35UL, 0xfe4d0656fce35c4UL }, + { 0x6ab8f0a770e9c20fUL, 0xf4d3db225768ebb4UL, 0x2a7e605adf75bf5eUL, 0xfeb8cfd40c94734UL } }, + { { 0xb1dc529e5cd81351UL, 0xf5ca210e8455ea86UL, 0xeacd84d9a8b502b9UL, 0xb6b7eb4ff9916c1UL }, + { 0xdb94de41ad3b48d0UL, 0x5953eb9473583fe8UL, 0xa603759c9ad36f81UL, 0x229e55e6aa957e6UL } }, + { { 0x3c0c61a8882bdd6cUL, 0xd8fe0e66857b4d54UL, 0xb39ce4d438c3eb07UL, 0x2c6333d09ff65713UL }, + { 0x79d7e64184f4cbb1UL, 0x46523cfdd9722bd8UL, 0xdb3fdb38faf61435UL, 0xe8198361076a5a5UL } } }; + fq6 b{ { { 0x1ac3b1e7ec8a731cUL, 0xbb7de52d99e73d29UL, 0x4caac2356d446d23UL, 0x929876b197c1767UL }, + { 0x46e1737df8be5c58UL, 0x3d2d14ad3aa1890cUL, 0x659c80230fad0fa0UL, 0xd47f2fbefb5fbabUL } }, + { { 0x8b4d2a252c11fd02UL, 0x415b985e57d8c07aUL, 0x864441c79f72d7b5UL, 0x143306f7ce4da3aeUL }, + { 0xd76ea5fe36f41c42UL, 0xc546a55497cb7e0aUL, 0x6027b6dc6f841d13UL, 0x2d7f5a564d5981b5UL } }, + { { 0xf8fced7f8d6ce98UL, 0x46d85360675c5f7bUL, 0x663867cd6a61f912UL, 0x1c3fbd1c4728ce2fUL }, + { 0xd7681e6bff8abe8bUL, 0x951b03f1bffa2c2fUL, 0x66fd7a89c9ec33b2UL, 0xc425d325d08a85fUL } } }; + fq6 expected{ { { 0xccc2041ef7e674a1UL, 0xf2f0e47f82792d77UL, 0xb4b9f006110451c9UL, 0xdae59051f5a8c62UL }, + { 0x9482d60673539368UL, 0x42c40af4541687e4UL, 0x67c6919c35403c12UL, 0xb8254cf01cba09eUL } }, + { { 0x3b942b02bf094a1UL, 0xff838144f8716d23UL, 0x8530532ec620bef1UL, 0x25d5c85a56786593UL }, + { 0x84f3278dc0362308UL, 0x95c01286b84d4f7fUL, 0xfd8b3ada165de51aUL, 0x26db5658234dc652UL } }, + { { 0x10ebd72f10b27cadUL, 0xe95a8002134cc334UL, 0x4b2b2a668d93ca18UL, 0x877ec906a5bfe77UL }, + { 0x50c434785d85431dUL, 0x74a86ebec041fbdaUL, 0x9cc22545b513d419UL, 0x24905a4154300d89UL } } }; +#endif + fq6 result = a * b; EXPECT_EQ(result, expected); } TEST(fq6, SqrCheckAgainstConstants) { + +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) fq6 a{ { { 0xe337aaa063afce6, 0xff4b5477485eb20, 0xef6dcf13b3855ef8, 0x14554c38da988ece }, { 0x6a70e65e71431416, 0xd21f95045c45f422, 0x2a17b6c6ff517884, 0x1b01ad6487a3ff16 } }, { { 0xea39618e9f05e1f, 0x63e9b0f7803072a6, 0xebe5538a2c75c89, 0x5312aad2ac95dcf }, @@ -158,6 +183,21 @@ TEST(fq6, SqrCheckAgainstConstants) { 0xd48ac80d8e6e52b5, 0x1791b8c4145bc2d3, 0x35c456444cdcf9be, 0x1eddd29d77366c08 } }, { { 0x56f1f8acbaed1118, 0xdd74b8bb2e47de74, 0x97525aa49c65f0fd, 0x15bbf236e098fa0f }, { 0xad97a94142524aeb, 0x42a508523527268b, 0x4c9c5f213de06ca8, 0x73fa6bc31efa2f2 } } }; +#else + fq6 a{ { { 0xb8c83817c906c025UL, 0x4d043f8c42f61ad5UL, 0x91a65831dd1a6241UL, 0x15918b45e38cb7bfUL }, + { 0x4ff37e49c815b109UL, 0x345a8ce3993010ecUL, 0x5a237c150983263UL, 0x298c76f000344000UL } }, + { { 0x20111ed8b494cc0bUL, 0xb6b1df3bccb8f51aUL, 0xaed9d5f0d4678813UL, 0x14f86a4cb596d964UL }, + { 0x69bc7d9504b28c8fUL, 0xe0d8603ce6221c7bUL, 0x23ca4fa0d532663fUL, 0x1a80d9d5b362f1a2UL } }, + { { 0x25eb400748a0cf37UL, 0x89d64fd9d5bf6d15UL, 0x5d26ffdaa12d840cUL, 0x2569403a2168757UL }, + { 0xcdec65e163c03266UL, 0xd10e3957cf3b72b0UL, 0xec521e4d37493492UL, 0x129d95f2098a2ca4UL } } }; + fq6 expected{ { { 0x3c4bcc8dcefcaceeUL, 0x34ab9174317f1e3aUL, 0x1ef0e16468a08463UL, 0x15d11e13ea53477bUL }, + { 0xa863e40cfbb3daa5UL, 0xce21a9ece91fa28dUL, 0x18f8b8d5131d5b16UL, 0x217cae35f576c1cUL } }, + { { 0xc9c6c70ba08b73c0UL, 0xcad2cccbf550a886UL, 0xfc81330087d97569UL, 0x887ec11880851c1UL }, + { 0xdece0fe8e4068d14UL, 0x1c1ac52662948771UL, 0x524556477d845073UL, 0x13e432b54eecfdc4UL } }, + { { 0x94776c5786cc491eUL, 0x6583437212c2bad1UL, 0xd5e7849877ab4a9dUL, 0x1201fc93c2687faaUL }, + { 0xc272f7cce8556844UL, 0xf69b6001031da740UL, 0xb24acd4db6083391UL, 0x26639dbab92ddda2UL } } }; +#endif + fq6 result = a.sqr(); EXPECT_EQ(result, expected); } diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.hpp index cf43e95d569..fcf2bc16a55 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.hpp @@ -63,6 +63,49 @@ class Bn254FrParams { 0x463456c802275bedULL, 0x543ece899c2f3b1cULL, 0x180a96573d3d9f8ULL, 0xf8b21270ddbb927ULL, 0x1d9598e8a7e39857ULL, 0x2ba010aa41eb7786ULL, 0x39aa886bdbf356b5ULL, 0x47b5002d75fb35e5ULL, }; + + static constexpr uint64_t modulus_wasm_0 = 0x10000001; + static constexpr uint64_t modulus_wasm_1 = 0x1f0fac9f; + static constexpr uint64_t modulus_wasm_2 = 0xe5c2450; + static constexpr uint64_t modulus_wasm_3 = 0x7d090f3; + static constexpr uint64_t modulus_wasm_4 = 0x1585d283; + static constexpr uint64_t modulus_wasm_5 = 0x2db40c0; + static constexpr uint64_t modulus_wasm_6 = 0xa6e141; + static constexpr uint64_t modulus_wasm_7 = 0xe5c2634; + static constexpr uint64_t modulus_wasm_8 = 0x30644e; + + static constexpr uint64_t r_squared_wasm_0 = 0x38c2e14b45b69bd4UL; + static constexpr uint64_t r_squared_wasm_1 = 0x0ffedb1885883377UL; + static constexpr uint64_t r_squared_wasm_2 = 0x7840f9f0abc6e54dUL; + static constexpr uint64_t r_squared_wasm_3 = 0x0a054a3e848b0f05UL; + + static constexpr uint64_t cube_root_wasm_0 = 0x7334a1ce7065364dUL; + static constexpr uint64_t cube_root_wasm_1 = 0xae21578e4a14d22aUL; + static constexpr uint64_t cube_root_wasm_2 = 0xcea2148a96b51265UL; + static constexpr uint64_t cube_root_wasm_3 = 0x0038f7edf614a198UL; + + static constexpr uint64_t primitive_root_wasm_0 = 0x2faf11711a27b370UL; + static constexpr uint64_t primitive_root_wasm_1 = 0xc23fe9fced28f1b8UL; + static constexpr uint64_t primitive_root_wasm_2 = 0x43a0fc9bbe2af541UL; + static constexpr uint64_t primitive_root_wasm_3 = 0x05d90b5719653a4fUL; + + static constexpr uint64_t coset_generators_wasm_0[8] = { 0xab46711cdffffcb2ULL, 0xdb1b52736ffffc09ULL, + 0x0af033c9fffffb60ULL, 0xf6e31f8c9ffffab6ULL, + 0x26b800e32ffffa0dULL, 0x568ce239bffff964ULL, + 0x427fcdfc5ffff8baULL, 0x7254af52effff811ULL }; + static constexpr uint64_t coset_generators_wasm_1[8] = { 0x2476607dbd2dfff1ULL, 0x9a3208a561c2b00bULL, + 0x0fedb0cd06576026ULL, 0x5d7570ac31329faeULL, + 0xd33118d3d5c74fc9ULL, 0x48ecc0fb7a5bffe3ULL, + 0x967480daa5373f6cULL, 0x0c30290249cbef86ULL }; + static constexpr uint64_t coset_generators_wasm_2[8] = { 0xe6b99ee0068dfc25ULL, 0x39bb9964882aa6a5ULL, + 0x8cbd93e909c75126ULL, 0x276f48b709e2a349ULL, + 0x7a71433b8b7f4dc9ULL, 0xcd733dc00d1bf84aULL, + 0x6824f28e0d374a6dULL, 0xbb26ed128ed3f4eeULL }; + static constexpr uint64_t coset_generators_wasm_3[8] = { 0x1484c05bce00b620ULL, 0x224cf685243dfa96ULL, + 0x30152cae7a7b3f0bULL, 0x0d791464ef86e357ULL, + 0x1b414a8e45c427ccULL, 0x290980b79c016c41ULL, + 0x066d686e110d108dULL, 0x14359e97674a5502ULL }; + // used in msgpack schema serialization static constexpr char schema_name[] = "fr"; static constexpr bool has_high_2adicity = true; diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp index 2b2fa35fb75..7d2d89edd04 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp @@ -56,9 +56,12 @@ TEST(fr, RandomElement) TEST(fr, Mul) { - fr a{ 0x192f9ddc938ea63, 0x1db93d61007ec4fe, 0xc89284ec31fa49c0, 0x2478d0ff12b04f0f }; - fr b{ 0x7aade4892631231c, 0x8e7515681fe70144, 0x98edb76e689b6fd8, 0x5d0886b15fc835fa }; - fr expected{ 0xab961ef46b4756b6, 0xbc6b636fc29678c8, 0xd247391ed6b5bd16, 0x12e8538b3bde6784 }; + auto a_uint = uint256_t{ 0x192f9ddc938ea63, 0x1db93d61007ec4fe, 0xc89284ec31fa49c0, 0x2478d0ff12b04f0f }; + auto b_uint = uint256_t{ 0x7aade4892631231c, 0x8e7515681fe70144, 0x98edb76e689b6fd8, 0x5d0886b15fc835fa }; + + fr a = a_uint; + fr b = b_uint; + fr expected = (uint512_t(a_uint) * uint512_t(b_uint) % uint512_t(fr::modulus)).lo; fr result; result = a * b; EXPECT_EQ((result == expected), true); @@ -66,8 +69,9 @@ TEST(fr, Mul) TEST(fr, Sqr) { - fr a{ 0x95f946723a1fc34f, 0x641ec0482fc40bb9, 0xb8d645bc49dd513d, 0x1c1bffd317599dbc }; - fr expected{ 0xc787f7d9e2c72714, 0xcf21cf53d8f65f67, 0x8db109903dac0008, 0x26ab4dd65f46be5f }; + auto a_uint = uint256_t{ 0x192f9ddc938ea63, 0x1db93d61007ec4fe, 0xc89284ec31fa49c0, 0x2478d0ff12b04f0f }; + fr a = a_uint; + fr expected = (uint512_t(a_uint) * uint512_t(a_uint) % uint512_t(fr::modulus)).lo; fr result; result = a.sqr(); EXPECT_EQ((result == expected), true); diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.hpp index ef5240fad94..28cd3036eba 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.hpp @@ -11,9 +11,17 @@ struct Bn254G1Params { static constexpr bool small_elements = true; static constexpr bool has_a = false; static constexpr fq one_x = fq::one(); +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) static constexpr fq one_y{ 0xa6ba871b8b1e1b3aUL, 0x14f1d651eb8e167bUL, 0xccdd46def0f28c58UL, 0x1c14ef83340fbe5eUL }; +#else + static constexpr fq one_y{ 0x9d0709d62af99842UL, 0xf7214c0419c29186UL, 0xa603f5090339546dUL, 0x1b906c52ac7a88eaUL }; +#endif static constexpr fq a{ 0UL, 0UL, 0UL, 0UL }; +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) static constexpr fq b{ 0x7a17caa950ad28d7UL, 0x1f6ac17ae15521b9UL, 0x334bea4e696bd284UL, 0x2a1f6744ce179d8eUL }; +#else + static constexpr fq b{ 0xeb8a8ec140766463UL, 0xf2b1f20626a3da49UL, 0xf905ef8d84d5fea4UL, 0x2958a27c02b7cd5fUL }; +#endif }; using g1 = group; diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g2.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g2.hpp index 37c84d4517d..78ede0cdc98 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g2.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g2.hpp @@ -11,10 +11,21 @@ struct Bn254G2Params { static constexpr bool small_elements = false; static constexpr bool has_a = false; +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) static constexpr fq2 one_x{ { 0x8e83b5d102bc2026, 0xdceb1935497b0172, 0xfbb8264797811adf, 0x19573841af96503b }, { 0xafb4737da84c6140, 0x6043dd5a5802d8c4, 0x09e950fc52a02f86, 0x14fef0833aea7b6b } }; static constexpr fq2 one_y{ { 0x619dfa9d886be9f6, 0xfe7fd297f59e9b78, 0xff9e1a62231b7dfe, 0x28fd7eebae9e4206 }, { 0x64095b56c71856ee, 0xdc57f922327d3cbb, 0x55f935be33351076, 0x0da4a0e693fd6482 } }; +#else + static constexpr fq2 one_x{ + { 0xe6df8b2cfb43050UL, 0x254c7d92a843857eUL, 0xf2006d8ad80dd622UL, 0x24a22107dfb004e3UL }, + { 0xe8e7528c0b334b65UL, 0x56e941e8b293cf69UL, 0xe1169545c074740bUL, 0x2ac61491edca4b42UL } + }; + static constexpr fq2 one_y{ + { 0xdc508d48384e8843UL, 0xd55415a8afd31226UL, 0x834bf204bacb6e00UL, 0x51b9758138c5c79UL }, + { 0x64067e0b46a5f641UL, 0x37726529a3a77875UL, 0x4454445bd915f391UL, 0x10d5ac894edeed3UL } + }; +#endif static constexpr fq2 a = fq2::zero(); static constexpr fq2 b = fq2::twist_coeff_b(); }; diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.hpp index fd6e1c36268..fc81216686a 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.hpp @@ -16,15 +16,25 @@ struct G1Params { static constexpr bool can_hash_to_curve = true; static constexpr bool small_elements = true; static constexpr bool has_a = false; - // have checked in grumpkin.test_b that b is Montgomery form of -17 +// have checked in grumpkin.test_b that b is Montgomery form of -17 +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) static constexpr bb::fr b{ 0xdd7056026000005a, 0x223fa97acb319311, 0xcc388229877910c0, 0x34394632b724eaa }; +#else + static constexpr bb::fr b{ 0x2646d52420000b3eUL, 0xf78d5ec872bf8119UL, 0x166fb9c3ec1f6749UL, 0x7a9ef7fabe69506UL }; +#endif static constexpr bb::fr a{ 0UL, 0UL, 0UL, 0UL }; // generator point = (x, y) = (1, sqrt(-16)), sqrt(-16) = 4i static constexpr bb::fr one_x = bb::fr::one(); +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) static constexpr bb::fr one_y{ 0x11b2dff1448c41d8UL, 0x23d3446f21c77dc3UL, 0xaa7b8cf435dfafbbUL, 0x14b34cf69dc25d68UL }; +#else + static constexpr bb::fr one_y{ + 0xc3e285a561883af3UL, 0x6fc5c2360a850101UL, 0xf35e144228647aa9UL, 0x2151a2fe48c68af6UL + }; +#endif }; using g1 = bb::group; diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp index 1d78dd1b3d1..81b575ad67b 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp @@ -43,6 +43,48 @@ struct FqParams { static constexpr uint64_t primitive_root_1 = 0UL; static constexpr uint64_t primitive_root_2 = 0UL; static constexpr uint64_t primitive_root_3 = 0UL; + + static constexpr uint64_t modulus_wasm_0 = 0x1ffffc2f; + static constexpr uint64_t modulus_wasm_1 = 0x1ffffff7; + static constexpr uint64_t modulus_wasm_2 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_3 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_4 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_5 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_6 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_7 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_8 = 0xffffff; + + static constexpr uint64_t r_squared_wasm_0 = 0x001e88003a428400UL; + static constexpr uint64_t r_squared_wasm_1 = 0x0000000000000400UL; + static constexpr uint64_t r_squared_wasm_2 = 0x0000000000000000UL; + static constexpr uint64_t r_squared_wasm_3 = 0x0000000000000000UL; + + static constexpr uint64_t cube_root_wasm_0 = 0x1486c3a0d03162ffUL; + static constexpr uint64_t cube_root_wasm_1 = 0x7fbc2c63897015ebUL; + static constexpr uint64_t cube_root_wasm_2 = 0x1d312f1a05c720a0UL; + static constexpr uint64_t cube_root_wasm_3 = 0x4946d5d79767aa7fUL; + + static constexpr uint64_t primitive_root_wasm_0 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_1 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_2 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_3 = 0x0000000000000000UL; + + static constexpr uint64_t coset_generators_wasm_0[8] = { 0x0000006000016e60ULL, 0x000000800001e880ULL, + 0x000000a0000262a0ULL, 0x000000c00002dcc0ULL, + 0x000000e0000356e0ULL, 0x000001000003d100ULL, + 0x0000012000044b20ULL, 0x000001400004c540ULL }; + static constexpr uint64_t coset_generators_wasm_1[8] = { 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL }; + static constexpr uint64_t coset_generators_wasm_2[8] = { 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL }; + static constexpr uint64_t coset_generators_wasm_3[8] = { 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL }; }; using fq = field; @@ -99,6 +141,48 @@ struct FrParams { static constexpr uint64_t primitive_root_1 = 0UL; static constexpr uint64_t primitive_root_2 = 0UL; static constexpr uint64_t primitive_root_3 = 0UL; + + static constexpr uint64_t modulus_wasm_0 = 0x10364141; + static constexpr uint64_t modulus_wasm_1 = 0x1e92f466; + static constexpr uint64_t modulus_wasm_2 = 0x12280eef; + static constexpr uint64_t modulus_wasm_3 = 0x1db9cd5e; + static constexpr uint64_t modulus_wasm_4 = 0x1fffebaa; + static constexpr uint64_t modulus_wasm_5 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_6 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_7 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_8 = 0xffffff; + + static constexpr uint64_t r_squared_wasm_0 = 0x63e601a3c9f6ab4bUL; + static constexpr uint64_t r_squared_wasm_1 = 0xa2b6456d46702f57UL; + static constexpr uint64_t r_squared_wasm_2 = 0x5fd7916f341f1cefUL; + static constexpr uint64_t r_squared_wasm_3 = 0x9c7356071a6f179aUL; + + static constexpr uint64_t cube_root_wasm_0 = 0x9185b639102f0736UL; + static constexpr uint64_t cube_root_wasm_1 = 0x47a854ad9ffc4748UL; + static constexpr uint64_t cube_root_wasm_2 = 0x752cc0ca4d2fb232UL; + static constexpr uint64_t cube_root_wasm_3 = 0x650802f0ab1ac72eUL; + + static constexpr uint64_t primitive_root_wasm_0 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_1 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_2 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_3 = 0x0000000000000000UL; + + static constexpr uint64_t coset_generators_wasm_0[8] = { 0x1c84e7fdde173760ULL, 0x22391663d74f0f40ULL, + 0x27ed44c9d086e720ULL, 0x2da1732fc9bebf00ULL, + 0x3355a195c2f696e0ULL, 0x3909cffbbc2e6ec0ULL, + 0x3ebdfe61b56646a0ULL, 0x44722cc7ae9e1e80ULL }; + static constexpr uint64_t coset_generators_wasm_1[8] = { 0x52b5efd2729bdaa8ULL, 0xfcda52fc8987d330ULL, + 0xa6feb626a073cbb8ULL, 0x51231950b75fc440ULL, + 0xfb477c7ace4bbcc8ULL, 0xa56bdfa4e537b550ULL, + 0x4f9042cefc23add8ULL, 0xf9b4a5f9130fa660ULL }; + static constexpr uint64_t coset_generators_wasm_2[8] = { 0x00000000000000cbULL, 0x00000000000000f3ULL, + 0x000000000000011cULL, 0x0000000000000145ULL, + 0x000000000000016dULL, 0x0000000000000196ULL, + 0x00000000000001bfULL, 0x00000000000001e7ULL }; + static constexpr uint64_t coset_generators_wasm_3[8] = { 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL }; }; using fr = field; diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.test.cpp index 82902e1c3f8..60d0f24af2a 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.test.cpp @@ -70,7 +70,11 @@ TEST(secp256k1, TestToMontgomeryForm) uint256_t a_raw = get_fq_element(); secp256k1::fq montgomery_result(a_raw); - uint512_t R = uint512_t(0, 1); +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) + constexpr uint512_t R = uint512_t(0, 1); +#else + constexpr uint512_t R = (uint512_t(1) << (29 * 9)) % uint512_t(test_fq_mod); +#endif uint512_t aR = uint512_t(a_raw) * R; uint256_t expected = (aR % uint512_t(test_fq_mod)).lo; @@ -442,6 +446,9 @@ TEST(secp256k1, GetEndomorphismScalars) expected.self_from_montgomery_form(); EXPECT_EQ(k, expected); + if (k != expected) { + break; + } } } diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256r1/secp256r1.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256r1/secp256r1.hpp index 3835bfce10f..69e8e89b477 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256r1/secp256r1.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256r1/secp256r1.hpp @@ -8,7 +8,7 @@ namespace bb::secp256r1 { struct FqParams { static constexpr uint64_t modulus_0 = 0xFFFFFFFFFFFFFFFFULL; static constexpr uint64_t modulus_1 = 0x00000000FFFFFFFFULL; - static constexpr uint64_t modulus_2 = 0X0000000000000000ULL; + static constexpr uint64_t modulus_2 = 0x0000000000000000ULL; static constexpr uint64_t modulus_3 = 0xFFFFFFFF00000001ULL; static constexpr uint64_t r_squared_0 = 3ULL; @@ -42,6 +42,48 @@ struct FqParams { static constexpr uint64_t primitive_root_1 = 0UL; static constexpr uint64_t primitive_root_2 = 0UL; static constexpr uint64_t primitive_root_3 = 0UL; + + static constexpr uint64_t modulus_wasm_0 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_1 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_2 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_3 = 0x1ff; + static constexpr uint64_t modulus_wasm_4 = 0x0; + static constexpr uint64_t modulus_wasm_5 = 0x0; + static constexpr uint64_t modulus_wasm_6 = 0x40000; + static constexpr uint64_t modulus_wasm_7 = 0x1fe00000; + static constexpr uint64_t modulus_wasm_8 = 0xffffff; + + static constexpr uint64_t r_squared_wasm_0 = 0x0000000000000c00UL; + static constexpr uint64_t r_squared_wasm_1 = 0xffffeffffffffc00UL; + static constexpr uint64_t r_squared_wasm_2 = 0xfffffffffffffbffUL; + static constexpr uint64_t r_squared_wasm_3 = 0x000013fffffff7ffUL; + + static constexpr uint64_t cube_root_wasm_0 = 0x0000000000000000UL; + static constexpr uint64_t cube_root_wasm_1 = 0x0000000000000000UL; + static constexpr uint64_t cube_root_wasm_2 = 0x0000000000000000UL; + static constexpr uint64_t cube_root_wasm_3 = 0x0000000000000000UL; + + static constexpr uint64_t primitive_root_wasm_0 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_1 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_2 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_3 = 0x0000000000000000UL; + + static constexpr uint64_t coset_generators_wasm_0[8] = { 0x0000000000000060ULL, 0x0000000000000080ULL, + 0x00000000000000a0ULL, 0x00000000000000c0ULL, + 0x00000000000000e0ULL, 0x0000000000000100ULL, + 0x0000000000000120ULL, 0x0000000000000140ULL }; + static constexpr uint64_t coset_generators_wasm_1[8] = { 0xffffffa000000000ULL, 0xffffff8000000000ULL, + 0xffffff6000000000ULL, 0xffffff4000000000ULL, + 0xffffff2000000000ULL, 0xffffff0000000000ULL, + 0xfffffee000000000ULL, 0xfffffec000000000ULL }; + static constexpr uint64_t coset_generators_wasm_2[8] = { 0xffffffffffffffffULL, 0xffffffffffffffffULL, + 0xffffffffffffffffULL, 0xffffffffffffffffULL, + 0xffffffffffffffffULL, 0xffffffffffffffffULL, + 0xffffffffffffffffULL, 0xffffffffffffffffULL }; + static constexpr uint64_t coset_generators_wasm_3[8] = { 0x0000005fffffff9fULL, 0x0000007fffffff7fULL, + 0x0000009fffffff5fULL, 0x000000bfffffff3fULL, + 0x000000dfffffff1fULL, 0x000000fffffffeffULL, + 0x0000011ffffffedfULL, 0x0000013ffffffebfULL }; }; using fq = field; @@ -83,6 +125,48 @@ struct FrParams { static constexpr uint64_t primitive_root_1 = 0UL; static constexpr uint64_t primitive_root_2 = 0UL; static constexpr uint64_t primitive_root_3 = 0UL; + + static constexpr uint64_t modulus_wasm_0 = 0x1c632551; + static constexpr uint64_t modulus_wasm_1 = 0x1dce5617; + static constexpr uint64_t modulus_wasm_2 = 0x5e7a13c; + static constexpr uint64_t modulus_wasm_3 = 0xdf55b4e; + static constexpr uint64_t modulus_wasm_4 = 0x1ffffbce; + static constexpr uint64_t modulus_wasm_5 = 0x1fffffff; + static constexpr uint64_t modulus_wasm_6 = 0x3ffff; + static constexpr uint64_t modulus_wasm_7 = 0x1fe00000; + static constexpr uint64_t modulus_wasm_8 = 0xffffff; + + static constexpr uint64_t r_squared_wasm_0 = 0x45e9cfeeb48d9ef5UL; + static constexpr uint64_t r_squared_wasm_1 = 0x1f11fc5bb2d31a99UL; + static constexpr uint64_t r_squared_wasm_2 = 0x16c8e4adafb16586UL; + static constexpr uint64_t r_squared_wasm_3 = 0x84b6556a65587f06UL; + + static constexpr uint64_t cube_root_wasm_0 = 0x0000000000000000UL; + static constexpr uint64_t cube_root_wasm_1 = 0x0000000000000000UL; + static constexpr uint64_t cube_root_wasm_2 = 0x0000000000000000UL; + static constexpr uint64_t cube_root_wasm_3 = 0x0000000000000000UL; + + static constexpr uint64_t primitive_root_wasm_0 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_1 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_2 = 0x0000000000000000UL; + static constexpr uint64_t primitive_root_wasm_3 = 0x0000000000000000UL; + + static constexpr uint64_t coset_generators_wasm_0[8] = { 0xbd6e9563293f5920ULL, 0x46353d039cdaaf00ULL, + 0xcefbe4a4107604e0ULL, 0x57c28c4484115ac0ULL, + 0xe08933e4f7acb0a0ULL, 0x694fdb856b480680ULL, + 0xf2168325dee35c60ULL, 0x7add2ac6527eb240ULL }; + static constexpr uint64_t coset_generators_wasm_1[8] = { 0xb5e4a80dcb554baaULL, 0x19055258e8617b0cULL, + 0x7c25fca4056daa6dULL, 0xdf46a6ef2279d9cfULL, + 0x4267513a3f860930ULL, 0xa587fb855c923892ULL, + 0x08a8a5d0799e67f3ULL, 0x6bc9501b96aa9755ULL }; + static constexpr uint64_t coset_generators_wasm_2[8] = { 0x000000000000003aULL, 0x0000000000000043ULL, + 0x000000000000004bULL, 0x0000000000000053ULL, + 0x000000000000005cULL, 0x0000000000000064ULL, + 0x000000000000006dULL, 0x0000000000000075ULL }; + static constexpr uint64_t coset_generators_wasm_3[8] = { 0x000000dfffffff20ULL, 0x000000ffffffff00ULL, + 0x0000011ffffffee0ULL, 0x0000013ffffffec0ULL, + 0x0000015ffffffea0ULL, 0x0000017ffffffe80ULL, + 0x0000019ffffffe60ULL, 0x000001bffffffe40ULL }; }; using fr = field; diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256r1/secp256r1.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256r1/secp256r1.test.cpp index 3e993586242..0b2befbb745 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256r1/secp256r1.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256r1/secp256r1.test.cpp @@ -70,7 +70,11 @@ TEST(secp256r1, TestToMontgomeryForm) uint256_t a_raw = get_fq_element(); secp256r1::fq montgomery_result(a_raw); - uint512_t R = uint512_t(0, 1); +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) + constexpr uint512_t R = uint512_t(0, 1); +#else + constexpr uint512_t R = (uint512_t(1) << (29 * 9)) % uint512_t(test_fq_mod); +#endif uint512_t aR = uint512_t(a_raw) * R; uint256_t expected = (aR % uint512_t(test_fq_mod)).lo; @@ -434,6 +438,7 @@ TEST(secp256r1, check_compression_constructor) std::cout << "Affine element: " << el << std::endl; }**/ +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) TEST(secp256r1, MontgomeryMulBigBug) { secp256r1::fr a; @@ -445,3 +450,4 @@ TEST(secp256r1, MontgomeryMulBigBug) secp256r1::fr expected(uint256_t{ 0x57abc6aa0349c084, 0x65b21b232a4cb7a5, 0x5ba781948b0fcd6e, 0xd6e9e0644bda12f7 }); EXPECT_EQ((a_sqr == expected), true); } +#endif \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp index 87a10938677..18a7480a3d0 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp @@ -21,6 +21,11 @@ #endif namespace bb { +/** + * @brief General class for prime fields see \ref field_docs["field documentation"] for general implementation reference + * + * @tparam Params_ + */ template struct alignas(32) field { public: using View = field; @@ -30,6 +35,11 @@ template struct alignas(32) field { using out_buf = uint8_t*; using vec_out_buf = uint8_t**; +#if defined(__wasm__) || !defined(__SIZEOF_INT128__) +#define WASM_NUM_LIMBS 9 +#define WASM_LIMB_BITS 29 +#endif + // We don't initialize data in the default constructor since we'd lose a lot of time on huge array initializations. // Other alternatives have been noted, such as casting to get around constructors where they matter, // however it is felt that sanitizer tools (e.g. MSAN) can detect garbage well, whereas doing @@ -159,14 +169,34 @@ template struct alignas(32) field { static constexpr uint256_t modulus = uint256_t{ Params::modulus_0, Params::modulus_1, Params::modulus_2, Params::modulus_3 }; +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) + static constexpr uint256_t r_squared_uint{ + Params_::r_squared_0, Params_::r_squared_1, Params_::r_squared_2, Params_::r_squared_3 + }; +#else + static constexpr uint256_t r_squared_uint{ + Params_::r_squared_wasm_0, Params_::r_squared_wasm_1, Params_::r_squared_wasm_2, Params_::r_squared_wasm_3 + }; + static constexpr std::array wasm_modulus = { Params::modulus_wasm_0, Params::modulus_wasm_1, + Params::modulus_wasm_2, Params::modulus_wasm_3, + Params::modulus_wasm_4, Params::modulus_wasm_5, + Params::modulus_wasm_6, Params::modulus_wasm_7, + Params::modulus_wasm_8 }; +#endif static constexpr field cube_root_of_unity() { // endomorphism i.e. lambda * [P] = (beta * x, y) if constexpr (Params::cube_root_0 != 0) { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) constexpr field result{ Params::cube_root_0, Params::cube_root_1, Params::cube_root_2, Params::cube_root_3 }; +#else + constexpr field result{ + Params::cube_root_wasm_0, Params::cube_root_wasm_1, Params::cube_root_wasm_2, Params::cube_root_wasm_3 + }; +#endif return result; } else { constexpr field two_inv = field(2).invert(); @@ -182,35 +212,65 @@ template struct alignas(32) field { static constexpr field external_coset_generator() { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) const field result{ Params::coset_generators_0[7], Params::coset_generators_1[7], Params::coset_generators_2[7], Params::coset_generators_3[7], }; +#else + const field result{ + Params::coset_generators_wasm_0[7], + Params::coset_generators_wasm_1[7], + Params::coset_generators_wasm_2[7], + Params::coset_generators_wasm_3[7], + }; +#endif + return result; } static constexpr field tag_coset_generator() { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) const field result{ Params::coset_generators_0[6], Params::coset_generators_1[6], Params::coset_generators_2[6], Params::coset_generators_3[6], }; +#else + const field result{ + Params::coset_generators_wasm_0[6], + Params::coset_generators_wasm_1[6], + Params::coset_generators_wasm_2[6], + Params::coset_generators_wasm_3[6], + }; +#endif + return result; } static constexpr field coset_generator(const size_t idx) { ASSERT(idx < 7); +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) const field result{ Params::coset_generators_0[idx], Params::coset_generators_1[idx], Params::coset_generators_2[idx], Params::coset_generators_3[idx], }; +#else + const field result{ + Params::coset_generators_wasm_0[idx], + Params::coset_generators_wasm_1[idx], + Params::coset_generators_wasm_2[idx], + Params::coset_generators_wasm_3[idx], + }; +#endif + return result; } @@ -247,6 +307,7 @@ template struct alignas(32) field { BB_INLINE constexpr field pow(const uint256_t& exponent) const noexcept; BB_INLINE constexpr field pow(uint64_t exponent) const noexcept; + static_assert(Params::modulus_0 != 1); static constexpr uint256_t modulus_minus_two = uint256_t(Params::modulus_0 - 2ULL, Params::modulus_1, Params::modulus_2, Params::modulus_3); constexpr field invert() const noexcept; @@ -520,6 +581,29 @@ template struct alignas(32) field { {} }; +#if defined(__wasm__) || !defined(__SIZEOF_INT128__) + BB_INLINE static constexpr void wasm_madd(uint64_t& left_limb, + const std::array& right_limbs, + uint64_t& result_0, + uint64_t& result_1, + uint64_t& result_2, + uint64_t& result_3, + uint64_t& result_4, + uint64_t& result_5, + uint64_t& result_6, + uint64_t& result_7, + uint64_t& result_8); + BB_INLINE static constexpr void wasm_reduce(uint64_t& result_0, + uint64_t& result_1, + uint64_t& result_2, + uint64_t& result_3, + uint64_t& result_4, + uint64_t& result_5, + uint64_t& result_6, + uint64_t& result_7, + uint64_t& result_8); + BB_INLINE static constexpr std::array wasm_convert(const uint64_t* data); +#endif BB_INLINE static constexpr std::pair mul_wide(uint64_t a, uint64_t b) noexcept; BB_INLINE static constexpr uint64_t mac( diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_docs.md b/barretenberg/cpp/src/barretenberg/ecc/fields/field_docs.md new file mode 100644 index 00000000000..76f1bc30643 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_docs.md @@ -0,0 +1,190 @@ +Prime field documentation {#field_docs} +=== +Barretenberg has its own implementation of finite field arithmetic. The implementation targets 254 (bn254, grumpkin) and 256-bit (secp256k1, secp256r1) fields. Internally the field is representate as a little-endian C-array of 4 uint64_t limbs. + +## Field arithmetic +### Introduction to Montgomery form {#field_docs_montgomery_explainer} +We use Montgomery reduction to speed up field multiplication. For an original element \f$ a ∈ F_p\f$ the element is represented internally as $$ a⋅R\ mod\ p$$ where \f$R = 2^d\ mod\ p\f$. The chosen \f$d\f$ depends on the build configuration: +1. \f$d=29⋅9=261\f$ for builds that don't support the uint128_t type, for example, for WASM build +2. \f$d=64⋅4=256\f$ for standard builds (x86_64). + +The goal of using Montgomery form is to avoid heavy division modulo \f$p\f$. To compute a representative of element $$c = a⋅b\ mod\ p$$ we compute $$c⋅R = (a⋅R)⋅(b⋅R) / R\ mod\ p$$, but we use an efficient division trick to avoid straight modular division. Let's look into the standard 4⋅64 case: +1. First, we compute the value $$c_r=c⋅R⋅R = aR⋅bR$$ in integers and get a value with 8 64-bit limbs +2. Then we take the lowest limb of \f$c_r\f$ (\f$c_r[0]\f$) and multiply it by a special value $$r_{inv} = -1 ⋅ p^{-1}\ mod\ 2^{64}$$ As a result we get $$k = r_{inv}⋅ c_r[0]\ mod\ 2^{64}$$ +3. Next we update \f$c_r\f$ in integers by adding a value \f$k⋅p\f$: $$c_r += k⋅p$$ You might notice that the value of \f$c_r\ mod\ p\f$ hasn't changed, since we've added a multiple of the modulus. A the same time, if we look at the expression modulo \f$2^{64}\f$: $$c_r + k⋅p = c_r + c_r⋅r_{inv}⋅p = c_r + c_r⋅ (-1)⋅p^{-1}⋅p = c_r - c_r = 0\ mod\ 2^{64}$$ The result is equivalent modulo \f$p\f$, but we zeroed out the lowest limb +4. We perform the same operation for \f$c_r[1]\f$, but instead of adding \f$k⋅p\f$, we add \f$2^{64}⋅k⋅p\f$. In the implementation, instead of adding \f$k⋅ p\f$ to limbs of \f$c_r\f$ starting with zero, we just start with limb 1. This ensures that \f$c_r[1]=0\f$. We then perform the same operation for 2 more limbs. +5. At this stage we are left with a version of \f$c_r\f$ where the first 4 limbs of the total 8 limbs are zero. So if we treat the 4 high limbs as a separate integer \f$c_{r.high}\f$, $$c_r = c_{r.high}⋅2^{256}=c_{r.high}⋅R\ mod\ p \Rightarrow c_{r.high} = c\cdot R\ mod\ p$$ and we can get the evaluation simply by taking the 4 high limbs of \f$c_r\f$. +6. The previous step has reduced the intermediate value of \f$cR\f$ to range \f$[0,2p)\f$, so we must check if it is more than \f$p\f$ and subtract the modulus once if it overflows. + +Why does this work? Originally both \f$aR\f$ and \f$bR\f$ are less than the modulus \f$p\f$ in integers, so $$aR\cdot bR <= (p-1)^2$$ During each of the \f$k\cdot p\f$ addition rounds we can add at most \f$(2^{64}-1)p\f$ to corresponding digits, so at most we add \f$(2^{256}-1)p\f$ and the total is $$aR\cdot bR + k_{0,1,2,3}p \le (p-1)^2+(2^{256}-1)p < 2\cdot 2^{256}p \Rightarrow c_{r.high} = \frac{aR\cdot bR + k_{0,1,2,3}p}{2^{256}} < 2p$$. + +For bn254 scalar and base fields we can do even better by employing a simple trick. The moduli of both fields are 254 bits, while 4 64-bit limbs allow 256 bits of storage. We relax the internal representation to use values in range \f$[0,2p)\f$. The addition, negation and subtraction operation logic doesn't change, we simply replace the modulus \f$p\f$ with \f$2p\f$, but the mutliplication becomes more efficient. The multiplicands are in range \f$[0,2p)\f$, but we add multiples of modulus \f$p\f$ to reduce limbs, not \f$2p\f$. If we revisit the \f$c_r\f$ formula: +$$aR\cdot bR + k_{0,1,2,3}p \le (2p-1)^2+(2^{256}-1)p = 2^{256}p+4p^2-5p+1 \Rightarrow$$ $$\Rightarrow c_{r.high} = \frac{aR\cdot bR + k_{0,1,2,3}p}{2^{256}} \le \frac{2^{256}p+4p^2-5p+1}{2^{256}}=p +\frac{4p^2 - 5p +1}{2^{256}}, 4p < 2^{256} \Rightarrow$$ $$\Rightarrow p +\frac{4p^2 - 5p +1}{2^{256}} < 2p$$ So we ended in the same range and we don't have to perform additional reductions. + +**N.B.** In the code we refer to this form as coarse + + + + +### Converting to and from Montgomery form +Obviously we want to avoid using standard form division when converting between forms, so we use Montgomery form to convert to Montgomery form. If we look at a value \f$a\ mod\ p\f$ we can notice that this is the Montgomery form of \f$a\cdot R^{-1}\ mod\ p\f$, so if we want to get \f$aR\f$ from it, we need to multiply it by the Montgomery form of \f$R\ mod\ p\f$, which is \f$R\cdot R\ mod\ p\f$. So using Montgomery multiplication we compute + +$$a \cdot R^2 / R = a\cdot R\ mod\ p$$ + +To convert from Montgomery form into standard form we multiply the element in Montgomery form by 1: + +$$ aR \cdot 1 / R = a\ mod\ p$$ + +## Architecture details {#field_docs_architecture_details} +You could say that for each multiplication or squaring primitive there are 3 implementations: +1. Generic 64-bit implementation when uint128_t type is available (there is efficient multiplication of 64-bit values) +2. Assembly 64-bit implementation (Intel ADX and no Intel ADX versions) +3. Implementation targeting WASM + +The generic implementation has 2 purposes: +1. Building barretenberg on platforms we haven't targetted in the past (new ARM-based Macs, for example) +2. Compile-time computation of constant expressions, since we can't use the assembly implementation for those. + +The assembly implementation for x86_64 is optimised. There are 2 versions: +1. General x86_64 implementation that uses 64-bit registers. The squaring operation is equivalent to multiplication for simplicity and because the original squaring implementation was quite buggy. +2. Implementation using Intel ADX. It allows simultaneous use of two addition-with carry operations (adox and adcx) on two separate CPU gates (units of execution that can work simultaneously on the same core), which almost halves the time spent adding up the results of uint64_t multiplication. + +Implementation for WASM: + +We use 9 29-bit limbs for computation (storage stays the same) and we change the Montgomery form. The reason for a different architecture is that WASM doesn't have: +1. 128-bit result 64*64 bit multiplication +2. 64-bit addition with carry + +In the past we implemented a version with 32-bit limbs, but as a result, when we accumulated limb products we always had to split 64-bit results of 32-bit multiplication back into 32-bit chunks. Had we not, the addition of 2 64-bit products would have lost the carry flag and the result would be incorrect. There were 2 issues with this: +1. This spawned in a lot of masking operations +2. We didn't use more efficient algorithms for squaring, because multiplication by 2 of intermediate products would once again overflow. + +Switching to 9 29-bit limbs increased the number of multiplications from 136 to 171. However, since the product of 2 limbs is 58 bits, we can safely accumulate 64 of those before we have to reduce. This allowed us to get rid of a lot of intermediate masking operations, shifts and additions, so the resulting computation turned out to be more efficient. + +## Interaction of field object with other objects +Most of the time field is used with uint64_t or uint256_t in our codebase, but there is general logic of how we generate field elements from integers: +1. Converting from signed int takes the sign into account. It takes the absolute value, converts it to montgomery and then negates the result if the original value was negative +2. Unsigned integers ( <= 64 bits) are just converted to montgomery +3. uint256_t and uint512_t: + 1. Truncate to 256 bits + 2. Subtract the modulus until the value is within field + 3. Convert to montgomery + +Conversion from field elements exists only to unsigned integers and bools. The value is converted from montgomery and appropriate number of lowest bits is used to initialize the value. + +**N.B.** Functions for converting from uint256_t and back are not bijective, since values \f$ \ge p\f$ will be reduced. + +## Field parameters + +The field template is instantiated with field parameter classes, for example, class bb::Bn254FqParams. Each such class contains at least the modulus (in 64-bit and 29-bit form), r_inv (used to efficient reductions) and 2 versions of r_squared used for converting to Montgomery form (64-bit and WASM/29-bit version). r_squared and other parameters (such as cube_root, primitive_root and coset_generators) are defined for wasm separately, because the values represent an element already in Montgomery form. + +## Helpful python snippets + +Parse field parameters out of a parameter class (doesn't check and reconstitute endomorphism parameters, but checks correctness of everything else) +```python +import re +def parse_field_params(s): + def parse_number(line): + """Expects a string without whitespaces""" + line=line.replace('U','').replace('L','') # Clear away all postfixes + if line.find('0x')!=-1: # We have to parse hex + value= int(line,16) + else: + value = int(line) + return value + + def recover_single_value(name): + nonlocal s + index=s.find(name) + if index==-1: + raise ValueError("Couldn't find value with name "+name) + eq_position=s[index:].find('=') + line_end=s[index:].find(';') + return parse_number(s[index+eq_position+1:index+line_end]) + + def recover_single_value_if_present(name): + nonlocal s + index=s.find(name) + if index==-1: + return None + eq_position=s[index:].find('=') + line_end=s[index:].find(';') + return parse_number(s[index+eq_position+1:index+line_end]) + + def recover_array(name): + nonlocal s + index = s.find(name) + number_of_elements=int(re.findall(r'(?<='+name+r'\[)\d+',s)[0]) + start_index=s[index:].find('{') + end_index=s[index:].find('}') + all_values=s[index+start_index+1:index+end_index] + result=[parse_number(x) for (i,x) in enumerate(all_values.split(',')) if i>(i*64))&((1<<64)-1))for i in range(4)])+"})") +``` \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp index 42d7e1583ee..1c556fa43de 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp @@ -290,7 +290,8 @@ template constexpr bool field::operator!=(const field& other) const template constexpr field field::to_montgomery_form() const noexcept { BB_OP_COUNT_TRACK_NAME("fr::to_montgomery_form"); - constexpr field r_squared{ T::r_squared_0, T::r_squared_1, T::r_squared_2, T::r_squared_3 }; + constexpr field r_squared = + field{ r_squared_uint.data[0], r_squared_uint.data[1], r_squared_uint.data[2], r_squared_uint.data[3] }; field result = *this; // TODO(@zac-williamson): are these reductions needed? @@ -315,7 +316,9 @@ template constexpr field field::from_montgomery_form() const noe template constexpr void field::self_to_montgomery_form() noexcept { BB_OP_COUNT_TRACK_NAME("fr::self_to_montgomery_form"); - constexpr field r_squared{ T::r_squared_0, T::r_squared_1, T::r_squared_2, T::r_squared_3 }; + constexpr field r_squared = + field{ r_squared_uint.data[0], r_squared_uint.data[1], r_squared_uint.data[2], r_squared_uint.data[3] }; + self_reduce_once(); self_reduce_once(); self_reduce_once(); @@ -582,7 +585,11 @@ template constexpr bool field::is_zero() const noexcept template constexpr field field::get_root_of_unity(size_t subgroup_size) noexcept { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) field r{ T::primitive_root_0, T::primitive_root_1, T::primitive_root_2, T::primitive_root_3 }; +#else + field r{ T::primitive_root_wasm_0, T::primitive_root_wasm_1, T::primitive_root_wasm_2, T::primitive_root_wasm_3 }; +#endif for (size_t i = primitive_root_log_size(); i > subgroup_size; --i) { r.self_sqr(); } diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl_generic.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl_generic.hpp index f1bed6aa602..cb3b152d33e 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl_generic.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl_generic.hpp @@ -276,6 +276,13 @@ template constexpr field field::subtract(const field& other) con return { r0, r1, r2, r3 }; } +/** + * @brief + * + * @tparam T + * @param other + * @return constexpr field + */ template constexpr field field::subtract_coarse(const field& other) const noexcept { if constexpr (modulus.data[3] >= 0x4000000000000000ULL) { @@ -295,6 +302,13 @@ template constexpr field field::subtract_coarse(const field& oth return { r0, r1, r2, r3 }; } + +/** + * @brief Mongtomery multiplication for moduli > 2²⁵⁴ + * + * @details Explanation of Montgomery form can be found in \ref field_docs_montgomery_explainer and the difference + * between WASM and generic versions is explained in \ref field_docs_architecture_details + */ template constexpr field field::montgomery_mul_big(const field& other) const noexcept { #if defined(__SIZEOF_INT128__) && !defined(__wasm__) @@ -336,84 +350,187 @@ template constexpr field field::montgomery_mul_big(const field& r3 += (modulus.data[3] & borrow) + carry; return { r0, r1, r2, r3 }; #else - uint64_t c = 0; - uint64_t t0 = 0; - uint64_t t1 = 0; - uint64_t t2 = 0; - uint64_t t3 = 0; - uint64_t t4 = 0; - uint64_t t5 = 0; - uint64_t t6 = 0; - uint64_t t7 = 0; - uint64_t t8 = 0; - uint64_t t9 = 0; - uint64_t k = 0; - constexpr uint64_t wasm_modulus[8]{ - modulus.data[0] & 0xffffffffULL, modulus.data[0] >> 32ULL, modulus.data[1] & 0xffffffffULL, - modulus.data[1] >> 32ULL, modulus.data[2] & 0xffffffffULL, modulus.data[2] >> 32ULL, - modulus.data[3] & 0xffffffffULL, modulus.data[3] >> 32ULL, - }; - constexpr uint64_t wasm_rinv = T::r_inv & 0xffffffffULL; + // Convert 4 64-bit limbs to 9 29-bit limbs + auto left = wasm_convert(data); + auto right = wasm_convert(other.data); + constexpr uint64_t mask = 0x1fffffff; + uint64_t temp_0 = 0; + uint64_t temp_1 = 0; + uint64_t temp_2 = 0; + uint64_t temp_3 = 0; + uint64_t temp_4 = 0; + uint64_t temp_5 = 0; + uint64_t temp_6 = 0; + uint64_t temp_7 = 0; + uint64_t temp_8 = 0; + uint64_t temp_9 = 0; + uint64_t temp_10 = 0; + uint64_t temp_11 = 0; + uint64_t temp_12 = 0; + uint64_t temp_13 = 0; + uint64_t temp_14 = 0; + uint64_t temp_15 = 0; + uint64_t temp_16 = 0; + uint64_t temp_17 = 0; + + // Multiply-add 0th limb of the left argument by all 9 limbs of the right arguemnt + wasm_madd(left[0], right, temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); + // Instantly reduce + wasm_reduce(temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); + // Continue for other limbs + wasm_madd(left[1], right, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); + wasm_reduce(temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); + wasm_madd(left[2], right, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); + wasm_reduce(temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); + wasm_madd(left[3], right, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); + wasm_reduce(temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); + wasm_madd(left[4], right, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); + wasm_reduce(temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); + wasm_madd(left[5], right, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); + wasm_reduce(temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); + wasm_madd(left[6], right, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); + wasm_reduce(temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); + wasm_madd(left[7], right, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); + wasm_reduce(temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); + wasm_madd(left[8], right, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); + wasm_reduce(temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); + + // After all multiplications and additions, convert relaxed form to strict (all limbs are 29 bits) + temp_10 += temp_9 >> WASM_LIMB_BITS; + temp_9 &= mask; + temp_11 += temp_10 >> WASM_LIMB_BITS; + temp_10 &= mask; + temp_12 += temp_11 >> WASM_LIMB_BITS; + temp_11 &= mask; + temp_13 += temp_12 >> WASM_LIMB_BITS; + temp_12 &= mask; + temp_14 += temp_13 >> WASM_LIMB_BITS; + temp_13 &= mask; + temp_15 += temp_14 >> WASM_LIMB_BITS; + temp_14 &= mask; + temp_16 += temp_15 >> WASM_LIMB_BITS; + temp_15 &= mask; + temp_17 += temp_16 >> WASM_LIMB_BITS; + temp_16 &= mask; + + uint64_t r_temp_0; + uint64_t r_temp_1; + uint64_t r_temp_2; + uint64_t r_temp_3; + uint64_t r_temp_4; + uint64_t r_temp_5; + uint64_t r_temp_6; + uint64_t r_temp_7; + uint64_t r_temp_8; + // Subtract modulus from result + r_temp_0 = temp_9 - wasm_modulus[0]; + r_temp_1 = temp_10 - wasm_modulus[1] - ((r_temp_0) >> 63); + r_temp_2 = temp_11 - wasm_modulus[2] - ((r_temp_1) >> 63); + r_temp_3 = temp_12 - wasm_modulus[3] - ((r_temp_2) >> 63); + r_temp_4 = temp_13 - wasm_modulus[4] - ((r_temp_3) >> 63); + r_temp_5 = temp_14 - wasm_modulus[5] - ((r_temp_4) >> 63); + r_temp_6 = temp_15 - wasm_modulus[6] - ((r_temp_5) >> 63); + r_temp_7 = temp_16 - wasm_modulus[7] - ((r_temp_6) >> 63); + r_temp_8 = temp_17 - wasm_modulus[8] - ((r_temp_7) >> 63); + + // Depending on whether the subtraction underflowed, choose original value or the result of subtraction + uint64_t new_mask = 0 - (r_temp_8 >> 63); + uint64_t inverse_mask = (~new_mask) & mask; + temp_9 = (temp_9 & new_mask) | (r_temp_0 & inverse_mask); + temp_10 = (temp_10 & new_mask) | (r_temp_1 & inverse_mask); + temp_11 = (temp_11 & new_mask) | (r_temp_2 & inverse_mask); + temp_12 = (temp_12 & new_mask) | (r_temp_3 & inverse_mask); + temp_13 = (temp_13 & new_mask) | (r_temp_4 & inverse_mask); + temp_14 = (temp_14 & new_mask) | (r_temp_5 & inverse_mask); + temp_15 = (temp_15 & new_mask) | (r_temp_6 & inverse_mask); + temp_16 = (temp_16 & new_mask) | (r_temp_7 & inverse_mask); + temp_17 = (temp_17 & new_mask) | (r_temp_8 & inverse_mask); + + // Convert back to 4 64-bit limbs + return { (temp_9 << 0) | (temp_10 << 29) | (temp_11 << 58), + (temp_11 >> 6) | (temp_12 << 23) | (temp_13 << 52), + (temp_13 >> 12) | (temp_14 << 17) | (temp_15 << 46), + (temp_15 >> 18) | (temp_16 << 11) | (temp_17 << 40) }; - const uint64_t left[8]{ - data[0] & 0xffffffffULL, data[0] >> 32, data[1] & 0xffffffffULL, data[1] >> 32, - data[2] & 0xffffffffULL, data[2] >> 32, data[3] & 0xffffffffULL, data[3] >> 32, - }; - const uint64_t right[8]{ - other.data[0] & 0xffffffffULL, other.data[0] >> 32, other.data[1] & 0xffffffffULL, other.data[1] >> 32, - other.data[2] & 0xffffffffULL, other.data[2] >> 32, other.data[3] & 0xffffffffULL, other.data[3] >> 32, - }; +#endif +} - for (size_t i = 0; i < 8; ++i) { - c = 0; - mac(t0, left[i], right[0], c, t0, c); - mac(t1, left[i], right[1], c, t1, c); - mac(t2, left[i], right[2], c, t2, c); - mac(t3, left[i], right[3], c, t3, c); - mac(t4, left[i], right[4], c, t4, c); - mac(t5, left[i], right[5], c, t5, c); - mac(t6, left[i], right[6], c, t6, c); - mac(t7, left[i], right[7], c, t7, c); - uint64_t end_mul = t8 + c; - t8 = end_mul & 0xffffffffU; - t9 = end_mul >> 32; +#if defined(__wasm__) || !defined(__SIZEOF_INT128__) - c = 0; - k = (t0 * wasm_rinv) & 0xffffffffU; - c = mac_discard_lo(t0, k, wasm_modulus[0]); - mac(t1, k, wasm_modulus[1], c, t0, c); - mac(t2, k, wasm_modulus[2], c, t1, c); - mac(t3, k, wasm_modulus[3], c, t2, c); - mac(t4, k, wasm_modulus[4], c, t3, c); - mac(t5, k, wasm_modulus[5], c, t4, c); - mac(t6, k, wasm_modulus[6], c, t5, c); - mac(t7, k, wasm_modulus[7], c, t6, c); - uint64_t end_reduce = c + t8; - t7 = end_reduce & 0xffffffffU; - c = end_reduce >> 32; - t8 = t9 + c; - } - uint64_t v0 = t0 + (t1 << 32); - uint64_t v1 = t2 + (t3 << 32); - uint64_t v2 = t4 + (t5 << 32); - uint64_t v3 = t6 + (t7 << 32); - uint64_t v4 = t8; - uint64_t borrow = 0; - uint64_t r0 = sbb(v0, modulus.data[0], borrow, borrow); - uint64_t r1 = sbb(v1, modulus.data[1], borrow, borrow); - uint64_t r2 = sbb(v2, modulus.data[2], borrow, borrow); - uint64_t r3 = sbb(v3, modulus.data[3], borrow, borrow); - borrow = borrow ^ (0ULL - v4); - r0 += (modulus.data[0] & borrow); - uint64_t carry = r0 < (modulus.data[0] & borrow); - r1 = addc(r1, modulus.data[1] & borrow, carry, carry); - r2 = addc(r2, modulus.data[2] & borrow, carry, carry); - r3 += (modulus.data[3] & borrow) + carry; - return { r0, r1, r2, r3 }; -#endif +/** + * @brief Multiply left limb by a sequence of 9 limbs and put into result variables + * + */ +template +constexpr void field::wasm_madd(uint64_t& left_limb, + const std::array& right_limbs, + uint64_t& result_0, + uint64_t& result_1, + uint64_t& result_2, + uint64_t& result_3, + uint64_t& result_4, + uint64_t& result_5, + uint64_t& result_6, + uint64_t& result_7, + uint64_t& result_8) +{ + result_0 += left_limb * right_limbs[0]; + result_1 += left_limb * right_limbs[1]; + result_2 += left_limb * right_limbs[2]; + result_3 += left_limb * right_limbs[3]; + result_4 += left_limb * right_limbs[4]; + result_5 += left_limb * right_limbs[5]; + result_6 += left_limb * right_limbs[6]; + result_7 += left_limb * right_limbs[7]; + result_8 += left_limb * right_limbs[8]; } +/** + * @brief Perform 29-bit montgomery reduction on 1 limb (result_0 should be zero modulo 2**29 after this) + * + */ +template +constexpr void field::wasm_reduce(uint64_t& result_0, + uint64_t& result_1, + uint64_t& result_2, + uint64_t& result_3, + uint64_t& result_4, + uint64_t& result_5, + uint64_t& result_6, + uint64_t& result_7, + uint64_t& result_8) +{ + constexpr uint64_t mask = 0x1fffffff; + constexpr uint64_t r_inv = T::r_inv & mask; + uint64_t k = (result_0 * r_inv) & mask; + result_0 += k * wasm_modulus[0]; + result_1 += k * wasm_modulus[1] + (result_0 >> WASM_LIMB_BITS); + result_2 += k * wasm_modulus[2]; + result_3 += k * wasm_modulus[3]; + result_4 += k * wasm_modulus[4]; + result_5 += k * wasm_modulus[5]; + result_6 += k * wasm_modulus[6]; + result_7 += k * wasm_modulus[7]; + result_8 += k * wasm_modulus[8]; +} +/** + * @brief Convert 4 64-bit limbs into 9 29-bit limbs + * + */ +template constexpr std::array field::wasm_convert(const uint64_t* data) +{ + return { data[0] & 0x1fffffff, + (data[0] >> WASM_LIMB_BITS) & 0x1fffffff, + ((data[0] >> 58) & 0x3f) | ((data[1] & 0x7fffff) << 6), + (data[1] >> 23) & 0x1fffffff, + ((data[1] >> 52) & 0xfff) | ((data[2] & 0x1ffff) << 12), + (data[2] >> 17) & 0x1fffffff, + ((data[2] >> 46) & 0x3ffff) | ((data[3] & 0x7ff) << 18), + (data[3] >> 11) & 0x1fffffff, + (data[3] >> 40) & 0x1fffffff }; +} +#endif template constexpr field field::montgomery_mul(const field& other) const noexcept { if constexpr (modulus.data[3] >= 0x4000000000000000ULL) { @@ -466,178 +583,71 @@ template constexpr field field::montgomery_mul(const field& othe t3 = c + a; return { t0, t1, t2, t3 }; #else - constexpr uint64_t wasm_modulus[8]{ - modulus.data[0] & 0xffffffffULL, modulus.data[0] >> 32ULL, modulus.data[1] & 0xffffffffULL, - modulus.data[1] >> 32ULL, modulus.data[2] & 0xffffffffULL, modulus.data[2] >> 32ULL, - modulus.data[3] & 0xffffffffULL, modulus.data[3] >> 32ULL, - }; - constexpr uint64_t wasm_rinv = T::r_inv & 0xffffffffULL; - const uint64_t left[8]{ - data[0] & 0xffffffffULL, data[0] >> 32, data[1] & 0xffffffffULL, data[1] >> 32, - data[2] & 0xffffffffULL, data[2] >> 32, data[3] & 0xffffffffULL, data[3] >> 32, - }; - const uint64_t right[8]{ - other.data[0] & 0xffffffffULL, other.data[0] >> 32, other.data[1] & 0xffffffffULL, other.data[1] >> 32, - other.data[2] & 0xffffffffULL, other.data[2] >> 32, other.data[3] & 0xffffffffULL, other.data[3] >> 32, - }; - - auto [t0, c] = mul_wide(left[0], right[0]); - uint64_t k = (t0 * wasm_rinv) & 0xffffffffULL; - uint64_t a = mac_discard_lo(t0, k, wasm_modulus[0]); - - uint64_t t1 = mac_mini(a, left[0], right[1], a); - mac(t1, k, wasm_modulus[1], c, t0, c); - uint64_t t2 = mac_mini(a, left[0], right[2], a); - mac(t2, k, wasm_modulus[2], c, t1, c); - uint64_t t3 = mac_mini(a, left[0], right[3], a); - mac(t3, k, wasm_modulus[3], c, t2, c); - uint64_t t4 = mac_mini(a, left[0], right[4], a); - mac(t4, k, wasm_modulus[4], c, t3, c); - uint64_t t5 = mac_mini(a, left[0], right[5], a); - mac(t5, k, wasm_modulus[5], c, t4, c); - uint64_t t6 = mac_mini(a, left[0], right[6], a); - mac(t6, k, wasm_modulus[6], c, t5, c); - uint64_t t7 = mac_mini(a, left[0], right[7], a); - mac(t7, k, wasm_modulus[7], c, t6, c); - t7 = c + a; - - for (size_t i = 1; i < 8; ++i) { - mac_mini(t0, left[i], right[0], t0, a); - k = (t0 * wasm_rinv) & 0xffffffffULL; - c = mac_discard_lo(t0, k, wasm_modulus[0]); - mac(t1, left[i], right[1], a, t1, a); - mac(t1, k, wasm_modulus[1], c, t0, c); - mac(t2, left[i], right[2], a, t2, a); - mac(t2, k, wasm_modulus[2], c, t1, c); - mac(t3, left[i], right[3], a, t3, a); - mac(t3, k, wasm_modulus[3], c, t2, c); - mac(t4, left[i], right[4], a, t4, a); - mac(t4, k, wasm_modulus[4], c, t3, c); - mac(t5, left[i], right[5], a, t5, a); - mac(t5, k, wasm_modulus[5], c, t4, c); - mac(t6, left[i], right[6], a, t6, a); - mac(t6, k, wasm_modulus[6], c, t5, c); - mac(t7, left[i], right[7], a, t7, a); - mac(t7, k, wasm_modulus[7], c, t6, c); - t7 = c + a; - } - - // mac_mini(t0, left[2], right[0], t0, a); - // k = (t0 * wasm_rinv) & 0xffffffffULL; - // c = mac_discard_lo(t0, k, wasm_modulus[0]); - // mac(t1, left[2], right[1], a, t1, a); - // mac(t1, k, wasm_modulus[1], c, t0, c); - // mac(t2, left[2], right[2], a, t2, a); - // mac(t2, k, wasm_modulus[2], c, t1, c); - // mac(t3, left[2], right[3], a, t3, a); - // mac(t3, k, wasm_modulus[3], c, t2, c); - // mac(t4, left[2], right[4], a, t4, a); - // mac(t4, k, wasm_modulus[4], c, t3, c); - // mac(t5, left[2], right[5], a, t5, a); - // mac(t5, k, wasm_modulus[5], c, t4, c); - // mac(t6, left[2], right[6], a, t6, a); - // mac(t6, k, wasm_modulus[6], c, t5, c); - // mac(t7, left[2], right[7], a, t7, a); - // mac(t7, k, wasm_modulus[7], c, t6, c); - // t7 = c + a; - - // mac_mini(t0, left[3], right[0], t0, a); - // k = (t0 * wasm_rinv) & 0xffffffffULL; - // c = mac_discard_lo(t0, k, wasm_modulus[0]); - // mac(t1, left[3], right[1], a, t1, a); - // mac(t1, k, wasm_modulus[1], c, t0, c); - // mac(t2, left[3], right[2], a, t2, a); - // mac(t2, k, wasm_modulus[2], c, t1, c); - // mac(t3, left[3], right[3], a, t3, a); - // mac(t3, k, wasm_modulus[3], c, t2, c); - // mac(t4, left[3], right[4], a, t4, a); - // mac(t4, k, wasm_modulus[4], c, t3, c); - // mac(t5, left[3], right[5], a, t5, a); - // mac(t5, k, wasm_modulus[5], c, t4, c); - // mac(t6, left[3], right[6], a, t6, a); - // mac(t6, k, wasm_modulus[6], c, t5, c); - // mac(t7, left[3], right[7], a, t7, a); - // mac(t7, k, wasm_modulus[7], c, t6, c); - // t7 = c + a; - - // mac_mini(t0, left[4], right[0], t0, a); - // k = (t0 * wasm_rinv) & 0xffffffffULL; - // c = mac_discard_lo(t0, k, wasm_modulus[0]); - // mac(t1, left[4], right[1], a, t1, a); - // mac(t1, k, wasm_modulus[1], c, t0, c); - // mac(t2, left[4], right[2], a, t2, a); - // mac(t2, k, wasm_modulus[2], c, t1, c); - // mac(t3, left[4], right[3], a, t3, a); - // mac(t3, k, wasm_modulus[3], c, t2, c); - // mac(t4, left[4], right[4], a, t4, a); - // mac(t4, k, wasm_modulus[4], c, t3, c); - // mac(t5, left[4], right[5], a, t5, a); - // mac(t5, k, wasm_modulus[5], c, t4, c); - // mac(t6, left[4], right[6], a, t6, a); - // mac(t6, k, wasm_modulus[6], c, t5, c); - // mac(t7, left[4], right[7], a, t7, a); - // mac(t7, k, wasm_modulus[7], c, t6, c); - // t7 = c + a; - - // mac_mini(t0, left[5], right[0], t0, a); - // k = (t0 * wasm_rinv) & 0xffffffffULL; - // c = mac_discard_lo(t0, k, wasm_modulus[0]); - // mac(t1, left[5], right[1], a, t1, a); - // mac(t1, k, wasm_modulus[1], c, t0, c); - // mac(t2, left[5], right[2], a, t2, a); - // mac(t2, k, wasm_modulus[2], c, t1, c); - // mac(t3, left[5], right[3], a, t3, a); - // mac(t3, k, wasm_modulus[3], c, t2, c); - // mac(t4, left[5], right[4], a, t4, a); - // mac(t4, k, wasm_modulus[4], c, t3, c); - // mac(t5, left[5], right[5], a, t5, a); - // mac(t5, k, wasm_modulus[5], c, t4, c); - // mac(t6, left[5], right[6], a, t6, a); - // mac(t6, k, wasm_modulus[6], c, t5, c); - // mac(t7, left[5], right[7], a, t7, a); - // mac(t7, k, wasm_modulus[7], c, t6, c); - // t7 = c + a; - - // mac_mini(t0, left[6], right[0], t0, a); - // k = (t0 * wasm_rinv) & 0xffffffffULL; - // c = mac_discard_lo(t0, k, wasm_modulus[0]); - // mac(t1, left[6], right[1], a, t1, a); - // mac(t1, k, wasm_modulus[1], c, t0, c); - // mac(t2, left[6], right[2], a, t2, a); - // mac(t2, k, wasm_modulus[2], c, t1, c); - // mac(t3, left[6], right[3], a, t3, a); - // mac(t3, k, wasm_modulus[3], c, t2, c); - // mac(t4, left[6], right[4], a, t4, a); - // mac(t4, k, wasm_modulus[4], c, t3, c); - // mac(t5, left[6], right[5], a, t5, a); - // mac(t5, k, wasm_modulus[5], c, t4, c); - // mac(t6, left[6], right[6], a, t6, a); - // mac(t6, k, wasm_modulus[6], c, t5, c); - // mac(t7, left[6], right[7], a, t7, a); - // mac(t7, k, wasm_modulus[7], c, t6, c); - // t7 = c + a; - - // mac_mini(t0, left[7], right[0], t0, a); - // k = (t0 * wasm_rinv) & 0xffffffffULL; - // c = mac_discard_lo(t0, k, wasm_modulus[0]); - // mac(t1, left[7], right[1], a, t1, a); - // mac(t1, k, wasm_modulus[1], c, t0, c); - // mac(t2, left[7], right[2], a, t2, a); - // mac(t2, k, wasm_modulus[2], c, t1, c); - // mac(t3, left[7], right[3], a, t3, a); - // mac(t3, k, wasm_modulus[3], c, t2, c); - // mac(t4, left[7], right[4], a, t4, a); - // mac(t4, k, wasm_modulus[4], c, t3, c); - // mac(t5, left[7], right[5], a, t5, a); - // mac(t5, k, wasm_modulus[5], c, t4, c); - // mac(t6, left[7], right[6], a, t6, a); - // mac(t6, k, wasm_modulus[6], c, t5, c); - // mac(t7, left[7], right[7], a, t7, a); - // mac(t7, k, wasm_modulus[7], c, t6, c); - // t7 = c + a; - - return { t0 + (t1 << 32), t2 + (t3 << 32), t4 + (t5 << 32), t6 + (t7 << 32) }; + // Convert 4 64-bit limbs to 9 29-bit ones + auto left = wasm_convert(data); + auto right = wasm_convert(other.data); + constexpr uint64_t mask = 0x1fffffff; + uint64_t temp_0 = 0; + uint64_t temp_1 = 0; + uint64_t temp_2 = 0; + uint64_t temp_3 = 0; + uint64_t temp_4 = 0; + uint64_t temp_5 = 0; + uint64_t temp_6 = 0; + uint64_t temp_7 = 0; + uint64_t temp_8 = 0; + uint64_t temp_9 = 0; + uint64_t temp_10 = 0; + uint64_t temp_11 = 0; + uint64_t temp_12 = 0; + uint64_t temp_13 = 0; + uint64_t temp_14 = 0; + uint64_t temp_15 = 0; + uint64_t temp_16 = 0; + + // Perform a series of multiplications and reductions (we multiply 1 limb of left argument by the whole right + // argument and then reduce) + wasm_madd(left[0], right, temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); + wasm_madd(left[1], right, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); + wasm_madd(left[2], right, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); + wasm_madd(left[3], right, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); + wasm_madd(left[4], right, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); + wasm_madd(left[5], right, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); + wasm_madd(left[6], right, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); + wasm_madd(left[7], right, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); + wasm_madd(left[8], right, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); + wasm_reduce(temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); + wasm_reduce(temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); + wasm_reduce(temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); + wasm_reduce(temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); + wasm_reduce(temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); + wasm_reduce(temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); + wasm_reduce(temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); + wasm_reduce(temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); + wasm_reduce(temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); + + // Convert result to unrelaxed form (all limbs are 29 bits) + temp_10 += temp_9 >> WASM_LIMB_BITS; + temp_9 &= mask; + temp_11 += temp_10 >> WASM_LIMB_BITS; + temp_10 &= mask; + temp_12 += temp_11 >> WASM_LIMB_BITS; + temp_11 &= mask; + temp_13 += temp_12 >> WASM_LIMB_BITS; + temp_12 &= mask; + temp_14 += temp_13 >> WASM_LIMB_BITS; + temp_13 &= mask; + temp_15 += temp_14 >> WASM_LIMB_BITS; + temp_14 &= mask; + temp_16 += temp_15 >> WASM_LIMB_BITS; + temp_15 &= mask; + + // Convert back to 4 64-bit limbs form + return { (temp_9 << 0) | (temp_10 << 29) | (temp_11 << 58), + (temp_11 >> 6) | (temp_12 << 23) | (temp_13 << 52), + (temp_13 >> 12) | (temp_14 << 17) | (temp_15 << 46), + (temp_15 >> 18) | (temp_16 << 11) }; #endif } @@ -695,9 +705,135 @@ template constexpr field field::montgomery_square() const noexce t3 = carry_lo + round_carry; return { t0, t1, t2, t3 }; #else - // We use ‘montgomery_mul' instead of 'square_accumulate'. The number of additions and comparisons in - // 'square_accumulate' makes it slower in this particular case. - return montgomery_mul(*this); + // Convert from 4 64-bit limbs to 9 29-bit ones + auto left = wasm_convert(data); + constexpr uint64_t mask = 0x1fffffff; + uint64_t temp_0 = 0; + uint64_t temp_1 = 0; + uint64_t temp_2 = 0; + uint64_t temp_3 = 0; + uint64_t temp_4 = 0; + uint64_t temp_5 = 0; + uint64_t temp_6 = 0; + uint64_t temp_7 = 0; + uint64_t temp_8 = 0; + uint64_t temp_9 = 0; + uint64_t temp_10 = 0; + uint64_t temp_11 = 0; + uint64_t temp_12 = 0; + uint64_t temp_13 = 0; + uint64_t temp_14 = 0; + uint64_t temp_15 = 0; + uint64_t temp_16 = 0; + uint64_t acc; + // Perform multiplications, but accumulated results for limb k=i+j so that we can double them at the same time + temp_0 += left[0] * left[0]; + acc = 0; + acc += left[0] * left[1]; + temp_1 += (acc << 1); + acc = 0; + acc += left[0] * left[2]; + temp_2 += left[1] * left[1]; + temp_2 += (acc << 1); + acc = 0; + acc += left[0] * left[3]; + acc += left[1] * left[2]; + temp_3 += (acc << 1); + acc = 0; + acc += left[0] * left[4]; + acc += left[1] * left[3]; + temp_4 += left[2] * left[2]; + temp_4 += (acc << 1); + acc = 0; + acc += left[0] * left[5]; + acc += left[1] * left[4]; + acc += left[2] * left[3]; + temp_5 += (acc << 1); + acc = 0; + acc += left[0] * left[6]; + acc += left[1] * left[5]; + acc += left[2] * left[4]; + temp_6 += left[3] * left[3]; + temp_6 += (acc << 1); + acc = 0; + acc += left[0] * left[7]; + acc += left[1] * left[6]; + acc += left[2] * left[5]; + acc += left[3] * left[4]; + temp_7 += (acc << 1); + acc = 0; + acc += left[0] * left[8]; + acc += left[1] * left[7]; + acc += left[2] * left[6]; + acc += left[3] * left[5]; + temp_8 += left[4] * left[4]; + temp_8 += (acc << 1); + acc = 0; + acc += left[1] * left[8]; + acc += left[2] * left[7]; + acc += left[3] * left[6]; + acc += left[4] * left[5]; + temp_9 += (acc << 1); + acc = 0; + acc += left[2] * left[8]; + acc += left[3] * left[7]; + acc += left[4] * left[6]; + temp_10 += left[5] * left[5]; + temp_10 += (acc << 1); + acc = 0; + acc += left[3] * left[8]; + acc += left[4] * left[7]; + acc += left[5] * left[6]; + temp_11 += (acc << 1); + acc = 0; + acc += left[4] * left[8]; + acc += left[5] * left[7]; + temp_12 += left[6] * left[6]; + temp_12 += (acc << 1); + acc = 0; + acc += left[5] * left[8]; + acc += left[6] * left[7]; + temp_13 += (acc << 1); + acc = 0; + acc += left[6] * left[8]; + temp_14 += left[7] * left[7]; + temp_14 += (acc << 1); + acc = 0; + acc += left[7] * left[8]; + temp_15 += (acc << 1); + temp_16 += left[8] * left[8]; + + // Perform reductions + wasm_reduce(temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); + wasm_reduce(temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); + wasm_reduce(temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); + wasm_reduce(temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); + wasm_reduce(temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); + wasm_reduce(temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); + wasm_reduce(temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); + wasm_reduce(temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); + wasm_reduce(temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); + + // Convert to unrelaxed 29-bit form + temp_10 += temp_9 >> WASM_LIMB_BITS; + temp_9 &= mask; + temp_11 += temp_10 >> WASM_LIMB_BITS; + temp_10 &= mask; + temp_12 += temp_11 >> WASM_LIMB_BITS; + temp_11 &= mask; + temp_13 += temp_12 >> WASM_LIMB_BITS; + temp_12 &= mask; + temp_14 += temp_13 >> WASM_LIMB_BITS; + temp_13 &= mask; + temp_15 += temp_14 >> WASM_LIMB_BITS; + temp_14 &= mask; + temp_16 += temp_15 >> WASM_LIMB_BITS; + temp_15 &= mask; + // Convert to 4 64-bit form + return { (temp_9 << 0) | (temp_10 << 29) | (temp_11 << 58), + (temp_11 >> 6) | (temp_12 << 23) | (temp_13 << 52), + (temp_13 >> 12) | (temp_14 << 17) | (temp_15 << 46), + (temp_15 >> 18) | (temp_16 << 11) }; #endif } @@ -726,93 +862,82 @@ template constexpr struct field::wide_array field::mul_512(const return { r0, r1, r2, r3, r4, r5, r6, carry_2 }; #else - const uint64_t left[8]{ - data[0] & 0xffffffffULL, data[0] >> 32, data[1] & 0xffffffffULL, data[1] >> 32, - data[2] & 0xffffffffULL, data[2] >> 32, data[3] & 0xffffffffULL, data[3] >> 32, - }; - - const uint64_t right[8]{ - other.data[0] & 0xffffffffULL, other.data[0] >> 32, other.data[1] & 0xffffffffULL, other.data[1] >> 32, - other.data[2] & 0xffffffffULL, other.data[2] >> 32, other.data[3] & 0xffffffffULL, other.data[3] >> 32, - }; - - uint64_t carry_2 = 0; - auto [r0, carry] = mul_wide(left[0], right[0]); - uint64_t r1 = mac_mini(carry, left[0], right[1], carry); - uint64_t r2 = mac_mini(carry, left[0], right[2], carry); - uint64_t r3 = mac_mini(carry, left[0], right[3], carry); - uint64_t r4 = mac_mini(carry, left[0], right[4], carry); - uint64_t r5 = mac_mini(carry, left[0], right[5], carry); - uint64_t r6 = mac_mini(carry, left[0], right[6], carry); - uint64_t r7 = mac_mini(carry, left[0], right[7], carry_2); - - r1 = mac_mini(r1, left[1], right[0], carry); - r2 = mac(r2, left[1], right[1], carry, carry); - r3 = mac(r3, left[1], right[2], carry, carry); - r4 = mac(r4, left[1], right[3], carry, carry); - r5 = mac(r5, left[1], right[4], carry, carry); - r6 = mac(r6, left[1], right[5], carry, carry); - r7 = mac(r7, left[1], right[6], carry, carry); - uint64_t r8 = mac(carry_2, left[1], right[7], carry, carry_2); - - r2 = mac_mini(r2, left[2], right[0], carry); - r3 = mac(r3, left[2], right[1], carry, carry); - r4 = mac(r4, left[2], right[2], carry, carry); - r5 = mac(r5, left[2], right[3], carry, carry); - r6 = mac(r6, left[2], right[4], carry, carry); - r7 = mac(r7, left[2], right[5], carry, carry); - r8 = mac(r8, left[2], right[6], carry, carry); - uint64_t r9 = mac(carry_2, left[2], right[7], carry, carry_2); - - r3 = mac_mini(r3, left[3], right[0], carry); - r4 = mac(r4, left[3], right[1], carry, carry); - r5 = mac(r5, left[3], right[2], carry, carry); - r6 = mac(r6, left[3], right[3], carry, carry); - r7 = mac(r7, left[3], right[4], carry, carry); - r8 = mac(r8, left[3], right[5], carry, carry); - r9 = mac(r9, left[3], right[6], carry, carry); - uint64_t r10 = mac(carry_2, left[3], right[7], carry, carry_2); - - r4 = mac_mini(r4, left[4], right[0], carry); - r5 = mac(r5, left[4], right[1], carry, carry); - r6 = mac(r6, left[4], right[2], carry, carry); - r7 = mac(r7, left[4], right[3], carry, carry); - r8 = mac(r8, left[4], right[4], carry, carry); - r9 = mac(r9, left[4], right[5], carry, carry); - r10 = mac(r10, left[4], right[6], carry, carry); - uint64_t r11 = mac(carry_2, left[4], right[7], carry, carry_2); - - r5 = mac_mini(r5, left[5], right[0], carry); - r6 = mac(r6, left[5], right[1], carry, carry); - r7 = mac(r7, left[5], right[2], carry, carry); - r8 = mac(r8, left[5], right[3], carry, carry); - r9 = mac(r9, left[5], right[4], carry, carry); - r10 = mac(r10, left[5], right[5], carry, carry); - r11 = mac(r11, left[5], right[6], carry, carry); - uint64_t r12 = mac(carry_2, left[5], right[7], carry, carry_2); - - r6 = mac_mini(r6, left[6], right[0], carry); - r7 = mac(r7, left[6], right[1], carry, carry); - r8 = mac(r8, left[6], right[2], carry, carry); - r9 = mac(r9, left[6], right[3], carry, carry); - r10 = mac(r10, left[6], right[4], carry, carry); - r11 = mac(r11, left[6], right[5], carry, carry); - r12 = mac(r12, left[6], right[6], carry, carry); - uint64_t r13 = mac(carry_2, left[6], right[7], carry, carry_2); - - r7 = mac_mini(r7, left[7], right[0], carry); - r8 = mac(r8, left[7], right[1], carry, carry); - r9 = mac(r9, left[7], right[2], carry, carry); - r10 = mac(r10, left[7], right[3], carry, carry); - r11 = mac(r11, left[7], right[4], carry, carry); - r12 = mac(r12, left[7], right[5], carry, carry); - r13 = mac(r13, left[7], right[6], carry, carry); - uint64_t r14 = mac(carry_2, left[7], right[7], carry, carry_2); - - return { - r0 + (r1 << 32), r2 + (r3 << 32), r4 + (r5 << 32), r6 + (r7 << 32), - r8 + (r9 << 32), r10 + (r11 << 32), r12 + (r13 << 32), r14 + (carry_2 << 32), - }; + // Convert from 4 64-bit limbs to 9 29-bit limbs + auto left = wasm_convert(data); + auto right = wasm_convert(other.data); + constexpr uint64_t mask = 0x1fffffff; + uint64_t temp_0 = 0; + uint64_t temp_1 = 0; + uint64_t temp_2 = 0; + uint64_t temp_3 = 0; + uint64_t temp_4 = 0; + uint64_t temp_5 = 0; + uint64_t temp_6 = 0; + uint64_t temp_7 = 0; + uint64_t temp_8 = 0; + uint64_t temp_9 = 0; + uint64_t temp_10 = 0; + uint64_t temp_11 = 0; + uint64_t temp_12 = 0; + uint64_t temp_13 = 0; + uint64_t temp_14 = 0; + uint64_t temp_15 = 0; + uint64_t temp_16 = 0; + + // Multiply-add all limbs + wasm_madd(left[0], right, temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); + wasm_madd(left[1], right, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); + wasm_madd(left[2], right, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); + wasm_madd(left[3], right, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); + wasm_madd(left[4], right, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); + wasm_madd(left[5], right, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); + wasm_madd(left[6], right, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); + wasm_madd(left[7], right, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); + wasm_madd(left[8], right, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); + + // Convert to unrelaxed 29-bit form + temp_1 += temp_0 >> WASM_LIMB_BITS; + temp_0 &= mask; + temp_2 += temp_1 >> WASM_LIMB_BITS; + temp_1 &= mask; + temp_3 += temp_2 >> WASM_LIMB_BITS; + temp_2 &= mask; + temp_4 += temp_3 >> WASM_LIMB_BITS; + temp_3 &= mask; + temp_5 += temp_4 >> WASM_LIMB_BITS; + temp_4 &= mask; + temp_6 += temp_5 >> WASM_LIMB_BITS; + temp_5 &= mask; + temp_7 += temp_6 >> WASM_LIMB_BITS; + temp_6 &= mask; + temp_8 += temp_7 >> WASM_LIMB_BITS; + temp_7 &= mask; + temp_9 += temp_8 >> WASM_LIMB_BITS; + temp_8 &= mask; + temp_10 += temp_9 >> WASM_LIMB_BITS; + temp_9 &= mask; + temp_11 += temp_10 >> WASM_LIMB_BITS; + temp_10 &= mask; + temp_12 += temp_11 >> WASM_LIMB_BITS; + temp_11 &= mask; + temp_13 += temp_12 >> WASM_LIMB_BITS; + temp_12 &= mask; + temp_14 += temp_13 >> WASM_LIMB_BITS; + temp_13 &= mask; + temp_15 += temp_14 >> WASM_LIMB_BITS; + temp_14 &= mask; + temp_16 += temp_15 >> WASM_LIMB_BITS; + temp_15 &= mask; + + // Convert to 8 64-bit limbs + return { (temp_0 << 0) | (temp_1 << 29) | (temp_2 << 58), + (temp_2 >> 6) | (temp_3 << 23) | (temp_4 << 52), + (temp_4 >> 12) | (temp_5 << 17) | (temp_6 << 46), + (temp_6 >> 18) | (temp_7 << 11) | (temp_8 << 40), + (temp_8 >> 24) | (temp_9 << 5) | (temp_10 << 34) | (temp_11 << 63), + (temp_11 >> 1) | (temp_12 << 28) | (temp_13 << 57), + (temp_13 >> 7) | (temp_14 << 22) | (temp_15 << 51), + (temp_15 >> 13) | (temp_16 << 16) }; #endif } diff --git a/barretenberg/cpp/src/barretenberg/numeric/uint256/uint256.hpp b/barretenberg/cpp/src/barretenberg/numeric/uint256/uint256.hpp index fe9e759adaa..0a9b705e22d 100644 --- a/barretenberg/cpp/src/barretenberg/numeric/uint256/uint256.hpp +++ b/barretenberg/cpp/src/barretenberg/numeric/uint256/uint256.hpp @@ -23,7 +23,12 @@ namespace bb::numeric { class alignas(32) uint256_t { + public: +#if defined(__wasm__) || !defined(__SIZEOF_INT128__) +#define WASM_NUM_LIMBS 9 +#define WASM_LIMB_BITS 29 +#endif constexpr uint256_t(const uint64_t a = 0) noexcept : data{ a, 0, 0, 0 } {} @@ -208,6 +213,20 @@ class alignas(32) uint256_t { uint64_t b, uint64_t c, uint64_t carry_in); +#if defined(__wasm__) || !defined(__SIZEOF_INT128__) + static constexpr void wasm_madd(const uint64_t& left_limb, + const uint64_t* right_limbs, + uint64_t& result_0, + uint64_t& result_1, + uint64_t& result_2, + uint64_t& result_3, + uint64_t& result_4, + uint64_t& result_5, + uint64_t& result_6, + uint64_t& result_7, + uint64_t& result_8); + [[nodiscard]] static constexpr std::array wasm_convert(const uint64_t* data); +#endif }; inline std::ostream& operator<<(std::ostream& os, uint256_t const& a) diff --git a/barretenberg/cpp/src/barretenberg/numeric/uint256/uint256_impl.hpp b/barretenberg/cpp/src/barretenberg/numeric/uint256/uint256_impl.hpp index ee51adc763a..29be0dfa01c 100644 --- a/barretenberg/cpp/src/barretenberg/numeric/uint256/uint256_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/numeric/uint256/uint256_impl.hpp @@ -73,7 +73,52 @@ constexpr uint64_t uint256_t::mac_discard_hi(const uint64_t a, { return (b * c + a + carry_in); } +#if defined(__wasm__) || !defined(__SIZEOF_INT128__) +/** + * @brief Multiply one limb by 9 limbs and add to resulting limbs + * + */ +constexpr void uint256_t::wasm_madd(const uint64_t& left_limb, + const uint64_t* right_limbs, + uint64_t& result_0, + uint64_t& result_1, + uint64_t& result_2, + uint64_t& result_3, + uint64_t& result_4, + uint64_t& result_5, + uint64_t& result_6, + uint64_t& result_7, + uint64_t& result_8) +{ + result_0 += left_limb * right_limbs[0]; + result_1 += left_limb * right_limbs[1]; + result_2 += left_limb * right_limbs[2]; + result_3 += left_limb * right_limbs[3]; + result_4 += left_limb * right_limbs[4]; + result_5 += left_limb * right_limbs[5]; + result_6 += left_limb * right_limbs[6]; + result_7 += left_limb * right_limbs[7]; + result_8 += left_limb * right_limbs[8]; +} + +/** + * @brief Convert from 4 64-bit limbs to 9 29-bit limbs + * + */ +constexpr std::array uint256_t::wasm_convert(const uint64_t* data) +{ + return { data[0] & 0x1fffffff, + (data[0] >> 29) & 0x1fffffff, + ((data[0] >> 58) & 0x3f) | ((data[1] & 0x7fffff) << 6), + (data[1] >> 23) & 0x1fffffff, + ((data[1] >> 52) & 0xfff) | ((data[2] & 0x1ffff) << 12), + (data[2] >> 17) & 0x1fffffff, + ((data[2] >> 46) & 0x3ffff) | ((data[3] & 0x7ff) << 18), + (data[3] >> 11) & 0x1fffffff, + (data[3] >> 40) & 0x1fffffff }; +} +#endif constexpr std::pair uint256_t::divmod(const uint256_t& b) const { if (*this == 0 || b == 0) { @@ -122,8 +167,13 @@ constexpr std::pair uint256_t::divmod(const uint256_t& b) return { quotient, remainder }; } +/** + * @brief Compute the result of multiplication modulu 2**512 + * + */ constexpr std::pair uint256_t::mul_extended(const uint256_t& other) const { +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) const auto [r0, t0] = mul_wide(data[0], other.data[0]); const auto [q0, t1] = mac(t0, data[0], other.data[1], 0); const auto [q1, t2] = mac(t1, data[0], other.data[2], 0); @@ -147,6 +197,84 @@ constexpr std::pair uint256_t::mul_extended(const uint256_ uint256_t lo(r0, r1, r2, r3); uint256_t hi(r4, r5, r6, r7); return { lo, hi }; +#else + // Convert 4 64-bit limbs to 9 29-bit limbs + const auto left = wasm_convert(data); + const auto right = wasm_convert(other.data); + constexpr uint64_t mask = 0x1fffffff; + uint64_t temp_0 = 0; + uint64_t temp_1 = 0; + uint64_t temp_2 = 0; + uint64_t temp_3 = 0; + uint64_t temp_4 = 0; + uint64_t temp_5 = 0; + uint64_t temp_6 = 0; + uint64_t temp_7 = 0; + uint64_t temp_8 = 0; + uint64_t temp_9 = 0; + uint64_t temp_10 = 0; + uint64_t temp_11 = 0; + uint64_t temp_12 = 0; + uint64_t temp_13 = 0; + uint64_t temp_14 = 0; + uint64_t temp_15 = 0; + uint64_t temp_16 = 0; + + // Multiply and addd all limbs + wasm_madd(left[0], &right[0], temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); + wasm_madd(left[1], &right[0], temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); + wasm_madd(left[2], &right[0], temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); + wasm_madd(left[3], &right[0], temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); + wasm_madd(left[4], &right[0], temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); + wasm_madd(left[5], &right[0], temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); + wasm_madd(left[6], &right[0], temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); + wasm_madd(left[7], &right[0], temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); + wasm_madd(left[8], &right[0], temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); + + // Convert from relaxed form into strict 29-bit form (except for temp_16) + temp_1 += temp_0 >> WASM_LIMB_BITS; + temp_0 &= mask; + temp_2 += temp_1 >> WASM_LIMB_BITS; + temp_1 &= mask; + temp_3 += temp_2 >> WASM_LIMB_BITS; + temp_2 &= mask; + temp_4 += temp_3 >> WASM_LIMB_BITS; + temp_3 &= mask; + temp_5 += temp_4 >> WASM_LIMB_BITS; + temp_4 &= mask; + temp_6 += temp_5 >> WASM_LIMB_BITS; + temp_5 &= mask; + temp_7 += temp_6 >> WASM_LIMB_BITS; + temp_6 &= mask; + temp_8 += temp_7 >> WASM_LIMB_BITS; + temp_7 &= mask; + temp_9 += temp_8 >> WASM_LIMB_BITS; + temp_8 &= mask; + temp_10 += temp_9 >> WASM_LIMB_BITS; + temp_9 &= mask; + temp_11 += temp_10 >> WASM_LIMB_BITS; + temp_10 &= mask; + temp_12 += temp_11 >> WASM_LIMB_BITS; + temp_11 &= mask; + temp_13 += temp_12 >> WASM_LIMB_BITS; + temp_12 &= mask; + temp_14 += temp_13 >> WASM_LIMB_BITS; + temp_13 &= mask; + temp_15 += temp_14 >> WASM_LIMB_BITS; + temp_14 &= mask; + temp_16 += temp_15 >> WASM_LIMB_BITS; + temp_15 &= mask; + + // Convert to 2 4-64-bit limb uint256_t objects + return { { (temp_0 << 0) | (temp_1 << 29) | (temp_2 << 58), + (temp_2 >> 6) | (temp_3 << 23) | (temp_4 << 52), + (temp_4 >> 12) | (temp_5 << 17) | (temp_6 << 46), + (temp_6 >> 18) | (temp_7 << 11) | (temp_8 << 40) }, + { (temp_8 >> 24) | (temp_9 << 5) | (temp_10 << 34) | (temp_11 << 63), + (temp_11 >> 1) | (temp_12 << 28) | (temp_13 << 57), + (temp_13 >> 7) | (temp_14 << 22) | (temp_15 << 51), + (temp_15 >> 13) | (temp_16 << 16) } }; +#endif } /** @@ -227,6 +355,8 @@ constexpr uint256_t uint256_t::operator-() const constexpr uint256_t uint256_t::operator*(const uint256_t& other) const { + +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) const auto [r0, t0] = mac(0, data[0], other.data[0], 0ULL); const auto [q0, t1] = mac(0, data[0], other.data[1], t0); const auto [q1, t2] = mac(0, data[0], other.data[2], t1); @@ -242,6 +372,86 @@ constexpr uint256_t uint256_t::operator*(const uint256_t& other) const const auto r3 = mac_discard_hi(q5, data[3], other.data[0], 0ULL); return { r0, r1, r2, r3 }; +#else + // Convert 4 64-bit limbs to 9 29-bit limbs + const auto left = wasm_convert(data); + const auto right = wasm_convert(other.data); + uint64_t temp_0 = 0; + uint64_t temp_1 = 0; + uint64_t temp_2 = 0; + uint64_t temp_3 = 0; + uint64_t temp_4 = 0; + uint64_t temp_5 = 0; + uint64_t temp_6 = 0; + uint64_t temp_7 = 0; + uint64_t temp_8 = 0; + + // Multiply and add the product of left limb 0 by all right limbs + wasm_madd(left[0], &right[0], temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); + // Multiply left limb 1 by limbs 0-7 ((1,8) doesn't need to be computed, because it overflows) + temp_1 += left[1] * right[0]; + temp_2 += left[1] * right[1]; + temp_3 += left[1] * right[2]; + temp_4 += left[1] * right[3]; + temp_5 += left[1] * right[4]; + temp_6 += left[1] * right[5]; + temp_7 += left[1] * right[6]; + temp_8 += left[1] * right[7]; + // Left limb 2 by right 0-6, etc + temp_2 += left[2] * right[0]; + temp_3 += left[2] * right[1]; + temp_4 += left[2] * right[2]; + temp_5 += left[2] * right[3]; + temp_6 += left[2] * right[4]; + temp_7 += left[2] * right[5]; + temp_8 += left[2] * right[6]; + temp_3 += left[3] * right[0]; + temp_4 += left[3] * right[1]; + temp_5 += left[3] * right[2]; + temp_6 += left[3] * right[3]; + temp_7 += left[3] * right[4]; + temp_8 += left[3] * right[5]; + temp_4 += left[4] * right[0]; + temp_5 += left[4] * right[1]; + temp_6 += left[4] * right[2]; + temp_7 += left[4] * right[3]; + temp_8 += left[4] * right[4]; + temp_5 += left[5] * right[0]; + temp_6 += left[5] * right[1]; + temp_7 += left[5] * right[2]; + temp_8 += left[5] * right[3]; + temp_6 += left[6] * right[0]; + temp_7 += left[6] * right[1]; + temp_8 += left[6] * right[2]; + temp_7 += left[7] * right[0]; + temp_8 += left[7] * right[1]; + temp_8 += left[8] * right[0]; + + // Convert from relaxed form to strict 29-bit form + constexpr uint64_t mask = 0x1fffffff; + temp_1 += temp_0 >> WASM_LIMB_BITS; + temp_0 &= mask; + temp_2 += temp_1 >> WASM_LIMB_BITS; + temp_1 &= mask; + temp_3 += temp_2 >> WASM_LIMB_BITS; + temp_2 &= mask; + temp_4 += temp_3 >> WASM_LIMB_BITS; + temp_3 &= mask; + temp_5 += temp_4 >> WASM_LIMB_BITS; + temp_4 &= mask; + temp_6 += temp_5 >> WASM_LIMB_BITS; + temp_5 &= mask; + temp_7 += temp_6 >> WASM_LIMB_BITS; + temp_6 &= mask; + temp_8 += temp_7 >> WASM_LIMB_BITS; + temp_7 &= mask; + + // Convert back to 4 64-bit limbs + return { (temp_0 << 0) | (temp_1 << 29) | (temp_2 << 58), + (temp_2 >> 6) | (temp_3 << 23) | (temp_4 << 52), + (temp_4 >> 12) | (temp_5 << 17) | (temp_6 << 46), + (temp_6 >> 18) | (temp_7 << 11) | (temp_8 << 40) }; +#endif } constexpr uint256_t uint256_t::operator/(const uint256_t& other) const diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp index aedc1353787..d63af5db8e7 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp @@ -119,6 +119,13 @@ template class Univariate } return *this; } + Univariate& self_sqr() + { + for (size_t i = 0; i < LENGTH; ++i) { + evaluations[i].self_sqr(); + } + return *this; + } Univariate operator+(const Univariate& other) const { Univariate res(*this); @@ -148,6 +155,13 @@ template class Univariate return res; } + Univariate sqr() const + { + Univariate res(*this); + res.self_sqr(); + return res; + } + // Operations between Univariate and scalar Univariate& operator+=(const Fr& scalar) { @@ -485,6 +499,12 @@ template class Univariate res *= other; return res; } + Univariate sqr() const + { + Univariate res(*this); + res = res.sqr(); + return res; + } Univariate operator*(const Univariate& other) const { diff --git a/barretenberg/cpp/src/barretenberg/relations/auxiliary_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/auxiliary_relation.hpp index 144ebdf9808..0b18b94596a 100644 --- a/barretenberg/cpp/src/barretenberg/relations/auxiliary_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/auxiliary_relation.hpp @@ -245,7 +245,7 @@ template class AuxiliaryRelationImpl { auto index_delta = w_1_shift - w_1; auto record_delta = w_4_shift - w_4; - auto index_is_monotonically_increasing = index_delta * index_delta - index_delta; // deg 2 + auto index_is_monotonically_increasing = index_delta.sqr() - index_delta; // deg 2 auto adjacent_values_match_if_adjacent_indices_match = (-index_delta + FF(1)) * record_delta; // deg 2 @@ -296,7 +296,7 @@ template class AuxiliaryRelationImpl { // do with an arithmetic gate because of the `eta` factors. We need to check that the *next* gate's access // type is correct, to cover this edge case // deg 2 or 4 - auto next_gate_access_type_is_boolean = next_gate_access_type * next_gate_access_type - next_gate_access_type; + auto next_gate_access_type_is_boolean = next_gate_access_type.sqr() - next_gate_access_type; auto q_arith_by_aux_and_scaling = q_arith * q_aux_by_scaling; // Putting it all together... diff --git a/barretenberg/cpp/src/barretenberg/relations/delta_range_constraint_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/delta_range_constraint_relation.hpp index dee7759db07..e26ad519633 100644 --- a/barretenberg/cpp/src/barretenberg/relations/delta_range_constraint_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/delta_range_constraint_relation.hpp @@ -46,7 +46,6 @@ template class DeltaRangeConstraintRelationImpl { static const FF minus_one = FF(-1); static const FF minus_two = FF(-2); - static const FF minus_three = FF(-3); // Compute wire differences auto delta_1 = w_2 - w_1; @@ -55,37 +54,29 @@ template class DeltaRangeConstraintRelationImpl { auto delta_4 = w_1_shift - w_4; // Contribution (1) - auto tmp_1 = delta_1; - tmp_1 *= (delta_1 + minus_one); - tmp_1 *= (delta_1 + minus_two); - tmp_1 *= (delta_1 + minus_three); + auto tmp_1 = (delta_1 + minus_one).sqr() + minus_one; + tmp_1 *= (delta_1 + minus_two).sqr() + minus_one; tmp_1 *= q_delta_range; tmp_1 *= scaling_factor; std::get<0>(accumulators) += tmp_1; // Contribution (2) - auto tmp_2 = delta_2; - tmp_2 *= (delta_2 + minus_one); - tmp_2 *= (delta_2 + minus_two); - tmp_2 *= (delta_2 + minus_three); + auto tmp_2 = (delta_2 + minus_one).sqr() + minus_one; + tmp_2 *= (delta_2 + minus_two).sqr() + minus_one; tmp_2 *= q_delta_range; tmp_2 *= scaling_factor; std::get<1>(accumulators) += tmp_2; // Contribution (3) - auto tmp_3 = delta_3; - tmp_3 *= (delta_3 + minus_one); - tmp_3 *= (delta_3 + minus_two); - tmp_3 *= (delta_3 + minus_three); + auto tmp_3 = (delta_3 + minus_one).sqr() + minus_one; + tmp_3 *= (delta_3 + minus_two).sqr() + minus_one; tmp_3 *= q_delta_range; tmp_3 *= scaling_factor; std::get<2>(accumulators) += tmp_3; // Contribution (4) - auto tmp_4 = delta_4; - tmp_4 *= (delta_4 + minus_one); - tmp_4 *= (delta_4 + minus_two); - tmp_4 *= (delta_4 + minus_three); + auto tmp_4 = (delta_4 + minus_one).sqr() + minus_one; + tmp_4 *= (delta_4 + minus_two).sqr() + minus_one; tmp_4 *= q_delta_range; tmp_4 *= scaling_factor; std::get<3>(accumulators) += tmp_4; diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_op_queue_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_op_queue_relation.hpp index faf2f0da162..11194edba10 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_op_queue_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_op_queue_relation.hpp @@ -57,50 +57,43 @@ template class EccOpQueueRelationImpl { auto lagrange_ecc_op = View(in.lagrange_ecc_op); // If lagrange_ecc_op is the indicator for ecc_op_gates, this is the indicator for the complement - auto complement_ecc_op = -lagrange_ecc_op + FF(1); + auto lagrange_by_scaling = lagrange_ecc_op * scaling_factor; + auto complement_ecc_op_by_scaling = -lagrange_by_scaling + scaling_factor; // Contribution (1) auto tmp = op_wire_1 - w_1; - tmp *= lagrange_ecc_op; - tmp *= scaling_factor; + tmp *= lagrange_by_scaling; std::get<0>(accumulators) += tmp; // Contribution (2) tmp = op_wire_2 - w_2; - tmp *= lagrange_ecc_op; - tmp *= scaling_factor; + tmp *= lagrange_by_scaling; std::get<1>(accumulators) += tmp; // Contribution (3) tmp = op_wire_3 - w_3; - tmp *= lagrange_ecc_op; - tmp *= scaling_factor; + tmp *= lagrange_by_scaling; std::get<2>(accumulators) += tmp; // Contribution (4) tmp = op_wire_4 - w_4; - tmp *= lagrange_ecc_op; - tmp *= scaling_factor; + tmp *= lagrange_by_scaling; std::get<3>(accumulators) += tmp; // Contribution (5) - tmp = op_wire_1 * complement_ecc_op; - tmp *= scaling_factor; + tmp = op_wire_1 * complement_ecc_op_by_scaling; std::get<4>(accumulators) += tmp; // Contribution (6) - tmp = op_wire_2 * complement_ecc_op; - tmp *= scaling_factor; + tmp = op_wire_2 * complement_ecc_op_by_scaling; std::get<5>(accumulators) += tmp; // Contribution (7) - tmp = op_wire_3 * complement_ecc_op; - tmp *= scaling_factor; + tmp = op_wire_3 * complement_ecc_op_by_scaling; std::get<6>(accumulators) += tmp; // Contribution (8) - tmp = op_wire_4 * complement_ecc_op; - tmp *= scaling_factor; + tmp = op_wire_4 * complement_ecc_op_by_scaling; std::get<7>(accumulators) += tmp; }; }; diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.cpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.cpp index b71b5a6e4a0..51dc6851d75 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.cpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.cpp @@ -182,7 +182,7 @@ void ECCVMMSMRelationImpl::accumulate(ContainerOverSubrelations& accumulator relation += selector * (lambda * (xb - xa - 1) - (yb - ya)) + lambda; collision_relation += selector * (xb - xa); // x3 = L.L + (-xb - xa) * q + (1 - q) xa - auto x_out = lambda * lambda + (-xb - xa - xa) * selector + xa; + auto x_out = lambda.sqr() + (-xb - xa - xa) * selector + xa; // y3 = L . (xa - x3) - ya * q + (1 - q) ya auto y_out = lambda * (xa - x_out) + (-ya - ya) * selector + ya; @@ -219,7 +219,7 @@ void ECCVMMSMRelationImpl::accumulate(ContainerOverSubrelations& accumulator auto dbl = [&](auto& x, auto& y, auto& lambda, auto& relation) { auto two_x = x + x; relation += lambda * (y + y) - (two_x + x) * x; - auto x_out = lambda * lambda - two_x; + auto x_out = lambda.sqr() - two_x; auto y_out = lambda * (x - x_out) - y; return std::array{ x_out, y_out }; }; diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.cpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.cpp index 0efec02d548..dfc1edfb9c3 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.cpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.cpp @@ -122,9 +122,9 @@ void ECCVMPointTableRelationImpl::accumulate(ContainerOverSubrelations& accu auto two_x = Tx + Tx; auto three_x = two_x + Tx; auto three_xx = Tx * three_x; - auto nine_xxxx = three_xx * three_xx; + auto nine_xxxx = three_xx.sqr(); auto two_y = Ty + Ty; - auto four_yy = two_y * two_y; + auto four_yy = two_y.sqr(); auto x_double_check = (Dx + two_x) * four_yy - nine_xxxx; auto y_double_check = (Ty + Dy) * two_y + three_xx * (Dx - Tx); std::get<0>(accumulator) += precompute_point_transition * x_double_check * scaling_factor; @@ -164,7 +164,7 @@ void ECCVMPointTableRelationImpl::accumulate(ContainerOverSubrelations& accu const auto& y3 = Ty; const auto lambda_numerator = y2 - y1; const auto lambda_denominator = x2 - x1; - auto x_add_check = (x3 + x2 + x1) * lambda_denominator * lambda_denominator - lambda_numerator * lambda_numerator; + auto x_add_check = (x3 + x2 + x1) * lambda_denominator.sqr() - lambda_numerator.sqr(); auto y_add_check = (y3 + y1) * lambda_denominator + (x3 - x1) * lambda_numerator; std::get<4>(accumulator) += (-lagrange_first + 1) * (-precompute_point_transition + 1) * x_add_check * scaling_factor; diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_transcript_relation.cpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_transcript_relation.cpp index 4e7a4cdbdb6..b2d0f2cedbd 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_transcript_relation.cpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_transcript_relation.cpp @@ -152,7 +152,7 @@ void ECCVMTranscriptRelationImpl::accumulate(ContainerOverSubrelations& accu auto y1 = transcript_accumulator_y; auto x2 = transcript_msm_x; auto y2 = transcript_msm_y; - auto tmpx = (x3 + x2 + x1) * (x2 - x1) * (x2 - x1) - (y2 - y1) * (y2 - y1); + auto tmpx = (x3 + x2 + x1) * (x2 - x1).sqr() - (y2 - y1).sqr(); auto tmpy = (y3 + y1) * (x2 - x1) - (y2 - y1) * (x1 - x3); std::get<7>(accumulator) += tmpx * add_msm_into_accumulator * scaling_factor; // degree 5 std::get<8>(accumulator) += tmpy * add_msm_into_accumulator * scaling_factor; // degree 4 @@ -177,7 +177,7 @@ void ECCVMTranscriptRelationImpl::accumulate(ContainerOverSubrelations& accu x2 = transcript_Px; y2 = transcript_Py; auto add_into_accumulator = q_add * (-is_accumulator_empty + 1); - tmpx = (x3 + x2 + x1) * (x2 - x1) * (x2 - x1) - (y2 - y1) * (y2 - y1); + tmpx = (x3 + x2 + x1) * (x2 - x1).sqr() - (y2 - y1).sqr(); tmpy = (y3 + y1) * (x2 - x1) - (y2 - y1) * (x1 - x3); std::get<11>(accumulator) += tmpx * add_into_accumulator * scaling_factor; // degree 5 std::get<12>(accumulator) += tmpy * add_into_accumulator * scaling_factor; // degree 4 @@ -214,14 +214,14 @@ void ECCVMTranscriptRelationImpl::accumulate(ContainerOverSubrelations& accu std::get<22>(accumulator) += q_eq * is_accumulator_empty * scaling_factor; // validate selectors are boolean (put somewhere else? these are low degree) - std::get<23>(accumulator) += q_eq * (q_eq - 1) * scaling_factor; - std::get<24>(accumulator) += q_add * (q_add - 1) * scaling_factor; - std::get<25>(accumulator) += q_mul * (q_mul - 1) * scaling_factor; - std::get<26>(accumulator) += q_reset_accumulator * (q_reset_accumulator - 1) * scaling_factor; - std::get<27>(accumulator) += msm_transition * (msm_transition - 1) * scaling_factor; - std::get<28>(accumulator) += is_accumulator_empty * (is_accumulator_empty - 1) * scaling_factor; - std::get<29>(accumulator) += z1_zero * (z1_zero - 1) * scaling_factor; - std::get<30>(accumulator) += z2_zero * (z2_zero - 1) * scaling_factor; + std::get<23>(accumulator) += (q_eq.sqr() - q_eq) * scaling_factor; + std::get<24>(accumulator) += (q_add.sqr() - q_add) * scaling_factor; + std::get<25>(accumulator) += (q_mul.sqr() - q_mul) * scaling_factor; + std::get<26>(accumulator) += (q_reset_accumulator.sqr() - q_reset_accumulator) * scaling_factor; + std::get<27>(accumulator) += (msm_transition.sqr() - msm_transition) * scaling_factor; + std::get<28>(accumulator) += (is_accumulator_empty.sqr() - is_accumulator_empty) * scaling_factor; + std::get<29>(accumulator) += (z1_zero.sqr() - z1_zero) * scaling_factor; + std::get<30>(accumulator) += (z2_zero.sqr() - z2_zero) * scaling_factor; /** * @brief Initial condition check on 1st row. diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.cpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.cpp index 3c7e7ca8433..111b5ebb253 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.cpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.cpp @@ -64,7 +64,7 @@ void ECCVMWnafRelationImpl::accumulate(ContainerOverSubrelations& accumulato }; const auto range_constraint_slice_to_2_bits = [&scaling_factor](const View& s, auto& acc) { - acc += s * (s - 1) * (s - 2) * (s - 3) * scaling_factor; + acc += ((s - 1).sqr() - 1) * ((s - 2).sqr() - 1) * scaling_factor; }; const auto convert_to_wnaf = [](const View& s0, const View& s1) { diff --git a/barretenberg/cpp/src/barretenberg/relations/elliptic_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/elliptic_relation.hpp index f6201145fba..b05740fc7b0 100644 --- a/barretenberg/cpp/src/barretenberg/relations/elliptic_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/elliptic_relation.hpp @@ -63,8 +63,8 @@ template class EllipticRelationImpl { // Contribution (1) point addition, x-coordinate check // q_elliptic * (x3 + x2 + x1)(x2 - x1)(x2 - x1) - y2^2 - y1^2 + 2(y2y1)*q_sign = 0 auto x_diff = (x_2 - x_1); - auto y2_sqr = (y_2 * y_2); - auto y1_sqr = (y_1 * y_1); + auto y2_sqr = y_2.sqr(); + auto y1_sqr = y_1.sqr(); auto y1y2 = y_1 * y_2 * q_sign; auto x_add_identity = (x_3 + x_2 + x_1) * x_diff * x_diff - y2_sqr - y1_sqr + y1y2 + y1y2; diff --git a/barretenberg/cpp/src/barretenberg/relations/poseidon2_external_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/poseidon2_external_relation.hpp index 274b644db9c..89fb3e10593 100644 --- a/barretenberg/cpp/src/barretenberg/relations/poseidon2_external_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/poseidon2_external_relation.hpp @@ -66,17 +66,17 @@ template class Poseidon2ExternalRelationImpl { auto s4 = w_4 + q_4; // apply s-box round - auto u1 = s1 * s1; - u1 *= u1; + auto u1 = s1.sqr(); + u1 = u1.sqr(); u1 *= s1; - auto u2 = s2 * s2; - u2 *= u2; + auto u2 = s2.sqr(); + u2 = u2.sqr(); u2 *= s2; - auto u3 = s3 * s3; - u3 *= u3; + auto u3 = s3.sqr(); + u3 = u3.sqr(); u3 *= s3; - auto u4 = s4 * s4; - u4 *= u4; + auto u4 = s4.sqr(); + u4 = u4.sqr(); u4 *= s4; // matrix mul v = M_E * u with 14 additions diff --git a/barretenberg/cpp/src/barretenberg/relations/poseidon2_internal_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/poseidon2_internal_relation.hpp index db4d4b02576..fa065567bce 100644 --- a/barretenberg/cpp/src/barretenberg/relations/poseidon2_internal_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/poseidon2_internal_relation.hpp @@ -57,8 +57,8 @@ template class Poseidon2InternalRelationImpl { auto s1 = w_l + q_l; // apply s-box round - auto u1 = s1 * s1; - u1 *= u1; + auto u1 = s1.sqr(); + u1 = u1.sqr(); u1 *= s1; auto u2 = w_r; auto u3 = w_o;