Skip to content

Commit

Permalink
Refactor sync hashBlocks (#4343)
Browse files Browse the repository at this point in the history
* Use Buffer.concat for bytes utils

* Simplify Buffer.concat()

* Remove need for byteArrayConcat

Co-authored-by: dapplion <[email protected]>
  • Loading branch information
twoeths and dapplion authored Aug 1, 2022
1 parent 9979836 commit f0a5f41
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 27 deletions.
4 changes: 2 additions & 2 deletions packages/beacon-node/src/sync/range/batch.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import PeerId from "peer-id";
import {allForks, Epoch, phase0} from "@lodestar/types";
import {allForks, Epoch, phase0, RootHex} from "@lodestar/types";
import {IChainForkConfig} from "@lodestar/config";
import {LodestarError} from "@lodestar/utils";
import {MAX_BATCH_DOWNLOAD_ATTEMPTS, MAX_BATCH_PROCESSING_ATTEMPTS} from "../constants.js";
Expand Down Expand Up @@ -32,7 +32,7 @@ export type Attempt = {
/** The peer that made the attempt */
peer: PeerId;
/** The hash of the blocks of the attempt */
hash: Uint8Array;
hash: RootHex;
};

export type BatchState =
Expand Down
3 changes: 1 addition & 2 deletions packages/beacon-node/src/sync/range/chain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {IChainForkConfig} from "@lodestar/config";
import {toHexString} from "@chainsafe/ssz";
import {PeerAction} from "../../network/index.js";
import {ItTrigger} from "../../util/itTrigger.js";
import {byteArrayEquals} from "../../util/bytes.js";
import {PeerMap} from "../../util/peerMap.js";
import {wrapError} from "../../util/wrapError.js";
import {RangeSyncType} from "../utils/remoteSyncType.js";
Expand Down Expand Up @@ -483,7 +482,7 @@ export class SyncChain {
// The last batch attempt is right, all others are wrong. Penalize other peers
const attemptOk = batch.validationSuccess();
for (const attempt of batch.failedProcessingAttempts) {
if (!byteArrayEquals(attempt.hash, attemptOk.hash)) {
if (attempt.hash !== attemptOk.hash) {
if (attemptOk.peer.toB58String() === attempt.peer.toB58String()) {
// The same peer corrected its previous attempt
this.reportPeer(attempt.peer, PeerAction.MidToleranceError, "SyncChainInvalidBatchSelf");
Expand Down
27 changes: 18 additions & 9 deletions packages/beacon-node/src/sync/range/utils/hashBlocks.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
import {allForks} from "@lodestar/types";
import {allForks, RootHex} from "@lodestar/types";
import {IChainForkConfig} from "@lodestar/config";
import {byteArrayConcat} from "../../../util/bytes.js";
import {toHex} from "@lodestar/utils";

/**
* Hash SignedBeaconBlock in a byte form easy to compare only
* @param blocks
* @param config
* String to uniquely identify block segments. Used for peer scoring and to compare if batches are equivalent.
*/
export function hashBlocks(blocks: allForks.SignedBeaconBlock[], config: IChainForkConfig): Uint8Array {
return byteArrayConcat(
blocks.map((block) => config.getForkTypes(block.message.slot).SignedBeaconBlock.hashTreeRoot(block))
);
export function hashBlocks(blocks: allForks.SignedBeaconBlock[], config: IChainForkConfig): RootHex {
switch (blocks.length) {
case 0:
return "0x";
case 1:
return toHex(config.getForkTypes(blocks[0].message.slot).SignedBeaconBlock.hashTreeRoot(blocks[0]));
default: {
const block0 = blocks[0];
const blockN = blocks[blocks.length - 1];
return (
toHex(config.getForkTypes(block0.message.slot).SignedBeaconBlock.hashTreeRoot(block0)) +
toHex(config.getForkTypes(blockN.message.slot).SignedBeaconBlock.hashTreeRoot(blockN))
);
}
}
}
13 changes: 0 additions & 13 deletions packages/beacon-node/src/util/bytes.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
import {Root} from "@lodestar/types";

export function byteArrayConcat(bytesArr: Uint8Array[]): Uint8Array {
const totalBytes = bytesArr.reduce((total, bytes) => total + bytes.length, 0);
const mergedBytes = new Uint8Array(totalBytes);

let offset = 0;
for (const bytes of bytesArr) {
mergedBytes.set(bytes, offset);
offset += bytes.length;
}

return mergedBytes;
}

export function byteArrayEquals(a: Uint8Array | Root, b: Uint8Array | Root): boolean {
if (a.length !== b.length) {
return false;
Expand Down
22 changes: 22 additions & 0 deletions packages/beacon-node/test/perf/util/bytes.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import {itBench} from "@dapplion/benchmark";

describe("bytes utils", function () {
const roots: Uint8Array[] = [];
let buffers: Buffer[] = [];
const count = 32;
before(function () {
this.timeout(60 * 1000);
for (let i = 0; i < count; i++) {
roots.push(new Uint8Array(Array.from({length: 32}, () => i)));
}
buffers = roots.map((root) => Buffer.from(root.buffer));
});

itBench({
id: `Buffer.concat ${count} items`,
fn: () => {
Buffer.concat(buffers);
},
runsFactor: 1000,
});
});
7 changes: 6 additions & 1 deletion packages/beacon-node/test/unit/util/bytes.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import {expect} from "chai";
import {fromHexString, toHexString} from "@chainsafe/ssz";

import {byteArrayConcat, byteArrayEquals} from "../../../src/util/bytes.js";
import {byteArrayEquals} from "../../../src/util/bytes.js";

/** Reference implementation of byteArrayConcat */
function byteArrayConcat(bytesArr: Uint8Array[]): Uint8Array {
return Buffer.concat(bytesArr);
}

describe("util / bytes", () => {
describe("byteArrayConcat", () => {
Expand Down

1 comment on commit f0a5f41

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for some benchmarks.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold.

Benchmark suite Current: f0a5f41 Previous: 9979836 Ratio
getEffectiveBalanceIncrementsZeroInactive - 250000 vs - 7PWei 1.2503 ms/op 388.62 us/op 3.22
Full benchmark results
Benchmark suite Current: f0a5f41 Previous: 9979836 Ratio
getPubkeys - index2pubkey - req 1000 vs - 250000 vc 2.3084 ms/op 2.3090 ms/op 1.00
getPubkeys - validatorsArr - req 1000 vs - 250000 vc 77.193 us/op 67.255 us/op 1.15
BLS verify - blst-native 1.8591 ms/op 2.1669 ms/op 0.86
BLS verifyMultipleSignatures 3 - blst-native 3.8037 ms/op 4.4732 ms/op 0.85
BLS verifyMultipleSignatures 8 - blst-native 8.1915 ms/op 9.6811 ms/op 0.85
BLS verifyMultipleSignatures 32 - blst-native 29.687 ms/op 35.211 ms/op 0.84
BLS aggregatePubkeys 32 - blst-native 39.072 us/op 46.933 us/op 0.83
BLS aggregatePubkeys 128 - blst-native 152.63 us/op 182.51 us/op 0.84
getAttestationsForBlock 172.59 ms/op 150.97 ms/op 1.14
isKnown best case - 1 super set check 421.00 ns/op 489.00 ns/op 0.86
isKnown normal case - 2 super set checks 415.00 ns/op 476.00 ns/op 0.87
isKnown worse case - 16 super set checks 414.00 ns/op 484.00 ns/op 0.86
CheckpointStateCache - add get delete 9.1060 us/op 9.0700 us/op 1.00
validate gossip signedAggregateAndProof - struct 4.2710 ms/op 5.0235 ms/op 0.85
validate gossip attestation - struct 2.0297 ms/op 2.3677 ms/op 0.86
altair verifyImport mainnet_s3766816:31 8.8464 s/op 8.7596 s/op 1.01
pickEth1Vote - no votes 2.1294 ms/op 2.1798 ms/op 0.98
pickEth1Vote - max votes 24.581 ms/op 22.009 ms/op 1.12
pickEth1Vote - Eth1Data hashTreeRoot value x2048 11.622 ms/op 13.273 ms/op 0.88
pickEth1Vote - Eth1Data hashTreeRoot tree x2048 21.473 ms/op 21.870 ms/op 0.98
pickEth1Vote - Eth1Data fastSerialize value x2048 1.5684 ms/op 1.6465 ms/op 0.95
pickEth1Vote - Eth1Data fastSerialize tree x2048 17.473 ms/op 16.304 ms/op 1.07
bytes32 toHexString 1.1740 us/op 1.1570 us/op 1.01
bytes32 Buffer.toString(hex) 765.00 ns/op 853.00 ns/op 0.90
bytes32 Buffer.toString(hex) from Uint8Array 1.0090 us/op 1.1140 us/op 0.91
bytes32 Buffer.toString(hex) + 0x 774.00 ns/op 855.00 ns/op 0.91
Object access 1 prop 0.38300 ns/op 0.42300 ns/op 0.91
Map access 1 prop 0.29300 ns/op 0.30300 ns/op 0.97
Object get x1000 17.897 ns/op 11.377 ns/op 1.57
Map get x1000 1.0360 ns/op 1.1830 ns/op 0.88
Object set x1000 131.31 ns/op 87.113 ns/op 1.51
Map set x1000 75.804 ns/op 55.274 ns/op 1.37
Return object 10000 times 0.36760 ns/op 0.43480 ns/op 0.85
Throw Error 10000 times 5.9912 us/op 6.0386 us/op 0.99
enrSubnets - fastDeserialize 64 bits 2.9860 us/op 3.1810 us/op 0.94
enrSubnets - ssz BitVector 64 bits 797.00 ns/op 954.00 ns/op 0.84
enrSubnets - fastDeserialize 4 bits 440.00 ns/op 490.00 ns/op 0.90
enrSubnets - ssz BitVector 4 bits 793.00 ns/op 939.00 ns/op 0.84
prioritizePeers score -10:0 att 32-0.1 sync 2-0 102.15 us/op 94.700 us/op 1.08
prioritizePeers score 0:0 att 32-0.25 sync 2-0.25 123.97 us/op 135.39 us/op 0.92
prioritizePeers score 0:0 att 32-0.5 sync 2-0.5 242.55 us/op 253.41 us/op 0.96
prioritizePeers score 0:0 att 64-0.75 sync 4-0.75 515.06 us/op 333.34 us/op 1.55
prioritizePeers score 0:0 att 64-1 sync 4-1 465.45 us/op 404.85 us/op 1.15
RateTracker 1000000 limit, 1 obj count per request 200.65 ns/op 208.34 ns/op 0.96
RateTracker 1000000 limit, 2 obj count per request 150.35 ns/op 156.09 ns/op 0.96
RateTracker 1000000 limit, 4 obj count per request 125.06 ns/op 127.28 ns/op 0.98
RateTracker 1000000 limit, 8 obj count per request 110.43 ns/op 114.07 ns/op 0.97
RateTracker with prune 4.8750 us/op 4.8620 us/op 1.00
array of 16000 items push then shift 3.2178 us/op 51.585 us/op 0.06
LinkedList of 16000 items push then shift 29.329 ns/op 13.060 ns/op 2.25
array of 16000 items push then pop 287.29 ns/op 239.28 ns/op 1.20
LinkedList of 16000 items push then pop 23.541 ns/op 12.149 ns/op 1.94
array of 24000 items push then shift 4.5606 us/op 77.354 us/op 0.06
LinkedList of 24000 items push then shift 32.544 ns/op 13.618 ns/op 2.39
array of 24000 items push then pop 206.23 ns/op 200.55 ns/op 1.03
LinkedList of 24000 items push then pop 23.423 ns/op 12.248 ns/op 1.91
intersect bitArray bitLen 8 11.790 ns/op 10.818 ns/op 1.09
intersect array and set length 8 177.44 ns/op 167.53 ns/op 1.06
intersect bitArray bitLen 128 62.126 ns/op 57.998 ns/op 1.07
intersect array and set length 128 2.3881 us/op 2.0673 us/op 1.16
Buffer.concat 32 items 2.0320 ns/op
pass gossip attestations to forkchoice per slot 3.2053 ms/op 3.2378 ms/op 0.99
computeDeltas 3.5014 ms/op 3.2282 ms/op 1.08
computeProposerBoostScoreFromBalances 921.72 us/op 810.54 us/op 1.14
altair processAttestation - 250000 vs - 7PWei normalcase 4.1870 ms/op 3.6657 ms/op 1.14
altair processAttestation - 250000 vs - 7PWei worstcase 6.0626 ms/op 5.3928 ms/op 1.12
altair processAttestation - setStatus - 1/6 committees join 209.64 us/op 175.94 us/op 1.19
altair processAttestation - setStatus - 1/3 committees join 395.81 us/op 350.17 us/op 1.13
altair processAttestation - setStatus - 1/2 committees join 558.57 us/op 492.53 us/op 1.13
altair processAttestation - setStatus - 2/3 committees join 714.15 us/op 646.24 us/op 1.11
altair processAttestation - setStatus - 4/5 committees join 997.84 us/op 902.59 us/op 1.11
altair processAttestation - setStatus - 100% committees join 1.1854 ms/op 1.0806 ms/op 1.10
altair processBlock - 250000 vs - 7PWei normalcase 28.981 ms/op 27.976 ms/op 1.04
altair processBlock - 250000 vs - 7PWei normalcase hashState 43.682 ms/op 33.903 ms/op 1.29
altair processBlock - 250000 vs - 7PWei worstcase 81.961 ms/op 88.223 ms/op 0.93
altair processBlock - 250000 vs - 7PWei worstcase hashState 101.08 ms/op 101.16 ms/op 1.00
phase0 processBlock - 250000 vs - 7PWei normalcase 5.4686 ms/op 4.0758 ms/op 1.34
phase0 processBlock - 250000 vs - 7PWei worstcase 48.689 ms/op 53.426 ms/op 0.91
altair processEth1Data - 250000 vs - 7PWei normalcase 868.22 us/op 804.38 us/op 1.08
Tree 40 250000 create 856.24 ms/op 728.93 ms/op 1.17
Tree 40 250000 get(125000) 291.63 ns/op 232.25 ns/op 1.26
Tree 40 250000 set(125000) 2.6016 us/op 2.2514 us/op 1.16
Tree 40 250000 toArray() 34.047 ms/op 27.627 ms/op 1.23
Tree 40 250000 iterate all - toArray() + loop 34.266 ms/op 28.090 ms/op 1.22
Tree 40 250000 iterate all - get(i) 113.99 ms/op 111.59 ms/op 1.02
MutableVector 250000 create 16.111 ms/op 13.648 ms/op 1.18
MutableVector 250000 get(125000) 13.101 ns/op 10.884 ns/op 1.20
MutableVector 250000 set(125000) 675.21 ns/op 544.15 ns/op 1.24
MutableVector 250000 toArray() 7.7937 ms/op 6.5222 ms/op 1.19
MutableVector 250000 iterate all - toArray() + loop 8.3826 ms/op 6.7330 ms/op 1.25
MutableVector 250000 iterate all - get(i) 3.4410 ms/op 2.8406 ms/op 1.21
Array 250000 create 6.4923 ms/op 6.7065 ms/op 0.97
Array 250000 clone - spread 2.5844 ms/op 3.2863 ms/op 0.79
Array 250000 get(125000) 1.0810 ns/op 1.5330 ns/op 0.71
Array 250000 set(125000) 1.1150 ns/op 1.5330 ns/op 0.73
Array 250000 iterate all - loop 167.95 us/op 150.97 us/op 1.11
effectiveBalanceIncrements clone Uint8Array 300000 70.664 us/op 52.372 us/op 1.35
effectiveBalanceIncrements clone MutableVector 300000 729.00 ns/op 1.0620 us/op 0.69
effectiveBalanceIncrements rw all Uint8Array 300000 254.07 us/op 247.37 us/op 1.03
effectiveBalanceIncrements rw all MutableVector 300000 178.23 ms/op 173.73 ms/op 1.03
phase0 afterProcessEpoch - 250000 vs - 7PWei 181.40 ms/op 189.20 ms/op 0.96
phase0 beforeProcessEpoch - 250000 vs - 7PWei 79.420 ms/op 66.758 ms/op 1.19
altair processEpoch - mainnet_e81889 512.90 ms/op 548.84 ms/op 0.93
mainnet_e81889 - altair beforeProcessEpoch 166.89 ms/op 141.67 ms/op 1.18
mainnet_e81889 - altair processJustificationAndFinalization 26.383 us/op 17.187 us/op 1.54
mainnet_e81889 - altair processInactivityUpdates 11.501 ms/op 9.1812 ms/op 1.25
mainnet_e81889 - altair processRewardsAndPenalties 98.481 ms/op 81.471 ms/op 1.21
mainnet_e81889 - altair processRegistryUpdates 4.2390 us/op 2.9260 us/op 1.45
mainnet_e81889 - altair processSlashings 789.00 ns/op 615.00 ns/op 1.28
mainnet_e81889 - altair processEth1DataReset 1.0590 us/op 615.00 ns/op 1.72
mainnet_e81889 - altair processEffectiveBalanceUpdates 2.4387 ms/op 2.0150 ms/op 1.21
mainnet_e81889 - altair processSlashingsReset 6.8430 us/op 4.5130 us/op 1.52
mainnet_e81889 - altair processRandaoMixesReset 7.0880 us/op 4.0770 us/op 1.74
mainnet_e81889 - altair processHistoricalRootsUpdate 1.0980 us/op 685.00 ns/op 1.60
mainnet_e81889 - altair processParticipationFlagUpdates 3.6260 us/op 2.5490 us/op 1.42
mainnet_e81889 - altair processSyncCommitteeUpdates 1.0200 us/op 1.9220 us/op 0.53
mainnet_e81889 - altair afterProcessEpoch 193.27 ms/op 219.30 ms/op 0.88
phase0 processEpoch - mainnet_e58758 547.32 ms/op 622.38 ms/op 0.88
mainnet_e58758 - phase0 beforeProcessEpoch 247.51 ms/op 235.99 ms/op 1.05
mainnet_e58758 - phase0 processJustificationAndFinalization 25.129 us/op 17.404 us/op 1.44
mainnet_e58758 - phase0 processRewardsAndPenalties 147.98 ms/op 137.10 ms/op 1.08
mainnet_e58758 - phase0 processRegistryUpdates 11.439 us/op 8.9980 us/op 1.27
mainnet_e58758 - phase0 processSlashings 989.00 ns/op 609.00 ns/op 1.62
mainnet_e58758 - phase0 processEth1DataReset 1.0470 us/op 719.00 ns/op 1.46
mainnet_e58758 - phase0 processEffectiveBalanceUpdates 2.4329 ms/op 2.0175 ms/op 1.21
mainnet_e58758 - phase0 processSlashingsReset 5.3540 us/op 4.2060 us/op 1.27
mainnet_e58758 - phase0 processRandaoMixesReset 6.3640 us/op 4.8780 us/op 1.30
mainnet_e58758 - phase0 processHistoricalRootsUpdate 1.0940 us/op 718.00 ns/op 1.52
mainnet_e58758 - phase0 processParticipationRecordUpdates 5.3360 us/op 3.5430 us/op 1.51
mainnet_e58758 - phase0 afterProcessEpoch 158.07 ms/op 163.05 ms/op 0.97
phase0 processEffectiveBalanceUpdates - 250000 normalcase 2.5845 ms/op 1.9873 ms/op 1.30
phase0 processEffectiveBalanceUpdates - 250000 worstcase 0.5 3.0268 ms/op 2.2480 ms/op 1.35
altair processInactivityUpdates - 250000 normalcase 42.964 ms/op 51.588 ms/op 0.83
altair processInactivityUpdates - 250000 worstcase 52.163 ms/op 50.974 ms/op 1.02
phase0 processRegistryUpdates - 250000 normalcase 9.0940 us/op 6.7410 us/op 1.35
phase0 processRegistryUpdates - 250000 badcase_full_deposits 417.56 us/op 371.64 us/op 1.12
phase0 processRegistryUpdates - 250000 worstcase 0.5 222.41 ms/op 174.06 ms/op 1.28
altair processRewardsAndPenalties - 250000 normalcase 129.99 ms/op 132.72 ms/op 0.98
altair processRewardsAndPenalties - 250000 worstcase 90.960 ms/op 138.86 ms/op 0.66
phase0 getAttestationDeltas - 250000 normalcase 13.859 ms/op 12.789 ms/op 1.08
phase0 getAttestationDeltas - 250000 worstcase 14.280 ms/op 13.254 ms/op 1.08
phase0 processSlashings - 250000 worstcase 5.5963 ms/op 5.6034 ms/op 1.00
altair processSyncCommitteeUpdates - 250000 285.50 ms/op 290.28 ms/op 0.98
BeaconState.hashTreeRoot - No change 492.00 ns/op 519.00 ns/op 0.95
BeaconState.hashTreeRoot - 1 full validator 62.943 us/op 69.659 us/op 0.90
BeaconState.hashTreeRoot - 32 full validator 643.10 us/op 810.43 us/op 0.79
BeaconState.hashTreeRoot - 512 full validator 6.6083 ms/op 9.0466 ms/op 0.73
BeaconState.hashTreeRoot - 1 validator.effectiveBalance 75.640 us/op 88.883 us/op 0.85
BeaconState.hashTreeRoot - 32 validator.effectiveBalance 1.2115 ms/op 1.4973 ms/op 0.81
BeaconState.hashTreeRoot - 512 validator.effectiveBalance 16.432 ms/op 17.149 ms/op 0.96
BeaconState.hashTreeRoot - 1 balances 65.299 us/op 62.817 us/op 1.04
BeaconState.hashTreeRoot - 32 balances 615.28 us/op 725.50 us/op 0.85
BeaconState.hashTreeRoot - 512 balances 6.7924 ms/op 6.7194 ms/op 1.01
BeaconState.hashTreeRoot - 250000 balances 92.136 ms/op 119.42 ms/op 0.77
aggregationBits - 2048 els - zipIndexesInBitList 34.168 us/op 26.044 us/op 1.31
regular array get 100000 times 67.497 us/op 60.593 us/op 1.11
wrappedArray get 100000 times 67.391 us/op 60.614 us/op 1.11
arrayWithProxy get 100000 times 29.246 ms/op 29.125 ms/op 1.00
ssz.Root.equals 541.00 ns/op 509.00 ns/op 1.06
byteArrayEquals 533.00 ns/op 498.00 ns/op 1.07
shuffle list - 16384 els 11.092 ms/op 12.383 ms/op 0.90
shuffle list - 250000 els 163.00 ms/op 167.33 ms/op 0.97
processSlot - 1 slots 13.205 us/op 13.100 us/op 1.01
processSlot - 32 slots 1.8517 ms/op 1.9152 ms/op 0.97
getEffectiveBalanceIncrementsZeroInactive - 250000 vs - 7PWei 1.2503 ms/op 388.62 us/op 3.22
getCommitteeAssignments - req 1 vs - 250000 vc 5.3405 ms/op 5.3766 ms/op 0.99
getCommitteeAssignments - req 100 vs - 250000 vc 7.3728 ms/op 7.8622 ms/op 0.94
getCommitteeAssignments - req 1000 vs - 250000 vc 7.7739 ms/op 8.4066 ms/op 0.92
computeProposers - vc 250000 18.643 ms/op 18.844 ms/op 0.99
computeEpochShuffling - vc 250000 165.62 ms/op 170.60 ms/op 0.97
getNextSyncCommittee - vc 250000 274.85 ms/op 285.44 ms/op 0.96

Please sign in to comment.