From d0170302b1533573344bf113c547ec1590b87db6 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Fri, 3 Feb 2023 13:02:26 -0700 Subject: [PATCH 01/28] all: implement era format, add history importer/export --- cmd/era/main.go | 331 +++++++++++++++++ cmd/geth/chaincmd.go | 116 ++++++ cmd/geth/main.go | 2 + cmd/utils/cmd.go | 158 ++++++++ cmd/utils/history_test.go | 171 +++++++++ core/blockchain_reader.go | 5 + go.mod | 3 + go.sum | 11 +- internal/era/accumulator.go | 87 +++++ internal/era/e2store/e2store.go | 146 ++++++++ internal/era/e2store/e2store_test.go | 158 ++++++++ internal/era/era.go | 528 +++++++++++++++++++++++++++ internal/era/era_test.go | 132 +++++++ 13 files changed, 1845 insertions(+), 3 deletions(-) create mode 100644 cmd/era/main.go create mode 100644 cmd/utils/history_test.go create mode 100644 internal/era/accumulator.go create mode 100644 internal/era/e2store/e2store.go create mode 100644 internal/era/e2store/e2store_test.go create mode 100644 internal/era/era.go create mode 100644 internal/era/era_test.go diff --git a/cmd/era/main.go b/cmd/era/main.go new file mode 100644 index 000000000000..6ab1020f2d47 --- /dev/null +++ b/cmd/era/main.go @@ -0,0 +1,331 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package main + +import ( + "encoding/json" + "fmt" + "io" + "math/big" + "os" + "path" + "strconv" + "strings" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/internal/era" + "github.com/ethereum/go-ethereum/internal/ethapi" + "github.com/ethereum/go-ethereum/internal/flags" + "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" + "github.com/urfave/cli/v2" +) + +var app = flags.NewApp("go-ethereum era tool") + +var ( + dirFlag = &cli.StringFlag{ + Name: "dir", + Usage: "directory storing all relevant era1 files", + Value: "eras", + } + networkFlag = &cli.StringFlag{ + Name: "network", + Usage: "network name associated with era1 files", + Value: "mainnet", + } + eraSizeFlag = &cli.IntFlag{ + Name: "size", + Usage: "number of blocks per era", + Value: era.MaxEra1Size, + } + txsFlag = &cli.BoolFlag{ + Name: "txs", + Usage: "print full transaction values", + } +) + +var ( + blockCommand = &cli.Command{ + Name: "block", + Usage: "get block data", + ArgsUsage: "", + Action: block, + Flags: []cli.Flag{ + txsFlag, + }, + } + infoCommand = &cli.Command{ + Name: "info", + ArgsUsage: "", + Usage: "get epoch information", + Action: info, + } + verifyCommand = &cli.Command{ + Name: "verify", + ArgsUsage: "", + Usage: "verifies each era against expected accumulator root", + Action: verify, + } +) + +func init() { + app.Commands = []*cli.Command{ + blockCommand, + infoCommand, + verifyCommand, + } + app.Flags = []cli.Flag{ + dirFlag, + networkFlag, + eraSizeFlag, + } +} + +func main() { + if err := app.Run(os.Args); err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + os.Exit(1) + } +} + +// block prints the specified block from an era1 store. +func block(ctx *cli.Context) error { + num, err := strconv.ParseUint(ctx.Args().First(), 10, 64) + if err != nil { + return fmt.Errorf("invalid block number: %w", err) + } + + f, err := open(ctx, num/uint64(ctx.Int(eraSizeFlag.Name))) + if err != nil { + return fmt.Errorf("error opening era: %w", err) + } + r, err := era.NewReader(f) + if err != nil { + return fmt.Errorf("error making era reader: %w", err) + } + + // Read block with number. + block, err := r.ReadBlock(num) + if err != nil { + return fmt.Errorf("error reading era: %w", err) + } + + // Convert block to JSON and print. + val, err := ethapi.RPCMarshalBlock(block, ctx.Bool(txsFlag.Name), ctx.Bool(txsFlag.Name), params.MainnetChainConfig) + if err != nil { + return fmt.Errorf("error marshaling json: %w", err) + } + b, err := json.MarshalIndent(val, "", " ") + if err != nil { + return fmt.Errorf("error marshaling json: %w", err) + } + fmt.Println(string(b)) + return nil +} + +// info prints some high-level information about the era1 file. +func info(ctx *cli.Context) error { + epoch, err := strconv.ParseUint(ctx.Args().First(), 10, 64) + if err != nil { + return fmt.Errorf("invalid epoch number: %w", err) + } + f, err := open(ctx, epoch) + if err != nil { + return err + } + r, err := era.NewReader(f) + if err != nil { + return fmt.Errorf("error creating era reader: %w", err) + } + acc, err := r.Accumulator() + if err != nil { + return fmt.Errorf("error reading accumulator: %w", err) + } + td, err := r.InitialTD() + if err != nil { + return fmt.Errorf("error reading total difficulty: %w", err) + } + info := struct { + Accumulator common.Hash `json:"accumulator"` + TotalDifficulty *big.Int `json:"totalDifficulty"` + StartBlock uint64 `json:"startBlock"` + Count uint64 `json:"count"` + }{ + acc, td, r.Start(), r.Count(), + } + b, _ := json.MarshalIndent(info, "", " ") + fmt.Println(string(b)) + return nil +} + +// open opens an era1 file at a certain epoch. +func open(ctx *cli.Context, epoch uint64) (*os.File, error) { + var ( + dir = ctx.String(dirFlag.Name) + network = ctx.String(networkFlag.Name) + ) + entries, err := era.ReadDir(dir, network) + if err != nil { + return nil, fmt.Errorf("error reading era dir: %w", err) + } + if epoch >= uint64(len(entries)) { + return nil, fmt.Errorf("epoch out-of-bounds: last %d, want %d", len(entries)-1, epoch) + } + return os.Open(path.Join(dir, entries[epoch])) +} + +// verify checks each era1 file in a directory to ensure it is well-formed and +// that the accumulator matches the expected value. +func verify(ctx *cli.Context) error { + if ctx.Args().Len() != 1 { + return fmt.Errorf("missing accumulators file") + } + + roots, err := readHashes(ctx.Args().First()) + if err != nil { + return fmt.Errorf("unable to read expected roots file: %w", err) + } + + var ( + dir = ctx.String(dirFlag.Name) + network = ctx.String(networkFlag.Name) + start = time.Now() + reported = time.Now() + ) + + entries, err := era.ReadDir(dir, network) + if err != nil { + return fmt.Errorf("error reading %s: %w", dir, err) + } + + if len(entries) != len(roots) { + return fmt.Errorf("number of era1 files should match the number of accumulator hashes") + } + + // Verify each epoch matches the expected root. + for i, want := range roots { + name := entries[i] + f, err := os.Open(path.Join(dir, name)) + if err != nil { + return fmt.Errorf("error opening era1 file %s: %w", name, err) + } + defer f.Close() + + r, err := era.NewReader(f) + if err != nil { + return fmt.Errorf("unable to make era reader: %w", err) + } + + // Read accumulator and check against expected. + if got, err := r.Accumulator(); err != nil { + return fmt.Errorf("error retrieving accumulator for %s: %w", name, err) + } else if got != want { + return fmt.Errorf("invalid root %s: got %s, want %s", name, got, want) + } + + // Recompute accumulator. + if err := checkAccumulator(r); err != nil { + return fmt.Errorf("error verify era1 file %s: %w", name, err) + } + + // Give the user some feedback that something is happening. + if time.Since(reported) >= 8*time.Second { + fmt.Printf("Verifying Era1 files \t\t verified=%d,\t elapsed=%s\n", i, common.PrettyDuration(time.Since(start))) + reported = time.Now() + } + } + + return nil +} + +// checkAccumulator verifies the accumulator matches the data in the Era. +func checkAccumulator(r *era.Reader) error { + var ( + err error + start = r.Start() + want common.Hash + td *big.Int + tds = make([]*big.Int, 0) + hashes = make([]common.Hash, 0) + ) + if want, err = r.Accumulator(); err != nil { + return fmt.Errorf("error reading accumulator: %w", err) + } + if td, err = r.InitialTD(); err != nil { + return fmt.Errorf("error reading total difficulty: %w", err) + } + // Starting at epoch 0, iterate through all available era1 files and + // check the following: + // * the block index is constructed correctly + // * the starting total difficulty value is correct + // * the accumulator is correct by recomputing it locally, + // which verifies the blocks are all correct (via hash) + // * the receipts root matches the value in the block + for j := 0; ; j++ { + // read() walks the block index, so we're able to + // implicitly verify it. + block, receipts, err := r.Read() + if err == io.EOF { + break + } else if err != nil { + return fmt.Errorf("error reading block %d: %w", start+uint64(j), err) + } + tr := types.DeriveSha(block.Transactions(), trie.NewStackTrie(nil)) + if tr != block.TxHash() { + return fmt.Errorf("tx root in block %d mismatch: want %s, got %s", block.NumberU64(), block.TxHash(), tr) + } + // Calculate receipt root from receipt list and check + // value against block. + rr := types.DeriveSha(receipts, trie.NewStackTrie(nil)) + if rr != block.ReceiptHash() { + return fmt.Errorf("receipt root in block %d mismatch: want %s, got %s", block.NumberU64(), block.ReceiptHash(), rr) + } + hashes = append(hashes, block.Hash()) + td.Add(td, block.Difficulty()) + tds = append(tds, new(big.Int).Set(td)) + } + got, err := era.ComputeAccumulator(hashes, tds) + if err != nil { + return fmt.Errorf("error computing accumulator: %w", err) + } + if got != want { + return fmt.Errorf("expected accumulator root does not match calculated: got %s, want %s", got, want) + } + return nil +} + +// readHashes reads a file of newline-delimited hashes. +func readHashes(f string) ([]common.Hash, error) { + b, err := os.ReadFile(f) + if err != nil { + return nil, fmt.Errorf("unable to open accumulators file") + } + s := strings.Split(string(b), "\n") + // Remove empty last element, if present. + if s[len(s)-1] == "" { + s = s[:len(s)-1] + } + // Convert to hashes. + r := make([]common.Hash, len(s)) + for i := range s { + r[i] = common.HexToHash(s[i]) + } + return r, nil +} diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 3b4f516af7b4..52bd1bb9bef7 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -35,10 +35,12 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/internal/era" "github.com/ethereum/go-ethereum/internal/flags" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/node" + "github.com/ethereum/go-ethereum/params" "github.com/urfave/cli/v2" ) @@ -122,6 +124,30 @@ Optional second and third arguments control the first and last block to write. In this mode, the file will be appended if already existing. If the file ends with .gz, the output will be gzipped.`, + } + importHistoryCommand = &cli.Command{ + Action: importHistory, + Name: "import-history", + Usage: "Import an Era archive", + ArgsUsage: "", + Flags: flags.Merge([]cli.Flag{ + utils.TxLookupLimitFlag, + }, + utils.DatabaseFlags, + utils.NetworkFlags, + ), + Description: "", + } + exportHistoryCommand = &cli.Command{ + Action: exportHistory, + Name: "export-history", + Usage: "Export blockchain history to Era archives", + ArgsUsage: " ", + Flags: flags.Merge(utils.DatabaseFlags), + Description: ` +The export-history command will export blocks and their corresponding receipts +into Era archives. Eras are typically packaged in steps of 8192 blocks. +`, } importPreimagesCommand = &cli.Command{ Action: importPreimages, @@ -364,7 +390,97 @@ func exportChain(ctx *cli.Context) error { } err = utils.ExportAppendChain(chain, fp, uint64(first), uint64(last)) } + if err != nil { + utils.Fatalf("Export error: %v\n", err) + } + fmt.Printf("Export done in %v\n", time.Since(start)) + return nil +} + +func importHistory(ctx *cli.Context) error { + if ctx.Args().Len() != 1 { + utils.Fatalf("usage: %s", ctx.Command.ArgsUsage) + } + + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + chain, db := utils.MakeChain(ctx, stack, false) + defer db.Close() + + var ( + start = time.Now() + dir = ctx.Args().Get(0) + network string + ) + + // Determine network. + if utils.IsNetworkPreset(ctx) { + switch { + case ctx.Bool(utils.MainnetFlag.Name): + network = "mainnet" + case ctx.Bool(utils.SepoliaFlag.Name): + network = "sepolia" + case ctx.Bool(utils.GoerliFlag.Name): + network = "goerli" + } + } else { + // No network flag set, try to determine network based on files + // present in directory. + var networks []string + for _, n := range params.NetworkNames { + entries, err := era.ReadDir(dir, n) + if err != nil { + return fmt.Errorf("error reading %s: %w", dir, err) + } + if len(entries) > 0 { + networks = append(networks, n) + } + } + if len(networks) == 0 { + return fmt.Errorf("no era1 files found in %s", dir) + } + if len(networks) > 1 { + return fmt.Errorf("multiple networks found, use a network flag to specify desired network") + } + network = networks[0] + } + + if err := utils.ImportHistory(chain, db, dir, network); err != nil { + return err + } + fmt.Printf("Import done in %v\n", time.Since(start)) + return nil +} + +// exportHistory exports chain history in Era archives at a specified +// directory. +func exportHistory(ctx *cli.Context) error { + if ctx.Args().Len() != 3 { + utils.Fatalf("usage: %s", ctx.Command.ArgsUsage) + } + + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + chain, _ := utils.MakeChain(ctx, stack, true) + start := time.Now() + var ( + dir = ctx.Args().Get(0) + first, ferr = strconv.ParseInt(ctx.Args().Get(1), 10, 64) + last, lerr = strconv.ParseInt(ctx.Args().Get(2), 10, 64) + ) + if ferr != nil || lerr != nil { + utils.Fatalf("Export error in parsing parameters: block number not an integer\n") + } + if first < 0 || last < 0 { + utils.Fatalf("Export error: block number must be greater than 0\n") + } + if head := chain.CurrentSnapBlock(); uint64(last) > head.Number.Uint64() { + utils.Fatalf("Export error: block number %d larger than head block %d\n", uint64(last), head.Number.Uint64()) + } + err := utils.ExportHistory(chain, dir, uint64(first), uint64(last), uint64(era.MaxEra1Size)) if err != nil { utils.Fatalf("Export error: %v\n", err) } diff --git a/cmd/geth/main.go b/cmd/geth/main.go index 0fd0cc20995b..2f7d37fdd7e7 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -208,6 +208,8 @@ func init() { initCommand, importCommand, exportCommand, + importHistoryCommand, + exportHistoryCommand, importPreimagesCommand, removedbCommand, dumpCommand, diff --git a/cmd/utils/cmd.go b/cmd/utils/cmd.go index 8b571be1ef85..3464b69a411e 100644 --- a/cmd/utils/cmd.go +++ b/cmd/utils/cmd.go @@ -19,12 +19,15 @@ package utils import ( "bufio" + "bytes" "compress/gzip" + "crypto/sha256" "errors" "fmt" "io" "os" "os/signal" + "path" "runtime" "strings" "syscall" @@ -39,8 +42,10 @@ import ( "github.com/ethereum/go-ethereum/eth/ethconfig" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/internal/debug" + "github.com/ethereum/go-ethereum/internal/era" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/node" + "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" "github.com/urfave/cli/v2" ) @@ -228,6 +233,87 @@ func ImportChain(chain *core.BlockChain, fn string) error { return nil } +func readList(filename string) ([]string, error) { + b, err := os.ReadFile(filename) + if err != nil { + return nil, err + } + return strings.Split(string(b), "\n"), nil +} + +// ImportHistory imports Era1 files containing historical block information, +// starting from genesis. +func ImportHistory(chain *core.BlockChain, db ethdb.Database, dir string, network string) error { + if chain.CurrentSnapBlock().Number.BitLen() != 0 { + return fmt.Errorf("history import only supported when starting from genesis") + } + entries, err := era.ReadDir(dir, network) + if err != nil { + return fmt.Errorf("error reading %s: %w", dir, err) + } + checksums, err := readList(path.Join(dir, "checksums.txt")) + if err != nil { + return fmt.Errorf("unable to read checksums.txt: %w", err) + } + if len(checksums) != len(entries) { + return fmt.Errorf("expected equal number of checksums and entries, have: %d checksums, %d entries", len(checksums), len(entries)) + } + var ( + start = time.Now() + reported = time.Now() + imported = 0 + forker = core.NewForkChoice(chain, nil) + ) + for i, filename := range entries { + // Read entire Era1 to memory. Max historical Era1 is around + // 600MB. This is a lot to load at once, but it speeds up the + // import substantially. + f, err := os.ReadFile(path.Join(dir, filename)) + if err != nil { + return fmt.Errorf("unable to open era: %w", err) + } + + if have, want := common.Hash(sha256.Sum256(f)).Hex(), checksums[i]; have != want { + return fmt.Errorf("checksum mismatch: have %s, want %s", have, want) + } + + // Import all block data from Era1. + r, err := era.NewReader(bytes.NewReader(f)) + if err != nil { + return fmt.Errorf("error making era reader: %w", err) + } + for j := 0; ; j += 1 { + n := i*era.MaxEra1Size + j + block, receipts, err := r.Read() + if err == io.EOF { + break + } else if err != nil { + return fmt.Errorf("error reading block %d: %w", n, err) + } else if block.Number().BitLen() == 0 { + continue // skip genesis + } + if status, err := chain.HeaderChain().InsertHeaderChain([]*types.Header{block.Header()}, start, forker); err != nil { + return fmt.Errorf("error inserting header %d: %w", n, err) + } else if status != core.CanonStatTy { + return fmt.Errorf("error inserting header %d, not canon: %v", n, status) + } + if _, err := chain.InsertReceiptChain([]*types.Block{block}, []types.Receipts{receipts}, 2^64-1); err != nil { + return fmt.Errorf("error inserting body %d: %w", n, err) + } + imported += 1 + + // Give the user some feedback that something is happening. + if time.Since(reported) >= 8*time.Second { + log.Info("Importing Era files", "head", n, "imported", imported, "elapsed", common.PrettyDuration(time.Since(start))) + imported = 0 + reported = time.Now() + } + } + } + + return nil +} + func missingBlocks(chain *core.BlockChain, blocks []*types.Block) []*types.Block { head := chain.CurrentBlock() for i, block := range blocks { @@ -297,6 +383,78 @@ func ExportAppendChain(blockchain *core.BlockChain, fn string, first uint64, las return nil } +// ExportHistory exports blockchain history into the specified directory, +// following the Era format. +func ExportHistory(bc *core.BlockChain, dir string, first, last, step uint64) error { + log.Info("Exporting blockchain history", "dir", dir) + if head := bc.CurrentBlock().Number.Uint64(); head < last { + log.Warn("Last block beyond head, setting last = head", "head", head, "last", last) + last = head + } + network := "unknown" + if name, ok := params.NetworkNames[bc.Config().ChainID.String()]; ok { + network = name + } + if err := os.MkdirAll(dir, os.ModePerm); err != nil { + return fmt.Errorf("error creating output directory: %w", err) + } + var ( + start = time.Now() + reported = time.Now() + checksums []string + ) + for i := first; i <= last; i += step { + var ( + buf = bytes.NewBuffer(nil) + w = era.NewBuilder(buf) + ) + for j := uint64(0); j < step && j <= last-i; j++ { + var ( + n = i + j + block = bc.GetBlockByNumber(n) + ) + if block == nil { + return fmt.Errorf("export failed on #%d: not found", n) + } + receipts := bc.GetReceiptsByHash(block.Hash()) + if receipts == nil { + return fmt.Errorf("export failed on #%d: receipts not found", n) + } + td := bc.GetTd(block.Hash(), block.NumberU64()) + if td == nil { + return fmt.Errorf("export failed on #%d: total difficulty not found", n) + } + if err := w.Add(block, receipts, td); err != nil { + return err + } + } + root, err := w.Finalize() + if err != nil { + return fmt.Errorf("export failed to finalize %d: %w", step/i, err) + } + + // Compute checksum of entire Era1. + checksums = append(checksums, common.Hash(sha256.Sum256(buf.Bytes())).Hex()) + + // Write Era1 to disk. + filename := path.Join(dir, era.Filename(network, int(i/step), root)) + if err := os.WriteFile(filename, buf.Bytes(), os.ModePerm); err != nil { + return err + } + + if time.Since(reported) >= 8*time.Second { + log.Info("Exporting blocks", "exported", i, "elapsed", common.PrettyDuration(time.Since(start))) + reported = time.Now() + } + } + + os.WriteFile(path.Join(dir, "checksums.txt"), []byte(strings.Join(checksums, "\n")), os.ModePerm) + + log.Info("Exported blockchain to", "dir", dir) + + return nil +} + // ImportPreimages imports a batch of exported hash preimages into the database. // It's a part of the deprecated functionality, should be removed in the future. func ImportPreimages(db ethdb.Database, fn string) error { diff --git a/cmd/utils/history_test.go b/cmd/utils/history_test.go new file mode 100644 index 000000000000..e24a68bbd6b3 --- /dev/null +++ b/cmd/utils/history_test.go @@ -0,0 +1,171 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package utils + +import ( + "bytes" + "crypto/sha256" + "io" + "math/big" + "os" + "path" + "strings" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/consensus/ethash" + "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/vm" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/internal/era" + "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/trie" +) + +var ( + count uint64 = 128 + step uint64 = 16 +) + +func TestHistoryImportAndExport(t *testing.T) { + var ( + key, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") + address = crypto.PubkeyToAddress(key.PublicKey) + genesis = &core.Genesis{ + Config: params.TestChainConfig, + Alloc: core.GenesisAlloc{address: {Balance: big.NewInt(1000000000000000000)}}, + } + signer = types.LatestSigner(genesis.Config) + ) + + // Generate chain. + db, blocks, _ := core.GenerateChainWithGenesis(genesis, ethash.NewFaker(), int(count), func(i int, g *core.BlockGen) { + if i == 0 { + return + } + tx, err := types.SignNewTx(key, signer, &types.DynamicFeeTx{ + ChainID: genesis.Config.ChainID, + Nonce: uint64(i - 1), + GasTipCap: common.Big0, + GasFeeCap: g.PrevBlock(0).BaseFee(), + Gas: 50000, + To: &common.Address{0xaa}, + Value: big.NewInt(int64(i)), + Data: nil, + AccessList: nil, + }) + if err != nil { + t.Fatalf("error creating tx: %v", err) + } + g.AddTx(tx) + }) + + // Initialize BlockChain. + chain, err := core.NewBlockChain(db, nil, genesis, nil, ethash.NewFaker(), vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("unable to initialize chain: %v", err) + } + if _, err := chain.InsertChain(blocks); err != nil { + t.Fatalf("error insterting chain: %v", err) + } + + // Make temp directory for era files. + dir, err := os.MkdirTemp("", "history-export-test") + if err != nil { + t.Fatalf("error creating temp test directory: %v", err) + } + defer os.RemoveAll(dir) + + // Export history to temp directory. + if err := ExportHistory(chain, dir, 0, count, step); err != nil { + t.Fatalf("error exporting history: %v", err) + } + + // Read checksums. + b, err := os.ReadFile(path.Join(dir, "checksums.txt")) + if err != nil { + t.Fatalf("failed to read checksums: %v", err) + } + checksums := strings.Split(string(b), "\n") + + // Verify each Era. + entries, _ := era.ReadDir(dir, "mainnet") + for i, filename := range entries { + f, err := os.ReadFile(path.Join(dir, filename)) + if err != nil { + t.Fatalf("error opening era file: %v", err) + } + if want, got := common.HexToHash(checksums[i]), common.Hash(sha256.Sum256(f)); want != got { + t.Fatalf("checksum %d does not match: got %s, want %s", i, got, want) + } + r, err := era.NewReader(bytes.NewReader(f)) + if err != nil { + t.Fatalf("error making era reader: %v", err) + } + for j := 0; ; j += 1 { + block, receipts, err := r.Read() + if err == io.EOF { + break + } else if err != nil { + t.Fatalf("error reading era file %d: %v", i, err) + } + var ( + n = i*int(step) + j + want = chain.GetBlockByNumber(uint64(n)) + ) + if want, got := uint64(n), block.NumberU64(); want != got { + t.Fatalf("blocks out of order: want %d, got %d", want, got) + } + if want.Hash() != block.Hash() { + t.Fatalf("block hash mismatch %d: want %s, got %s", i+j, want.Hash().Hex(), block.Hash().Hex()) + } + if got := types.DeriveSha(block.Transactions(), trie.NewStackTrie(nil)); got != want.TxHash() { + t.Fatalf("tx hash %d mismatch: want %s, got %s", i+j, want.TxHash(), got) + } + if got := types.CalcUncleHash(block.Uncles()); got != want.UncleHash() { + t.Fatalf("uncle hash %d mismatch: want %s, got %s", i+j, want.UncleHash(), got) + } + if got := types.DeriveSha(receipts, trie.NewStackTrie(nil)); got != want.ReceiptHash() { + t.Fatalf("receipt root %d mismatch: want %s, got %s", i+j, want.ReceiptHash(), got) + } + } + } + + // Now import Era. + freezer := t.TempDir() + db2, err := rawdb.NewDatabaseWithFreezer(rawdb.NewMemoryDatabase(), freezer, "", false) + if err != nil { + panic(err) + } + t.Cleanup(func() { + db2.Close() + }) + + genesis.MustCommit(db2) + imported, err := core.NewBlockChain(db2, nil, genesis, nil, ethash.NewFaker(), vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("unable to initialize chain: %v", err) + } + if err := ImportHistory(imported, db2, dir, "mainnet"); err != nil { + t.Fatalf("failed to import chain: %v", err) + } + if have, want := imported.CurrentHeader(), chain.CurrentHeader(); have.Hash() != want.Hash() { + t.Fatalf("imported chain does not match expected, have (%d, %s) want (%d, %s)", have.Number, have.Hash(), want.Number, want.Hash()) + } +} diff --git a/core/blockchain_reader.go b/core/blockchain_reader.go index 6fb09abaccb5..706844171dc1 100644 --- a/core/blockchain_reader.go +++ b/core/blockchain_reader.go @@ -410,6 +410,11 @@ func (bc *BlockChain) TrieDB() *trie.Database { return bc.triedb } +// HeaderChain returns the underlying header chain. +func (bc *BlockChain) HeaderChain() *HeaderChain { + return bc.hc +} + // SubscribeRemovedLogsEvent registers a subscription of RemovedLogsEvent. func (bc *BlockChain) SubscribeRemovedLogsEvent(ch chan<- RemovedLogsEvent) event.Subscription { return bc.scope.Track(bc.rmLogsFeed.Subscribe(ch)) diff --git a/go.mod b/go.mod index 79bdc2551abe..acae682e19ce 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( github.com/dop251/goja v0.0.0-20230806174421-c933cf95e127 github.com/ethereum/c-kzg-4844 v0.4.0 github.com/fatih/color v1.13.0 + github.com/ferranbt/fastssz v0.1.2 github.com/fjl/gencodec v0.0.0-20230517082657-f9840df7b83e github.com/fjl/memsize v0.0.0-20190710130421-bcb5799ab5e5 github.com/fsnotify/fsnotify v1.6.0 @@ -114,10 +115,12 @@ require ( github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/kilic/bls12-381 v0.1.0 // indirect github.com/klauspost/compress v1.15.15 // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect + github.com/minio/sha256-simd v1.0.0 // indirect github.com/mitchellh/mapstructure v1.4.1 // indirect github.com/mitchellh/pointerstructure v1.2.0 // indirect github.com/mmcloughlin/addchain v0.4.0 // indirect diff --git a/go.sum b/go.sum index b692629b6b6a..57a357f16d7a 100644 --- a/go.sum +++ b/go.sum @@ -187,6 +187,8 @@ github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= +github.com/ferranbt/fastssz v0.1.2 h1:Dky6dXlngF6Qjc+EfDipAkE83N5I5DE68bY6O0VLNPk= +github.com/ferranbt/fastssz v0.1.2/go.mod h1:X5UPrE2u1UJjxHA8X54u04SBwdAQjG2sFtWs39YxyWs= github.com/fjl/gencodec v0.0.0-20230517082657-f9840df7b83e h1:bBLctRc7kr01YGvaDfgLbTwjFNW5jdp5y5rj8XXBHfY= github.com/fjl/gencodec v0.0.0-20230517082657-f9840df7b83e/go.mod h1:AzA8Lj6YtixmJWL+wkKoBGsLWy9gFrAzi4g+5bCKwpY= github.com/fjl/memsize v0.0.0-20190710130421-bcb5799ab5e5 h1:FtmdgXiUlNeRsoNMFlKLDt+S+6hbjVMEW6RGQ7aUf7c= @@ -221,7 +223,6 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8= -github.com/go-ole/go-ole v1.2.5 h1:t4MGB5xEDZvXI+0rMjjsfBsD7yAgp/s9ZDkL1JndXwY= github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= @@ -400,6 +401,9 @@ github.com/klauspost/compress v1.9.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0 github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw= github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4= github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= +github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= @@ -447,6 +451,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182aff github.com/mediocregopher/mediocre-go-lib v0.0.0-20181029021733-cb65787f37ed/go.mod h1:dSsfyI2zABAdhcbvkXqgxOxrCsbYeHCPgrZkku60dSg= github.com/mediocregopher/radix/v3 v3.3.0/go.mod h1:EmfVyvspXz1uZEyPBMyGK+kjWiKQGvsUt6O3Pj+LDCQ= github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= +github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= +github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag= @@ -524,6 +530,7 @@ github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0 github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/protolambda/bls12-381-util v0.0.0-20220416220906-d8552aa452c7 h1:cZC+usqsYgHtlBaGulVnZ1hfKAi8iWtujBnRLQE698c= github.com/protolambda/bls12-381-util v0.0.0-20220416220906-d8552aa452c7/go.mod h1:IToEjHuttnUzwZI5KBSM/LOOW3qLbbrHOEfp3SbECGY= +github.com/prysmaticlabs/gohashtree v0.0.1-alpha.0.20220714111606-acbb2962fb48 h1:cSo6/vk8YpvkLbk9v3FO97cakNmUoxwi2KMP8hd5WIw= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= @@ -777,8 +784,6 @@ golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= -golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= diff --git a/internal/era/accumulator.go b/internal/era/accumulator.go new file mode 100644 index 000000000000..78fa451930b8 --- /dev/null +++ b/internal/era/accumulator.go @@ -0,0 +1,87 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package era + +import ( + "fmt" + "math/big" + + "github.com/ethereum/go-ethereum/common" + ssz "github.com/ferranbt/fastssz" +) + +// ComputeAccumulator calculates the SSZ hash tree root of the Era1 +// accumulator of header records. +func ComputeAccumulator(hashes []common.Hash, tds []*big.Int) (common.Hash, error) { + if len(hashes) != len(tds) { + return common.Hash{}, fmt.Errorf("must have equal number hashes as td values") + } + hh := ssz.NewHasher() + for i := range hashes { + rec := headerRecord{hashes[i], tds[i]} + root, err := rec.HashTreeRoot() + if err != nil { + return common.Hash{}, err + } + hh.Append(root[:]) + } + hh.MerkleizeWithMixin(0, uint64(len(hashes)), uint64(MaxEra1Size)) + return hh.HashRoot() +} + +// headerRecord is an individual record for a historical header. +// +// See https://github.com/ethereum/portal-network-specs/blob/master/history-network.md#the-header-accumulator +// for more information. +type headerRecord struct { + Hash common.Hash + TotalDifficulty *big.Int +} + +// GetTree completes the ssz.HashRoot interface, but is unused. +func (h *headerRecord) GetTree() (*ssz.Node, error) { + return nil, nil +} + +// HashTreeRoot ssz hashes the headerRecord object. +func (h *headerRecord) HashTreeRoot() ([32]byte, error) { + return ssz.HashWithDefaultHasher(h) +} + +// HashTreeRootWith ssz hashes the headerRecord object with a hasher. +func (h *headerRecord) HashTreeRootWith(hh ssz.HashWalker) (err error) { + hh.PutBytes(h.Hash[:]) + td := bigToBytes32(h.TotalDifficulty) + hh.PutBytes(td[:]) + hh.Merkleize(0) + return +} + +// bigToBytes32 converts a big.Int into a little-endian 32-byte array. +func bigToBytes32(n *big.Int) (b [32]byte) { + n.FillBytes(b[:]) + reverseOrder(b[:]) + return +} + +// reverseOrder reverses the byte order of a slice. +func reverseOrder(b []byte) []byte { + for i := 0; i < 16; i++ { + b[i], b[32-i-1] = b[32-i-1], b[i] + } + return b +} diff --git a/internal/era/e2store/e2store.go b/internal/era/e2store/e2store.go new file mode 100644 index 000000000000..4e43f6c58ab9 --- /dev/null +++ b/internal/era/e2store/e2store.go @@ -0,0 +1,146 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package e2store + +import ( + "fmt" + "io" +) + +// e2store header size. +var headerSize = 8 + +// Entry is a variable-length-data record in an e2store. +type Entry struct { + Type uint16 + Value []byte +} + +// Writer writes entries using e2store encoding. +// For more information on this format, see: +// https://github.com/status-im/nimbus-eth2/blob/stable/docs/e2store.md +type Writer struct { + w io.Writer +} + +// NewWriter returns a new Writer that writes to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{w} +} + +// Write writes a single e2store entry to w. +// An entry is encoded in a type-length-value format. The first 8 bytes of the +// record store the type (2 bytes), the length (4 bytes), and some reserved +// data (2 bytes). The remaining bytes store b. +func (w *Writer) Write(typ uint16, b []byte) (int, error) { + buf := make([]byte, headerSize+len(b)) + + // type + buf[0] = byte(typ) + buf[1] = byte(typ >> 8) + + // length + l := len(b) + buf[2] = byte(l) + buf[3] = byte(l >> 8) + buf[4] = byte(l >> 16) + buf[5] = byte(l >> 24) + + // value + copy(buf[8:], b) + + return w.w.Write(buf) +} + +// A Reader reads entries from an e2store-encoded file. +// For more information on this format, see +// https://github.com/status-im/nimbus-eth2/blob/stable/docs/e2store.md +type Reader struct { + r io.Reader +} + +// NewReader returns a new Reader that reads from r. +func NewReader(r io.Reader) *Reader { + return &Reader{r} +} + +// Read reads one Entry from r. +// If the entry is malformed, it returns io.UnexpectedEOF. If there are no +// entries left to be read, Read returns io.EOF. +func (r *Reader) Read() (*Entry, error) { + b := make([]byte, headerSize) + if _, err := io.ReadFull(r.r, b); err != nil { + return nil, err + } + + typ := uint16(b[0]) + typ += uint16(b[1]) << 8 + + length := uint64(b[2]) + length += uint64(b[3]) << 8 + length += uint64(b[4]) << 16 + length += uint64(b[5]) << 24 + + // Check reserved bytes of header. + if b[6] != 0 || b[7] != 0 { + return nil, fmt.Errorf("reserved bytes are non-zero") + } + + val := make([]byte, length) + if _, err := io.ReadFull(r.r, val); err != nil { + // An entry with a non-zero length should not return EOF when + // reading the value. + if err == io.EOF { + return nil, io.ErrUnexpectedEOF + } + return nil, err + } + + return &Entry{ + Type: typ, + Value: val, + }, nil +} + +// Find returns the first entry with the matching type. +func (r *Reader) Find(typ uint16) (*Entry, error) { + for { + entry, err := r.Read() + if err == io.EOF { + return nil, io.EOF + } else if err != nil { + return nil, err + } + if entry.Type == typ { + return entry, nil + } + } +} + +// FindAll returns all entries with the matching type. +func (r *Reader) FindAll(typ uint16) ([]*Entry, error) { + all := make([]*Entry, 0) + for { + entry, err := r.Find(typ) + if err == io.EOF { + return all, io.EOF + } else if err != nil { + return all, err + } + all = append(all, entry) + } +} diff --git a/internal/era/e2store/e2store_test.go b/internal/era/e2store/e2store_test.go new file mode 100644 index 000000000000..c27a843139e4 --- /dev/null +++ b/internal/era/e2store/e2store_test.go @@ -0,0 +1,158 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package e2store + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/ethereum/go-ethereum/common" +) + +func TestEncode(t *testing.T) { + for i, tt := range []struct { + entries []Entry + want string + }{ + { + entries: []Entry{{0xffff, nil}}, + want: "ffff000000000000", + }, + { + entries: []Entry{{42, common.Hex2Bytes("beef")}}, + want: "2a00020000000000beef", + }, + { + entries: []Entry{ + {42, common.Hex2Bytes("beef")}, + {9, common.Hex2Bytes("abcdabcd")}, + }, + want: "2a00020000000000beef0900040000000000abcdabcd", + }, + } { + var ( + b = NewWriteSeeker() + w = NewWriter(b) + ) + for _, e := range tt.entries { + if _, err := w.Write(e.Type, e.Value); err != nil { + t.Fatalf("test %d: encoding error: %v", i, err) + } + } + if want, got := common.Hex2Bytes(tt.want), b.Bytes(); !bytes.Equal(want, got) { + t.Fatalf("test %d: encoding mismatch (want %s, got %s", i, common.Bytes2Hex(want), common.Bytes2Hex(got)) + } + r := NewReader(io.NopCloser(bytes.NewBuffer(b.Bytes()))) + for _, want := range tt.entries { + if got, err := r.Read(); err != nil { + t.Fatalf("test %d: decoding error: %v", i, err) + } else if got.Type != want.Type || !bytes.Equal(got.Value, want.Value) { + t.Fatalf("test %d: decoded entry does not match (want %v, got %v)", i, want, got) + } + } + } +} + +func TestDecode(t *testing.T) { + for i, tt := range []struct { + have string + want []Entry + err error + }{ + { // basic valid decoding + have: "ffff000000000000", + want: []Entry{{0xffff, nil}}, + }, + { // basic valid decoding + have: "ffff000000000001", + err: fmt.Errorf("reserved bytes are non-zero"), + }, + { // no more entries to read, returns EOF + have: "", + err: io.EOF, + }, + { // malformed type + have: "bad", + err: io.ErrUnexpectedEOF, + }, + { // malformed length + have: "badbeef", + err: io.ErrUnexpectedEOF, + }, + { // specified length longer than actual value + have: "beef010000000000", + err: io.ErrUnexpectedEOF, + }, + } { + r := NewReader(io.NopCloser(bytes.NewBuffer(common.Hex2Bytes(tt.have)))) + if tt.err != nil { + if _, err := r.Read(); err != nil && tt.err != nil && err.Error() != tt.err.Error() { + t.Fatalf("expected error %v, got %v", tt.err, err) + } + continue + } + for _, want := range tt.want { + if got, err := r.Read(); err != nil { + t.Fatalf("test %d: decoding error: %v", i, err) + } else if got.Type != want.Type || !bytes.Equal(got.Value, want.Value) { + t.Fatalf("test %d: decoded entry does not match (want %v, got %v)", i, want, got) + } + } + } +} + +// WriteSeeker is an in-memory io.Writer and io.Seeker implementation. +type WriteSeeker struct { + pos int64 + buf []byte +} + +func NewWriteSeeker() *WriteSeeker { + return &WriteSeeker{} +} + +func (w *WriteSeeker) Write(p []byte) (n int, err error) { + if len(w.buf) != int(w.pos) { + return 0, fmt.Errorf("writing after seek not supported") + } + w.buf = append(w.buf, p...) + w.pos += int64(len(p)) + return len(p), nil +} + +func (w *WriteSeeker) Seek(offset int64, whence int) (int64, error) { + switch whence { + case io.SeekStart: + w.pos = offset + case io.SeekCurrent: + w.pos = w.pos + offset + case io.SeekEnd: + w.pos = int64(len(w.buf)) + offset + default: + return 0, fmt.Errorf("unknown seek whence %d", whence) + } + if w.pos < 0 { + w.pos = 0 + } + return w.pos, nil +} + +func (w *WriteSeeker) Bytes() []byte { + return w.buf +} diff --git a/internal/era/era.go b/internal/era/era.go new file mode 100644 index 000000000000..3ae68b827925 --- /dev/null +++ b/internal/era/era.go @@ -0,0 +1,528 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package era + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math/big" + "os" + "path" + "strconv" + "strings" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/internal/era/e2store" + "github.com/ethereum/go-ethereum/rlp" + "github.com/golang/snappy" +) + +var ( + TypeVersion uint16 = 0x3265 + TypeCompressedHeader uint16 = 0x03 + TypeCompressedBody uint16 = 0x04 + TypeCompressedReceipts uint16 = 0x05 + TypeTotalDifficulty uint16 = 0x06 + TypeAccumulator uint16 = 0x07 + TypeBlockIndex uint16 = 0x3266 + + MaxEra1Size = 8192 +) + +// Filename returns a recognizable Era1-formatted file name for the specified +// epoch and network. +func Filename(network string, epoch int, root common.Hash) string { + return fmt.Sprintf("%s-%05d-%s.era1", network, epoch, root.Hex()[2:10]) +} + +// ReadDir reads all the era1 files in a directory for a given network. +func ReadDir(dir, network string) ([]string, error) { + entries, err := os.ReadDir(dir) + if err != nil { + return nil, fmt.Errorf("error reading directory %s: %w", dir, err) + } + var ( + next = uint64(0) + eras []string + ) + for _, entry := range entries { + if path.Ext(entry.Name()) == ".era1" { + n := strings.Split(entry.Name(), "-") + if len(n) != 3 { + // invalid era1 filename, skip + continue + } + if n[0] == network { + if epoch, err := strconv.ParseUint(n[1], 10, 64); err != nil { + return nil, fmt.Errorf("malformed era1 filename: %s", entry.Name()) + } else if epoch != next { + return nil, fmt.Errorf("missing epoch %d", next) + } + next += 1 + eras = append(eras, entry.Name()) + } + } + } + return eras, nil +} + +// Builder is used to create Era1 archives of block data. +// +// Era1 files are themselves e2store files. For more information on this format, +// see https://github.com/status-im/nimbus-eth2/blob/stable/docs/e2store.md. +// +// The overall structure of an Era1 file follows closely the structure of an Era file +// which contains consensus Layer data (and as a byproduct, EL data after the merge). +// +// The structure can be summarized through this definition: +// +// era1 := Version | block-tuple* | other-entries* | Accumulator | BlockIndex +// block-tuple := CompressedHeader | CompressedBody | CompressedReceipts | TotalDifficulty +// +// Each basic element is its own entry: +// +// Version = { type: [0x65, 0x32], data: nil } +// CompressedHeader = { type: [0x03, 0x00], data: snappyFramed(rlp(header)) } +// CompressedBody = { type: [0x04, 0x00], data: snappyFramed(rlp(body)) } +// CompressedReceipts = { type: [0x05, 0x00], data: snappyFramed(rlp(receipts)) } +// TotalDifficulty = { type: [0x06, 0x00], data: uint256(header.total_difficulty) } +// Accumulator = { type: [0x07, 0x00], data: accumulator-root } +// BlockIndex = { type: [0x32, 0x66], data: block-index } +// +// Accumulator is computed by constructing an SSZ list of header-records of length at most +// 8192 and then calculating the hash_tree_root of that list. +// +// header-record := { block-hash: Bytes32, total-difficulty: Uint256 } +// accumulator := hash_tree_root([]header-record, 8192) +// +// BlockIndex stores relative offsets to each compressed block entry. The +// format is: +// +// block-index := starting-number | index | index | index ... | count +// +// starting-number is the first block number in the archive. Every index is a +// defined relative to index's location in the file. The total number of block +// entries in the file is recorded in count. +// +// Due to the accumulator size limit of 8192, the maximum number of blocks in +// an Era1 batch is also 8192. +type Builder struct { + w *e2store.Writer + startNum *uint64 + startTd *big.Int + indexes []uint64 + hashes []common.Hash + tds []*big.Int + written int +} + +// NewBuilder returns a new Builder instance. +func NewBuilder(w io.Writer) *Builder { + return &Builder{ + w: e2store.NewWriter(w), + hashes: make([]common.Hash, 0), + tds: make([]*big.Int, 0), + } +} + +// Add writes a compressed block entry and compressed receipts entry to the +// underlying e2store file. +func (b *Builder) Add(block *types.Block, receipts types.Receipts, td *big.Int) error { + eh, err := rlp.EncodeToBytes(block.Header()) + if err != nil { + return err + } + eb, err := rlp.EncodeToBytes(block.Body()) + if err != nil { + return err + } + er, err := rlp.EncodeToBytes(receipts) + if err != nil { + return err + } + return b.AddRLP(eh, eb, er, block.NumberU64(), block.Hash(), td, block.Difficulty()) +} + +// AddRLP writes a compressed block entry and compressed receipts entry to the +// underlying e2store file. +func (b *Builder) AddRLP(header, body, receipts []byte, number uint64, hash common.Hash, td, difficulty *big.Int) error { + // Write Era1 version entry before first block. + if b.startNum == nil { + if err := writeVersion(b.w); err != nil { + return err + } + n := number + b.startNum = &n + b.startTd = new(big.Int).Sub(td, difficulty) + } + if len(b.indexes) >= MaxEra1Size { + return fmt.Errorf("exceeds maximum batch size of %d", MaxEra1Size) + } + + b.indexes = append(b.indexes, uint64(b.written)) + b.hashes = append(b.hashes, hash) + b.tds = append(b.tds, td) + + // Small helper to take care snappy encoding and writing e2store entry. + snappyWrite := func(typ uint16, in []byte) error { + var ( + buf = bytes.NewBuffer(nil) + s = snappy.NewBufferedWriter(buf) + ) + if _, err := s.Write(in); err != nil { + return fmt.Errorf("error snappy encoding: %w", err) + } + if err := s.Flush(); err != nil { + return fmt.Errorf("error flushing snappy encoding: %w", err) + } + n, err := b.w.Write(typ, buf.Bytes()) + b.written += n + if err != nil { + return fmt.Errorf("error writing e2store entry: %w", err) + } + return nil + } + + // Write block data. + if err := snappyWrite(TypeCompressedHeader, header); err != nil { + return err + } + if err := snappyWrite(TypeCompressedBody, body); err != nil { + return err + } + if err := snappyWrite(TypeCompressedReceipts, receipts); err != nil { + return err + } + // Also write total difficulty, but don't snappy encode. + btd := bigToBytes32(td) + n, err := b.w.Write(TypeTotalDifficulty, btd[:]) + b.written += n + if err != nil { + return err + } + + return nil +} + +// Finalize computes the accumulator and block index values, then writes the +// corresponding e2store entries. +func (b *Builder) Finalize() (common.Hash, error) { + if b.startNum == nil { + return common.Hash{}, fmt.Errorf("finalize called on empty builder") + } + // Compute accumulator root and write entry. + root, err := ComputeAccumulator(b.hashes, b.tds) + if err != nil { + return common.Hash{}, fmt.Errorf("error calculating accumulator root: %w", err) + } + n, err := b.w.Write(TypeAccumulator, root[:]) + b.written += n + if err != nil { + return common.Hash{}, fmt.Errorf("error writing accumulator: %w", err) + } + // Get beginning of index entry to calculate block relative offset. + base := int64(b.written + (3 * 8)) // skip e2store header (type, length) and start block + + // Construct block index. Detailed format described in Builder + // documentation, but it is essentially encoded as: + // "start | index | index | ... | count" + var ( + count = len(b.indexes) + index = make([]byte, 16+count*8) + ) + binary.LittleEndian.PutUint64(index, *b.startNum) + // Each offset is relative from the position it is encoded in the + // index. This means that even if the same block was to be included in + // the index twice (this would be invalid anyways), the relative offset + // would be different. The idea with this is that after reading a + // relative offset, the corresponding block can be quickly read by + // performing a seek relative to the current position. + for i, offset := range b.indexes { + relative := int64(offset) - (base + int64(i)*8) + binary.LittleEndian.PutUint64(index[8+i*8:], uint64(relative)) + } + binary.LittleEndian.PutUint64(index[8+count*8:], uint64(count)) + + // Finally, write the block index entry. + if _, err := b.w.Write(TypeBlockIndex, index); err != nil { + return common.Hash{}, fmt.Errorf("unable to write block index: %w", err) + } + + return root, nil +} + +// writeVersion writes a version entry to e2store. +func writeVersion(w *e2store.Writer) error { + _, err := w.Write(TypeVersion, nil) + return err +} + +// Reader reads an Era1 archive. +// See Builder documentation for a detailed explanation of the Era1 format. +type Reader struct { + r io.ReadSeeker + offset uint64 + metadata metadata +} + +// NewReader returns a new Reader instance. +func NewReader(r io.ReadSeeker) (*Reader, error) { + m, err := readMetadata(r) + if err != nil { + return nil, err + } + return &Reader{r, m.start, m}, nil +} + +// readOffset reads a specific block's offset from the block index. The value n +// is the absolute block number desired. It is normalized against the index's +// start block. +func (r *Reader) readOffset(n uint64) (int64, error) { + // Seek to the encoding of the block's offset. + var ( + firstIndex = -8 - int64(r.metadata.count)*8 // size of count - index entries + indexOffset = int64(n-r.metadata.start) * 8 // desired index * size of indexes + ) + if _, err := r.r.Seek(firstIndex+indexOffset, io.SeekEnd); err != nil { + return 0, err + } + // Read the block's offset. + var offset int64 + if err := binary.Read(r.r, binary.LittleEndian, &offset); err != nil { + return 0, err + } + return offset, nil +} + +// Read reads one (block, receipts) tuple from an Era1 archive. +func (r *Reader) Read() (*types.Block, types.Receipts, error) { + block, receipts, err := r.ReadBlockAndReceipts(r.offset) + if err != nil { + return nil, nil, err + } + r.offset += 1 + return block, receipts, nil +} + +// ReadHeader reads the header number n RLP. +func (r *Reader) ReadHeaderRLP(n uint64) ([]byte, error) { + // Determine if the request can served by current the Era1 file, e.g. n + // must be within the range of blocks specified in the block index + // metadata. + if n < r.metadata.start || r.metadata.start+r.metadata.count < n { + return nil, fmt.Errorf("request out-of-bounds: want %d, start: %d, count: %d", n, r.metadata.start, r.metadata.count) + } + // Read the specified block's offset from the block index. + offset, err := r.readOffset(n) + if err != nil { + return nil, fmt.Errorf("error reading block offset: %w", err) + } + if _, err := r.r.Seek(offset, io.SeekCurrent); err != nil { + return nil, err + } + // Read header. + entry, err := e2store.NewReader(r.r).Read() + if err != nil { + return nil, err + } + if entry.Type != TypeCompressedHeader { + return nil, fmt.Errorf("expected header entry, got %x", entry.Type) + } + return io.ReadAll(snappy.NewReader(bytes.NewReader(entry.Value))) +} + +// ReadBodyRLP reads the block body number n RLP. +func (r *Reader) ReadBodyRLP(n uint64) ([]byte, error) { + // Orient cursor. + _, err := r.ReadHeaderRLP(n) + if err != nil { + return nil, err + } + // Read body. + entry, err := e2store.NewReader(r.r).Read() + if err != nil { + return nil, err + } + if entry.Type != TypeCompressedBody { + return nil, fmt.Errorf("expected body entry, got %x", entry.Type) + } + return io.ReadAll(snappy.NewReader(bytes.NewReader(entry.Value))) +} + +// ReadReceiptsRLP reads the receipts RLP associated with number n. +func (r *Reader) ReadReceiptsRLP(n uint64) ([]byte, error) { + // Orient cursor. + _, err := r.ReadBodyRLP(n) + if err != nil { + return nil, err + } + // Read receipts. + entry, err := e2store.NewReader(r.r).Read() + if err != nil { + return nil, err + } + if entry.Type != TypeCompressedReceipts { + return nil, fmt.Errorf("expected receipts entry, got %x", entry.Type) + } + return io.ReadAll(snappy.NewReader(bytes.NewReader(entry.Value))) +} + +// ReadTotalDifficulty reads the total difficulty of block number n. +func (r *Reader) ReadTotalDifficulty(n uint64) (*big.Int, error) { + // Orient cursor. + _, err := r.ReadReceiptsRLP(n) + if err != nil { + return nil, err + } + // Read totaly difficulty. + entry, err := e2store.NewReader(r.r).Read() + if err != nil { + return nil, err + } + if entry.Type != TypeTotalDifficulty { + return nil, fmt.Errorf("expected total difficulty entry, got %x", entry.Type) + } + return new(big.Int).SetBytes(reverseOrder(entry.Value)), nil +} + +// ReadHeader reads the header number n. +func (r *Reader) ReadHeader(n uint64) (*types.Header, error) { + h, err := r.ReadHeaderRLP(n) + if err != nil { + return nil, err + } + var header types.Header + if err := rlp.DecodeBytes(h, &header); err != nil { + return nil, err + } + return &header, nil +} + +// ReadBlock reads the block number n. +func (r *Reader) ReadBlock(n uint64) (*types.Block, error) { + header, err := r.ReadHeader(n) + if err != nil { + return nil, err + } + b, err := r.ReadBodyRLP(n) + if err != nil { + return nil, err + } + var body types.Body + if err := rlp.DecodeBytes(b, &body); err != nil { + return nil, err + } + return types.NewBlockWithHeader(header).WithBody(body.Transactions, body.Uncles), nil +} + +// ReadBlockAndReceipts reads the block number n and associated receipts. +func (r *Reader) ReadBlockAndReceipts(n uint64) (*types.Block, types.Receipts, error) { + // Read block. + block, err := r.ReadBlock(n) + if err != nil { + return nil, nil, err + } + // Read receipts. + rr, err := r.ReadReceiptsRLP(n) + if err != nil { + return nil, nil, err + } + // Decode receipts. + var receipts types.Receipts + if err := rlp.DecodeBytes(rr, &receipts); err != nil { + return nil, nil, err + } + return block, receipts, err +} + +// Accumulator reads the accumulator entry in the Era1 file. +func (r *Reader) Accumulator() (common.Hash, error) { + _, err := r.seek(0, io.SeekStart) + if err != nil { + return common.Hash{}, err + } + entry, err := e2store.NewReader(r.r).Find(TypeAccumulator) + if err != nil { + return common.Hash{}, err + } + return common.BytesToHash(entry.Value), nil +} + +// InitialTD returns initial total difficulty before the difficulty of the +// first block of the Era1 is applied. +func (r *Reader) InitialTD() (*big.Int, error) { + _, err := r.seek(0, io.SeekStart) + if err != nil { + return nil, err + } + h, err := r.ReadHeader(r.Start()) + if err != nil { + return nil, err + } + // Above seek also sets reader so next TD entry will be for this block. + entry, err := e2store.NewReader(r.r).Find(TypeTotalDifficulty) + if err != nil { + return nil, err + } + td := new(big.Int).SetBytes(reverseOrder(entry.Value)) + return td.Sub(td, h.Difficulty), nil +} + +// Start returns the listed start block. +func (r *Reader) Start() uint64 { + return r.metadata.start +} + +// Count returns the total number of blocks in the Era1. +func (r *Reader) Count() uint64 { + return r.metadata.count +} + +// seek is a shorthand method for calling seek on the inner reader. +func (r *Reader) seek(offset int64, whence int) (int64, error) { + return r.r.Seek(offset, whence) +} + +// metadata wraps the metadata in the block index. +type metadata struct { + start, count uint64 +} + +// readMetadata reads the metadata stored in an Era1 file's block index. +func readMetadata(r io.ReadSeeker) (m metadata, err error) { + // Seek to count value. It's the last 8 bytes of the file. + if _, err = r.Seek(-8, io.SeekEnd); err != nil { + return + } + // Read count. + if err = binary.Read(r, binary.LittleEndian, &m.count); err != nil { + return + } + // Seek to start value. It's at the offset -sizeof(m.count) - + // count*sizeof(indexEntry) - sizeof(m.start) + if _, err = r.Seek(-16-int64(m.count)*8, io.SeekEnd); err != nil { + return + } + // Read start. + if err = binary.Read(r, binary.LittleEndian, &m.start); err != nil { + return + } + return +} diff --git a/internal/era/era_test.go b/internal/era/era_test.go new file mode 100644 index 000000000000..1c646fa47f33 --- /dev/null +++ b/internal/era/era_test.go @@ -0,0 +1,132 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package era + +import ( + "bytes" + "math/big" + "os" + "testing" + + "github.com/ethereum/go-ethereum/common" +) + +type testchain struct { + headers [][]byte + bodies [][]byte + receipts [][]byte + tds []*big.Int +} + +func TestEra1Builder(t *testing.T) { + // Get temp directory. + f, err := os.CreateTemp("", "era1-test") + if err != nil { + t.Fatalf("error creating temp file: %v", err) + } + defer f.Close() + + var ( + builder = NewBuilder(f) + chain = testchain{} + ) + for i := 0; i < 128; i++ { + chain.headers = append(chain.headers, []byte{byte('h'), byte(i)}) + chain.bodies = append(chain.bodies, []byte{byte('b'), byte(i)}) + chain.receipts = append(chain.receipts, []byte{byte('r'), byte(i)}) + chain.tds = append(chain.tds, big.NewInt(int64(i))) + } + + // Write blocks to Era1. + for i := 0; i < len(chain.headers); i++ { + var ( + header = chain.headers[i] + body = chain.bodies[i] + receipts = chain.receipts[i] + hash = common.Hash{byte(i)} + td = chain.tds[i] + ) + if err = builder.AddRLP(header, body, receipts, uint64(i), hash, td, big.NewInt(1)); err != nil { + t.Fatalf("error adding entry: %v", err) + } + } + + // Finalize Era1. + if _, err := builder.Finalize(); err != nil { + t.Fatalf("error finalizing era1: %v", err) + } + + // Verify Era1 contents. + r, err := NewReader(f) + if err != nil { + t.Fatalf("failed to make reader: %s", err) + } + for i := uint64(0); i < uint64(len(chain.headers)); i++ { + // Check headers. + header, err := r.ReadHeaderRLP(i) + if err != nil { + t.Fatalf("error reading from era1: %v", err) + } + if !bytes.Equal(header, chain.headers[i]) { + t.Fatalf("mismatched header: want %s, got %s", chain.headers[i], header) + } + + // Check bodies. + body, err := r.ReadBodyRLP(i) + if err != nil { + t.Fatalf("error reading from era1: %v", err) + } + if !bytes.Equal(body, chain.bodies[i]) { + t.Fatalf("mismatched body: want %s, got %s", chain.bodies[i], body) + } + + // Check receipts. + receipts, err := r.ReadReceiptsRLP(i) + if err != nil { + t.Fatalf("error reading from era1: %v", err) + } + if !bytes.Equal(receipts, chain.receipts[i]) { + t.Fatalf("mismatched body: want %s, got %s", chain.receipts[i], receipts) + } + + // Check total difficulty. + td, err := r.ReadTotalDifficulty(i) + if err != nil { + t.Fatalf("error reading from era1: %v", err) + } + if td.Cmp(chain.tds[i]) != 0 { + t.Fatalf("mismatched tds: want %s, got %s", chain.tds[i], td) + } + } +} + +func TestEraFilename(t *testing.T) { + for i, tt := range []struct { + network string + epoch int + root common.Hash + expected string + }{ + {"mainnet", 1, common.Hash{1}, "mainnet-00001-01000000.era1"}, + {"goerli", 99999, common.HexToHash("0xdeadbeef00000000000000000000000000000000000000000000000000000000"), "goerli-99999-deadbeef.era1"}, + } { + got := Filename(tt.network, tt.epoch, tt.root) + if tt.expected != got { + t.Errorf("test %d: invalid filename: want %s, got %s", i, tt.expected, got) + } + } +} From f3ec256867d0e87824cdbf07991410cc9cdb8ee2 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Fri, 19 May 2023 16:42:44 +0200 Subject: [PATCH 02/28] internal/era/e2store: refactor e2store to provide ReadAt interface --- internal/era/e2store/e2store.go | 140 +++++++++++++++++---------- internal/era/e2store/e2store_test.go | 46 +++++---- 2 files changed, 116 insertions(+), 70 deletions(-) diff --git a/internal/era/e2store/e2store.go b/internal/era/e2store/e2store.go index 4e43f6c58ab9..fb042b5c3d40 100644 --- a/internal/era/e2store/e2store.go +++ b/internal/era/e2store/e2store.go @@ -17,12 +17,15 @@ package e2store import ( + "encoding/binary" "fmt" "io" ) -// e2store header size. -var headerSize = 8 +const ( + headerSize = 8 + valueSizeLimit = 1024 * 1024 * 50 +) // Entry is a variable-length-data record in an e2store. type Entry struct { @@ -48,21 +51,9 @@ func NewWriter(w io.Writer) *Writer { // data (2 bytes). The remaining bytes store b. func (w *Writer) Write(typ uint16, b []byte) (int, error) { buf := make([]byte, headerSize+len(b)) - - // type - buf[0] = byte(typ) - buf[1] = byte(typ >> 8) - - // length - l := len(b) - buf[2] = byte(l) - buf[3] = byte(l >> 8) - buf[4] = byte(l >> 16) - buf[5] = byte(l >> 24) - - // value + binary.LittleEndian.PutUint16(buf, typ) + binary.LittleEndian.PutUint32(buf[2:], uint32(len(b))) copy(buf[8:], b) - return w.w.Write(buf) } @@ -70,77 +61,126 @@ func (w *Writer) Write(typ uint16, b []byte) (int, error) { // For more information on this format, see // https://github.com/status-im/nimbus-eth2/blob/stable/docs/e2store.md type Reader struct { - r io.Reader + r io.ReaderAt + offset int64 } // NewReader returns a new Reader that reads from r. -func NewReader(r io.Reader) *Reader { - return &Reader{r} +func NewReader(r io.ReaderAt) *Reader { + return &Reader{r, 0} } // Read reads one Entry from r. -// If the entry is malformed, it returns io.UnexpectedEOF. If there are no -// entries left to be read, Read returns io.EOF. func (r *Reader) Read() (*Entry, error) { - b := make([]byte, headerSize) - if _, err := io.ReadFull(r.r, b); err != nil { + var e Entry + n, err := r.ReadAt(&e, r.offset) + if err != nil { return nil, err } + r.offset += int64(n) + return &e, nil +} - typ := uint16(b[0]) - typ += uint16(b[1]) << 8 - - length := uint64(b[2]) - length += uint64(b[3]) << 8 - length += uint64(b[4]) << 16 - length += uint64(b[5]) << 24 +// ReadAt reads one Entry from r at the specified offset. +func (r *Reader) ReadAt(entry *Entry, off int64) (int, error) { + typ, length, err := r.ReadMetadataAt(off) + if err != nil { + return 0, err + } + entry.Type = typ - // Check reserved bytes of header. - if b[6] != 0 || b[7] != 0 { - return nil, fmt.Errorf("reserved bytes are non-zero") + // Check length bounds. + if length > valueSizeLimit { + return headerSize, fmt.Errorf("item larger than item size limit %d: have %d", valueSizeLimit, length) + } + if length == 0 { + return headerSize, nil } + // Read value. val := make([]byte, length) - if _, err := io.ReadFull(r.r, val); err != nil { + if n, err := r.r.ReadAt(val, off+headerSize); err != nil { + n += headerSize // An entry with a non-zero length should not return EOF when // reading the value. if err == io.EOF { - return nil, io.ErrUnexpectedEOF + return n, io.ErrUnexpectedEOF } - return nil, err + return n, err + } + entry.Value = val + return int(headerSize + length), nil +} + +// ReadMetadataAt reads the header metadata at the given offset. +func (r *Reader) ReadMetadataAt(off int64) (typ uint16, length uint32, err error) { + b := make([]byte, headerSize) + if n, err := r.r.ReadAt(b, off); err != nil { + if err == io.EOF && n > 0 { + return 0, 0, io.ErrUnexpectedEOF + } + return 0, 0, err } + typ = binary.LittleEndian.Uint16(b) + length = binary.LittleEndian.Uint32(b[2:]) - return &Entry{ - Type: typ, - Value: val, - }, nil + // Check reserved bytes of header. + if b[6] != 0 || b[7] != 0 { + return 0, 0, fmt.Errorf("reserved bytes are non-zero") + } + + return typ, length, nil } // Find returns the first entry with the matching type. -func (r *Reader) Find(typ uint16) (*Entry, error) { +func (r *Reader) Find(want uint16) (*Entry, error) { + var ( + off int64 + typ uint16 + length uint32 + err error + ) for { - entry, err := r.Read() + typ, length, err = r.ReadMetadataAt(off) if err == io.EOF { return nil, io.EOF } else if err != nil { return nil, err } - if entry.Type == typ { - return entry, nil + if typ == want { + var e Entry + if _, err := r.ReadAt(&e, off); err != nil { + return nil, err + } + return &e, nil } + off += int64(headerSize + length) } } // FindAll returns all entries with the matching type. -func (r *Reader) FindAll(typ uint16) ([]*Entry, error) { - all := make([]*Entry, 0) +func (r *Reader) FindAll(want uint16) ([]*Entry, error) { + var ( + off int64 + typ uint16 + length uint32 + entries []*Entry + err error + ) for { - entry, err := r.Find(typ) + typ, length, err = r.ReadMetadataAt(off) if err == io.EOF { - return all, io.EOF + return entries, nil } else if err != nil { - return all, err + return entries, err + } + if typ == want { + e := new(Entry) + if _, err := r.ReadAt(e, off); err != nil { + return entries, err + } + entries = append(entries, e) } - all = append(all, entry) + off += int64(headerSize + length) } } diff --git a/internal/era/e2store/e2store_test.go b/internal/era/e2store/e2store_test.go index c27a843139e4..3246685b484a 100644 --- a/internal/era/e2store/e2store_test.go +++ b/internal/era/e2store/e2store_test.go @@ -26,19 +26,23 @@ import ( ) func TestEncode(t *testing.T) { - for i, tt := range []struct { + for _, tt := range []struct { entries []Entry want string + name string }{ { + name: "emptyEntry", entries: []Entry{{0xffff, nil}}, want: "ffff000000000000", }, { + name: "beef", entries: []Entry{{42, common.Hex2Bytes("beef")}}, want: "2a00020000000000beef", }, { + name: "twoEntries", entries: []Entry{ {42, common.Hex2Bytes("beef")}, {9, common.Hex2Bytes("abcdabcd")}, @@ -46,26 +50,28 @@ func TestEncode(t *testing.T) { want: "2a00020000000000beef0900040000000000abcdabcd", }, } { - var ( - b = NewWriteSeeker() - w = NewWriter(b) - ) - for _, e := range tt.entries { - if _, err := w.Write(e.Type, e.Value); err != nil { - t.Fatalf("test %d: encoding error: %v", i, err) + t.Run(tt.name, func(t *testing.T) { + var ( + b = NewWriteSeeker() + w = NewWriter(b) + ) + for _, e := range tt.entries { + if _, err := w.Write(e.Type, e.Value); err != nil { + t.Fatalf("encoding error: %v", err) + } } - } - if want, got := common.Hex2Bytes(tt.want), b.Bytes(); !bytes.Equal(want, got) { - t.Fatalf("test %d: encoding mismatch (want %s, got %s", i, common.Bytes2Hex(want), common.Bytes2Hex(got)) - } - r := NewReader(io.NopCloser(bytes.NewBuffer(b.Bytes()))) - for _, want := range tt.entries { - if got, err := r.Read(); err != nil { - t.Fatalf("test %d: decoding error: %v", i, err) - } else if got.Type != want.Type || !bytes.Equal(got.Value, want.Value) { - t.Fatalf("test %d: decoded entry does not match (want %v, got %v)", i, want, got) + if want, got := common.Hex2Bytes(tt.want), b.Bytes(); !bytes.Equal(want, got) { + t.Fatalf("encoding mismatch (want %s, got %s", common.Bytes2Hex(want), common.Bytes2Hex(got)) } - } + r := NewReader(bytes.NewReader(b.Bytes())) + for _, want := range tt.entries { + if got, err := r.Read(); err != nil { + t.Fatalf("decoding error: %v", err) + } else if got.Type != want.Type || !bytes.Equal(got.Value, want.Value) { + t.Fatalf("decoded entry does not match (want %v, got %v)", want, got) + } + } + }) } } @@ -100,7 +106,7 @@ func TestDecode(t *testing.T) { err: io.ErrUnexpectedEOF, }, } { - r := NewReader(io.NopCloser(bytes.NewBuffer(common.Hex2Bytes(tt.have)))) + r := NewReader(bytes.NewReader(common.Hex2Bytes(tt.have))) if tt.err != nil { if _, err := r.Read(); err != nil && tt.err != nil && err.Error() != tt.err.Error() { t.Fatalf("expected error %v, got %v", tt.err, err) From a9304e8c047c0d5621014a13a5f7e703b7e33246 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Fri, 19 May 2023 21:09:48 +0200 Subject: [PATCH 03/28] internal/era/e2store: export HeaderSize --- internal/era/e2store/e2store.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/era/e2store/e2store.go b/internal/era/e2store/e2store.go index fb042b5c3d40..2e51b7297830 100644 --- a/internal/era/e2store/e2store.go +++ b/internal/era/e2store/e2store.go @@ -23,7 +23,7 @@ import ( ) const ( - headerSize = 8 + HeaderSize = 8 valueSizeLimit = 1024 * 1024 * 50 ) @@ -50,7 +50,7 @@ func NewWriter(w io.Writer) *Writer { // record store the type (2 bytes), the length (4 bytes), and some reserved // data (2 bytes). The remaining bytes store b. func (w *Writer) Write(typ uint16, b []byte) (int, error) { - buf := make([]byte, headerSize+len(b)) + buf := make([]byte, HeaderSize+len(b)) binary.LittleEndian.PutUint16(buf, typ) binary.LittleEndian.PutUint32(buf[2:], uint32(len(b))) copy(buf[8:], b) @@ -91,16 +91,16 @@ func (r *Reader) ReadAt(entry *Entry, off int64) (int, error) { // Check length bounds. if length > valueSizeLimit { - return headerSize, fmt.Errorf("item larger than item size limit %d: have %d", valueSizeLimit, length) + return HeaderSize, fmt.Errorf("item larger than item size limit %d: have %d", valueSizeLimit, length) } if length == 0 { - return headerSize, nil + return HeaderSize, nil } // Read value. val := make([]byte, length) - if n, err := r.r.ReadAt(val, off+headerSize); err != nil { - n += headerSize + if n, err := r.r.ReadAt(val, off+HeaderSize); err != nil { + n += HeaderSize // An entry with a non-zero length should not return EOF when // reading the value. if err == io.EOF { @@ -109,12 +109,12 @@ func (r *Reader) ReadAt(entry *Entry, off int64) (int, error) { return n, err } entry.Value = val - return int(headerSize + length), nil + return int(HeaderSize + length), nil } // ReadMetadataAt reads the header metadata at the given offset. func (r *Reader) ReadMetadataAt(off int64) (typ uint16, length uint32, err error) { - b := make([]byte, headerSize) + b := make([]byte, HeaderSize) if n, err := r.r.ReadAt(b, off); err != nil { if err == io.EOF && n > 0 { return 0, 0, io.ErrUnexpectedEOF @@ -154,7 +154,7 @@ func (r *Reader) Find(want uint16) (*Entry, error) { } return &e, nil } - off += int64(headerSize + length) + off += int64(HeaderSize + length) } } @@ -181,6 +181,6 @@ func (r *Reader) FindAll(want uint16) ([]*Entry, error) { } entries = append(entries, e) } - off += int64(headerSize + length) + off += int64(HeaderSize + length) } } From 545f0b1aa7ced9bfaaba262d6c002d6a54009c4c Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Fri, 19 May 2023 21:10:31 +0200 Subject: [PATCH 04/28] internal/era: refactor era to use ReadAt interface --- internal/era/builder.go | 227 ++++++++++++++++++++++++ internal/era/era.go | 382 ++++++++++++---------------------------- 2 files changed, 339 insertions(+), 270 deletions(-) create mode 100644 internal/era/builder.go diff --git a/internal/era/builder.go b/internal/era/builder.go new file mode 100644 index 000000000000..7172b2236087 --- /dev/null +++ b/internal/era/builder.go @@ -0,0 +1,227 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . +package era + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math/big" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/internal/era/e2store" + "github.com/ethereum/go-ethereum/rlp" + "github.com/golang/snappy" +) + +// Builder is used to create Era1 archives of block data. +// +// Era1 files are themselves e2store files. For more information on this format, +// see https://github.com/status-im/nimbus-eth2/blob/stable/docs/e2store.md. +// +// The overall structure of an Era1 file follows closely the structure of an Era file +// which contains consensus Layer data (and as a byproduct, EL data after the merge). +// +// The structure can be summarized through this definition: +// +// era1 := Version | block-tuple* | other-entries* | Accumulator | BlockIndex +// block-tuple := CompressedHeader | CompressedBody | CompressedReceipts | TotalDifficulty +// +// Each basic element is its own entry: +// +// Version = { type: [0x65, 0x32], data: nil } +// CompressedHeader = { type: [0x03, 0x00], data: snappyFramed(rlp(header)) } +// CompressedBody = { type: [0x04, 0x00], data: snappyFramed(rlp(body)) } +// CompressedReceipts = { type: [0x05, 0x00], data: snappyFramed(rlp(receipts)) } +// TotalDifficulty = { type: [0x06, 0x00], data: uint256(header.total_difficulty) } +// Accumulator = { type: [0x07, 0x00], data: accumulator-root } +// BlockIndex = { type: [0x32, 0x66], data: block-index } +// +// Accumulator is computed by constructing an SSZ list of header-records of length at most +// 8192 and then calculating the hash_tree_root of that list. +// +// header-record := { block-hash: Bytes32, total-difficulty: Uint256 } +// accumulator := hash_tree_root([]header-record, 8192) +// +// BlockIndex stores relative offsets to each compressed block entry. The +// format is: +// +// block-index := starting-number | index | index | index ... | count +// +// starting-number is the first block number in the archive. Every index is a +// defined relative to index's location in the file. The total number of block +// entries in the file is recorded in count. +// +// Due to the accumulator size limit of 8192, the maximum number of blocks in +// an Era1 batch is also 8192. +type Builder struct { + w *e2store.Writer + startNum *uint64 + startTd *big.Int + indexes []uint64 + hashes []common.Hash + tds []*big.Int + written int + + buf *bytes.Buffer + snappy *snappy.Writer +} + +// NewBuilder returns a new Builder instance. +func NewBuilder(w io.Writer) *Builder { + buf := bytes.NewBuffer(nil) + return &Builder{ + w: e2store.NewWriter(w), + buf: buf, + snappy: snappy.NewBufferedWriter(buf), + } +} + +// Add writes a compressed block entry and compressed receipts entry to the +// underlying e2store file. +func (b *Builder) Add(block *types.Block, receipts types.Receipts, td *big.Int) error { + eh, err := rlp.EncodeToBytes(block.Header()) + if err != nil { + return err + } + eb, err := rlp.EncodeToBytes(block.Body()) + if err != nil { + return err + } + er, err := rlp.EncodeToBytes(receipts) + if err != nil { + return err + } + return b.AddRLP(eh, eb, er, block.NumberU64(), block.Hash(), td, block.Difficulty()) +} + +// AddRLP writes a compressed block entry and compressed receipts entry to the +// underlying e2store file. +func (b *Builder) AddRLP(header, body, receipts []byte, number uint64, hash common.Hash, td, difficulty *big.Int) error { + // Write Era1 version entry before first block. + if b.startNum == nil { + if err := writeVersion(b.w); err != nil { + return err + } + n := number + b.startNum = &n + b.startTd = new(big.Int).Sub(td, difficulty) + } + if len(b.indexes) >= MaxEra1Size { + return fmt.Errorf("exceeds maximum batch size of %d", MaxEra1Size) + } + + b.indexes = append(b.indexes, uint64(b.written)) + b.hashes = append(b.hashes, hash) + b.tds = append(b.tds, td) + + // Small helper to take care snappy encoding and writing e2store entry. + snappyWrite := func(typ uint16, in []byte) error { + var ( + buf = b.buf + s = b.snappy + ) + buf.Reset() + s.Reset(buf) + if _, err := b.snappy.Write(in); err != nil { + return fmt.Errorf("error snappy encoding: %w", err) + } + if err := s.Flush(); err != nil { + return fmt.Errorf("error flushing snappy encoding: %w", err) + } + n, err := b.w.Write(typ, b.buf.Bytes()) + b.written += n + if err != nil { + return fmt.Errorf("error writing e2store entry: %w", err) + } + return nil + } + + // Write block data. + if err := snappyWrite(TypeCompressedHeader, header); err != nil { + return err + } + if err := snappyWrite(TypeCompressedBody, body); err != nil { + return err + } + if err := snappyWrite(TypeCompressedReceipts, receipts); err != nil { + return err + } + // Also write total difficulty, but don't snappy encode. + btd := bigToBytes32(td) + n, err := b.w.Write(TypeTotalDifficulty, btd[:]) + b.written += n + if err != nil { + return err + } + + return nil +} + +// Finalize computes the accumulator and block index values, then writes the +// corresponding e2store entries. +func (b *Builder) Finalize() (common.Hash, error) { + if b.startNum == nil { + return common.Hash{}, fmt.Errorf("finalize called on empty builder") + } + // Compute accumulator root and write entry. + root, err := ComputeAccumulator(b.hashes, b.tds) + if err != nil { + return common.Hash{}, fmt.Errorf("error calculating accumulator root: %w", err) + } + n, err := b.w.Write(TypeAccumulator, root[:]) + b.written += n + if err != nil { + return common.Hash{}, fmt.Errorf("error writing accumulator: %w", err) + } + // Get beginning of index entry to calculate block relative offset. + base := int64(b.written + (3 * 8)) // skip e2store header (type, length) and start block + + // Construct block index. Detailed format described in Builder + // documentation, but it is essentially encoded as: + // "start | index | index | ... | count" + var ( + count = len(b.indexes) + index = make([]byte, 16+count*8) + ) + binary.LittleEndian.PutUint64(index, *b.startNum) + // Each offset is relative from the position it is encoded in the + // index. This means that even if the same block was to be included in + // the index twice (this would be invalid anyways), the relative offset + // would be different. The idea with this is that after reading a + // relative offset, the corresponding block can be quickly read by + // performing a seek relative to the current position. + for i, offset := range b.indexes { + relative := int64(offset) - (base + int64(i)*8) + binary.LittleEndian.PutUint64(index[8+i*8:], uint64(relative)) + } + binary.LittleEndian.PutUint64(index[8+count*8:], uint64(count)) + + // Finally, write the block index entry. + if _, err := b.w.Write(TypeBlockIndex, index); err != nil { + return common.Hash{}, fmt.Errorf("unable to write block index: %w", err) + } + + return root, nil +} + +// writeVersion writes a version entry to e2store. +func writeVersion(w *e2store.Writer) error { + _, err := w.Write(TypeVersion, nil) + return err +} diff --git a/internal/era/era.go b/internal/era/era.go index 3ae68b827925..195d5261435c 100644 --- a/internal/era/era.go +++ b/internal/era/era.go @@ -83,323 +83,176 @@ func ReadDir(dir, network string) ([]string, error) { return eras, nil } -// Builder is used to create Era1 archives of block data. -// -// Era1 files are themselves e2store files. For more information on this format, -// see https://github.com/status-im/nimbus-eth2/blob/stable/docs/e2store.md. -// -// The overall structure of an Era1 file follows closely the structure of an Era file -// which contains consensus Layer data (and as a byproduct, EL data after the merge). -// -// The structure can be summarized through this definition: -// -// era1 := Version | block-tuple* | other-entries* | Accumulator | BlockIndex -// block-tuple := CompressedHeader | CompressedBody | CompressedReceipts | TotalDifficulty -// -// Each basic element is its own entry: -// -// Version = { type: [0x65, 0x32], data: nil } -// CompressedHeader = { type: [0x03, 0x00], data: snappyFramed(rlp(header)) } -// CompressedBody = { type: [0x04, 0x00], data: snappyFramed(rlp(body)) } -// CompressedReceipts = { type: [0x05, 0x00], data: snappyFramed(rlp(receipts)) } -// TotalDifficulty = { type: [0x06, 0x00], data: uint256(header.total_difficulty) } -// Accumulator = { type: [0x07, 0x00], data: accumulator-root } -// BlockIndex = { type: [0x32, 0x66], data: block-index } -// -// Accumulator is computed by constructing an SSZ list of header-records of length at most -// 8192 and then calculating the hash_tree_root of that list. -// -// header-record := { block-hash: Bytes32, total-difficulty: Uint256 } -// accumulator := hash_tree_root([]header-record, 8192) -// -// BlockIndex stores relative offsets to each compressed block entry. The -// format is: -// -// block-index := starting-number | index | index | index ... | count -// -// starting-number is the first block number in the archive. Every index is a -// defined relative to index's location in the file. The total number of block -// entries in the file is recorded in count. -// -// Due to the accumulator size limit of 8192, the maximum number of blocks in -// an Era1 batch is also 8192. -type Builder struct { - w *e2store.Writer - startNum *uint64 - startTd *big.Int - indexes []uint64 - hashes []common.Hash - tds []*big.Int - written int -} +// Reader reads an Era1 archive. +// See Builder documentation for a detailed explanation of the Era1 format. +type Reader struct { + r io.ReaderAt + e *e2store.Reader -// NewBuilder returns a new Builder instance. -func NewBuilder(w io.Writer) *Builder { - return &Builder{ - w: e2store.NewWriter(w), - hashes: make([]common.Hash, 0), - tds: make([]*big.Int, 0), - } + buf [8]byte // buffer reading entry offsets + next uint64 // next block to read + length int64 // total length of r + metadata metadata // start, count info } -// Add writes a compressed block entry and compressed receipts entry to the -// underlying e2store file. -func (b *Builder) Add(block *types.Block, receipts types.Receipts, td *big.Int) error { - eh, err := rlp.EncodeToBytes(block.Header()) - if err != nil { - return err - } - eb, err := rlp.EncodeToBytes(block.Body()) - if err != nil { - return err - } - er, err := rlp.EncodeToBytes(receipts) - if err != nil { - return err - } - return b.AddRLP(eh, eb, er, block.NumberU64(), block.Hash(), td, block.Difficulty()) +type ReadAtSeeker interface { + io.ReaderAt + io.Seeker } -// AddRLP writes a compressed block entry and compressed receipts entry to the -// underlying e2store file. -func (b *Builder) AddRLP(header, body, receipts []byte, number uint64, hash common.Hash, td, difficulty *big.Int) error { - // Write Era1 version entry before first block. - if b.startNum == nil { - if err := writeVersion(b.w); err != nil { - return err - } - n := number - b.startNum = &n - b.startTd = new(big.Int).Sub(td, difficulty) - } - if len(b.indexes) >= MaxEra1Size { - return fmt.Errorf("exceeds maximum batch size of %d", MaxEra1Size) - } - - b.indexes = append(b.indexes, uint64(b.written)) - b.hashes = append(b.hashes, hash) - b.tds = append(b.tds, td) - - // Small helper to take care snappy encoding and writing e2store entry. - snappyWrite := func(typ uint16, in []byte) error { - var ( - buf = bytes.NewBuffer(nil) - s = snappy.NewBufferedWriter(buf) - ) - if _, err := s.Write(in); err != nil { - return fmt.Errorf("error snappy encoding: %w", err) - } - if err := s.Flush(); err != nil { - return fmt.Errorf("error flushing snappy encoding: %w", err) - } - n, err := b.w.Write(typ, buf.Bytes()) - b.written += n - if err != nil { - return fmt.Errorf("error writing e2store entry: %w", err) - } - return nil - } - - // Write block data. - if err := snappyWrite(TypeCompressedHeader, header); err != nil { - return err - } - if err := snappyWrite(TypeCompressedBody, body); err != nil { - return err - } - if err := snappyWrite(TypeCompressedReceipts, receipts); err != nil { - return err - } - // Also write total difficulty, but don't snappy encode. - btd := bigToBytes32(td) - n, err := b.w.Write(TypeTotalDifficulty, btd[:]) - b.written += n - if err != nil { - return err - } - - return nil -} - -// Finalize computes the accumulator and block index values, then writes the -// corresponding e2store entries. -func (b *Builder) Finalize() (common.Hash, error) { - if b.startNum == nil { - return common.Hash{}, fmt.Errorf("finalize called on empty builder") - } - // Compute accumulator root and write entry. - root, err := ComputeAccumulator(b.hashes, b.tds) +// NewReader returns a new Reader instance. +func NewReader(r ReadAtSeeker) (*Reader, error) { + length, err := r.Seek(0, io.SeekEnd) if err != nil { - return common.Hash{}, fmt.Errorf("error calculating accumulator root: %w", err) + return nil, err } - n, err := b.w.Write(TypeAccumulator, root[:]) - b.written += n - if err != nil { - return common.Hash{}, fmt.Errorf("error writing accumulator: %w", err) - } - // Get beginning of index entry to calculate block relative offset. - base := int64(b.written + (3 * 8)) // skip e2store header (type, length) and start block - - // Construct block index. Detailed format described in Builder - // documentation, but it is essentially encoded as: - // "start | index | index | ... | count" - var ( - count = len(b.indexes) - index = make([]byte, 16+count*8) - ) - binary.LittleEndian.PutUint64(index, *b.startNum) - // Each offset is relative from the position it is encoded in the - // index. This means that even if the same block was to be included in - // the index twice (this would be invalid anyways), the relative offset - // would be different. The idea with this is that after reading a - // relative offset, the corresponding block can be quickly read by - // performing a seek relative to the current position. - for i, offset := range b.indexes { - relative := int64(offset) - (base + int64(i)*8) - binary.LittleEndian.PutUint64(index[8+i*8:], uint64(relative)) - } - binary.LittleEndian.PutUint64(index[8+count*8:], uint64(count)) - - // Finally, write the block index entry. - if _, err := b.w.Write(TypeBlockIndex, index); err != nil { - return common.Hash{}, fmt.Errorf("unable to write block index: %w", err) - } - - return root, nil -} - -// writeVersion writes a version entry to e2store. -func writeVersion(w *e2store.Writer) error { - _, err := w.Write(TypeVersion, nil) - return err -} - -// Reader reads an Era1 archive. -// See Builder documentation for a detailed explanation of the Era1 format. -type Reader struct { - r io.ReadSeeker - offset uint64 - metadata metadata -} - -// NewReader returns a new Reader instance. -func NewReader(r io.ReadSeeker) (*Reader, error) { - m, err := readMetadata(r) + m, err := readMetadata(r, length) if err != nil { return nil, err } - return &Reader{r, m.start, m}, nil + return &Reader{ + r: r, + e: e2store.NewReader(r), + buf: [8]byte{}, + next: m.start, + length: length, + metadata: m, + }, nil } // readOffset reads a specific block's offset from the block index. The value n -// is the absolute block number desired. It is normalized against the index's -// start block. +// is the absolute block number desired. func (r *Reader) readOffset(n uint64) (int64, error) { - // Seek to the encoding of the block's offset. var ( - firstIndex = -8 - int64(r.metadata.count)*8 // size of count - index entries - indexOffset = int64(n-r.metadata.start) * 8 // desired index * size of indexes + firstIndex = -8 - int64(r.metadata.count)*8 // size of count - index entries + indexOffset = int64(n-r.metadata.start) * 8 // desired index * size of indexes + offOffset = r.length + firstIndex + indexOffset // offset of block offset ) - if _, err := r.r.Seek(firstIndex+indexOffset, io.SeekEnd); err != nil { - return 0, err - } - // Read the block's offset. - var offset int64 - if err := binary.Read(r.r, binary.LittleEndian, &offset); err != nil { + r.clearBuffer() + if _, err := r.r.ReadAt(r.buf[:], offOffset); err != nil { return 0, err } - return offset, nil + // Since the block offset is relative from its location + size of index + // value (8), we need to add it to it's offset to get the block's + // absolute offset. + return offOffset + 8 + int64(binary.LittleEndian.Uint64(r.buf[:])), nil } // Read reads one (block, receipts) tuple from an Era1 archive. func (r *Reader) Read() (*types.Block, types.Receipts, error) { - block, receipts, err := r.ReadBlockAndReceipts(r.offset) + block, receipts, err := r.ReadBlockAndReceipts(r.next) if err != nil { return nil, nil, err } - r.offset += 1 + r.next += 1 return block, receipts, nil } // ReadHeader reads the header number n RLP. func (r *Reader) ReadHeaderRLP(n uint64) ([]byte, error) { - // Determine if the request can served by current the Era1 file, e.g. n - // must be within the range of blocks specified in the block index - // metadata. if n < r.metadata.start || r.metadata.start+r.metadata.count < n { return nil, fmt.Errorf("request out-of-bounds: want %d, start: %d, count: %d", n, r.metadata.start, r.metadata.count) } // Read the specified block's offset from the block index. - offset, err := r.readOffset(n) + off, err := r.readOffset(n) if err != nil { return nil, fmt.Errorf("error reading block offset: %w", err) } - if _, err := r.r.Seek(offset, io.SeekCurrent); err != nil { - return nil, err - } // Read header. - entry, err := e2store.NewReader(r.r).Read() - if err != nil { + var e e2store.Entry + if _, err := r.e.ReadAt(&e, off); err != nil { return nil, err } - if entry.Type != TypeCompressedHeader { - return nil, fmt.Errorf("expected header entry, got %x", entry.Type) + if e.Type != TypeCompressedHeader { + return nil, fmt.Errorf("expected header entry, got %x", e.Type) } - return io.ReadAll(snappy.NewReader(bytes.NewReader(entry.Value))) + return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) +} + +func skipN(r *Reader, off int64, n int) (int64, error) { + for i := 0; i < n; i++ { + _, length, err := r.e.ReadMetadataAt(off) // header + if err != nil { + return off, err + } + off += e2store.HeaderSize + int64(length) + } + return off, nil } // ReadBodyRLP reads the block body number n RLP. func (r *Reader) ReadBodyRLP(n uint64) ([]byte, error) { - // Orient cursor. - _, err := r.ReadHeaderRLP(n) + if n < r.metadata.start || r.metadata.start+r.metadata.count < n { + return nil, fmt.Errorf("request out-of-bounds: want %d, start: %d, count: %d", n, r.metadata.start, r.metadata.count) + } + // Read the specified block's offset from the block index. + off, err := r.readOffset(n) if err != nil { + return nil, fmt.Errorf("error reading block offset: %w", err) + } + // Skip over header entry to get to body. + if off, err = skipN(r, off, 1); err != nil { return nil, err } // Read body. - entry, err := e2store.NewReader(r.r).Read() - if err != nil { + var e e2store.Entry + if _, err := r.e.ReadAt(&e, off); err != nil { return nil, err } - if entry.Type != TypeCompressedBody { - return nil, fmt.Errorf("expected body entry, got %x", entry.Type) + if e.Type != TypeCompressedBody { + return nil, fmt.Errorf("expected body entry, got %x", e.Type) } - return io.ReadAll(snappy.NewReader(bytes.NewReader(entry.Value))) + return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) } // ReadReceiptsRLP reads the receipts RLP associated with number n. func (r *Reader) ReadReceiptsRLP(n uint64) ([]byte, error) { - // Orient cursor. - _, err := r.ReadBodyRLP(n) + if n < r.metadata.start || r.metadata.start+r.metadata.count < n { + return nil, fmt.Errorf("request out-of-bounds: want %d, start: %d, count: %d", n, r.metadata.start, r.metadata.count) + } + // Read the specified block's offset from the block index. + off, err := r.readOffset(n) if err != nil { + return nil, fmt.Errorf("error reading block offset: %w", err) + } + // Skip over header entry to get to body. + if off, err = skipN(r, off, 2); err != nil { return nil, err } // Read receipts. - entry, err := e2store.NewReader(r.r).Read() - if err != nil { + var e e2store.Entry + if _, err := r.e.ReadAt(&e, off); err != nil { return nil, err } - if entry.Type != TypeCompressedReceipts { - return nil, fmt.Errorf("expected receipts entry, got %x", entry.Type) + if e.Type != TypeCompressedReceipts { + return nil, fmt.Errorf("expected receipts entry, got %x", e.Type) } - return io.ReadAll(snappy.NewReader(bytes.NewReader(entry.Value))) + return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) } // ReadTotalDifficulty reads the total difficulty of block number n. func (r *Reader) ReadTotalDifficulty(n uint64) (*big.Int, error) { - // Orient cursor. - _, err := r.ReadReceiptsRLP(n) + if n < r.metadata.start || r.metadata.start+r.metadata.count < n { + return nil, fmt.Errorf("request out-of-bounds: want %d, start: %d, count: %d", n, r.metadata.start, r.metadata.count) + } + // Read the specified block's offset from the block index. + off, err := r.readOffset(n) if err != nil { + return nil, fmt.Errorf("error reading block offset: %w", err) + } + // Skip over header entry to get to body. + if off, err = skipN(r, off, 3); err != nil { return nil, err } - // Read totaly difficulty. - entry, err := e2store.NewReader(r.r).Read() - if err != nil { + // Read receipts. + var e e2store.Entry + if _, err := r.e.ReadAt(&e, off); err != nil { return nil, err } - if entry.Type != TypeTotalDifficulty { - return nil, fmt.Errorf("expected total difficulty entry, got %x", entry.Type) + if e.Type != TypeTotalDifficulty { + return nil, fmt.Errorf("expected receipts entry, got %x", e.Type) } - return new(big.Int).SetBytes(reverseOrder(entry.Value)), nil + return new(big.Int).SetBytes(reverseOrder(e.Value)), nil } // ReadHeader reads the header number n. @@ -454,11 +307,7 @@ func (r *Reader) ReadBlockAndReceipts(n uint64) (*types.Block, types.Receipts, e // Accumulator reads the accumulator entry in the Era1 file. func (r *Reader) Accumulator() (common.Hash, error) { - _, err := r.seek(0, io.SeekStart) - if err != nil { - return common.Hash{}, err - } - entry, err := e2store.NewReader(r.r).Find(TypeAccumulator) + entry, err := r.e.Find(TypeAccumulator) if err != nil { return common.Hash{}, err } @@ -468,16 +317,12 @@ func (r *Reader) Accumulator() (common.Hash, error) { // InitialTD returns initial total difficulty before the difficulty of the // first block of the Era1 is applied. func (r *Reader) InitialTD() (*big.Int, error) { - _, err := r.seek(0, io.SeekStart) - if err != nil { - return nil, err - } h, err := r.ReadHeader(r.Start()) if err != nil { return nil, err } // Above seek also sets reader so next TD entry will be for this block. - entry, err := e2store.NewReader(r.r).Find(TypeTotalDifficulty) + entry, err := r.e.Find(TypeTotalDifficulty) if err != nil { return nil, err } @@ -495,9 +340,11 @@ func (r *Reader) Count() uint64 { return r.metadata.count } -// seek is a shorthand method for calling seek on the inner reader. -func (r *Reader) seek(offset int64, whence int) (int64, error) { - return r.r.Seek(offset, whence) +// clearBuffer zeroes out the buffer. +func (r *Reader) clearBuffer() { + for i := 0; i < len(r.buf); i++ { + r.buf[i] = 0 + } } // metadata wraps the metadata in the block index. @@ -506,23 +353,18 @@ type metadata struct { } // readMetadata reads the metadata stored in an Era1 file's block index. -func readMetadata(r io.ReadSeeker) (m metadata, err error) { - // Seek to count value. It's the last 8 bytes of the file. - if _, err = r.Seek(-8, io.SeekEnd); err != nil { - return - } - // Read count. - if err = binary.Read(r, binary.LittleEndian, &m.count); err != nil { +func readMetadata(r io.ReaderAt, length int64) (m metadata, err error) { + b := make([]byte, 16) + // Read count. It's the last 8 bytes of the file. + if _, err = r.ReadAt(b[:8], length-8); err != nil { return } - // Seek to start value. It's at the offset -sizeof(m.count) - + m.count = binary.LittleEndian.Uint64(b) + // Read start. It's at the offset -sizeof(m.count) - // count*sizeof(indexEntry) - sizeof(m.start) - if _, err = r.Seek(-16-int64(m.count)*8, io.SeekEnd); err != nil { - return - } - // Read start. - if err = binary.Read(r, binary.LittleEndian, &m.start); err != nil { + if _, err = r.ReadAt(b[8:], length-16-int64(m.count*8)); err != nil { return } + m.start = binary.LittleEndian.Uint64(b[8:]) return } From d58c9137a37baddcbc0489b5893ecc6e4c6d0e2f Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Mon, 22 May 2023 16:30:56 +0200 Subject: [PATCH 05/28] internal/era: elevate anonymous func to named --- internal/era/builder.go | 51 +++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/internal/era/builder.go b/internal/era/builder.go index 7172b2236087..be50355eeea3 100644 --- a/internal/era/builder.go +++ b/internal/era/builder.go @@ -130,38 +130,17 @@ func (b *Builder) AddRLP(header, body, receipts []byte, number uint64, hash comm b.hashes = append(b.hashes, hash) b.tds = append(b.tds, td) - // Small helper to take care snappy encoding and writing e2store entry. - snappyWrite := func(typ uint16, in []byte) error { - var ( - buf = b.buf - s = b.snappy - ) - buf.Reset() - s.Reset(buf) - if _, err := b.snappy.Write(in); err != nil { - return fmt.Errorf("error snappy encoding: %w", err) - } - if err := s.Flush(); err != nil { - return fmt.Errorf("error flushing snappy encoding: %w", err) - } - n, err := b.w.Write(typ, b.buf.Bytes()) - b.written += n - if err != nil { - return fmt.Errorf("error writing e2store entry: %w", err) - } - return nil - } - // Write block data. - if err := snappyWrite(TypeCompressedHeader, header); err != nil { + if err := b.snappyWrite(TypeCompressedHeader, header); err != nil { return err } - if err := snappyWrite(TypeCompressedBody, body); err != nil { + if err := b.snappyWrite(TypeCompressedBody, body); err != nil { return err } - if err := snappyWrite(TypeCompressedReceipts, receipts); err != nil { + if err := b.snappyWrite(TypeCompressedReceipts, receipts); err != nil { return err } + // Also write total difficulty, but don't snappy encode. btd := bigToBytes32(td) n, err := b.w.Write(TypeTotalDifficulty, btd[:]) @@ -220,6 +199,28 @@ func (b *Builder) Finalize() (common.Hash, error) { return root, nil } +// snappyWrite is a small helper to take care snappy encoding and writing an e2store entry. +func (b *Builder) snappyWrite(typ uint16, in []byte) error { + var ( + buf = b.buf + s = b.snappy + ) + buf.Reset() + s.Reset(buf) + if _, err := b.snappy.Write(in); err != nil { + return fmt.Errorf("error snappy encoding: %w", err) + } + if err := s.Flush(); err != nil { + return fmt.Errorf("error flushing snappy encoding: %w", err) + } + n, err := b.w.Write(typ, b.buf.Bytes()) + b.written += n + if err != nil { + return fmt.Errorf("error writing e2store entry: %w", err) + } + return nil +} + // writeVersion writes a version entry to e2store. func writeVersion(w *e2store.Writer) error { _, err := w.Write(TypeVersion, nil) From 76e08e0cf53b971f23388e01c692aa8cb1513b40 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Mon, 22 May 2023 16:59:54 +0200 Subject: [PATCH 06/28] cmd/utils: don't store entire era file in-memory during import / export --- cmd/utils/cmd.go | 162 +++++++++++++++++++++++++++-------------------- 1 file changed, 94 insertions(+), 68 deletions(-) diff --git a/cmd/utils/cmd.go b/cmd/utils/cmd.go index 3464b69a411e..c9f2dfd10531 100644 --- a/cmd/utils/cmd.go +++ b/cmd/utils/cmd.go @@ -19,7 +19,6 @@ package utils import ( "bufio" - "bytes" "compress/gzip" "crypto/sha256" "errors" @@ -263,51 +262,63 @@ func ImportHistory(chain *core.BlockChain, db ethdb.Database, dir string, networ reported = time.Now() imported = 0 forker = core.NewForkChoice(chain, nil) + h = sha256.New() + buf []byte ) for i, filename := range entries { - // Read entire Era1 to memory. Max historical Era1 is around - // 600MB. This is a lot to load at once, but it speeds up the - // import substantially. - f, err := os.ReadFile(path.Join(dir, filename)) - if err != nil { - return fmt.Errorf("unable to open era: %w", err) - } - - if have, want := common.Hash(sha256.Sum256(f)).Hex(), checksums[i]; have != want { - return fmt.Errorf("checksum mismatch: have %s, want %s", have, want) - } + err := func() error { + f, err := os.Open(path.Join(dir, filename)) + if err != nil { + return fmt.Errorf("unable to open era: %w", err) + } + defer f.Close() - // Import all block data from Era1. - r, err := era.NewReader(bytes.NewReader(f)) - if err != nil { - return fmt.Errorf("error making era reader: %w", err) - } - for j := 0; ; j += 1 { - n := i*era.MaxEra1Size + j - block, receipts, err := r.Read() - if err == io.EOF { - break - } else if err != nil { - return fmt.Errorf("error reading block %d: %w", n, err) - } else if block.Number().BitLen() == 0 { - continue // skip genesis + // Validate checksum. + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("unable to recalculate checksum: %w", err) } - if status, err := chain.HeaderChain().InsertHeaderChain([]*types.Header{block.Header()}, start, forker); err != nil { - return fmt.Errorf("error inserting header %d: %w", n, err) - } else if status != core.CanonStatTy { - return fmt.Errorf("error inserting header %d, not canon: %v", n, status) + if have, want := common.BytesToHash(h.Sum(buf)).Hex(), checksums[i]; have != want { + return fmt.Errorf("checksum mismatch: have %s, want %s", have, want) } - if _, err := chain.InsertReceiptChain([]*types.Block{block}, []types.Receipts{receipts}, 2^64-1); err != nil { - return fmt.Errorf("error inserting body %d: %w", n, err) + h.Reset() + buf = buf[:0] + + // Import all block data from Era1. + r, err := era.NewReader(f) + if err != nil { + return fmt.Errorf("error making era reader: %w", err) } - imported += 1 + for j := 0; ; j += 1 { + n := i*era.MaxEra1Size + j + block, receipts, err := r.Read() + if err == io.EOF { + break + } else if err != nil { + return fmt.Errorf("error reading block %d: %w", n, err) + } else if block.Number().BitLen() == 0 { + continue // skip genesis + } + if status, err := chain.HeaderChain().InsertHeaderChain([]*types.Header{block.Header()}, start, forker); err != nil { + return fmt.Errorf("error inserting header %d: %w", n, err) + } else if status != core.CanonStatTy { + return fmt.Errorf("error inserting header %d, not canon: %v", n, status) + } + if _, err := chain.InsertReceiptChain([]*types.Block{block}, []types.Receipts{receipts}, 2^64-1); err != nil { + return fmt.Errorf("error inserting body %d: %w", n, err) + } + imported += 1 - // Give the user some feedback that something is happening. - if time.Since(reported) >= 8*time.Second { - log.Info("Importing Era files", "head", n, "imported", imported, "elapsed", common.PrettyDuration(time.Since(start))) - imported = 0 - reported = time.Now() + // Give the user some feedback that something is happening. + if time.Since(reported) >= 8*time.Second { + log.Info("Importing Era files", "head", n, "imported", imported, "elapsed", common.PrettyDuration(time.Since(start))) + imported = 0 + reported = time.Now() + } } + return nil + }() + if err != nil { + return err } } @@ -401,47 +412,62 @@ func ExportHistory(bc *core.BlockChain, dir string, first, last, step uint64) er var ( start = time.Now() reported = time.Now() + h = sha256.New() + buf []byte checksums []string ) for i := first; i <= last; i += step { - var ( - buf = bytes.NewBuffer(nil) - w = era.NewBuilder(buf) - ) - for j := uint64(0); j < step && j <= last-i; j++ { - var ( - n = i + j - block = bc.GetBlockByNumber(n) - ) - if block == nil { - return fmt.Errorf("export failed on #%d: not found", n) + err := func() error { + filename := path.Join(dir, era.Filename(network, int(i/step), common.Hash{})) + f, err := os.Create(filename) + if err != nil { + return fmt.Errorf("could not create era file: %w", err) } - receipts := bc.GetReceiptsByHash(block.Hash()) - if receipts == nil { - return fmt.Errorf("export failed on #%d: receipts not found", n) + defer f.Close() + + w := era.NewBuilder(f) + for j := uint64(0); j < step && j <= last-i; j++ { + var ( + n = i + j + block = bc.GetBlockByNumber(n) + ) + if block == nil { + return fmt.Errorf("export failed on #%d: not found", n) + } + receipts := bc.GetReceiptsByHash(block.Hash()) + if receipts == nil { + return fmt.Errorf("export failed on #%d: receipts not found", n) + } + td := bc.GetTd(block.Hash(), block.NumberU64()) + if td == nil { + return fmt.Errorf("export failed on #%d: total difficulty not found", n) + } + if err := w.Add(block, receipts, td); err != nil { + return err + } } - td := bc.GetTd(block.Hash(), block.NumberU64()) - if td == nil { - return fmt.Errorf("export failed on #%d: total difficulty not found", n) + root, err := w.Finalize() + if err != nil { + return fmt.Errorf("export failed to finalize %d: %w", step/i, err) } - if err := w.Add(block, receipts, td); err != nil { + // Set correct filename with root. + os.Rename(filename, path.Join(dir, era.Filename(network, int(i/step), root))) + + // Compute checksum of entire Era1. + if _, err := f.Seek(0, io.SeekStart); err != nil { return err } - } - root, err := w.Finalize() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("unable to calculate checksum: %w", err) + } + checksums = append(checksums, common.BytesToHash(h.Sum(buf)).Hex()) + h.Reset() + buf = buf[:0] + return nil + }() if err != nil { - return fmt.Errorf("export failed to finalize %d: %w", step/i, err) - } - - // Compute checksum of entire Era1. - checksums = append(checksums, common.Hash(sha256.Sum256(buf.Bytes())).Hex()) - - // Write Era1 to disk. - filename := path.Join(dir, era.Filename(network, int(i/step), root)) - if err := os.WriteFile(filename, buf.Bytes(), os.ModePerm); err != nil { return err } - if time.Since(reported) >= 8*time.Second { log.Info("Exporting blocks", "exported", i, "elapsed", common.PrettyDuration(time.Since(start))) reported = time.Now() From d6cc6019215f33227e5e6c1e5cd3428c0333accf Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Tue, 23 May 2023 13:21:46 +0200 Subject: [PATCH 07/28] internal/era: better abstraction between era and e2store --- internal/era/e2store/e2store.go | 30 ++++++++++++++++++++---------- internal/era/era.go | 4 ++-- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/internal/era/e2store/e2store.go b/internal/era/e2store/e2store.go index 2e51b7297830..e5337ce7886c 100644 --- a/internal/era/e2store/e2store.go +++ b/internal/era/e2store/e2store.go @@ -23,7 +23,7 @@ import ( ) const ( - HeaderSize = 8 + headerSize = 8 valueSizeLimit = 1024 * 1024 * 50 ) @@ -50,7 +50,7 @@ func NewWriter(w io.Writer) *Writer { // record store the type (2 bytes), the length (4 bytes), and some reserved // data (2 bytes). The remaining bytes store b. func (w *Writer) Write(typ uint16, b []byte) (int, error) { - buf := make([]byte, HeaderSize+len(b)) + buf := make([]byte, headerSize+len(b)) binary.LittleEndian.PutUint16(buf, typ) binary.LittleEndian.PutUint32(buf[2:], uint32(len(b))) copy(buf[8:], b) @@ -91,16 +91,16 @@ func (r *Reader) ReadAt(entry *Entry, off int64) (int, error) { // Check length bounds. if length > valueSizeLimit { - return HeaderSize, fmt.Errorf("item larger than item size limit %d: have %d", valueSizeLimit, length) + return headerSize, fmt.Errorf("item larger than item size limit %d: have %d", valueSizeLimit, length) } if length == 0 { - return HeaderSize, nil + return headerSize, nil } // Read value. val := make([]byte, length) - if n, err := r.r.ReadAt(val, off+HeaderSize); err != nil { - n += HeaderSize + if n, err := r.r.ReadAt(val, off+headerSize); err != nil { + n += headerSize // An entry with a non-zero length should not return EOF when // reading the value. if err == io.EOF { @@ -109,12 +109,22 @@ func (r *Reader) ReadAt(entry *Entry, off int64) (int, error) { return n, err } entry.Value = val - return int(HeaderSize + length), nil + return int(headerSize + length), nil +} + +// Reads the header at off and returns the total length of the entry, including +// header. +func (r *Reader) LengthAt(off int64) (int64, error) { + _, length, err := r.ReadMetadataAt(off) + if err != nil { + return 0, err + } + return int64(length) + headerSize, nil } // ReadMetadataAt reads the header metadata at the given offset. func (r *Reader) ReadMetadataAt(off int64) (typ uint16, length uint32, err error) { - b := make([]byte, HeaderSize) + b := make([]byte, headerSize) if n, err := r.r.ReadAt(b, off); err != nil { if err == io.EOF && n > 0 { return 0, 0, io.ErrUnexpectedEOF @@ -154,7 +164,7 @@ func (r *Reader) Find(want uint16) (*Entry, error) { } return &e, nil } - off += int64(HeaderSize + length) + off += int64(headerSize + length) } } @@ -181,6 +191,6 @@ func (r *Reader) FindAll(want uint16) ([]*Entry, error) { } entries = append(entries, e) } - off += int64(HeaderSize + length) + off += int64(headerSize + length) } } diff --git a/internal/era/era.go b/internal/era/era.go index 195d5261435c..b516b8403f79 100644 --- a/internal/era/era.go +++ b/internal/era/era.go @@ -171,11 +171,11 @@ func (r *Reader) ReadHeaderRLP(n uint64) ([]byte, error) { func skipN(r *Reader, off int64, n int) (int64, error) { for i := 0; i < n; i++ { - _, length, err := r.e.ReadMetadataAt(off) // header + length, err := r.e.LengthAt(off) if err != nil { return off, err } - off += e2store.HeaderSize + int64(length) + off += length } return off, nil } From 6c4747fe60d41c249d7b8af7e4a3effbb466cc39 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Tue, 23 May 2023 13:25:24 +0200 Subject: [PATCH 08/28] cmd/era: properly close era files --- cmd/era/main.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cmd/era/main.go b/cmd/era/main.go index 6ab1020f2d47..a0475b645bfc 100644 --- a/cmd/era/main.go +++ b/cmd/era/main.go @@ -111,22 +111,20 @@ func block(ctx *cli.Context) error { if err != nil { return fmt.Errorf("invalid block number: %w", err) } - f, err := open(ctx, num/uint64(ctx.Int(eraSizeFlag.Name))) if err != nil { return fmt.Errorf("error opening era: %w", err) } + defer f.Close() r, err := era.NewReader(f) if err != nil { return fmt.Errorf("error making era reader: %w", err) } - // Read block with number. block, err := r.ReadBlock(num) if err != nil { return fmt.Errorf("error reading era: %w", err) } - // Convert block to JSON and print. val, err := ethapi.RPCMarshalBlock(block, ctx.Bool(txsFlag.Name), ctx.Bool(txsFlag.Name), params.MainnetChainConfig) if err != nil { @@ -150,6 +148,7 @@ func info(ctx *cli.Context) error { if err != nil { return err } + defer f.Close() r, err := era.NewReader(f) if err != nil { return fmt.Errorf("error creating era reader: %w", err) From 97b86f45ca932310a19bc368c2ba016a4743f461 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Tue, 23 May 2023 13:29:37 +0200 Subject: [PATCH 09/28] cmd/era: don't let defers stack --- cmd/era/main.go | 55 ++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/cmd/era/main.go b/cmd/era/main.go index a0475b645bfc..4b624a4745eb 100644 --- a/cmd/era/main.go +++ b/cmd/era/main.go @@ -220,34 +220,41 @@ func verify(ctx *cli.Context) error { // Verify each epoch matches the expected root. for i, want := range roots { - name := entries[i] - f, err := os.Open(path.Join(dir, name)) - if err != nil { - return fmt.Errorf("error opening era1 file %s: %w", name, err) - } - defer f.Close() + // Wrap in function so defers don't stack. + err := func() error { + name := entries[i] + f, err := os.Open(path.Join(dir, name)) + if err != nil { + return fmt.Errorf("error opening era1 file %s: %w", name, err) + } + defer f.Close() - r, err := era.NewReader(f) - if err != nil { - return fmt.Errorf("unable to make era reader: %w", err) - } + r, err := era.NewReader(f) + if err != nil { + return fmt.Errorf("unable to make era reader: %w", err) + } - // Read accumulator and check against expected. - if got, err := r.Accumulator(); err != nil { - return fmt.Errorf("error retrieving accumulator for %s: %w", name, err) - } else if got != want { - return fmt.Errorf("invalid root %s: got %s, want %s", name, got, want) - } + // Read accumulator and check against expected. + if got, err := r.Accumulator(); err != nil { + return fmt.Errorf("error retrieving accumulator for %s: %w", name, err) + } else if got != want { + return fmt.Errorf("invalid root %s: got %s, want %s", name, got, want) + } - // Recompute accumulator. - if err := checkAccumulator(r); err != nil { - return fmt.Errorf("error verify era1 file %s: %w", name, err) - } + // Recompute accumulator. + if err := checkAccumulator(r); err != nil { + return fmt.Errorf("error verify era1 file %s: %w", name, err) + } - // Give the user some feedback that something is happening. - if time.Since(reported) >= 8*time.Second { - fmt.Printf("Verifying Era1 files \t\t verified=%d,\t elapsed=%s\n", i, common.PrettyDuration(time.Since(start))) - reported = time.Now() + // Give the user some feedback that something is happening. + if time.Since(reported) >= 8*time.Second { + fmt.Printf("Verifying Era1 files \t\t verified=%d,\t elapsed=%s\n", i, common.PrettyDuration(time.Since(start))) + reported = time.Now() + } + return nil + }() + if err != nil { + return err } } From b65db9db79add884dfdc902570bc6ed1971b91d7 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Tue, 23 May 2023 13:39:33 +0200 Subject: [PATCH 10/28] cmd/geth: add description for import-history --- cmd/geth/chaincmd.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 52bd1bb9bef7..d333c175599d 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -136,7 +136,10 @@ be gzipped.`, utils.DatabaseFlags, utils.NetworkFlags, ), - Description: "", + Description: ` +The import-history command will import blocks and their corresponding receipts +from Era archives. +`, } exportHistoryCommand = &cli.Command{ Action: exportHistory, From ca3d8bf71720b9c2d781f622afa8076006efa175 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Tue, 23 May 2023 13:49:33 +0200 Subject: [PATCH 11/28] cmd/utils: better bytes buffer --- cmd/utils/cmd.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cmd/utils/cmd.go b/cmd/utils/cmd.go index c9f2dfd10531..5a97122dfb70 100644 --- a/cmd/utils/cmd.go +++ b/cmd/utils/cmd.go @@ -19,6 +19,7 @@ package utils import ( "bufio" + "bytes" "compress/gzip" "crypto/sha256" "errors" @@ -263,7 +264,7 @@ func ImportHistory(chain *core.BlockChain, db ethdb.Database, dir string, networ imported = 0 forker = core.NewForkChoice(chain, nil) h = sha256.New() - buf []byte + buf = bytes.NewBuffer(nil) ) for i, filename := range entries { err := func() error { @@ -277,11 +278,11 @@ func ImportHistory(chain *core.BlockChain, db ethdb.Database, dir string, networ if _, err := io.Copy(h, f); err != nil { return fmt.Errorf("unable to recalculate checksum: %w", err) } - if have, want := common.BytesToHash(h.Sum(buf)).Hex(), checksums[i]; have != want { + if have, want := common.BytesToHash(h.Sum(buf.Bytes()[:])).Hex(), checksums[i]; have != want { return fmt.Errorf("checksum mismatch: have %s, want %s", have, want) } h.Reset() - buf = buf[:0] + buf.Reset() // Import all block data from Era1. r, err := era.NewReader(f) @@ -413,7 +414,7 @@ func ExportHistory(bc *core.BlockChain, dir string, first, last, step uint64) er start = time.Now() reported = time.Now() h = sha256.New() - buf []byte + buf = bytes.NewBuffer(nil) checksums []string ) for i := first; i <= last; i += step { @@ -460,9 +461,9 @@ func ExportHistory(bc *core.BlockChain, dir string, first, last, step uint64) er if _, err := io.Copy(h, f); err != nil { return fmt.Errorf("unable to calculate checksum: %w", err) } - checksums = append(checksums, common.BytesToHash(h.Sum(buf)).Hex()) + checksums = append(checksums, common.BytesToHash(h.Sum(buf.Bytes()[:])).Hex()) h.Reset() - buf = buf[:0] + buf.Reset() return nil }() if err != nil { From cccf47b4f4405f18dc9bfbd54fccaa7005fdc1d2 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Tue, 23 May 2023 14:04:18 +0200 Subject: [PATCH 12/28] internal/era: error if accumulator has more records than max allowed --- internal/era/accumulator.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/era/accumulator.go b/internal/era/accumulator.go index 78fa451930b8..19e03973f1f5 100644 --- a/internal/era/accumulator.go +++ b/internal/era/accumulator.go @@ -30,6 +30,9 @@ func ComputeAccumulator(hashes []common.Hash, tds []*big.Int) (common.Hash, erro if len(hashes) != len(tds) { return common.Hash{}, fmt.Errorf("must have equal number hashes as td values") } + if len(hashes) > MaxEra1Size { + return common.Hash{}, fmt.Errorf("too many records: have %d, max %d", len(hashes), MaxEra1Size) + } hh := ssz.NewHasher() for i := range hashes { rec := headerRecord{hashes[i], tds[i]} From 7b9bda23c4f8424d4e60428668431f5d15f4110a Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Tue, 23 May 2023 14:11:59 +0200 Subject: [PATCH 13/28] internal/era: better doc comment --- internal/era/e2store/e2store.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/era/e2store/e2store.go b/internal/era/e2store/e2store.go index e5337ce7886c..e1a85f6bc43c 100644 --- a/internal/era/e2store/e2store.go +++ b/internal/era/e2store/e2store.go @@ -112,8 +112,8 @@ func (r *Reader) ReadAt(entry *Entry, off int64) (int, error) { return int(headerSize + length), nil } -// Reads the header at off and returns the total length of the entry, including -// header. +// LengthAt reads the header at off and returns the total length of the entry, +// including header. func (r *Reader) LengthAt(off int64) (int64, error) { _, length, err := r.ReadMetadataAt(off) if err != nil { From de45fcad5f84c9fc33a997861b6894bd780e6791 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 29 May 2023 16:03:04 +0200 Subject: [PATCH 14/28] internal/era/e2store: rm superfluous reader, rm superfluous testcases, add fuzzer --- internal/era/e2store/e2store_test.go | 95 ++++++++++++---------------- 1 file changed, 40 insertions(+), 55 deletions(-) diff --git a/internal/era/e2store/e2store_test.go b/internal/era/e2store/e2store_test.go index 3246685b484a..64f4320f91dc 100644 --- a/internal/era/e2store/e2store_test.go +++ b/internal/era/e2store/e2store_test.go @@ -51,8 +51,9 @@ func TestEncode(t *testing.T) { }, } { t.Run(tt.name, func(t *testing.T) { + t.Parallel() var ( - b = NewWriteSeeker() + b = bytes.NewBuffer(nil) w = NewWriter(b) ) for _, e := range tt.entries { @@ -60,15 +61,20 @@ func TestEncode(t *testing.T) { t.Fatalf("encoding error: %v", err) } } - if want, got := common.Hex2Bytes(tt.want), b.Bytes(); !bytes.Equal(want, got) { - t.Fatalf("encoding mismatch (want %s, got %s", common.Bytes2Hex(want), common.Bytes2Hex(got)) + if want, have := common.FromHex(tt.want), b.Bytes(); !bytes.Equal(want, have) { + t.Fatalf("encoding mismatch (want %x, have %x", want, have) } r := NewReader(bytes.NewReader(b.Bytes())) for _, want := range tt.entries { - if got, err := r.Read(); err != nil { + have, err := r.Read() + if err != nil { t.Fatalf("decoding error: %v", err) - } else if got.Type != want.Type || !bytes.Equal(got.Value, want.Value) { - t.Fatalf("decoded entry does not match (want %v, got %v)", want, got) + } + if have.Type != want.Type { + t.Fatalf("decoded entry does type mismatch (want %v, got %v)", want.Type, have.Type) + } + if !bytes.Equal(have.Value, want.Value) { + t.Fatalf("decoded entry does not match (want %#x, got %#x)", want.Value, have.Value) } } }) @@ -78,12 +84,10 @@ func TestEncode(t *testing.T) { func TestDecode(t *testing.T) { for i, tt := range []struct { have string - want []Entry err error }{ { // basic valid decoding have: "ffff000000000000", - want: []Entry{{0xffff, nil}}, }, { // basic valid decoding have: "ffff000000000001", @@ -106,59 +110,40 @@ func TestDecode(t *testing.T) { err: io.ErrUnexpectedEOF, }, } { - r := NewReader(bytes.NewReader(common.Hex2Bytes(tt.have))) + r := NewReader(bytes.NewReader(common.FromHex(tt.have))) if tt.err != nil { - if _, err := r.Read(); err != nil && tt.err != nil && err.Error() != tt.err.Error() { + _, err := r.Read() + if err == nil && tt.err != nil { + t.Fatalf("test %d, expected error, got none", i) + } + if err != nil && tt.err == nil { + t.Fatalf("test %d, expected no error, got %v", i, err) + } + if err != nil && tt.err != nil && err.Error() != tt.err.Error() { t.Fatalf("expected error %v, got %v", tt.err, err) } continue } - for _, want := range tt.want { - if got, err := r.Read(); err != nil { - t.Fatalf("test %d: decoding error: %v", i, err) - } else if got.Type != want.Type || !bytes.Equal(got.Value, want.Value) { - t.Fatalf("test %d: decoded entry does not match (want %v, got %v)", i, want, got) - } - } } } -// WriteSeeker is an in-memory io.Writer and io.Seeker implementation. -type WriteSeeker struct { - pos int64 - buf []byte -} - -func NewWriteSeeker() *WriteSeeker { - return &WriteSeeker{} -} - -func (w *WriteSeeker) Write(p []byte) (n int, err error) { - if len(w.buf) != int(w.pos) { - return 0, fmt.Errorf("writing after seek not supported") - } - w.buf = append(w.buf, p...) - w.pos += int64(len(p)) - return len(p), nil -} - -func (w *WriteSeeker) Seek(offset int64, whence int) (int64, error) { - switch whence { - case io.SeekStart: - w.pos = offset - case io.SeekCurrent: - w.pos = w.pos + offset - case io.SeekEnd: - w.pos = int64(len(w.buf)) + offset - default: - return 0, fmt.Errorf("unknown seek whence %d", whence) - } - if w.pos < 0 { - w.pos = 0 - } - return w.pos, nil -} - -func (w *WriteSeeker) Bytes() []byte { - return w.buf +func FuzzCodec(f *testing.F) { + f.Fuzz(func(t *testing.T, input []byte) { + r := NewReader(bytes.NewReader(input)) + entry, err := r.Read() + if err != nil { + return + } + var ( + b = bytes.NewBuffer(nil) + w = NewWriter(b) + ) + w.Write(entry.Type, entry.Value) + output := b.Bytes() + // Only care about the input that was actually consumed + input = input[:r.offset] + if !bytes.Equal(input, output) { + t.Fatalf("decode-encode mismatch, input %#x output %#x", input, output) + } + }) } From 235d413c2f0d0f47f49b1f44e678b621aa6826e3 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 29 May 2023 16:47:13 +0200 Subject: [PATCH 15/28] internal/era: avoid some repetition --- internal/era/era.go | 89 ++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 58 deletions(-) diff --git a/internal/era/era.go b/internal/era/era.go index b516b8403f79..9859fb76ef8b 100644 --- a/internal/era/era.go +++ b/internal/era/era.go @@ -113,7 +113,6 @@ func NewReader(r ReadAtSeeker) (*Reader, error) { return &Reader{ r: r, e: e2store.NewReader(r), - buf: [8]byte{}, next: m.start, length: length, metadata: m, @@ -148,27 +147,6 @@ func (r *Reader) Read() (*types.Block, types.Receipts, error) { return block, receipts, nil } -// ReadHeader reads the header number n RLP. -func (r *Reader) ReadHeaderRLP(n uint64) ([]byte, error) { - if n < r.metadata.start || r.metadata.start+r.metadata.count < n { - return nil, fmt.Errorf("request out-of-bounds: want %d, start: %d, count: %d", n, r.metadata.start, r.metadata.count) - } - // Read the specified block's offset from the block index. - off, err := r.readOffset(n) - if err != nil { - return nil, fmt.Errorf("error reading block offset: %w", err) - } - // Read header. - var e e2store.Entry - if _, err := r.e.ReadAt(&e, off); err != nil { - return nil, err - } - if e.Type != TypeCompressedHeader { - return nil, fmt.Errorf("expected header entry, got %x", e.Type) - } - return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) -} - func skipN(r *Reader, off int64, n int) (int64, error) { for i := 0; i < n; i++ { length, err := r.e.LengthAt(off) @@ -180,8 +158,8 @@ func skipN(r *Reader, off int64, n int) (int64, error) { return off, nil } -// ReadBodyRLP reads the block body number n RLP. -func (r *Reader) ReadBodyRLP(n uint64) ([]byte, error) { +// readBlob reads an entry of data. +func (r *Reader) readEntry(n uint64, skip int) (*e2store.Entry, error) { if n < r.metadata.start || r.metadata.start+r.metadata.count < n { return nil, fmt.Errorf("request out-of-bounds: want %d, start: %d, count: %d", n, r.metadata.start, r.metadata.count) } @@ -190,13 +168,34 @@ func (r *Reader) ReadBodyRLP(n uint64) ([]byte, error) { if err != nil { return nil, fmt.Errorf("error reading block offset: %w", err) } - // Skip over header entry to get to body. - if off, err = skipN(r, off, 1); err != nil { + // Skip to the requested entry. + if off, err = skipN(r, off, skip); err != nil { return nil, err } - // Read body. - var e e2store.Entry - if _, err := r.e.ReadAt(&e, off); err != nil { + // Read entry. + var entry e2store.Entry + if _, err := r.e.ReadAt(&entry, off); err != nil { + return nil, err + } + return &entry, nil +} + +// ReadHeader reads the header number n RLP. +func (r *Reader) ReadHeaderRLP(n uint64) ([]byte, error) { + e, err := r.readEntry(n, 0) + if err != nil { + return nil, err + } + if e.Type != TypeCompressedHeader { + return nil, fmt.Errorf("expected header entry, got %x", e.Type) + } + return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) +} + +// ReadBodyRLP reads the block body number n RLP. +func (r *Reader) ReadBodyRLP(n uint64) ([]byte, error) { + e, err := r.readEntry(n, 1) + if err != nil { return nil, err } if e.Type != TypeCompressedBody { @@ -207,21 +206,8 @@ func (r *Reader) ReadBodyRLP(n uint64) ([]byte, error) { // ReadReceiptsRLP reads the receipts RLP associated with number n. func (r *Reader) ReadReceiptsRLP(n uint64) ([]byte, error) { - if n < r.metadata.start || r.metadata.start+r.metadata.count < n { - return nil, fmt.Errorf("request out-of-bounds: want %d, start: %d, count: %d", n, r.metadata.start, r.metadata.count) - } - // Read the specified block's offset from the block index. - off, err := r.readOffset(n) + e, err := r.readEntry(n, 2) if err != nil { - return nil, fmt.Errorf("error reading block offset: %w", err) - } - // Skip over header entry to get to body. - if off, err = skipN(r, off, 2); err != nil { - return nil, err - } - // Read receipts. - var e e2store.Entry - if _, err := r.e.ReadAt(&e, off); err != nil { return nil, err } if e.Type != TypeCompressedReceipts { @@ -232,25 +218,12 @@ func (r *Reader) ReadReceiptsRLP(n uint64) ([]byte, error) { // ReadTotalDifficulty reads the total difficulty of block number n. func (r *Reader) ReadTotalDifficulty(n uint64) (*big.Int, error) { - if n < r.metadata.start || r.metadata.start+r.metadata.count < n { - return nil, fmt.Errorf("request out-of-bounds: want %d, start: %d, count: %d", n, r.metadata.start, r.metadata.count) - } - // Read the specified block's offset from the block index. - off, err := r.readOffset(n) + e, err := r.readEntry(n, 3) if err != nil { - return nil, fmt.Errorf("error reading block offset: %w", err) - } - // Skip over header entry to get to body. - if off, err = skipN(r, off, 3); err != nil { - return nil, err - } - // Read receipts. - var e e2store.Entry - if _, err := r.e.ReadAt(&e, off); err != nil { return nil, err } if e.Type != TypeTotalDifficulty { - return nil, fmt.Errorf("expected receipts entry, got %x", e.Type) + return nil, fmt.Errorf("expected TD entry, got %x", e.Type) } return new(big.Int).SetBytes(reverseOrder(e.Value)), nil } From 100075574075436a2b1219ac12c52698a71d6e5a Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 29 May 2023 16:59:38 +0200 Subject: [PATCH 16/28] internal/era: simplify clauses --- internal/era/era.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/internal/era/era.go b/internal/era/era.go index 9859fb76ef8b..477f9eeb71c2 100644 --- a/internal/era/era.go +++ b/internal/era/era.go @@ -53,6 +53,7 @@ func Filename(network string, epoch int, root common.Hash) string { } // ReadDir reads all the era1 files in a directory for a given network. +// Format: --.era1 func ReadDir(dir, network string) ([]string, error) { entries, err := os.ReadDir(dir) if err != nil { @@ -63,22 +64,21 @@ func ReadDir(dir, network string) ([]string, error) { eras []string ) for _, entry := range entries { - if path.Ext(entry.Name()) == ".era1" { - n := strings.Split(entry.Name(), "-") - if len(n) != 3 { - // invalid era1 filename, skip - continue - } - if n[0] == network { - if epoch, err := strconv.ParseUint(n[1], 10, 64); err != nil { - return nil, fmt.Errorf("malformed era1 filename: %s", entry.Name()) - } else if epoch != next { - return nil, fmt.Errorf("missing epoch %d", next) - } - next += 1 - eras = append(eras, entry.Name()) - } + if path.Ext(entry.Name()) != ".era1" { + continue } + parts := strings.Split(entry.Name(), "-") + if len(parts) != 3 || parts[0] != network { + // invalid era1 filename, skip + continue + } + if epoch, err := strconv.ParseUint(parts[1], 10, 64); err != nil { + return nil, fmt.Errorf("malformed era1 filename: %s", entry.Name()) + } else if epoch != next { + return nil, fmt.Errorf("missing epoch %d", next) + } + next += 1 + eras = append(eras, entry.Name()) } return eras, nil } From 008ed822241a703f9c2bc618ca7ad34641aaf3c3 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 29 May 2023 21:13:39 +0200 Subject: [PATCH 17/28] internal/era: unexport things --- internal/era/era.go | 41 +++++++++++++++++----------------------- internal/era/era_test.go | 8 ++++---- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/internal/era/era.go b/internal/era/era.go index 477f9eeb71c2..bbb499a45cbb 100644 --- a/internal/era/era.go +++ b/internal/era/era.go @@ -147,17 +147,6 @@ func (r *Reader) Read() (*types.Block, types.Receipts, error) { return block, receipts, nil } -func skipN(r *Reader, off int64, n int) (int64, error) { - for i := 0; i < n; i++ { - length, err := r.e.LengthAt(off) - if err != nil { - return off, err - } - off += length - } - return off, nil -} - // readBlob reads an entry of data. func (r *Reader) readEntry(n uint64, skip int) (*e2store.Entry, error) { if n < r.metadata.start || r.metadata.start+r.metadata.count < n { @@ -169,8 +158,12 @@ func (r *Reader) readEntry(n uint64, skip int) (*e2store.Entry, error) { return nil, fmt.Errorf("error reading block offset: %w", err) } // Skip to the requested entry. - if off, err = skipN(r, off, skip); err != nil { - return nil, err + for i := 0; i < skip; i++ { + if length, err := r.e.LengthAt(off); err != nil { + return nil, err + } else { + off += length + } } // Read entry. var entry e2store.Entry @@ -180,8 +173,8 @@ func (r *Reader) readEntry(n uint64, skip int) (*e2store.Entry, error) { return &entry, nil } -// ReadHeader reads the header number n RLP. -func (r *Reader) ReadHeaderRLP(n uint64) ([]byte, error) { +// readHeaderRLP reads the header number n RLP. +func (r *Reader) readHeaderRLP(n uint64) ([]byte, error) { e, err := r.readEntry(n, 0) if err != nil { return nil, err @@ -192,8 +185,8 @@ func (r *Reader) ReadHeaderRLP(n uint64) ([]byte, error) { return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) } -// ReadBodyRLP reads the block body number n RLP. -func (r *Reader) ReadBodyRLP(n uint64) ([]byte, error) { +// readBodyRLP reads the block body number n RLP. +func (r *Reader) readBodyRLP(n uint64) ([]byte, error) { e, err := r.readEntry(n, 1) if err != nil { return nil, err @@ -204,8 +197,8 @@ func (r *Reader) ReadBodyRLP(n uint64) ([]byte, error) { return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) } -// ReadReceiptsRLP reads the receipts RLP associated with number n. -func (r *Reader) ReadReceiptsRLP(n uint64) ([]byte, error) { +// readReceiptsRLP reads the receipts RLP associated with number n. +func (r *Reader) readReceiptsRLP(n uint64) ([]byte, error) { e, err := r.readEntry(n, 2) if err != nil { return nil, err @@ -216,8 +209,8 @@ func (r *Reader) ReadReceiptsRLP(n uint64) ([]byte, error) { return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) } -// ReadTotalDifficulty reads the total difficulty of block number n. -func (r *Reader) ReadTotalDifficulty(n uint64) (*big.Int, error) { +// readTotalDifficulty reads the total difficulty of block number n. +func (r *Reader) readTotalDifficulty(n uint64) (*big.Int, error) { e, err := r.readEntry(n, 3) if err != nil { return nil, err @@ -230,7 +223,7 @@ func (r *Reader) ReadTotalDifficulty(n uint64) (*big.Int, error) { // ReadHeader reads the header number n. func (r *Reader) ReadHeader(n uint64) (*types.Header, error) { - h, err := r.ReadHeaderRLP(n) + h, err := r.readHeaderRLP(n) if err != nil { return nil, err } @@ -247,7 +240,7 @@ func (r *Reader) ReadBlock(n uint64) (*types.Block, error) { if err != nil { return nil, err } - b, err := r.ReadBodyRLP(n) + b, err := r.readBodyRLP(n) if err != nil { return nil, err } @@ -266,7 +259,7 @@ func (r *Reader) ReadBlockAndReceipts(n uint64) (*types.Block, types.Receipts, e return nil, nil, err } // Read receipts. - rr, err := r.ReadReceiptsRLP(n) + rr, err := r.readReceiptsRLP(n) if err != nil { return nil, nil, err } diff --git a/internal/era/era_test.go b/internal/era/era_test.go index 1c646fa47f33..a4fd2c3b063a 100644 --- a/internal/era/era_test.go +++ b/internal/era/era_test.go @@ -77,7 +77,7 @@ func TestEra1Builder(t *testing.T) { } for i := uint64(0); i < uint64(len(chain.headers)); i++ { // Check headers. - header, err := r.ReadHeaderRLP(i) + header, err := r.readHeaderRLP(i) if err != nil { t.Fatalf("error reading from era1: %v", err) } @@ -86,7 +86,7 @@ func TestEra1Builder(t *testing.T) { } // Check bodies. - body, err := r.ReadBodyRLP(i) + body, err := r.readBodyRLP(i) if err != nil { t.Fatalf("error reading from era1: %v", err) } @@ -95,7 +95,7 @@ func TestEra1Builder(t *testing.T) { } // Check receipts. - receipts, err := r.ReadReceiptsRLP(i) + receipts, err := r.readReceiptsRLP(i) if err != nil { t.Fatalf("error reading from era1: %v", err) } @@ -104,7 +104,7 @@ func TestEra1Builder(t *testing.T) { } // Check total difficulty. - td, err := r.ReadTotalDifficulty(i) + td, err := r.readTotalDifficulty(i) if err != nil { t.Fatalf("error reading from era1: %v", err) } From 3fa17e775f80515863429ca66520f289d373a6cc Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Fri, 2 Jun 2023 17:03:02 +0200 Subject: [PATCH 18/28] internal/era,cmd/utils,cmd/era: change to iterator interface for reading era entries --- cmd/era/main.go | 72 ++++----- cmd/utils/cmd.go | 21 ++- cmd/utils/history_test.go | 46 +++--- internal/era/era.go | 300 ++++++++++++++++---------------------- internal/era/era_test.go | 36 +++-- internal/era/iterator.go | 154 +++++++++++++++++++ 6 files changed, 376 insertions(+), 253 deletions(-) create mode 100644 internal/era/iterator.go diff --git a/cmd/era/main.go b/cmd/era/main.go index 4b624a4745eb..972377af7174 100644 --- a/cmd/era/main.go +++ b/cmd/era/main.go @@ -19,7 +19,6 @@ package main import ( "encoding/json" "fmt" - "io" "math/big" "os" "path" @@ -111,19 +110,15 @@ func block(ctx *cli.Context) error { if err != nil { return fmt.Errorf("invalid block number: %w", err) } - f, err := open(ctx, num/uint64(ctx.Int(eraSizeFlag.Name))) + e, err := open(ctx, num/uint64(ctx.Int(eraSizeFlag.Name))) if err != nil { return fmt.Errorf("error opening era: %w", err) } - defer f.Close() - r, err := era.NewReader(f) - if err != nil { - return fmt.Errorf("error making era reader: %w", err) - } + defer e.Close() // Read block with number. - block, err := r.ReadBlock(num) + block, err := e.GetBlockByNumber(num) if err != nil { - return fmt.Errorf("error reading era: %w", err) + return fmt.Errorf("error reading block %d: %w", num, err) } // Convert block to JSON and print. val, err := ethapi.RPCMarshalBlock(block, ctx.Bool(txsFlag.Name), ctx.Bool(txsFlag.Name), params.MainnetChainConfig) @@ -144,20 +139,16 @@ func info(ctx *cli.Context) error { if err != nil { return fmt.Errorf("invalid epoch number: %w", err) } - f, err := open(ctx, epoch) + e, err := open(ctx, epoch) if err != nil { return err } - defer f.Close() - r, err := era.NewReader(f) - if err != nil { - return fmt.Errorf("error creating era reader: %w", err) - } - acc, err := r.Accumulator() + defer e.Close() + acc, err := e.Accumulator() if err != nil { return fmt.Errorf("error reading accumulator: %w", err) } - td, err := r.InitialTD() + td, err := e.InitialTD() if err != nil { return fmt.Errorf("error reading total difficulty: %w", err) } @@ -167,7 +158,7 @@ func info(ctx *cli.Context) error { StartBlock uint64 `json:"startBlock"` Count uint64 `json:"count"` }{ - acc, td, r.Start(), r.Count(), + acc, td, e.Start(), e.Count(), } b, _ := json.MarshalIndent(info, "", " ") fmt.Println(string(b)) @@ -175,7 +166,7 @@ func info(ctx *cli.Context) error { } // open opens an era1 file at a certain epoch. -func open(ctx *cli.Context, epoch uint64) (*os.File, error) { +func open(ctx *cli.Context, epoch uint64) (*era.Era, error) { var ( dir = ctx.String(dirFlag.Name) network = ctx.String(networkFlag.Name) @@ -187,7 +178,7 @@ func open(ctx *cli.Context, epoch uint64) (*os.File, error) { if epoch >= uint64(len(entries)) { return nil, fmt.Errorf("epoch out-of-bounds: last %d, want %d", len(entries)-1, epoch) } - return os.Open(path.Join(dir, entries[epoch])) + return era.Open(path.Join(dir, entries[epoch])) } // verify checks each era1 file in a directory to ensure it is well-formed and @@ -223,29 +214,21 @@ func verify(ctx *cli.Context) error { // Wrap in function so defers don't stack. err := func() error { name := entries[i] - f, err := os.Open(path.Join(dir, name)) + e, err := era.Open(path.Join(dir, name)) if err != nil { return fmt.Errorf("error opening era1 file %s: %w", name, err) } - defer f.Close() - - r, err := era.NewReader(f) - if err != nil { - return fmt.Errorf("unable to make era reader: %w", err) - } - + defer e.Close() // Read accumulator and check against expected. - if got, err := r.Accumulator(); err != nil { + if got, err := e.Accumulator(); err != nil { return fmt.Errorf("error retrieving accumulator for %s: %w", name, err) } else if got != want { return fmt.Errorf("invalid root %s: got %s, want %s", name, got, want) } - // Recompute accumulator. - if err := checkAccumulator(r); err != nil { + if err := checkAccumulator(e); err != nil { return fmt.Errorf("error verify era1 file %s: %w", name, err) } - // Give the user some feedback that something is happening. if time.Since(reported) >= 8*time.Second { fmt.Printf("Verifying Era1 files \t\t verified=%d,\t elapsed=%s\n", i, common.PrettyDuration(time.Since(start))) @@ -262,21 +245,25 @@ func verify(ctx *cli.Context) error { } // checkAccumulator verifies the accumulator matches the data in the Era. -func checkAccumulator(r *era.Reader) error { +func checkAccumulator(e *era.Era) error { var ( err error - start = r.Start() + start = e.Start() want common.Hash td *big.Int tds = make([]*big.Int, 0) hashes = make([]common.Hash, 0) ) - if want, err = r.Accumulator(); err != nil { + if want, err = e.Accumulator(); err != nil { return fmt.Errorf("error reading accumulator: %w", err) } - if td, err = r.InitialTD(); err != nil { + if td, err = e.InitialTD(); err != nil { return fmt.Errorf("error reading total difficulty: %w", err) } + it, err := era.NewIterator(e) + if err != nil { + return fmt.Errorf("error making era iterator: %w", err) + } // Starting at epoch 0, iterate through all available era1 files and // check the following: // * the block index is constructed correctly @@ -284,13 +271,14 @@ func checkAccumulator(r *era.Reader) error { // * the accumulator is correct by recomputing it locally, // which verifies the blocks are all correct (via hash) // * the receipts root matches the value in the block - for j := 0; ; j++ { - // read() walks the block index, so we're able to + for j := 0; it.Next(); j++ { + // next() walks the block index, so we're able to // implicitly verify it. - block, receipts, err := r.Read() - if err == io.EOF { - break - } else if err != nil { + if it.Error() != nil { + return fmt.Errorf("error reading block %d: %w", start+uint64(j), err) + } + block, receipts, err := it.BlockAndReceipts() + if it.Error() != nil { return fmt.Errorf("error reading block %d: %w", start+uint64(j), err) } tr := types.DeriveSha(block.Transactions(), trie.NewStackTrie(nil)) diff --git a/cmd/utils/cmd.go b/cmd/utils/cmd.go index 5a97122dfb70..7f72105fd37e 100644 --- a/cmd/utils/cmd.go +++ b/cmd/utils/cmd.go @@ -285,20 +285,27 @@ func ImportHistory(chain *core.BlockChain, db ethdb.Database, dir string, networ buf.Reset() // Import all block data from Era1. - r, err := era.NewReader(f) + e, err := era.From(f) + if err != nil { + return fmt.Errorf("error opening era: %w", err) + } + it, err := era.NewIterator(e) if err != nil { return fmt.Errorf("error making era reader: %w", err) } - for j := 0; ; j += 1 { + for j := 0; it.Next(); j++ { n := i*era.MaxEra1Size + j - block, receipts, err := r.Read() - if err == io.EOF { - break - } else if err != nil { + block, err := it.Block() + if err != nil { return fmt.Errorf("error reading block %d: %w", n, err) - } else if block.Number().BitLen() == 0 { + } + if block.Number().BitLen() == 0 { continue // skip genesis } + receipts, err := it.Receipts() + if err != nil { + return fmt.Errorf("error reading receipts %d: %w", n, err) + } if status, err := chain.HeaderChain().InsertHeaderChain([]*types.Header{block.Header()}, start, forker); err != nil { return fmt.Errorf("error inserting header %d: %w", n, err) } else if status != core.CanonStatTy { diff --git a/cmd/utils/history_test.go b/cmd/utils/history_test.go index e24a68bbd6b3..544405464c09 100644 --- a/cmd/utils/history_test.go +++ b/cmd/utils/history_test.go @@ -107,42 +107,54 @@ func TestHistoryImportAndExport(t *testing.T) { // Verify each Era. entries, _ := era.ReadDir(dir, "mainnet") for i, filename := range entries { - f, err := os.ReadFile(path.Join(dir, filename)) + f, err := os.Open(path.Join(dir, filename)) if err != nil { t.Fatalf("error opening era file: %v", err) } - if want, got := common.HexToHash(checksums[i]), common.Hash(sha256.Sum256(f)); want != got { + + var ( + h = sha256.New() + buf = bytes.NewBuffer(nil) + ) + if _, err := io.Copy(h, f); err != nil { + t.Fatalf("unable to recalculate checksum: %v", err) + } + if got, want := common.BytesToHash(h.Sum(buf.Bytes()[:])).Hex(), checksums[i]; got != want { t.Fatalf("checksum %d does not match: got %s, want %s", i, got, want) } - r, err := era.NewReader(bytes.NewReader(f)) + + e, err := era.From(f) + if err != nil { + t.Fatalf("error opening era: %v", err) + } + it, err := era.NewIterator(e) if err != nil { t.Fatalf("error making era reader: %v", err) } - for j := 0; ; j += 1 { - block, receipts, err := r.Read() - if err == io.EOF { - break - } else if err != nil { - t.Fatalf("error reading era file %d: %v", i, err) + for j := 0; it.Next(); j++ { + n := i*int(step) + j + if it.Error() != nil { + t.Fatalf("error reading block entry %d: %v", n, err) + } + block, receipts, err := it.BlockAndReceipts() + if err != nil { + t.Fatalf("error reading block entry %d: %v", n, err) } - var ( - n = i*int(step) + j - want = chain.GetBlockByNumber(uint64(n)) - ) + want := chain.GetBlockByNumber(uint64(n)) if want, got := uint64(n), block.NumberU64(); want != got { t.Fatalf("blocks out of order: want %d, got %d", want, got) } if want.Hash() != block.Hash() { - t.Fatalf("block hash mismatch %d: want %s, got %s", i+j, want.Hash().Hex(), block.Hash().Hex()) + t.Fatalf("block hash mismatch %d: want %s, got %s", n, want.Hash().Hex(), block.Hash().Hex()) } if got := types.DeriveSha(block.Transactions(), trie.NewStackTrie(nil)); got != want.TxHash() { - t.Fatalf("tx hash %d mismatch: want %s, got %s", i+j, want.TxHash(), got) + t.Fatalf("tx hash %d mismatch: want %s, got %s", n, want.TxHash(), got) } if got := types.CalcUncleHash(block.Uncles()); got != want.UncleHash() { - t.Fatalf("uncle hash %d mismatch: want %s, got %s", i+j, want.UncleHash(), got) + t.Fatalf("uncle hash %d mismatch: want %s, got %s", n, want.UncleHash(), got) } if got := types.DeriveSha(receipts, trie.NewStackTrie(nil)); got != want.ReceiptHash() { - t.Fatalf("receipt root %d mismatch: want %s, got %s", i+j, want.ReceiptHash(), got) + t.Fatalf("receipt root %d mismatch: want %s, got %s", n, want.ReceiptHash(), got) } } } diff --git a/internal/era/era.go b/internal/era/era.go index bbb499a45cbb..3affe0e0e776 100644 --- a/internal/era/era.go +++ b/internal/era/era.go @@ -26,6 +26,7 @@ import ( "path" "strconv" "strings" + "sync" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" @@ -83,252 +84,203 @@ func ReadDir(dir, network string) ([]string, error) { return eras, nil } -// Reader reads an Era1 archive. -// See Builder documentation for a detailed explanation of the Era1 format. -type Reader struct { - r io.ReaderAt - e *e2store.Reader - - buf [8]byte // buffer reading entry offsets - next uint64 // next block to read - length int64 // total length of r - metadata metadata // start, count info -} - -type ReadAtSeeker interface { +type ReadAtSeekCloser interface { io.ReaderAt io.Seeker + io.Closer } -// NewReader returns a new Reader instance. -func NewReader(r ReadAtSeeker) (*Reader, error) { - length, err := r.Seek(0, io.SeekEnd) - if err != nil { - return nil, err - } - m, err := readMetadata(r, length) +// Era reads and Era1 file. +type Era struct { + f ReadAtSeekCloser // backing era1 file + s *e2store.Reader // e2store reader over f + m metadata // start, count, length info + mu *sync.Mutex // lock for buf + buf [8]byte // buffer reading entry offsets +} + +// From returns an Era backed by f. +func From(f ReadAtSeekCloser) (*Era, error) { + m, err := readMetadata(f) if err != nil { return nil, err } - return &Reader{ - r: r, - e: e2store.NewReader(r), - next: m.start, - length: length, - metadata: m, + return &Era{ + f: f, + s: e2store.NewReader(f), + m: m, + mu: new(sync.Mutex), }, nil } -// readOffset reads a specific block's offset from the block index. The value n -// is the absolute block number desired. -func (r *Reader) readOffset(n uint64) (int64, error) { - var ( - firstIndex = -8 - int64(r.metadata.count)*8 // size of count - index entries - indexOffset = int64(n-r.metadata.start) * 8 // desired index * size of indexes - offOffset = r.length + firstIndex + indexOffset // offset of block offset - ) - r.clearBuffer() - if _, err := r.r.ReadAt(r.buf[:], offOffset); err != nil { - return 0, err +// Open returns an Era backed by the given filename. +func Open(filename string) (*Era, error) { + f, err := os.Open(filename) + if err != nil { + return nil, err } - // Since the block offset is relative from its location + size of index - // value (8), we need to add it to it's offset to get the block's - // absolute offset. - return offOffset + 8 + int64(binary.LittleEndian.Uint64(r.buf[:])), nil + return From(f) } -// Read reads one (block, receipts) tuple from an Era1 archive. -func (r *Reader) Read() (*types.Block, types.Receipts, error) { - block, receipts, err := r.ReadBlockAndReceipts(r.next) - if err != nil { - return nil, nil, err - } - r.next += 1 - return block, receipts, nil +func (e *Era) Close() error { + return e.f.Close() } -// readBlob reads an entry of data. -func (r *Reader) readEntry(n uint64, skip int) (*e2store.Entry, error) { - if n < r.metadata.start || r.metadata.start+r.metadata.count < n { - return nil, fmt.Errorf("request out-of-bounds: want %d, start: %d, count: %d", n, r.metadata.start, r.metadata.count) +func (e *Era) GetBlockByNumber(num uint64) (*types.Block, error) { + if e.m.start > num || e.m.start+e.m.count <= num { + return nil, fmt.Errorf("out-of-bounds") } - // Read the specified block's offset from the block index. - off, err := r.readOffset(n) + off, err := e.readOffset(num) if err != nil { - return nil, fmt.Errorf("error reading block offset: %w", err) - } - // Skip to the requested entry. - for i := 0; i < skip; i++ { - if length, err := r.e.LengthAt(off); err != nil { - return nil, err - } else { - off += length - } - } - // Read entry. - var entry e2store.Entry - if _, err := r.e.ReadAt(&entry, off); err != nil { return nil, err } - return &entry, nil -} - -// readHeaderRLP reads the header number n RLP. -func (r *Reader) readHeaderRLP(n uint64) ([]byte, error) { - e, err := r.readEntry(n, 0) + r, n, err := newSnappyReader(e.s, off) if err != nil { return nil, err } - if e.Type != TypeCompressedHeader { - return nil, fmt.Errorf("expected header entry, got %x", e.Type) - } - return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) -} - -// readBodyRLP reads the block body number n RLP. -func (r *Reader) readBodyRLP(n uint64) ([]byte, error) { - e, err := r.readEntry(n, 1) - if err != nil { + var header types.Header + if err := rlp.Decode(r, &header); err != nil { return nil, err } - if e.Type != TypeCompressedBody { - return nil, fmt.Errorf("expected body entry, got %x", e.Type) + off += int64(n) + var body types.Body + if err := rlp.Decode(r, &body); err != nil { + return nil, err } - return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) + return types.NewBlockWithHeader(&header).WithBody(body.Transactions, body.Uncles), nil } -// readReceiptsRLP reads the receipts RLP associated with number n. -func (r *Reader) readReceiptsRLP(n uint64) ([]byte, error) { - e, err := r.readEntry(n, 2) +// Accumulator reads the accumulator entry in the Era1 file. +func (e *Era) Accumulator() (common.Hash, error) { + entry, err := e.s.Find(TypeAccumulator) if err != nil { - return nil, err - } - if e.Type != TypeCompressedReceipts { - return nil, fmt.Errorf("expected receipts entry, got %x", e.Type) + return common.Hash{}, err } - return io.ReadAll(snappy.NewReader(bytes.NewReader(e.Value))) + return common.BytesToHash(entry.Value), nil } -// readTotalDifficulty reads the total difficulty of block number n. -func (r *Reader) readTotalDifficulty(n uint64) (*big.Int, error) { - e, err := r.readEntry(n, 3) - if err != nil { +// InitialTD returns initial total difficulty before the difficulty of the +// first block of the Era1 is applied. +func (e *Era) InitialTD() (*big.Int, error) { + var ( + r io.Reader + header types.Header + rawTd []byte + n int + off int64 + err error + ) + + // Read first header. + if off, err = e.readOffset(e.m.start); err != nil { return nil, err } - if e.Type != TypeTotalDifficulty { - return nil, fmt.Errorf("expected TD entry, got %x", e.Type) - } - return new(big.Int).SetBytes(reverseOrder(e.Value)), nil -} - -// ReadHeader reads the header number n. -func (r *Reader) ReadHeader(n uint64) (*types.Header, error) { - h, err := r.readHeaderRLP(n) - if err != nil { + if r, n, err = newSnappyReader(e.s, off); err != nil { return nil, err } - var header types.Header - if err := rlp.DecodeBytes(h, &header); err != nil { + if err := rlp.Decode(r, header); err != nil { return nil, err } - return &header, nil -} + off += int64(n) -// ReadBlock reads the block number n. -func (r *Reader) ReadBlock(n uint64) (*types.Block, error) { - header, err := r.ReadHeader(n) - if err != nil { - return nil, err + // Skip over next two records. + for i := 0; i < 2; i++ { + length, err := e.s.LengthAt(off) + if err != nil { + return nil, err + } + off += length } - b, err := r.readBodyRLP(n) - if err != nil { + + // Read total difficulty after first block. + if r, n, err = newReader(e.s, off); err != nil { return nil, err } - var body types.Body - if err := rlp.DecodeBytes(b, &body); err != nil { + if err := rlp.Decode(r, rawTd); err != nil { return nil, err } - return types.NewBlockWithHeader(header).WithBody(body.Transactions, body.Uncles), nil + td := new(big.Int).SetBytes(reverseOrder(rawTd)) + return td.Sub(td, header.Difficulty), nil } -// ReadBlockAndReceipts reads the block number n and associated receipts. -func (r *Reader) ReadBlockAndReceipts(n uint64) (*types.Block, types.Receipts, error) { - // Read block. - block, err := r.ReadBlock(n) - if err != nil { - return nil, nil, err - } - // Read receipts. - rr, err := r.readReceiptsRLP(n) - if err != nil { - return nil, nil, err - } - // Decode receipts. - var receipts types.Receipts - if err := rlp.DecodeBytes(rr, &receipts); err != nil { - return nil, nil, err - } - return block, receipts, err +// Start returns the listed start block. +func (e *Era) Start() uint64 { + return e.m.start } -// Accumulator reads the accumulator entry in the Era1 file. -func (r *Reader) Accumulator() (common.Hash, error) { - entry, err := r.e.Find(TypeAccumulator) - if err != nil { - return common.Hash{}, err - } - return common.BytesToHash(entry.Value), nil +// Count returns the total number of blocks in the Era1. +func (e *Era) Count() uint64 { + return e.m.count } -// InitialTD returns initial total difficulty before the difficulty of the -// first block of the Era1 is applied. -func (r *Reader) InitialTD() (*big.Int, error) { - h, err := r.ReadHeader(r.Start()) - if err != nil { - return nil, err - } - // Above seek also sets reader so next TD entry will be for this block. - entry, err := r.e.Find(TypeTotalDifficulty) - if err != nil { - return nil, err +// readOffset reads a specific block's offset from the block index. The value n +// is the absolute block number desired. +func (e *Era) readOffset(n uint64) (int64, error) { + var ( + firstIndex = -8 - int64(e.m.count)*8 // size of count - index entries + indexOffset = int64(n-e.m.start) * 8 // desired index * size of indexes + offOffset = e.m.length + firstIndex + indexOffset // offset of block offset + ) + e.mu.Lock() + defer e.mu.Unlock() + clearBuffer(e.buf[:]) + if _, err := e.f.ReadAt(e.buf[:], offOffset); err != nil { + return 0, err } - td := new(big.Int).SetBytes(reverseOrder(entry.Value)) - return td.Sub(td, h.Difficulty), nil + // Since the block offset is relative from its location + size of index + // value (8), we need to add it to it's offset to get the block's + // absolute offset. + return offOffset + 8 + int64(binary.LittleEndian.Uint64(e.buf[:])), nil } -// Start returns the listed start block. -func (r *Reader) Start() uint64 { - return r.metadata.start +// newReader returns an io.Reader for the e2store entry value at off. +func newReader(e *e2store.Reader, off int64) (io.Reader, int, error) { + var ( + entry e2store.Entry + n int + err error + ) + if n, err = e.ReadAt(&entry, off); err != nil { + return nil, n, err + } + return bytes.NewReader(entry.Value), n, nil } -// Count returns the total number of blocks in the Era1. -func (r *Reader) Count() uint64 { - return r.metadata.count +// newReader returns a snappy.Reader for the e2store entry value at off. +func newSnappyReader(e *e2store.Reader, off int64) (io.Reader, int, error) { + r, n, err := newReader(e, off) + return snappy.NewReader(r), n, err } // clearBuffer zeroes out the buffer. -func (r *Reader) clearBuffer() { - for i := 0; i < len(r.buf); i++ { - r.buf[i] = 0 +func clearBuffer(buf []byte) { + for i := 0; i < len(buf); i++ { + buf[i] = 0 } } // metadata wraps the metadata in the block index. type metadata struct { - start, count uint64 + start uint64 + count uint64 + length int64 } // readMetadata reads the metadata stored in an Era1 file's block index. -func readMetadata(r io.ReaderAt, length int64) (m metadata, err error) { +func readMetadata(f ReadAtSeekCloser) (m metadata, err error) { + // Determine length of reader. + if m.length, err = f.Seek(0, io.SeekEnd); err != nil { + return + } b := make([]byte, 16) // Read count. It's the last 8 bytes of the file. - if _, err = r.ReadAt(b[:8], length-8); err != nil { + if _, err = f.ReadAt(b[:8], m.length-8); err != nil { return } m.count = binary.LittleEndian.Uint64(b) // Read start. It's at the offset -sizeof(m.count) - // count*sizeof(indexEntry) - sizeof(m.start) - if _, err = r.ReadAt(b[8:], length-16-int64(m.count*8)); err != nil { + if _, err = f.ReadAt(b[8:], m.length-16-int64(m.count*8)); err != nil { return } m.start = binary.LittleEndian.Uint64(b[8:]) diff --git a/internal/era/era_test.go b/internal/era/era_test.go index a4fd2c3b063a..ee5d9e82a099 100644 --- a/internal/era/era_test.go +++ b/internal/era/era_test.go @@ -18,6 +18,7 @@ package era import ( "bytes" + "io" "math/big" "os" "testing" @@ -71,43 +72,52 @@ func TestEra1Builder(t *testing.T) { } // Verify Era1 contents. - r, err := NewReader(f) + e, err := Open(f.Name()) if err != nil { - t.Fatalf("failed to make reader: %s", err) + t.Fatalf("failed to open era: %v", err) + } + it, err := NewRawIterator(e) + if err != nil { + t.Fatalf("failed to make iterator: %s", err) } for i := uint64(0); i < uint64(len(chain.headers)); i++ { + if !it.Next() { + t.Fatalf("expected more entries") + } + if it.Error() != nil { + t.Fatalf("unexpected error %v", it.Error()) + } // Check headers. - header, err := r.readHeaderRLP(i) + header, err := io.ReadAll(it.Header) if err != nil { - t.Fatalf("error reading from era1: %v", err) + t.Fatalf("error reading header: %v", err) } if !bytes.Equal(header, chain.headers[i]) { t.Fatalf("mismatched header: want %s, got %s", chain.headers[i], header) } - // Check bodies. - body, err := r.readBodyRLP(i) + body, err := io.ReadAll(it.Body) if err != nil { - t.Fatalf("error reading from era1: %v", err) + t.Fatalf("error reading body: %v", err) } if !bytes.Equal(body, chain.bodies[i]) { t.Fatalf("mismatched body: want %s, got %s", chain.bodies[i], body) } - // Check receipts. - receipts, err := r.readReceiptsRLP(i) + receipts, err := io.ReadAll(it.Receipts) if err != nil { - t.Fatalf("error reading from era1: %v", err) + t.Fatalf("error reading receipts: %v", err) } if !bytes.Equal(receipts, chain.receipts[i]) { - t.Fatalf("mismatched body: want %s, got %s", chain.receipts[i], receipts) + t.Fatalf("mismatched receipts: want %s, got %s", chain.receipts[i], receipts) } // Check total difficulty. - td, err := r.readTotalDifficulty(i) + rawTd, err := io.ReadAll(it.TotalDifficulty) if err != nil { - t.Fatalf("error reading from era1: %v", err) + t.Fatalf("error reading td: %v", err) } + td := new(big.Int).SetBytes(reverseOrder(rawTd)) if td.Cmp(chain.tds[i]) != 0 { t.Fatalf("mismatched tds: want %s, got %s", chain.tds[i], td) } diff --git a/internal/era/iterator.go b/internal/era/iterator.go new file mode 100644 index 000000000000..2f353e6e8548 --- /dev/null +++ b/internal/era/iterator.go @@ -0,0 +1,154 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +package era + +import ( + "io" + "math/big" + + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/rlp" +) + +// Iterator wraps RawIterator and returns decoded Era1 entries. +type Iterator struct { + inner *RawIterator +} + +// NewIterator returns a new Iterator instance. +func NewIterator(e *Era) (*Iterator, error) { + inner, err := NewRawIterator(e) + if err != nil { + return nil, err + } + return &Iterator{inner}, nil +} + +// Next moves the iterator to the next block entry. +func (it *Iterator) Next() bool { + return it.inner.Next() +} + +// Error returns the error status of the iterator. +func (it *Iterator) Error() error { + return it.inner.Error() +} + +// Block returns the block for the iterator's current position. +func (it *Iterator) Block() (*types.Block, error) { + var ( + header types.Header + body types.Body + ) + if err := rlp.Decode(it.inner.Header, &header); err != nil { + return nil, err + } + if err := rlp.Decode(it.inner.Body, &body); err != nil { + return nil, err + } + return types.NewBlockWithHeader(&header).WithBody(body.Transactions, body.Uncles), nil +} + +// Receipts returns the receipts for the iterator's current position. +func (it *Iterator) Receipts() (types.Receipts, error) { + var receipts types.Receipts + err := rlp.Decode(it.inner.Receipts, &receipts) + return receipts, err +} + +// BlockAndReceipts returns the block and receipts for the iterator's current +// position. +func (it *Iterator) BlockAndReceipts() (*types.Block, types.Receipts, error) { + b, err := it.Block() + if err != nil { + return nil, nil, err + } + r, err := it.Receipts() + if err != nil { + return nil, nil, err + } + return b, r, nil +} + +// TotalDifficulty returns the total difficulty for the iterator's current +// position. +func (it *Iterator) TotalDifficulty() (*big.Int, error) { + var td []byte + if err := rlp.Decode(it.inner.TotalDifficulty, td); err != nil { + return nil, err + } + return new(big.Int).SetBytes(reverseOrder(td)), nil +} + +// RawIterator reads an RLP-encode Era1 entries. +type RawIterator struct { + e *Era // backing Era1 + curOffset int64 // offset of current block + next uint64 // next block to read + err error // last error + + Header io.Reader + Body io.Reader + Receipts io.Reader + TotalDifficulty io.Reader +} + +// NewRawIterator returns a new Iterator instance. +func NewRawIterator(e *Era) (*RawIterator, error) { + return &RawIterator{ + e: e, + next: e.m.start, + }, nil +} + +// Next moves the iterator to the next block entry. +func (it *RawIterator) Next() bool { + if it.e.m.start+it.e.m.count <= it.next { + return false + } + off, err := it.e.readOffset(it.next) + if err == io.EOF { + it.err = err + return false + } + var n int + if it.Header, n, it.err = newSnappyReader(it.e.s, off); it.err != nil { + return true + } + off += int64(n) + if it.Body, n, it.err = newSnappyReader(it.e.s, off); it.err != nil { + return true + } + off += int64(n) + if it.Receipts, n, it.err = newSnappyReader(it.e.s, off); it.err != nil { + return true + } + off += int64(n) + if it.TotalDifficulty, n, it.err = newReader(it.e.s, off); it.err != nil { + return true + } + it.next += 1 + return true +} + +// Error returns the error status of the iterator. +func (it *RawIterator) Error() error { + if it.err == io.EOF { + return nil + } + return it.err +} From 02b3f1983e8d2c379402491fe84f2da591a3b770 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Fri, 2 Jun 2023 17:08:07 +0200 Subject: [PATCH 19/28] cmd/utils: better defer handling in history test --- cmd/utils/history_test.go | 88 ++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/cmd/utils/history_test.go b/cmd/utils/history_test.go index 544405464c09..0f597625270a 100644 --- a/cmd/utils/history_test.go +++ b/cmd/utils/history_test.go @@ -107,56 +107,58 @@ func TestHistoryImportAndExport(t *testing.T) { // Verify each Era. entries, _ := era.ReadDir(dir, "mainnet") for i, filename := range entries { - f, err := os.Open(path.Join(dir, filename)) - if err != nil { - t.Fatalf("error opening era file: %v", err) - } - - var ( - h = sha256.New() - buf = bytes.NewBuffer(nil) - ) - if _, err := io.Copy(h, f); err != nil { - t.Fatalf("unable to recalculate checksum: %v", err) - } - if got, want := common.BytesToHash(h.Sum(buf.Bytes()[:])).Hex(), checksums[i]; got != want { - t.Fatalf("checksum %d does not match: got %s, want %s", i, got, want) - } - - e, err := era.From(f) - if err != nil { - t.Fatalf("error opening era: %v", err) - } - it, err := era.NewIterator(e) - if err != nil { - t.Fatalf("error making era reader: %v", err) - } - for j := 0; it.Next(); j++ { - n := i*int(step) + j - if it.Error() != nil { - t.Fatalf("error reading block entry %d: %v", n, err) - } - block, receipts, err := it.BlockAndReceipts() + func() { + f, err := os.Open(path.Join(dir, filename)) if err != nil { - t.Fatalf("error reading block entry %d: %v", n, err) + t.Fatalf("error opening era file: %v", err) } - want := chain.GetBlockByNumber(uint64(n)) - if want, got := uint64(n), block.NumberU64(); want != got { - t.Fatalf("blocks out of order: want %d, got %d", want, got) + var ( + h = sha256.New() + buf = bytes.NewBuffer(nil) + ) + if _, err := io.Copy(h, f); err != nil { + t.Fatalf("unable to recalculate checksum: %v", err) } - if want.Hash() != block.Hash() { - t.Fatalf("block hash mismatch %d: want %s, got %s", n, want.Hash().Hex(), block.Hash().Hex()) + if got, want := common.BytesToHash(h.Sum(buf.Bytes()[:])).Hex(), checksums[i]; got != want { + t.Fatalf("checksum %d does not match: got %s, want %s", i, got, want) } - if got := types.DeriveSha(block.Transactions(), trie.NewStackTrie(nil)); got != want.TxHash() { - t.Fatalf("tx hash %d mismatch: want %s, got %s", n, want.TxHash(), got) + e, err := era.From(f) + if err != nil { + t.Fatalf("error opening era: %v", err) } - if got := types.CalcUncleHash(block.Uncles()); got != want.UncleHash() { - t.Fatalf("uncle hash %d mismatch: want %s, got %s", n, want.UncleHash(), got) + defer e.Close() + it, err := era.NewIterator(e) + if err != nil { + t.Fatalf("error making era reader: %v", err) } - if got := types.DeriveSha(receipts, trie.NewStackTrie(nil)); got != want.ReceiptHash() { - t.Fatalf("receipt root %d mismatch: want %s, got %s", n, want.ReceiptHash(), got) + for j := 0; it.Next(); j++ { + n := i*int(step) + j + if it.Error() != nil { + t.Fatalf("error reading block entry %d: %v", n, err) + } + block, receipts, err := it.BlockAndReceipts() + if err != nil { + t.Fatalf("error reading block entry %d: %v", n, err) + } + want := chain.GetBlockByNumber(uint64(n)) + if want, got := uint64(n), block.NumberU64(); want != got { + t.Fatalf("blocks out of order: want %d, got %d", want, got) + } + if want.Hash() != block.Hash() { + t.Fatalf("block hash mismatch %d: want %s, got %s", n, want.Hash().Hex(), block.Hash().Hex()) + } + if got := types.DeriveSha(block.Transactions(), trie.NewStackTrie(nil)); got != want.TxHash() { + t.Fatalf("tx hash %d mismatch: want %s, got %s", n, want.TxHash(), got) + } + if got := types.CalcUncleHash(block.Uncles()); got != want.UncleHash() { + t.Fatalf("uncle hash %d mismatch: want %s, got %s", n, want.UncleHash(), got) + } + if got := types.DeriveSha(receipts, trie.NewStackTrie(nil)); got != want.ReceiptHash() { + t.Fatalf("receipt root %d mismatch: want %s, got %s", n, want.ReceiptHash(), got) + } } - } + return + }() } // Now import Era. From b98b6d652431a8dad0f6ae507ace936ac792e38b Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Mon, 5 Jun 2023 09:38:18 +0200 Subject: [PATCH 20/28] internal/era,cmd: add number method to era iterator to get the current block number --- cmd/era/main.go | 7 +++---- cmd/utils/cmd.go | 15 +++++++-------- internal/era/iterator.go | 10 ++++++++++ 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/cmd/era/main.go b/cmd/era/main.go index 972377af7174..48687617ca57 100644 --- a/cmd/era/main.go +++ b/cmd/era/main.go @@ -248,7 +248,6 @@ func verify(ctx *cli.Context) error { func checkAccumulator(e *era.Era) error { var ( err error - start = e.Start() want common.Hash td *big.Int tds = make([]*big.Int, 0) @@ -271,15 +270,15 @@ func checkAccumulator(e *era.Era) error { // * the accumulator is correct by recomputing it locally, // which verifies the blocks are all correct (via hash) // * the receipts root matches the value in the block - for j := 0; it.Next(); j++ { + for it.Next() { // next() walks the block index, so we're able to // implicitly verify it. if it.Error() != nil { - return fmt.Errorf("error reading block %d: %w", start+uint64(j), err) + return fmt.Errorf("error reading block %d: %w", it.Number(), err) } block, receipts, err := it.BlockAndReceipts() if it.Error() != nil { - return fmt.Errorf("error reading block %d: %w", start+uint64(j), err) + return fmt.Errorf("error reading block %d: %w", it.Number(), err) } tr := types.DeriveSha(block.Transactions(), trie.NewStackTrie(nil)) if tr != block.TxHash() { diff --git a/cmd/utils/cmd.go b/cmd/utils/cmd.go index 7f72105fd37e..4b5716466556 100644 --- a/cmd/utils/cmd.go +++ b/cmd/utils/cmd.go @@ -293,32 +293,31 @@ func ImportHistory(chain *core.BlockChain, db ethdb.Database, dir string, networ if err != nil { return fmt.Errorf("error making era reader: %w", err) } - for j := 0; it.Next(); j++ { - n := i*era.MaxEra1Size + j + for it.Next() { block, err := it.Block() if err != nil { - return fmt.Errorf("error reading block %d: %w", n, err) + return fmt.Errorf("error reading block %d: %w", it.Number(), err) } if block.Number().BitLen() == 0 { continue // skip genesis } receipts, err := it.Receipts() if err != nil { - return fmt.Errorf("error reading receipts %d: %w", n, err) + return fmt.Errorf("error reading receipts %d: %w", it.Number(), err) } if status, err := chain.HeaderChain().InsertHeaderChain([]*types.Header{block.Header()}, start, forker); err != nil { - return fmt.Errorf("error inserting header %d: %w", n, err) + return fmt.Errorf("error inserting header %d: %w", it.Number(), err) } else if status != core.CanonStatTy { - return fmt.Errorf("error inserting header %d, not canon: %v", n, status) + return fmt.Errorf("error inserting header %d, not canon: %v", it.Number(), status) } if _, err := chain.InsertReceiptChain([]*types.Block{block}, []types.Receipts{receipts}, 2^64-1); err != nil { - return fmt.Errorf("error inserting body %d: %w", n, err) + return fmt.Errorf("error inserting body %d: %w", it.Number(), err) } imported += 1 // Give the user some feedback that something is happening. if time.Since(reported) >= 8*time.Second { - log.Info("Importing Era files", "head", n, "imported", imported, "elapsed", common.PrettyDuration(time.Since(start))) + log.Info("Importing Era files", "head", it.Number(), "imported", imported, "elapsed", common.PrettyDuration(time.Since(start))) imported = 0 reported = time.Now() } diff --git a/internal/era/iterator.go b/internal/era/iterator.go index 2f353e6e8548..3cc57a85e123 100644 --- a/internal/era/iterator.go +++ b/internal/era/iterator.go @@ -43,6 +43,11 @@ func (it *Iterator) Next() bool { return it.inner.Next() } +// Number returns the current number block the iterator will return. +func (it *Iterator) Number() uint64 { + return it.inner.next - 1 +} + // Error returns the error status of the iterator. func (it *Iterator) Error() error { return it.inner.Error() @@ -145,6 +150,11 @@ func (it *RawIterator) Next() bool { return true } +// Number returns the current number block the iterator will return. +func (it *RawIterator) Number() uint64 { + return it.next - 1 +} + // Error returns the error status of the iterator. func (it *RawIterator) Error() error { if it.err == io.EOF { From af6837f249848014e1d8fa2c0bb10e1da66c2ab4 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Mon, 5 Jun 2023 09:48:55 +0200 Subject: [PATCH 21/28] internal/era/e2store: avoid double allocation during write --- internal/era/e2store/e2store.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/internal/era/e2store/e2store.go b/internal/era/e2store/e2store.go index e1a85f6bc43c..da8a3c7e9986 100644 --- a/internal/era/e2store/e2store.go +++ b/internal/era/e2store/e2store.go @@ -50,11 +50,17 @@ func NewWriter(w io.Writer) *Writer { // record store the type (2 bytes), the length (4 bytes), and some reserved // data (2 bytes). The remaining bytes store b. func (w *Writer) Write(typ uint16, b []byte) (int, error) { - buf := make([]byte, headerSize+len(b)) + buf := make([]byte, headerSize) binary.LittleEndian.PutUint16(buf, typ) binary.LittleEndian.PutUint32(buf[2:], uint32(len(b))) - copy(buf[8:], b) - return w.w.Write(buf) + + // Write header. + if n, err := w.w.Write(buf); err != nil { + return n, err + } + // Write value, return combined write size. + n, err := w.w.Write(b) + return n + headerSize, err } // A Reader reads entries from an e2store-encoded file. From 64513d7d3d521b554c0f997ceeddb79d9cdeffa9 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Mon, 5 Jun 2023 15:29:03 +0200 Subject: [PATCH 22/28] internal/era,cmd/utils: fix lint issues --- cmd/utils/history_test.go | 1 - internal/era/e2store/e2store_test.go | 3 ++- internal/era/era.go | 6 +++++- internal/era/iterator.go | 9 ++++----- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cmd/utils/history_test.go b/cmd/utils/history_test.go index 0f597625270a..69256aaf012a 100644 --- a/cmd/utils/history_test.go +++ b/cmd/utils/history_test.go @@ -157,7 +157,6 @@ func TestHistoryImportAndExport(t *testing.T) { t.Fatalf("receipt root %d mismatch: want %s, got %s", n, want.ReceiptHash(), got) } } - return }() } diff --git a/internal/era/e2store/e2store_test.go b/internal/era/e2store/e2store_test.go index 64f4320f91dc..6e7b7fd37c1b 100644 --- a/internal/era/e2store/e2store_test.go +++ b/internal/era/e2store/e2store_test.go @@ -26,7 +26,7 @@ import ( ) func TestEncode(t *testing.T) { - for _, tt := range []struct { + for _, test := range []struct { entries []Entry want string name string @@ -50,6 +50,7 @@ func TestEncode(t *testing.T) { want: "2a00020000000000beef0900040000000000abcdabcd", }, } { + tt := test t.Run(tt.name, func(t *testing.T) { t.Parallel() var ( diff --git a/internal/era/era.go b/internal/era/era.go index 3affe0e0e776..a369f947b8c6 100644 --- a/internal/era/era.go +++ b/internal/era/era.go @@ -143,6 +143,10 @@ func (e *Era) GetBlockByNumber(num uint64) (*types.Block, error) { return nil, err } off += int64(n) + r, _, err = newSnappyReader(e.s, off) + if err != nil { + return nil, err + } var body types.Body if err := rlp.Decode(r, &body); err != nil { return nil, err @@ -193,7 +197,7 @@ func (e *Era) InitialTD() (*big.Int, error) { } // Read total difficulty after first block. - if r, n, err = newReader(e.s, off); err != nil { + if r, _, err = newReader(e.s, off); err != nil { return nil, err } if err := rlp.Decode(r, rawTd); err != nil { diff --git a/internal/era/iterator.go b/internal/era/iterator.go index 3cc57a85e123..a9ac9f3d11ac 100644 --- a/internal/era/iterator.go +++ b/internal/era/iterator.go @@ -101,10 +101,9 @@ func (it *Iterator) TotalDifficulty() (*big.Int, error) { // RawIterator reads an RLP-encode Era1 entries. type RawIterator struct { - e *Era // backing Era1 - curOffset int64 // offset of current block - next uint64 // next block to read - err error // last error + e *Era // backing Era1 + next uint64 // next block to read + err error // last error Header io.Reader Body io.Reader @@ -143,7 +142,7 @@ func (it *RawIterator) Next() bool { return true } off += int64(n) - if it.TotalDifficulty, n, it.err = newReader(it.e.s, off); it.err != nil { + if it.TotalDifficulty, _, it.err = newReader(it.e.s, off); it.err != nil { return true } it.next += 1 From b8fc70e8f452fcfdfdcfaa75688bd39502ac8419 Mon Sep 17 00:00:00 2001 From: "lightclient@protonmail.com" Date: Mon, 5 Jun 2023 20:20:25 +0200 Subject: [PATCH 23/28] internal/era: add ReaderAt func so entry value can be read lazily Co-authored-by: lightclient Co-authored-by: Martin Holst Swende --- internal/era/e2store/e2store.go | 18 ++++++++++++++++ internal/era/era.go | 37 ++++++++++++--------------------- internal/era/iterator.go | 16 +++++++------- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/internal/era/e2store/e2store.go b/internal/era/e2store/e2store.go index da8a3c7e9986..d85b3e44e97d 100644 --- a/internal/era/e2store/e2store.go +++ b/internal/era/e2store/e2store.go @@ -118,6 +118,24 @@ func (r *Reader) ReadAt(entry *Entry, off int64) (int, error) { return int(headerSize + length), nil } +// ReaderAt returns an io.Reader delivering value data for the entry at +// the specified offset. If the entry type does not match the expected type, an +// error is returned. +func (r *Reader) ReaderAt(expectedType uint16, off int64) (io.Reader, int, error) { + // problem = need to return length+headerSize not just value length via section reader + typ, length, err := r.ReadMetadataAt(off) + if err != nil { + return nil, headerSize, err + } + if typ != expectedType { + return nil, headerSize, fmt.Errorf("wrong type, want %d have %d", expectedType, typ) + } + if length > valueSizeLimit { + return nil, headerSize, fmt.Errorf("item larger than item size limit %d: have %d", valueSizeLimit, length) + } + return io.NewSectionReader(r.r, off+headerSize, int64(length)), headerSize + int(length), nil +} + // LengthAt reads the header at off and returns the total length of the entry, // including header. func (r *Reader) LengthAt(off int64) (int64, error) { diff --git a/internal/era/era.go b/internal/era/era.go index a369f947b8c6..bb9b20246448 100644 --- a/internal/era/era.go +++ b/internal/era/era.go @@ -17,7 +17,6 @@ package era import ( - "bytes" "encoding/binary" "fmt" "io" @@ -134,7 +133,7 @@ func (e *Era) GetBlockByNumber(num uint64) (*types.Block, error) { if err != nil { return nil, err } - r, n, err := newSnappyReader(e.s, off) + r, n, err := newSnappyReader(e.s, TypeCompressedHeader, off) if err != nil { return nil, err } @@ -142,8 +141,8 @@ func (e *Era) GetBlockByNumber(num uint64) (*types.Block, error) { if err := rlp.Decode(r, &header); err != nil { return nil, err } - off += int64(n) - r, _, err = newSnappyReader(e.s, off) + off += n + r, _, err = newSnappyReader(e.s, TypeCompressedBody, off) if err != nil { return nil, err } @@ -170,7 +169,7 @@ func (e *Era) InitialTD() (*big.Int, error) { r io.Reader header types.Header rawTd []byte - n int + n int64 off int64 err error ) @@ -179,13 +178,13 @@ func (e *Era) InitialTD() (*big.Int, error) { if off, err = e.readOffset(e.m.start); err != nil { return nil, err } - if r, n, err = newSnappyReader(e.s, off); err != nil { + if r, n, err = newSnappyReader(e.s, TypeCompressedHeader, off); err != nil { return nil, err } if err := rlp.Decode(r, header); err != nil { return nil, err } - off += int64(n) + off += n // Skip over next two records. for i := 0; i < 2; i++ { @@ -197,7 +196,7 @@ func (e *Era) InitialTD() (*big.Int, error) { } // Read total difficulty after first block. - if r, _, err = newReader(e.s, off); err != nil { + if r, _, err = e.s.ReaderAt(TypeTotalDifficulty, off); err != nil { return nil, err } if err := rlp.Decode(r, rawTd); err != nil { @@ -237,23 +236,13 @@ func (e *Era) readOffset(n uint64) (int64, error) { return offOffset + 8 + int64(binary.LittleEndian.Uint64(e.buf[:])), nil } -// newReader returns an io.Reader for the e2store entry value at off. -func newReader(e *e2store.Reader, off int64) (io.Reader, int, error) { - var ( - entry e2store.Entry - n int - err error - ) - if n, err = e.ReadAt(&entry, off); err != nil { - return nil, n, err - } - return bytes.NewReader(entry.Value), n, nil -} - // newReader returns a snappy.Reader for the e2store entry value at off. -func newSnappyReader(e *e2store.Reader, off int64) (io.Reader, int, error) { - r, n, err := newReader(e, off) - return snappy.NewReader(r), n, err +func newSnappyReader(e *e2store.Reader, expectedType uint16, off int64) (io.Reader, int64, error) { + r, n, err := e.ReaderAt(expectedType, off) + if err != nil { + return nil, 0, err + } + return snappy.NewReader(r), int64(n), err } // clearBuffer zeroes out the buffer. diff --git a/internal/era/iterator.go b/internal/era/iterator.go index a9ac9f3d11ac..b798af90db91 100644 --- a/internal/era/iterator.go +++ b/internal/era/iterator.go @@ -129,20 +129,20 @@ func (it *RawIterator) Next() bool { it.err = err return false } - var n int - if it.Header, n, it.err = newSnappyReader(it.e.s, off); it.err != nil { + var n int64 + if it.Header, n, it.err = newSnappyReader(it.e.s, TypeCompressedHeader, off); it.err != nil { return true } - off += int64(n) - if it.Body, n, it.err = newSnappyReader(it.e.s, off); it.err != nil { + off += n + if it.Body, n, it.err = newSnappyReader(it.e.s, TypeCompressedBody, off); it.err != nil { return true } - off += int64(n) - if it.Receipts, n, it.err = newSnappyReader(it.e.s, off); it.err != nil { + off += n + if it.Receipts, n, it.err = newSnappyReader(it.e.s, TypeCompressedReceipts, off); it.err != nil { return true } - off += int64(n) - if it.TotalDifficulty, _, it.err = newReader(it.e.s, off); it.err != nil { + off += n + if it.TotalDifficulty, _, it.err = it.e.s.ReaderAt(TypeTotalDifficulty, off); it.err != nil { return true } it.next += 1 From 67defc417c134c5b365dd4d394a791cfc829dcba Mon Sep 17 00:00:00 2001 From: lightclient Date: Tue, 6 Jun 2023 20:43:32 +0200 Subject: [PATCH 24/28] internal/era: improve iterator interface --- internal/era/iterator.go | 48 ++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/internal/era/iterator.go b/internal/era/iterator.go index b798af90db91..291d55a27d1e 100644 --- a/internal/era/iterator.go +++ b/internal/era/iterator.go @@ -17,6 +17,7 @@ package era import ( + "fmt" "io" "math/big" @@ -29,7 +30,8 @@ type Iterator struct { inner *RawIterator } -// NewIterator returns a new Iterator instance. +// NewRawIterator returns a new Iterator instance. Next must be immediately +// called on new iterators to load the first item. func NewIterator(e *Era) (*Iterator, error) { inner, err := NewRawIterator(e) if err != nil { @@ -38,7 +40,9 @@ func NewIterator(e *Era) (*Iterator, error) { return &Iterator{inner}, nil } -// Next moves the iterator to the next block entry. +// Next moves the iterator to the next block entry. It returns false when all +// items have been read or an error has halted its progress. Block, Receipts, +// and BlockAndReceipts should no longer be called after false is returned. func (it *Iterator) Next() bool { return it.inner.Next() } @@ -48,13 +52,17 @@ func (it *Iterator) Number() uint64 { return it.inner.next - 1 } -// Error returns the error status of the iterator. +// Error returns the error status of the iterator. It should be called before +// reading from any of the iterator's values. func (it *Iterator) Error() error { return it.inner.Error() } // Block returns the block for the iterator's current position. func (it *Iterator) Block() (*types.Block, error) { + if it.inner.Header == nil || it.inner.Body == nil { + return nil, fmt.Errorf("header and body must be non-nil") + } var ( header types.Header body types.Body @@ -70,6 +78,9 @@ func (it *Iterator) Block() (*types.Block, error) { // Receipts returns the receipts for the iterator's current position. func (it *Iterator) Receipts() (types.Receipts, error) { + if it.inner.Receipts == nil { + return nil, fmt.Errorf("receipts must be non-nil") + } var receipts types.Receipts err := rlp.Decode(it.inner.Receipts, &receipts) return receipts, err @@ -111,7 +122,8 @@ type RawIterator struct { TotalDifficulty io.Reader } -// NewRawIterator returns a new Iterator instance. +// NewRawIterator returns a new RawIterator instance. Next must be immediately +// called on new iterators to load the first item. func NewRawIterator(e *Era) (*RawIterator, error) { return &RawIterator{ e: e, @@ -119,30 +131,43 @@ func NewRawIterator(e *Era) (*RawIterator, error) { }, nil } -// Next moves the iterator to the next block entry. +// Next moves the iterator to the next block entry. It returns false when all +// items have been read or an error has halted its progress. Header, Body, +// Receipts, TotalDifficulty will be set to nil in the case returning false or +// finding an error and should therefore no longer be read from. func (it *RawIterator) Next() bool { + // Clear old errors. + it.err = nil if it.e.m.start+it.e.m.count <= it.next { + it.clear() return false } off, err := it.e.readOffset(it.next) - if err == io.EOF { + if err != nil { + // Error here means block index is corrupted, so don't + // continue. + it.clear() it.err = err return false } var n int64 if it.Header, n, it.err = newSnappyReader(it.e.s, TypeCompressedHeader, off); it.err != nil { + it.clear() return true } off += n if it.Body, n, it.err = newSnappyReader(it.e.s, TypeCompressedBody, off); it.err != nil { + it.clear() return true } off += n if it.Receipts, n, it.err = newSnappyReader(it.e.s, TypeCompressedReceipts, off); it.err != nil { + it.clear() return true } off += n if it.TotalDifficulty, _, it.err = it.e.s.ReaderAt(TypeTotalDifficulty, off); it.err != nil { + it.clear() return true } it.next += 1 @@ -154,10 +179,19 @@ func (it *RawIterator) Number() uint64 { return it.next - 1 } -// Error returns the error status of the iterator. +// Error returns the error status of the iterator. It should be called before +// reading from any of the iterator's values. func (it *RawIterator) Error() error { if it.err == io.EOF { return nil } return it.err } + +// clear sets all the outputs to nil. +func (it *RawIterator) clear() { + it.Header = nil + it.Body = nil + it.Receipts = nil + it.TotalDifficulty = nil +} From e9cb2074d3eba83c1b22cbddd0639239081b7101 Mon Sep 17 00:00:00 2001 From: lightclient Date: Thu, 8 Jun 2023 10:11:21 +0200 Subject: [PATCH 25/28] internal/era: fix rlp decode of header and correctly read total difficulty --- internal/era/era.go | 5 +++-- internal/era/iterator.go | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/era/era.go b/internal/era/era.go index bb9b20246448..38bebfced018 100644 --- a/internal/era/era.go +++ b/internal/era/era.go @@ -181,7 +181,7 @@ func (e *Era) InitialTD() (*big.Int, error) { if r, n, err = newSnappyReader(e.s, TypeCompressedHeader, off); err != nil { return nil, err } - if err := rlp.Decode(r, header); err != nil { + if err := rlp.Decode(r, &header); err != nil { return nil, err } off += n @@ -199,7 +199,8 @@ func (e *Era) InitialTD() (*big.Int, error) { if r, _, err = e.s.ReaderAt(TypeTotalDifficulty, off); err != nil { return nil, err } - if err := rlp.Decode(r, rawTd); err != nil { + rawTd, err = io.ReadAll(r) + if err != nil { return nil, err } td := new(big.Int).SetBytes(reverseOrder(rawTd)) diff --git a/internal/era/iterator.go b/internal/era/iterator.go index 291d55a27d1e..e74a8154b1a6 100644 --- a/internal/era/iterator.go +++ b/internal/era/iterator.go @@ -103,8 +103,8 @@ func (it *Iterator) BlockAndReceipts() (*types.Block, types.Receipts, error) { // TotalDifficulty returns the total difficulty for the iterator's current // position. func (it *Iterator) TotalDifficulty() (*big.Int, error) { - var td []byte - if err := rlp.Decode(it.inner.TotalDifficulty, td); err != nil { + td, err := io.ReadAll(it.inner.TotalDifficulty) + if err != nil { return nil, err } return new(big.Int).SetBytes(reverseOrder(td)), nil From 2a4cbda11509ff2a3d05ccd50ff3bb77f82634c2 Mon Sep 17 00:00:00 2001 From: lightclient Date: Mon, 29 Jan 2024 08:26:50 -0700 Subject: [PATCH 26/28] cmd/era: fix rebase errors --- cmd/era/main.go | 5 +---- cmd/utils/history_test.go | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cmd/era/main.go b/cmd/era/main.go index 48687617ca57..c8cb43d82793 100644 --- a/cmd/era/main.go +++ b/cmd/era/main.go @@ -121,10 +121,7 @@ func block(ctx *cli.Context) error { return fmt.Errorf("error reading block %d: %w", num, err) } // Convert block to JSON and print. - val, err := ethapi.RPCMarshalBlock(block, ctx.Bool(txsFlag.Name), ctx.Bool(txsFlag.Name), params.MainnetChainConfig) - if err != nil { - return fmt.Errorf("error marshaling json: %w", err) - } + val := ethapi.RPCMarshalBlock(block, ctx.Bool(txsFlag.Name), ctx.Bool(txsFlag.Name), params.MainnetChainConfig) b, err := json.MarshalIndent(val, "", " ") if err != nil { return fmt.Errorf("error marshaling json: %w", err) diff --git a/cmd/utils/history_test.go b/cmd/utils/history_test.go index 69256aaf012a..d4500be53de7 100644 --- a/cmd/utils/history_test.go +++ b/cmd/utils/history_test.go @@ -170,7 +170,7 @@ func TestHistoryImportAndExport(t *testing.T) { db2.Close() }) - genesis.MustCommit(db2) + genesis.MustCommit(db2, trie.NewDatabase(db, trie.HashDefaults)) imported, err := core.NewBlockChain(db2, nil, genesis, nil, ethash.NewFaker(), vm.Config{}, nil, nil) if err != nil { t.Fatalf("unable to initialize chain: %v", err) From 0fa894792538f9de51b3a94a2067844d96e9115a Mon Sep 17 00:00:00 2001 From: lightclient Date: Tue, 30 Jan 2024 14:33:37 -0700 Subject: [PATCH 27/28] cmd/era: clearer comments --- cmd/era/main.go | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/cmd/era/main.go b/cmd/era/main.go index c8cb43d82793..7bc1e4088d8e 100644 --- a/cmd/era/main.go +++ b/cmd/era/main.go @@ -79,7 +79,7 @@ var ( verifyCommand = &cli.Command{ Name: "verify", ArgsUsage: "", - Usage: "verifies each era against expected accumulator root", + Usage: "verifies each era1 against expected accumulator root", Action: verify, } ) @@ -112,7 +112,7 @@ func block(ctx *cli.Context) error { } e, err := open(ctx, num/uint64(ctx.Int(eraSizeFlag.Name))) if err != nil { - return fmt.Errorf("error opening era: %w", err) + return fmt.Errorf("error opening era1: %w", err) } defer e.Close() // Read block with number. @@ -260,16 +260,18 @@ func checkAccumulator(e *era.Era) error { if err != nil { return fmt.Errorf("error making era iterator: %w", err) } - // Starting at epoch 0, iterate through all available era1 files and - // check the following: - // * the block index is constructed correctly - // * the starting total difficulty value is correct - // * the accumulator is correct by recomputing it locally, - // which verifies the blocks are all correct (via hash) - // * the receipts root matches the value in the block + // To fully verify an era the following attributes must be checked: + // 1) the block index is constructed correctly + // 2) the tx root matches the value in the block + // 3) the receipts root matches the value in the block + // 4) the starting total difficulty value is correct + // 5) the accumulator is correct by recomputing it locally, which verifies + // the blocks are all correct (via hash) + // + // The attributes 1), 2), and 3) are checked for each block. 4) and 5) require + // accumulation accross the entire set and are verified at the end. for it.Next() { - // next() walks the block index, so we're able to - // implicitly verify it. + // 1) next() walks the block index, so we're able to implicitly verify it. if it.Error() != nil { return fmt.Errorf("error reading block %d: %w", it.Number(), err) } @@ -277,12 +279,12 @@ func checkAccumulator(e *era.Era) error { if it.Error() != nil { return fmt.Errorf("error reading block %d: %w", it.Number(), err) } + // 2) recompute tx root and verify against header. tr := types.DeriveSha(block.Transactions(), trie.NewStackTrie(nil)) if tr != block.TxHash() { return fmt.Errorf("tx root in block %d mismatch: want %s, got %s", block.NumberU64(), block.TxHash(), tr) } - // Calculate receipt root from receipt list and check - // value against block. + // 3) recompute receipt root and check value against block. rr := types.DeriveSha(receipts, trie.NewStackTrie(nil)) if rr != block.ReceiptHash() { return fmt.Errorf("receipt root in block %d mismatch: want %s, got %s", block.NumberU64(), block.ReceiptHash(), rr) @@ -291,6 +293,7 @@ func checkAccumulator(e *era.Era) error { td.Add(td, block.Difficulty()) tds = append(tds, new(big.Int).Set(td)) } + // 4+5) Verify accumulator and total difficulty. got, err := era.ComputeAccumulator(hashes, tds) if err != nil { return fmt.Errorf("error computing accumulator: %w", err) From beb6505f3ef17de69f561bf520be35014c21607c Mon Sep 17 00:00:00 2001 From: lightclient Date: Thu, 1 Feb 2024 15:15:37 -0700 Subject: [PATCH 28/28] cmd,internal: fix comment typos --- cmd/era/main.go | 2 +- internal/era/e2store/e2store_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/era/main.go b/cmd/era/main.go index 7bc1e4088d8e..e27d8ccec605 100644 --- a/cmd/era/main.go +++ b/cmd/era/main.go @@ -269,7 +269,7 @@ func checkAccumulator(e *era.Era) error { // the blocks are all correct (via hash) // // The attributes 1), 2), and 3) are checked for each block. 4) and 5) require - // accumulation accross the entire set and are verified at the end. + // accumulation across the entire set and are verified at the end. for it.Next() { // 1) next() walks the block index, so we're able to implicitly verify it. if it.Error() != nil { diff --git a/internal/era/e2store/e2store_test.go b/internal/era/e2store/e2store_test.go index 6e7b7fd37c1b..febcffe4cf2c 100644 --- a/internal/era/e2store/e2store_test.go +++ b/internal/era/e2store/e2store_test.go @@ -90,7 +90,7 @@ func TestDecode(t *testing.T) { { // basic valid decoding have: "ffff000000000000", }, - { // basic valid decoding + { // basic invalid decoding have: "ffff000000000001", err: fmt.Errorf("reserved bytes are non-zero"), },