Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

main, wire, blockchain, indexers, ffldb: Add pruning #1971

Merged
merged 14 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions blockchain/chain.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ type BlockChain struct {
// fields in this struct below this point.
chainLock sync.RWMutex

// pruneTarget is the size in bytes the database targets for when the node
// is pruned.
pruneTarget uint64

// These fields are related to the memory block index. They both have
// their own locks, however they are often also protected by the chain
// lock to help prevent logic races when blocks are being processed.
Expand Down Expand Up @@ -600,6 +604,26 @@ func (b *BlockChain) connectBlock(node *blockNode, block *btcutil.Block,

// Atomically insert info into the database.
err = b.db.Update(func(dbTx database.Tx) error {
// If the pruneTarget isn't 0, we should attempt to delete older blocks
// from the database.
if b.pruneTarget != 0 {
// When the total block size is under the prune target, prune blocks is
// a no-op and the deleted hashes are nil.
deletedHashes, err := dbTx.PruneBlocks(b.pruneTarget)
if err != nil {
return err
}

// Only attempt to delete if we have any deleted blocks.
if len(deletedHashes) != 0 {
// Delete the spend journals of the pruned blocks.
err = dbPruneSpendJournalEntry(dbTx, deletedHashes)
if err != nil {
return err
}
}
}

// Update best block state.
err := dbPutBestState(dbTx, state, node.workSum)
if err != nil {
Expand Down Expand Up @@ -1702,6 +1726,11 @@ type Config struct {
// This field can be nil if the caller is not interested in using a
// signature cache.
HashCache *txscript.HashCache

// Prune specifies the target database usage (in bytes) the database
// will target for with block files. Prune at 0 specifies that no
// blocks will be deleted.
Prune uint64
}

// New returns a BlockChain instance using the provided configuration details.
Expand Down Expand Up @@ -1757,6 +1786,7 @@ func New(config *Config) (*BlockChain, error) {
prevOrphans: make(map[chainhash.Hash][]*orphanBlock),
warningCaches: newThresholdCaches(vbNumBits),
deploymentCaches: newThresholdCaches(chaincfg.DefinedDeployments),
pruneTarget: config.Prune,
}

// Ensure all the deployments are synchronized with our clock if
Expand Down
15 changes: 15 additions & 0 deletions blockchain/chainio.go
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,21 @@ func dbRemoveSpendJournalEntry(dbTx database.Tx, blockHash *chainhash.Hash) erro
return spendBucket.Delete(blockHash[:])
}

// dbPruneSpendJournalEntry uses an existing database transaction to remove all
// the spend journal entries for the pruned blocks.
func dbPruneSpendJournalEntry(dbTx database.Tx, blockHashes []chainhash.Hash) error {
spendBucket := dbTx.Metadata().Bucket(spendJournalBucketName)

for _, blockHash := range blockHashes {
err := spendBucket.Delete(blockHash[:])
if err != nil {
return err
}
}

return nil
}

// -----------------------------------------------------------------------------
// The unspent transaction output (utxo) set consists of an entry for each
// unspent output using a format that is optimized to reduce space using domain
Expand Down
12 changes: 12 additions & 0 deletions blockchain/indexers/addrindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -991,3 +991,15 @@ func NewAddrIndex(db database.DB, chainParams *chaincfg.Params) *AddrIndex {
func DropAddrIndex(db database.DB, interrupt <-chan struct{}) error {
return dropIndex(db, addrIndexKey, addrIndexName, interrupt)
}

// AddrIndexInitialized returns true if the address index has been created previously.
func AddrIndexInitialized(db database.DB) bool {
var exists bool
db.View(func(dbTx database.Tx) error {
bucket := dbTx.Metadata().Bucket(addrIndexKey)
exists = bucket != nil
return nil
})

return exists
}
12 changes: 12 additions & 0 deletions blockchain/indexers/cfindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,3 +355,15 @@ func NewCfIndex(db database.DB, chainParams *chaincfg.Params) *CfIndex {
func DropCfIndex(db database.DB, interrupt <-chan struct{}) error {
return dropIndex(db, cfIndexParentBucketKey, cfIndexName, interrupt)
}

// CfIndexInitialized returns true if the cfindex has been created previously.
func CfIndexInitialized(db database.DB) bool {
var exists bool
db.View(func(dbTx database.Tx) error {
bucket := dbTx.Metadata().Bucket(cfIndexParentBucketKey)
exists = bucket != nil
return nil
})

return exists
}
12 changes: 12 additions & 0 deletions blockchain/indexers/txindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,3 +481,15 @@ func DropTxIndex(db database.DB, interrupt <-chan struct{}) error {

return dropIndex(db, txIndexKey, txIndexName, interrupt)
}

// TxIndexInitialized returns true if the tx index has been created previously.
func TxIndexInitialized(db database.DB) bool {
var exists bool
db.View(func(dbTx database.Tx) error {
bucket := dbTx.Metadata().Bucket(txIndexKey)
exists = bucket != nil
return nil
})

return exists
}
82 changes: 82 additions & 0 deletions btcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,88 @@ func btcdMain(serverChan chan<- *server) error {
return nil
}

// Check if the database had previously been pruned. If it had been, it's
// not possible to newly generate the tx index and addr index.
var beenPruned bool
db.View(func(dbTx database.Tx) error {
beenPruned, err = dbTx.BeenPruned()
return err
})
if err != nil {
btcdLog.Errorf("%v", err)
return err
}
if beenPruned && cfg.Prune == 0 {
err = fmt.Errorf("--prune cannot be disabled as the node has been "+
"previously pruned. You must delete the files in the datadir: \"%s\" "+
"and sync from the beginning to disable pruning", cfg.DataDir)
btcdLog.Errorf("%v", err)
return err
}
if beenPruned && cfg.TxIndex {
err = fmt.Errorf("--txindex cannot be enabled as the node has been "+
"previously pruned. You must delete the files in the datadir: \"%s\" "+
"and sync from the beginning to enable the desired index", cfg.DataDir)
btcdLog.Errorf("%v", err)
return err
}
if beenPruned && cfg.AddrIndex {
err = fmt.Errorf("--addrindex cannot be enabled as the node has been "+
"previously pruned. You must delete the files in the datadir: \"%s\" "+
"and sync from the beginning to enable the desired index", cfg.DataDir)
btcdLog.Errorf("%v", err)
return err
}
// If we've previously been pruned and the cfindex isn't present, it means that the
// user wants to enable the cfindex after the node has already synced up and been
// pruned.
if beenPruned && !indexers.CfIndexInitialized(db) && !cfg.NoCFilters {
err = fmt.Errorf("compact filters cannot be enabled as the node has been "+
"previously pruned. You must delete the files in the datadir: \"%s\" "+
"and sync from the beginning to enable the desired index. You may "+
"use the --nocfilters flag to start the node up without the compact "+
"filters", cfg.DataDir)
btcdLog.Errorf("%v", err)
return err
}
// If the user wants to disable the cfindex and is pruned or has enabled pruning, force
// the user to either drop the cfindex manually or restart the node without the --nocfilters
// flag.
if (beenPruned || cfg.Prune != 0) && indexers.CfIndexInitialized(db) && cfg.NoCFilters {
err = fmt.Errorf("--nocfilters flag was given but the compact filters have " +
"previously been enabled on this node and the index data currently " +
"exists in the database. The node has also been previously pruned and " +
"the database would be left in an inconsistent state if the compact " +
"filters don't get indexed now. To disable compact filters, please drop the " +
"index completely with the --dropcfindex flag and restart the node. " +
"To keep the compact filters, restart the node without the --nocfilters " +
"flag")
btcdLog.Errorf("%v", err)
return err
}

// Enforce removal of txindex and addrindex if user requested pruning.
// This is to require explicit action from the user before removing
// indexes that won't be useful when block files are pruned.
//
// NOTE: The order is important here because dropping the tx index also
// drops the address index since it relies on it. We explicitly make the
// user drop both indexes if --addrindex was enabled previously.
if cfg.Prune != 0 && indexers.AddrIndexInitialized(db) {
err = fmt.Errorf("--prune flag may not be given when the address index " +
"has been initialized. Please drop the address index with the " +
"--dropaddrindex flag before enabling pruning")
btcdLog.Errorf("%v", err)
return err
}
if cfg.Prune != 0 && indexers.TxIndexInitialized(db) {
err = fmt.Errorf("--prune flag may not be given when the transaction index " +
"has been initialized. Please drop the transaction index with the " +
"--droptxindex flag before enabling pruning")
btcdLog.Errorf("%v", err)
return err
}

// The config file is already created if it did not exist and the log
// file has already been opened by now so we only need to allow
// creating rpc cert and key files if they don't exist.
Expand Down
26 changes: 26 additions & 0 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ const (
sampleConfigFilename = "sample-btcd.conf"
defaultTxIndex = false
defaultAddrIndex = false
pruneMinSize = 1536
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see there's another min size here. Shouldn't we sync this with what's being checked in database/ffldb/db.go?

Copy link
Collaborator Author

@kcalvinalvin kcalvinalvin May 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the 1536 number came from the fact that we have these constants.

blocksNeeded = 288
ConsensusMaxBlockSize = 4,000,000
onDiskBlkFileSizeInBytes = 536,870,912

We need to keep 288 blocks to adhere to NODE_NETWORK_LIMITED BIP. Each block can be a maximum of 4MB. Each block file (XXXXXX.fdb) is 512MiB.

(blocksNeeded*ConsensusMaxBlockSize)/onDiskBlkFileSizeInBytes = 2.145

We do the math and we need to keep 2.14 on disk .fdb files to adhere to the NODE_NETWORK_LIMITED rule. Can't keep .14 files so we round up to 3. 512MiB * 3 is 1536MiB which is our pruneMinSize.

As mentioned here, the check in database/ffldb/db.go is a different check so it's ok.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm reading this correctly it also seems like the smallest resolution we can target is 512MiB increments which means that maybe our settings should use GiB as the unit of measurement for pruning's sake. Right now it specifies it with a uint32 of bytes, which practically means that there are effectively 6 configurations that can come out of this: 1.5, 2.0, 2.5, 3.0, 3.5, and 4.0 GiB?

I'm interpreting this because you said we can't keep partial block files so that means that we effectively only have the discrete intervals of 512MiB to work with.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Each blk file is 512MiB and while we can delete parts of the file, it's not efficient to do so and won't have much user benefit.

It's the same as Core where you give a target and core deletes .dat files to meet that target. It'll be in those increments you state but the user is free to give any amount (just like in Core).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I don't mind the intervals at all. It's hardly an issue now that the overall prune cache size isn't maxed at ~4G

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay, the two different limits now make sense to me, thanks for the explanation.

)

var (
Expand Down Expand Up @@ -146,6 +147,7 @@ type config struct {
Proxy string `long:"proxy" description:"Connect via SOCKS5 proxy (eg. 127.0.0.1:9050)"`
ProxyPass string `long:"proxypass" default-mask:"-" description:"Password for proxy server"`
ProxyUser string `long:"proxyuser" description:"Username for proxy server"`
Prune uint64 `long:"prune" description:"Prune already validated blocks from the database. Must specify a target size in MiB (minimum value of 1536, default value of 0 will disable pruning)"`
RegressionTest bool `long:"regtest" description:"Use the regression test network"`
RejectNonStd bool `long:"rejectnonstd" description:"Reject non-standard transactions regardless of the default settings for the active network."`
RejectReplacement bool `long:"rejectreplacement" description:"Reject transactions that attempt to replace existing transactions within the mempool through the Replace-By-Fee (RBF) signaling policy."`
Expand Down Expand Up @@ -1137,6 +1139,30 @@ func loadConfig() (*config, []string, error) {
}
}

if cfg.Prune != 0 && cfg.Prune < pruneMinSize {
err := fmt.Errorf("%s: the minimum value for --prune is %d. Got %d",
funcName, pruneMinSize, cfg.Prune)
fmt.Fprintln(os.Stderr, err)
fmt.Fprintln(os.Stderr, usageMessage)
return nil, nil, err
}
Comment on lines +1142 to +1148
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to terminate here or should we just log a Warning and start up with the min value?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it startups up, then the user may end up believing that their min value is correct. It's hard to catch the logs since there's a bunch at startup so I think it's better to fail.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems reasonable


if cfg.Prune != 0 && cfg.TxIndex {
err := fmt.Errorf("%s: the --prune and --txindex options may "+
"not be activated at the same time", funcName)
fmt.Fprintln(os.Stderr, err)
fmt.Fprintln(os.Stderr, usageMessage)
return nil, nil, err
}
Comment on lines +1150 to +1156
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are forcing this set of options to be blocked at the gate, shouldn't we return an error from PruneBlock on the txindex instead of returning nil as a noop?


if cfg.Prune != 0 && cfg.AddrIndex {
err := fmt.Errorf("%s: the --prune and --addrindex options may "+
"not be activated at the same time", funcName)
fmt.Fprintln(os.Stderr, err)
fmt.Fprintln(os.Stderr, usageMessage)
return nil, nil, err
}
Comment on lines +1158 to +1164
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are forcing this set of options to be blocked at the gate, shouldn't we return an error from PruneBlock on the addrindex instead of returning nil as a noop?


// Warn about missing config file only after all other configuration is
// done. This prevents the warning on help messages and invalid
// options. Note this should go directly before the return.
Expand Down
72 changes: 50 additions & 22 deletions database/ffldb/blockio.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ import (
"io"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"

"github.com/btcsuite/btcd/chaincfg/chainhash"
Expand All @@ -23,6 +26,10 @@ import (
)

const (
// blockFileExtension is the extension that's used to store the block
// files on the disk.
blockFileExtension = ".fdb"

// The Bitcoin protocol encodes block height as int32, so max number of
// blocks is 2^31. Max block size per the protocol is 32MiB per block.
// So the theoretical max at the time this comment was written is 64PiB
Expand All @@ -32,7 +39,7 @@ const (
// 512MiB each for a total of ~476.84PiB (roughly 7.4 times the current
// theoretical max), so there is room for the max block size to grow in
// the future.
blockFilenameTemplate = "%09d.fdb"
blockFilenameTemplate = "%09d" + blockFileExtension

// maxOpenFiles is the max number of open files to maintain in the
// open blocks cache. Note that this does not include the current
Expand Down Expand Up @@ -713,36 +720,57 @@ func (s *blockStore) handleRollback(oldBlockFileNum, oldBlockOffset uint32) {
}

// scanBlockFiles searches the database directory for all flat block files to
// find the end of the most recent file. This position is considered the
// current write cursor which is also stored in the metadata. Thus, it is used
// to detect unexpected shutdowns in the middle of writes so the block files
// can be reconciled.
func scanBlockFiles(dbPath string) (int, uint32) {
lastFile := -1
fileLen := uint32(0)
for i := 0; ; i++ {
filePath := blockFilePath(dbPath, uint32(i))
st, err := os.Stat(filePath)
if err != nil {
break
}
lastFile = i
// find the first file, last file, and the end of the most recent file. The
// position at the last file is considered the current write cursor which is
// also stored in the metadata. Thus, it is used to detect unexpected shutdowns
// in the middle of writes so the block files can be reconciled.
func scanBlockFiles(dbPath string) (int, int, uint32, error) {
firstFile, lastFile, lastFileLen, err := int(-1), int(-1), uint32(0), error(nil)

files, err := filepath.Glob(filepath.Join(dbPath, "*"+blockFileExtension))
if err != nil {
return 0, 0, 0, err
}
sort.Strings(files)

fileLen = uint32(st.Size())
// Return early if there's no block files.
if len(files) == 0 {
return firstFile, lastFile, lastFileLen, nil
}

log.Tracef("Scan found latest block file #%d with length %d", lastFile,
fileLen)
return lastFile, fileLen
// Grab the first and last file's number.
firstFile, err = strconv.Atoi(strings.TrimSuffix(filepath.Base(files[0]), blockFileExtension))
if err != nil {
return 0, 0, 0, fmt.Errorf("scanBlockFiles error: %v", err)
}
lastFile, err = strconv.Atoi(strings.TrimSuffix(filepath.Base(files[len(files)-1]), blockFileExtension))
if err != nil {
return 0, 0, 0, fmt.Errorf("scanBlockFiles error: %v", err)
}

// Get the last file's length.
filePath := blockFilePath(dbPath, uint32(lastFile))
st, err := os.Stat(filePath)
if err != nil {
return 0, 0, 0, err
}
lastFileLen = uint32(st.Size())

log.Tracef("Scan found latest block file #%d with length %d", lastFile, lastFileLen)

return firstFile, lastFile, lastFileLen, err
}

// newBlockStore returns a new block store with the current block file number
// and offset set and all fields initialized.
func newBlockStore(basePath string, network wire.BitcoinNet) *blockStore {
func newBlockStore(basePath string, network wire.BitcoinNet) (*blockStore, error) {
// Look for the end of the latest block to file to determine what the
// write cursor position is from the viewpoing of the block files on
// disk.
fileNum, fileOff := scanBlockFiles(basePath)
_, fileNum, fileOff, err := scanBlockFiles(basePath)
if err != nil {
return nil, err
}
if fileNum == -1 {
fileNum = 0
fileOff = 0
Expand All @@ -765,5 +793,5 @@ func newBlockStore(basePath string, network wire.BitcoinNet) *blockStore {
store.openFileFunc = store.openFile
store.openWriteFileFunc = store.openWriteFile
store.deleteFileFunc = store.deleteFile
return store
return store, nil
}
Loading
Loading