From 83605e476cbe8a2782af721c20c88eba19a2c744 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Tue, 22 Aug 2023 07:22:06 +0900 Subject: [PATCH] btcutil: reuse serialized tx during TxHash btcutil.Block caches the serialized raw bytes of the block during ibd. This serialized block bytes includes the serialized tx. The current tx hash generation will re-serialized the de-serialized tx to create the raw bytes and it'll only then hash that. This commit changes the code so that the re-serialization never happens, saving tons of cpu and memory overhead. --- btcutil/block.go | 23 +++++++++++++++- btcutil/tx.go | 68 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/btcutil/block.go b/btcutil/block.go index 7d38abc4a0..52c909192b 100644 --- a/btcutil/block.go +++ b/btcutil/block.go @@ -154,12 +154,26 @@ func (b *Block) Transactions() []*Tx { b.transactions = make([]*Tx, len(b.msgBlock.Transactions)) } + // Offset of each tx. 80 accounts for the block header size. + offset := 80 + wire.VarIntSerializeSize(uint64(len(b.msgBlock.Transactions))) + // Generate and cache the wrapped transactions for all that haven't // already been done. for i, tx := range b.transactions { if tx == nil { newTx := NewTx(b.msgBlock.Transactions[i]) newTx.SetIndex(i) + + size := b.msgBlock.Transactions[i].SerializeSize() + + // The block may not always have the serializedBlock. + if len(b.serializedBlock) > 0 { + // This allows for the reuse of the already serialized tx. + newTx.setBytes(b.serializedBlock[offset : offset+size]) + + // Increment offset for this block. + offset += size + } b.transactions[i] = newTx } } @@ -234,6 +248,9 @@ func NewBlockFromBytes(serializedBlock []byte) (*Block, error) { return nil, err } b.serializedBlock = serializedBlock + // This initializes []btcutil.Tx to have the serialized raw transactions cached. + // Helps speed up things like generating the txhash. + b.Transactions() return b, nil } @@ -257,9 +274,13 @@ func NewBlockFromReader(r io.Reader) (*Block, error) { // NewBlockFromBlockAndBytes returns a new instance of a bitcoin block given // an underlying wire.MsgBlock and the serialized bytes for it. See Block. func NewBlockFromBlockAndBytes(msgBlock *wire.MsgBlock, serializedBlock []byte) *Block { - return &Block{ + b := &Block{ msgBlock: msgBlock, serializedBlock: serializedBlock, blockHeight: BlockHeightUnknown, } + // This initializes []btcutil.Tx to have the serialized raw transactions cached. + // Helps speed up things like generating the txhash. + b.Transactions() + return b } diff --git a/btcutil/tx.go b/btcutil/tx.go index 5633fef90e..abc055acbe 100644 --- a/btcutil/tx.go +++ b/btcutil/tx.go @@ -27,6 +27,7 @@ type Tx struct { txHashWitness *chainhash.Hash // Cached transaction witness hash txHasWitness *bool // If the transaction has witness data txIndex int // Position within a block or TxIndexUnknown + rawBytes []byte // Raw bytes for the tx in the raw block. } // MsgTx returns the underlying wire.MsgTx for the transaction. @@ -37,22 +38,68 @@ func (t *Tx) MsgTx() *wire.MsgTx { // Hash returns the hash of the transaction. This is equivalent to // calling TxHash on the underlying wire.MsgTx, however it caches the -// result so subsequent calls are more efficient. +// result so subsequent calls are more efficient. If the Tx has the +// raw bytes of the tx cached, it will use that and skip serialization. func (t *Tx) Hash() *chainhash.Hash { // Return the cached hash if it has already been generated. if t.txHash != nil { return t.txHash } - // Cache the hash and return it. - hash := t.msgTx.TxHash() + // If the rawBytes aren't available, call msgtx.TxHash. + if t.rawBytes == nil { + hash := t.msgTx.TxHash() + t.txHash = &hash + return &hash + } + + // If we have the raw bytes, then don't call msgTx.TxHash as that has the + // overhead of serialization. + var hash chainhash.Hash + if t.HasWitness() { + // If the raw bytes contain the witness, we must strip it out before + // calculating the hash. + baseSize := t.msgTx.SerializeSizeStripped() + nonWitnessBytes := make([]byte, 0, baseSize) + + // Append the version bytes. + offset := 4 + nonWitnessBytes = append(nonWitnessBytes, t.rawBytes[:offset]...) + + // Append the input and output bytes. -8 to account for the + // version bytes and the locktime bytes. + // + // Skip the 2 bytes for the witness encoding. + offset += 2 + nonWitnessBytes = append(nonWitnessBytes, t.rawBytes[offset:offset+baseSize-8]...) + + // Append the last 4 bytes which are the locktime bytes. + nonWitnessBytes = append(nonWitnessBytes, t.rawBytes[len(t.rawBytes)-4:]...) + + // We purposely call doublehashh here instead of doublehashraw as we don't have the + // serialization overhead and avoiding the 1 alloc is better in this case. + hash = chainhash.DoubleHashRaw(func(w io.Writer) error { + _, err := w.Write(nonWitnessBytes) + return err + }) + } else { + // If the raw bytes don't have the witness, we can use it directly. + // + // We purposely call doublehashh here instead of doublehashraw as we don't have the + // serialization overhead and avoiding the 1 alloc is better in this case. + hash = chainhash.DoubleHashRaw(func(w io.Writer) error { + _, err := w.Write(t.rawBytes) + return err + }) + } t.txHash = &hash return &hash } // WitnessHash returns the witness hash (wtxid) of the transaction. This is // equivalent to calling WitnessHash on the underlying wire.MsgTx, however it -// caches the result so subsequent calls are more efficient. +// caches the result so subsequent calls are more efficient. If the Tx has the +// raw bytes of the tx cached, it will use that and skip serialization. func (t *Tx) WitnessHash() *chainhash.Hash { // Return the cached hash if it has already been generated. if t.txHashWitness != nil { @@ -60,7 +107,13 @@ func (t *Tx) WitnessHash() *chainhash.Hash { } // Cache the hash and return it. - hash := t.msgTx.WitnessHash() + var hash chainhash.Hash + if len(t.rawBytes) > 0 { + hash = chainhash.DoubleHashH(t.rawBytes) + } else { + hash = t.msgTx.WitnessHash() + } + t.txHashWitness = &hash return &hash } @@ -99,6 +152,11 @@ func NewTx(msgTx *wire.MsgTx) *Tx { } } +// setBytes sets the raw bytes of the tx. +func (t *Tx) setBytes(bytes []byte) { + t.rawBytes = bytes +} + // NewTxFromBytes returns a new instance of a bitcoin transaction given the // serialized bytes. See Tx. func NewTxFromBytes(serializedTx []byte) (*Tx, error) {