Skip to content

Commit

Permalink
Set readTs in write batch to latest done readTs so that we can discar…
Browse files Browse the repository at this point in the history
…d older versions of keys during compactions.

Every Transaction stores the latest value of `readTs` it is aware of. When the transaction is discarded (which happens even when we commit), the global value of `readMark` is updated.
https://github.com/dgraph-io/badger/blob/1fcc96ecdb66d221df85cddec186b6ac7b6dab4b/txn.go#L501-L503
Previously, the `readTs` of transaction inside the write batch struct was set to 0. So the global value of `readMark` would also be set to 0 (unless someone ran a transaction after using write batch).
Due to the 0 value of the global `readMark`, the compaction algorithm would skip all the values which were inserted in the write batch call.
See https://github.com/dgraph-io/badger/blob/1fcc96ecdb66d221df85cddec186b6ac7b6dab4b/levels.go#L480-L484
and
https://github.com/dgraph-io/badger/blob/1fcc96ecdb66d221df85cddec186b6ac7b6dab4b/txn.go#L138-L145
The `o.readMark.DoneUntil()` call would always return `0` and so the compaction wouldn't compact the newer values.

With this commit, the compaction algorithm works as expected with key-values inserted via Transaction API or via the Write Batch API.

See #767
  • Loading branch information
Ibrahim Jarif authored and jarifibrahim committed May 2, 2019
1 parent ea4934b commit 6b796b3
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 2 deletions.
12 changes: 10 additions & 2 deletions batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,14 @@ type WriteBatch struct {
// creating and committing transactions. Due to the nature of SSI guaratees provided by Badger,
// blind writes can never encounter transaction conflicts (ErrConflict).
func (db *DB) NewWriteBatch() *WriteBatch {
return &WriteBatch{db: db, txn: db.newTransaction(true, true)}
txn := db.newTransaction(true, true)
// If we let it stay at zero, compactions would not allow older key versions to be deleted,
// because the read timestamps of pending txns, would be zero. Therefore, we set it to the
// maximum read timestamp that's done. This allows compactions to discard key versions below
// this read timestamp, while also not blocking on pending txns to finish before starting this
// one.
txn.readTs = db.orc.readMark.DoneUntil()
return &WriteBatch{db: db, txn: txn}
}

// Cancel function must be called if there's a chance that Flush might not get
Expand Down Expand Up @@ -128,7 +135,8 @@ func (wb *WriteBatch) commit() error {
wb.wg.Add(1)
wb.txn.CommitWith(wb.callback)
wb.txn = wb.db.newTransaction(true, true)
wb.txn.readTs = 0 // We're not reading anything.
// See comment about readTs in NewWriteBatch.
wb.txn.readTs = wb.db.orc.readMark.DoneUntil()
return wb.err
}

Expand Down
58 changes: 58 additions & 0 deletions batch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package badger

import (
"fmt"
"io/ioutil"
"os"
"testing"
"time"

Expand Down Expand Up @@ -67,3 +69,59 @@ func TestWriteBatch(t *testing.T) {
require.NoError(t, err)
})
}

func TestWriteBatchCompaction(t *testing.T) {
dir, err := ioutil.TempDir(".", "badger-test")
require.NoError(t, err)
defer os.RemoveAll(dir)

opts := DefaultOptions
opts.ValueDir = dir
opts.Dir = dir

db, err := Open(opts)
require.NoError(t, err)

wb := db.NewWriteBatch()
entries := 10000
for i := 0; i < entries; i++ {
require.Nil(t, wb.Set([]byte(fmt.Sprintf("foo%d", i)), []byte("bar"), 0))
}
require.Nil(t, wb.Flush())

wb = db.NewWriteBatch()
// Delete 50% of the entries
for i := 0; i < entries/2; i++ {
require.Nil(t, wb.Delete([]byte(fmt.Sprintf("foo%d", i))))
}
require.Nil(t, wb.Flush())

// It is necessary that we call db.Update(..) before compaction so that the db.orc.readMark
// value is incremented. The transactions in WriteBatch call do not increment the
// db.orc.readMark value and hence compaction wouldn't discard any entries added by write batch
// if we do not increment the db.orc.readMark value
require.Nil(t, db.Update(func(txn *Txn) error {
txn.Set([]byte("key1"), []byte("val1"))
return nil
}))

// Close DB to force compaction
require.Nil(t, db.Close())

db, err = Open(opts)
require.NoError(t, err)
defer db.Close()

iopt := DefaultIteratorOptions
iopt.AllVersions = true
txn := db.NewTransaction(false)
defer txn.Discard()
it := txn.NewIterator(iopt)
defer it.Close()
countAfterCompaction := 0
for it.Rewind(); it.Valid(); it.Next() {
countAfterCompaction++
}
// We have deleted 50% of the keys
require.Less(t, countAfterCompaction, entries+entries/2)
}

0 comments on commit 6b796b3

Please sign in to comment.