From 036d3e72b8911441610a7c619b3d9c74092dd6f7 Mon Sep 17 00:00:00 2001
From: Ibrahim Jarif <ibrahim@dgraph.io>
Date: Tue, 23 Apr 2019 21:29:04 +0530
Subject: [PATCH] Set readTs in write batch to latest done readTs so we can
 discard older versions of keys during compactions.

Every Transaction stores the latest value of `readTs` it is aware of. When the transaction is discarded (which happens even when we commit), the global value of `readMark` is updated.
https://github.com/dgraph-io/badger/blob/1fcc96ecdb66d221df85cddec186b6ac7b6dab4b/txn.go#L501-L503
Previously, the `readTs` of transaction inside the write batch struct was set to 0. So the global value of `readMark` would also be set to 0 (unless someone ran a transaction after using write batch).
Due to the 0 value of the global `readMark`, the compaction algorithm would skip all the values which were inserted in the write batch call.
See https://github.com/dgraph-io/badger/blob/1fcc96ecdb66d221df85cddec186b6ac7b6dab4b/levels.go#L480-L484
and
https://github.com/dgraph-io/badger/blob/1fcc96ecdb66d221df85cddec186b6ac7b6dab4b/txn.go#L138-L145
The `o.readMark.DoneUntil()` call would always return `0` and so the compaction wouldn't compact the newer values.

With this commit, the compaction algorithm works as expected with key-values inserted via Transaction API or via the Write Batch API.

See https://github.com/dgraph-io/badger/issues/767
---
 batch.go | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/batch.go b/batch.go
index 2c26d4b07..2187f9c6d 100644
--- a/batch.go
+++ b/batch.go
@@ -36,7 +36,14 @@ type WriteBatch struct {
 // creating and committing transactions. Due to the nature of SSI guaratees provided by Badger,
 // blind writes can never encounter transaction conflicts (ErrConflict).
 func (db *DB) NewWriteBatch() *WriteBatch {
-	return &WriteBatch{db: db, txn: db.newTransaction(true, true)}
+	txn := db.newTransaction(true, true)
+	// If we let it stay at zero, compactions would not allow older key versions to be deleted,
+	// because the read timestamps of pending txns, would be zero. Therefore, we set it to the
+	// maximum read timestamp that's done. This allows compactions to discard key versions below
+	// this read timestamp, while also not blocking on pending txns to finish before starting this
+	// one.
+	txn.readTs = db.orc.readMark.DoneUntil()
+	return &WriteBatch{db: db, txn: txn}
 }
 
 // Cancel function must be called if there's a chance that Flush might not get
@@ -128,7 +135,8 @@ func (wb *WriteBatch) commit() error {
 	wb.wg.Add(1)
 	wb.txn.CommitWith(wb.callback)
 	wb.txn = wb.db.newTransaction(true, true)
-	wb.txn.readTs = 0 // We're not reading anything.
+	// See comment about readTs in NewWriteBatch.
+	wb.txn.readTs = wb.db.orc.readMark.DoneUntil()
 	return wb.err
 }