Skip to content

Commit

Permalink
storage: use RangeKeyChanged() in MVCCIncrementalIterator
Browse files Browse the repository at this point in the history
This patch uses `RangeKeyChanged()` to detect changes to range keys in
`MVCCIncrementalIterator`. There are no functional changes.

Some quick benchmarks, using catchup scans:

```
name                                                                 old time/op  new time/op  delta
CatchUpScan/mixed-case/withDiff=true/perc=0.00/numRangeKeys=0-24      538ms ± 1%   535ms ± 1%     ~     (p=0.211 n=10+9)
CatchUpScan/mixed-case/withDiff=true/perc=0.00/numRangeKeys=1-24      690ms ± 0%   590ms ± 1%  -14.56%  (p=0.000 n=9+10)
CatchUpScan/mixed-case/withDiff=true/perc=0.00/numRangeKeys=100-24    743ms ± 1%   646ms ± 1%  -13.13%  (p=0.000 n=9+9)
CatchUpScan/mixed-case/withDiff=false/perc=0.00/numRangeKeys=0-24     794ms ± 1%   794ms ± 0%     ~     (p=0.579 n=10+10)
CatchUpScan/mixed-case/withDiff=false/perc=0.00/numRangeKeys=1-24     966ms ± 0%   911ms ± 1%   -5.72%  (p=0.000 n=10+10)
CatchUpScan/mixed-case/withDiff=false/perc=0.00/numRangeKeys=100-24   974ms ± 0%   920ms ± 0%   -5.51%  (p=0.000 n=10+10)
```

Release justification: bug fixes and low-risk updates to new functionality.

Release note: None
  • Loading branch information
erikgrinaker committed Aug 17, 2022
1 parent a86462e commit 6f22b47
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 67 deletions.
137 changes: 70 additions & 67 deletions pkg/storage/mvcc_incremental_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,14 @@ type MVCCIncrementalIterator struct {
// or range key from the underlying iterator. If true, this implies that the
// underlying iterator returns true as well. This can be used to hide point or
// range keys where one key kind satisfies the time predicate but the other
// one doesn't.
// one doesn't. Ignored following IgnoringTime() calls.
hasPoint, hasRange bool

// rangeKeysStart contains the last seen range key start bound. It is used
// to detect changes to range keys.
//
// TODO(erikgrinaker): This pattern keeps coming up, and involves one
// comparison for every covered point key. Consider exposing this from Pebble,
// who has presumably already done these comparisons, so we can avoid them.
rangeKeysStart roachpb.Key
// rangeKeys contains the filtered range keys at the current location.
rangeKeys MVCCRangeKeyStack

// rangeKeysIgnoringTime contains the complete range keys at the current location.
rangeKeysIgnoringTime MVCCRangeKeyStack

// ignoringTime is true if the iterator is currently ignoring time bounds,
// i.e. following a call to NextIgnoringTime().
Expand Down Expand Up @@ -246,8 +244,7 @@ func (i *MVCCIncrementalIterator) SeekGE(startKey MVCCKey) {
}
}
i.iter.SeekGE(startKey)
i.rangeKeysStart = nil
i.advance()
i.advance(true /* seek */)
}

// Close implements SimpleMVCCIterator.
Expand All @@ -261,7 +258,7 @@ func (i *MVCCIncrementalIterator) Close() {
// Next implements SimpleMVCCIterator.
func (i *MVCCIncrementalIterator) Next() {
i.iter.Next()
i.advance()
i.advance(false /* seek */)
}

// updateValid updates i.valid and i.err based on the underlying iterator, and
Expand All @@ -275,7 +272,7 @@ func (i *MVCCIncrementalIterator) updateValid() bool {
// NextKey implements SimpleMVCCIterator.
func (i *MVCCIncrementalIterator) NextKey() {
i.iter.NextKey()
i.advance()
i.advance(false /* seek */)
}

// maybeSkipKeys checks if any keys can be skipped by using a time-bound
Expand Down Expand Up @@ -414,22 +411,49 @@ func (i *MVCCIncrementalIterator) updateMeta() error {
return nil
}

// updateRangeKeys updates the iterator with the current range keys, filtered by
// time span, and returns whether the position has point and/or range keys.
func (i *MVCCIncrementalIterator) updateRangeKeys() (bool, bool) {
hasPoint, hasRange := i.iter.HasPointAndRange()
if hasRange {
// Clone full set of range keys into i.rangeKeysIgnoringTime.
rangeKeys := i.iter.RangeKeys()
rangeKeys.CloneInto(&i.rangeKeysIgnoringTime)

// Keep trimmed subset in i.rangeKeys.
i.rangeKeys = i.rangeKeysIgnoringTime
i.rangeKeys.Trim(i.startTime.Next(), i.endTime)
if i.rangeKeys.IsEmpty() {
i.rangeKeys.Clear()
hasRange = false
}
} else {
i.rangeKeys.Clear()
i.rangeKeysIgnoringTime.Clear()
}
return hasPoint, hasRange
}

// advance advances the main iterator until it is referencing a key within
// (start_time, end_time].
// (start_time, end_time]. If seek is true, the caller is a SeekGE operations,
// in which case we should emit the current range key position even if
// RangeKeyChanged() doesn't trigger.
//
// It populates i.err with an error if it encountered an inline value or an
// intent with a timestamp within the incremental iterator's bounds when the
// intent policy is MVCCIncrementalIterIntentPolicyError.
func (i *MVCCIncrementalIterator) advance() {
func (i *MVCCIncrementalIterator) advance(seek bool) {
i.ignoringTime = false
for {
if !i.updateValid() {
return
}
rangeKeyChanged := i.iter.RangeKeyChanged()
skipped := i.maybeSkipKeys()
if !i.valid {
return
}
rangeKeyChanged = rangeKeyChanged || i.iter.RangeKeyChanged()
if skipped {
// The skip may landed in the middle of a bare range key, in which case we
// should move on to the next key.
Expand All @@ -439,44 +463,32 @@ func (i *MVCCIncrementalIterator) advance() {
if !i.updateValid() {
return
}
rangeKeyChanged = rangeKeyChanged || i.iter.RangeKeyChanged()
}
}
}

// NB: Don't update i.hasRange directly -- we only change it when
// i.rangeKeysStart changes, which allows us to retain i.hasRange=false if
// we've already determined that the current range keys are outside of the
// time bounds.
hasPoint, hasRange := i.iter.HasPointAndRange()
i.hasPoint = hasPoint

// Process range keys.
//
// TODO(erikgrinaker): This needs to be optimized. For example, range keys
// only change on unversioned keys (except after a SeekGE), which can save a
// bunch of comparisons here. HasPointAndRange() has also been seen to have
// a non-negligible cost even without any range keys.
var newRangeKey bool
if hasRange {
if rangeStart := i.iter.RangeBounds().Key; !rangeStart.Equal(i.rangeKeysStart) {
i.rangeKeysStart = append(i.rangeKeysStart[:0], rangeStart...)
i.hasRange = i.iter.RangeKeys().HasBetween(i.startTime.Next(), i.endTime)
newRangeKey = i.hasRange
if seek || rangeKeyChanged {
i.hasPoint, i.hasRange = i.updateRangeKeys()
newRangeKey = i.hasRange

// If we're on a visible, bare range key then we're done. If the range key
// isn't visible either, then we keep going.
if !i.hasPoint {
if !i.hasRange {
i.iter.Next()
continue
}
i.meta.Reset()
return
}
// Else: keep i.hasRange from last i.rangeKeysStart change.
} else {
i.hasRange = false
}

// If we're on a visible, bare range key then we're done. If the range key
// isn't visible either, then we keep going.
if !i.hasPoint {
if !i.hasRange {
i.iter.Next()
continue
}
i.meta.Reset()
return
} else {
// If the range key didn't change, and this wasn't a seek, then we must be
// on a point key since the iterator won't surface anything else.
i.hasPoint = true
}

// Process point keys.
Expand Down Expand Up @@ -541,30 +553,26 @@ func (i *MVCCIncrementalIterator) UnsafeKey() MVCCKey {

// HasPointAndRange implements SimpleMVCCIterator.
func (i *MVCCIncrementalIterator) HasPointAndRange() (bool, bool) {
if i.ignoringTime {
return i.iter.HasPointAndRange()
}
return i.hasPoint, i.hasRange
}

// RangeBounds implements SimpleMVCCIterator.
func (i *MVCCIncrementalIterator) RangeBounds() roachpb.Span {
if !i.hasRange {
return roachpb.Span{}
if i.ignoringTime {
return i.rangeKeysIgnoringTime.Bounds
}
return i.iter.RangeBounds()
return i.rangeKeys.Bounds
}

// RangeKeys implements SimpleMVCCIterator.
func (i *MVCCIncrementalIterator) RangeKeys() MVCCRangeKeyStack {
if !i.hasRange {
return MVCCRangeKeyStack{}
}
// TODO(erikgrinaker): It may be worthwhile to clone and memoize this result
// for the same range key. However, callers may avoid calling RangeKeys()
// unnecessarily, and we may optimize parent iterators, so let's measure.
rangeKeys := i.iter.RangeKeys()
if !i.ignoringTime {
rangeKeys.Trim(i.startTime.Next(), i.endTime)
if i.ignoringTime {
return i.rangeKeysIgnoringTime
}
return rangeKeys
return i.rangeKeys
}

// RangeKeyChanged implements SimpleMVCCIterator.
Expand All @@ -589,17 +597,12 @@ func (i *MVCCIncrementalIterator) updateIgnoreTime() {
return
}

i.hasPoint, i.hasRange = i.iter.HasPointAndRange()
if i.hasRange {
// Make sure we update rangeKeysStart appropriately so that switching back
// to regular iteration won't emit bare range keys twice.
if rangeStart := i.iter.RangeBounds().Key; !rangeStart.Equal(i.rangeKeysStart) {
i.rangeKeysStart = append(i.rangeKeysStart[:0], rangeStart...)
if i.iter.RangeKeyChanged() {
if i.hasPoint, i.hasRange = i.updateRangeKeys(); !i.hasPoint {
return
}
}

if !i.hasPoint {
return
} else {
i.hasPoint = true
}

if err := i.updateMeta(); err != nil {
Expand Down
76 changes: 76 additions & 0 deletions pkg/storage/testdata/mvcc_histories/range_key_iter_incremental
Original file line number Diff line number Diff line change
Expand Up @@ -1135,6 +1135,47 @@ iter_next: "b"/4.000000000,0=/<empty> {b-c}/[3.000000000,0=/<empty>]
iter_next_ignoring_time: {c-d}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_next: "d"/4.000000000,0=/BYTES/d4

# Switch between ignoring and respecting time with NextIgnoringTime across span.
run ok
iter_new_incremental types=pointsAndRanges k=a end=z startTs=2 endTs=4 intents=emit
iter_seek_ge k=a
iter_next
iter_next_ignoring_time
iter_next
iter_next_ignoring_time
iter_next
iter_next_ignoring_time
iter_next
iter_next_ignoring_time
iter_next
iter_next_ignoring_time
iter_next
iter_next_ignoring_time
iter_next
iter_next_ignoring_time
iter_next
iter_next_ignoring_time
iter_next
----
iter_seek_ge: "a"/4.000000000,0=/<empty>
iter_next: {b-c}/[3.000000000,0=/<empty>]
iter_next_ignoring_time: "b"/4.000000000,0=/<empty> {b-c}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_next: {c-d}/[3.000000000,0=/<empty>]
iter_next_ignoring_time: "d"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true {d-f}/[5.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_next: "d"/4.000000000,0=/BYTES/d4
iter_next_ignoring_time: "e"/3.000000000,0=/BYTES/e3 {d-f}/[5.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_next: {f-g}/[3.000000000,0=/<empty>]
iter_next_ignoring_time: "f"/6.000000000,0=/BYTES/f6 {f-g}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_next: "f"/4.000000000,0=/BYTES/f4 {f-g}/[3.000000000,0=/<empty>]
iter_next_ignoring_time: "f"/2.000000000,0=/BYTES/f2 {f-g}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_next: {g-h}/[3.000000000,0=/<empty>]
iter_next_ignoring_time: "g"/4.000000000,0=/BYTES/g4 {g-h}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_next: "h"/4.000000000,0=/<empty>
iter_next_ignoring_time: "h"/3.000000000,0=/BYTES/h3 {h-k}/[1.000000000,0=/<empty>]
iter_next: {m-n}/[3.000000000,0={localTs=2.000000000,0}/<empty>]
iter_next_ignoring_time: "m"/8.000000000,0=/BYTES/m8 {m-n}/[3.000000000,0={localTs=2.000000000,0}/<empty>]
iter_next: .

# NextIgnoringTime with only range keys.
run ok
iter_new_incremental types=rangesOnly k=a end=z startTs=4 endTs=5 intents=emit
Expand All @@ -1160,6 +1201,7 @@ iter_next_key_ignoring_time
iter_next_key_ignoring_time
iter_next_key_ignoring_time
iter_next_key_ignoring_time
iter_next_key_ignoring_time
----
iter_seek_ge: "a"/4.000000000,0=/<empty>
iter_next_key_ignoring_time: {b-c}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
Expand All @@ -1174,6 +1216,40 @@ iter_next_key_ignoring_time: "k"/5.000000000,0=/BYTES/k5
iter_next_key_ignoring_time: "l"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
iter_next_key_ignoring_time: "m"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true {m-n}/[3.000000000,0={localTs=2.000000000,0}/<empty>]
iter_next_key_ignoring_time: "o"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
iter_next_key_ignoring_time: .

# Switch between ignoring and respecting time with NextKeyIgnoringTime across span.
run ok
iter_new_incremental types=pointsAndRanges k=a end=z startTs=2 endTs=4 intents=emit
iter_seek_ge k=a
iter_next
iter_next_key_ignoring_time
iter_next
iter_next_key_ignoring_time
iter_next
iter_next_key_ignoring_time
iter_next
iter_next_key_ignoring_time
iter_next
iter_next_key_ignoring_time
iter_next
iter_next_key_ignoring_time
iter_next
----
iter_seek_ge: "a"/4.000000000,0=/<empty>
iter_next: {b-c}/[3.000000000,0=/<empty>]
iter_next_key_ignoring_time: {c-d}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_next: "d"/4.000000000,0=/BYTES/d4
iter_next_key_ignoring_time: "e"/3.000000000,0=/BYTES/e3 {d-f}/[5.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_next: {f-g}/[3.000000000,0=/<empty>]
iter_next_key_ignoring_time: {g-h}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_next: "g"/4.000000000,0=/BYTES/g4 {g-h}/[3.000000000,0=/<empty>]
iter_next_key_ignoring_time: {h-k}/[1.000000000,0=/<empty>]
iter_next: "h"/4.000000000,0=/<empty>
iter_next_key_ignoring_time: "j"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true {h-k}/[1.000000000,0=/<empty>]
iter_next: {m-n}/[3.000000000,0={localTs=2.000000000,0}/<empty>]
iter_next_key_ignoring_time: "o"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
iter_next: .

# NextKeyIgnoringTime with only range keys.
run ok
Expand Down

0 comments on commit 6f22b47

Please sign in to comment.