From 6f22b47c490ee5a7804e7e7fb71cc0d0379da054 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 16 Aug 2022 17:19:51 +0000 Subject: [PATCH] storage: use `RangeKeyChanged()` in `MVCCIncrementalIterator` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch uses `RangeKeyChanged()` to detect changes to range keys in `MVCCIncrementalIterator`. There are no functional changes. Some quick benchmarks, using catchup scans: ``` name old time/op new time/op delta CatchUpScan/mixed-case/withDiff=true/perc=0.00/numRangeKeys=0-24 538ms ± 1% 535ms ± 1% ~ (p=0.211 n=10+9) CatchUpScan/mixed-case/withDiff=true/perc=0.00/numRangeKeys=1-24 690ms ± 0% 590ms ± 1% -14.56% (p=0.000 n=9+10) CatchUpScan/mixed-case/withDiff=true/perc=0.00/numRangeKeys=100-24 743ms ± 1% 646ms ± 1% -13.13% (p=0.000 n=9+9) CatchUpScan/mixed-case/withDiff=false/perc=0.00/numRangeKeys=0-24 794ms ± 1% 794ms ± 0% ~ (p=0.579 n=10+10) CatchUpScan/mixed-case/withDiff=false/perc=0.00/numRangeKeys=1-24 966ms ± 0% 911ms ± 1% -5.72% (p=0.000 n=10+10) CatchUpScan/mixed-case/withDiff=false/perc=0.00/numRangeKeys=100-24 974ms ± 0% 920ms ± 0% -5.51% (p=0.000 n=10+10) ``` Release justification: bug fixes and low-risk updates to new functionality. Release note: None --- pkg/storage/mvcc_incremental_iterator.go | 137 +++++++++--------- .../mvcc_histories/range_key_iter_incremental | 76 ++++++++++ 2 files changed, 146 insertions(+), 67 deletions(-) diff --git a/pkg/storage/mvcc_incremental_iterator.go b/pkg/storage/mvcc_incremental_iterator.go index 4ac67b426d66..c27821ce509d 100644 --- a/pkg/storage/mvcc_incremental_iterator.go +++ b/pkg/storage/mvcc_incremental_iterator.go @@ -93,16 +93,14 @@ type MVCCIncrementalIterator struct { // or range key from the underlying iterator. If true, this implies that the // underlying iterator returns true as well. This can be used to hide point or // range keys where one key kind satisfies the time predicate but the other - // one doesn't. + // one doesn't. Ignored following IgnoringTime() calls. hasPoint, hasRange bool - // rangeKeysStart contains the last seen range key start bound. It is used - // to detect changes to range keys. - // - // TODO(erikgrinaker): This pattern keeps coming up, and involves one - // comparison for every covered point key. Consider exposing this from Pebble, - // who has presumably already done these comparisons, so we can avoid them. - rangeKeysStart roachpb.Key + // rangeKeys contains the filtered range keys at the current location. + rangeKeys MVCCRangeKeyStack + + // rangeKeysIgnoringTime contains the complete range keys at the current location. + rangeKeysIgnoringTime MVCCRangeKeyStack // ignoringTime is true if the iterator is currently ignoring time bounds, // i.e. following a call to NextIgnoringTime(). @@ -246,8 +244,7 @@ func (i *MVCCIncrementalIterator) SeekGE(startKey MVCCKey) { } } i.iter.SeekGE(startKey) - i.rangeKeysStart = nil - i.advance() + i.advance(true /* seek */) } // Close implements SimpleMVCCIterator. @@ -261,7 +258,7 @@ func (i *MVCCIncrementalIterator) Close() { // Next implements SimpleMVCCIterator. func (i *MVCCIncrementalIterator) Next() { i.iter.Next() - i.advance() + i.advance(false /* seek */) } // updateValid updates i.valid and i.err based on the underlying iterator, and @@ -275,7 +272,7 @@ func (i *MVCCIncrementalIterator) updateValid() bool { // NextKey implements SimpleMVCCIterator. func (i *MVCCIncrementalIterator) NextKey() { i.iter.NextKey() - i.advance() + i.advance(false /* seek */) } // maybeSkipKeys checks if any keys can be skipped by using a time-bound @@ -414,22 +411,49 @@ func (i *MVCCIncrementalIterator) updateMeta() error { return nil } +// updateRangeKeys updates the iterator with the current range keys, filtered by +// time span, and returns whether the position has point and/or range keys. +func (i *MVCCIncrementalIterator) updateRangeKeys() (bool, bool) { + hasPoint, hasRange := i.iter.HasPointAndRange() + if hasRange { + // Clone full set of range keys into i.rangeKeysIgnoringTime. + rangeKeys := i.iter.RangeKeys() + rangeKeys.CloneInto(&i.rangeKeysIgnoringTime) + + // Keep trimmed subset in i.rangeKeys. + i.rangeKeys = i.rangeKeysIgnoringTime + i.rangeKeys.Trim(i.startTime.Next(), i.endTime) + if i.rangeKeys.IsEmpty() { + i.rangeKeys.Clear() + hasRange = false + } + } else { + i.rangeKeys.Clear() + i.rangeKeysIgnoringTime.Clear() + } + return hasPoint, hasRange +} + // advance advances the main iterator until it is referencing a key within -// (start_time, end_time]. +// (start_time, end_time]. If seek is true, the caller is a SeekGE operations, +// in which case we should emit the current range key position even if +// RangeKeyChanged() doesn't trigger. // // It populates i.err with an error if it encountered an inline value or an // intent with a timestamp within the incremental iterator's bounds when the // intent policy is MVCCIncrementalIterIntentPolicyError. -func (i *MVCCIncrementalIterator) advance() { +func (i *MVCCIncrementalIterator) advance(seek bool) { i.ignoringTime = false for { if !i.updateValid() { return } + rangeKeyChanged := i.iter.RangeKeyChanged() skipped := i.maybeSkipKeys() if !i.valid { return } + rangeKeyChanged = rangeKeyChanged || i.iter.RangeKeyChanged() if skipped { // The skip may landed in the middle of a bare range key, in which case we // should move on to the next key. @@ -439,44 +463,32 @@ func (i *MVCCIncrementalIterator) advance() { if !i.updateValid() { return } + rangeKeyChanged = rangeKeyChanged || i.iter.RangeKeyChanged() } } } - // NB: Don't update i.hasRange directly -- we only change it when - // i.rangeKeysStart changes, which allows us to retain i.hasRange=false if - // we've already determined that the current range keys are outside of the - // time bounds. - hasPoint, hasRange := i.iter.HasPointAndRange() - i.hasPoint = hasPoint - // Process range keys. - // - // TODO(erikgrinaker): This needs to be optimized. For example, range keys - // only change on unversioned keys (except after a SeekGE), which can save a - // bunch of comparisons here. HasPointAndRange() has also been seen to have - // a non-negligible cost even without any range keys. var newRangeKey bool - if hasRange { - if rangeStart := i.iter.RangeBounds().Key; !rangeStart.Equal(i.rangeKeysStart) { - i.rangeKeysStart = append(i.rangeKeysStart[:0], rangeStart...) - i.hasRange = i.iter.RangeKeys().HasBetween(i.startTime.Next(), i.endTime) - newRangeKey = i.hasRange + if seek || rangeKeyChanged { + i.hasPoint, i.hasRange = i.updateRangeKeys() + newRangeKey = i.hasRange + + // If we're on a visible, bare range key then we're done. If the range key + // isn't visible either, then we keep going. + if !i.hasPoint { + if !i.hasRange { + i.iter.Next() + continue + } + i.meta.Reset() + return } - // Else: keep i.hasRange from last i.rangeKeysStart change. - } else { - i.hasRange = false - } - // If we're on a visible, bare range key then we're done. If the range key - // isn't visible either, then we keep going. - if !i.hasPoint { - if !i.hasRange { - i.iter.Next() - continue - } - i.meta.Reset() - return + } else { + // If the range key didn't change, and this wasn't a seek, then we must be + // on a point key since the iterator won't surface anything else. + i.hasPoint = true } // Process point keys. @@ -541,30 +553,26 @@ func (i *MVCCIncrementalIterator) UnsafeKey() MVCCKey { // HasPointAndRange implements SimpleMVCCIterator. func (i *MVCCIncrementalIterator) HasPointAndRange() (bool, bool) { + if i.ignoringTime { + return i.iter.HasPointAndRange() + } return i.hasPoint, i.hasRange } // RangeBounds implements SimpleMVCCIterator. func (i *MVCCIncrementalIterator) RangeBounds() roachpb.Span { - if !i.hasRange { - return roachpb.Span{} + if i.ignoringTime { + return i.rangeKeysIgnoringTime.Bounds } - return i.iter.RangeBounds() + return i.rangeKeys.Bounds } // RangeKeys implements SimpleMVCCIterator. func (i *MVCCIncrementalIterator) RangeKeys() MVCCRangeKeyStack { - if !i.hasRange { - return MVCCRangeKeyStack{} - } - // TODO(erikgrinaker): It may be worthwhile to clone and memoize this result - // for the same range key. However, callers may avoid calling RangeKeys() - // unnecessarily, and we may optimize parent iterators, so let's measure. - rangeKeys := i.iter.RangeKeys() - if !i.ignoringTime { - rangeKeys.Trim(i.startTime.Next(), i.endTime) + if i.ignoringTime { + return i.rangeKeysIgnoringTime } - return rangeKeys + return i.rangeKeys } // RangeKeyChanged implements SimpleMVCCIterator. @@ -589,17 +597,12 @@ func (i *MVCCIncrementalIterator) updateIgnoreTime() { return } - i.hasPoint, i.hasRange = i.iter.HasPointAndRange() - if i.hasRange { - // Make sure we update rangeKeysStart appropriately so that switching back - // to regular iteration won't emit bare range keys twice. - if rangeStart := i.iter.RangeBounds().Key; !rangeStart.Equal(i.rangeKeysStart) { - i.rangeKeysStart = append(i.rangeKeysStart[:0], rangeStart...) + if i.iter.RangeKeyChanged() { + if i.hasPoint, i.hasRange = i.updateRangeKeys(); !i.hasPoint { + return } - } - - if !i.hasPoint { - return + } else { + i.hasPoint = true } if err := i.updateMeta(); err != nil { diff --git a/pkg/storage/testdata/mvcc_histories/range_key_iter_incremental b/pkg/storage/testdata/mvcc_histories/range_key_iter_incremental index 27d1030fe8fd..eb67adaa25b1 100644 --- a/pkg/storage/testdata/mvcc_histories/range_key_iter_incremental +++ b/pkg/storage/testdata/mvcc_histories/range_key_iter_incremental @@ -1135,6 +1135,47 @@ iter_next: "b"/4.000000000,0=/ {b-c}/[3.000000000,0=/] iter_next_ignoring_time: {c-d}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] iter_next: "d"/4.000000000,0=/BYTES/d4 +# Switch between ignoring and respecting time with NextIgnoringTime across span. +run ok +iter_new_incremental types=pointsAndRanges k=a end=z startTs=2 endTs=4 intents=emit +iter_seek_ge k=a +iter_next +iter_next_ignoring_time +iter_next +iter_next_ignoring_time +iter_next +iter_next_ignoring_time +iter_next +iter_next_ignoring_time +iter_next +iter_next_ignoring_time +iter_next +iter_next_ignoring_time +iter_next +iter_next_ignoring_time +iter_next +iter_next_ignoring_time +iter_next +---- +iter_seek_ge: "a"/4.000000000,0=/ +iter_next: {b-c}/[3.000000000,0=/] +iter_next_ignoring_time: "b"/4.000000000,0=/ {b-c}/[3.000000000,0=/ 1.000000000,0=/] +iter_next: {c-d}/[3.000000000,0=/] +iter_next_ignoring_time: "d"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true {d-f}/[5.000000000,0=/ 1.000000000,0=/] +iter_next: "d"/4.000000000,0=/BYTES/d4 +iter_next_ignoring_time: "e"/3.000000000,0=/BYTES/e3 {d-f}/[5.000000000,0=/ 1.000000000,0=/] +iter_next: {f-g}/[3.000000000,0=/] +iter_next_ignoring_time: "f"/6.000000000,0=/BYTES/f6 {f-g}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +iter_next: "f"/4.000000000,0=/BYTES/f4 {f-g}/[3.000000000,0=/] +iter_next_ignoring_time: "f"/2.000000000,0=/BYTES/f2 {f-g}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +iter_next: {g-h}/[3.000000000,0=/] +iter_next_ignoring_time: "g"/4.000000000,0=/BYTES/g4 {g-h}/[3.000000000,0=/ 1.000000000,0=/] +iter_next: "h"/4.000000000,0=/ +iter_next_ignoring_time: "h"/3.000000000,0=/BYTES/h3 {h-k}/[1.000000000,0=/] +iter_next: {m-n}/[3.000000000,0={localTs=2.000000000,0}/] +iter_next_ignoring_time: "m"/8.000000000,0=/BYTES/m8 {m-n}/[3.000000000,0={localTs=2.000000000,0}/] +iter_next: . + # NextIgnoringTime with only range keys. run ok iter_new_incremental types=rangesOnly k=a end=z startTs=4 endTs=5 intents=emit @@ -1160,6 +1201,7 @@ iter_next_key_ignoring_time iter_next_key_ignoring_time iter_next_key_ignoring_time iter_next_key_ignoring_time +iter_next_key_ignoring_time ---- iter_seek_ge: "a"/4.000000000,0=/ iter_next_key_ignoring_time: {b-c}/[3.000000000,0=/ 1.000000000,0=/] @@ -1174,6 +1216,40 @@ iter_next_key_ignoring_time: "k"/5.000000000,0=/BYTES/k5 iter_next_key_ignoring_time: "l"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true iter_next_key_ignoring_time: "m"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true {m-n}/[3.000000000,0={localTs=2.000000000,0}/] iter_next_key_ignoring_time: "o"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +iter_next_key_ignoring_time: . + +# Switch between ignoring and respecting time with NextKeyIgnoringTime across span. +run ok +iter_new_incremental types=pointsAndRanges k=a end=z startTs=2 endTs=4 intents=emit +iter_seek_ge k=a +iter_next +iter_next_key_ignoring_time +iter_next +iter_next_key_ignoring_time +iter_next +iter_next_key_ignoring_time +iter_next +iter_next_key_ignoring_time +iter_next +iter_next_key_ignoring_time +iter_next +iter_next_key_ignoring_time +iter_next +---- +iter_seek_ge: "a"/4.000000000,0=/ +iter_next: {b-c}/[3.000000000,0=/] +iter_next_key_ignoring_time: {c-d}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +iter_next: "d"/4.000000000,0=/BYTES/d4 +iter_next_key_ignoring_time: "e"/3.000000000,0=/BYTES/e3 {d-f}/[5.000000000,0=/ 1.000000000,0=/] +iter_next: {f-g}/[3.000000000,0=/] +iter_next_key_ignoring_time: {g-h}/[3.000000000,0=/ 1.000000000,0=/] +iter_next: "g"/4.000000000,0=/BYTES/g4 {g-h}/[3.000000000,0=/] +iter_next_key_ignoring_time: {h-k}/[1.000000000,0=/] +iter_next: "h"/4.000000000,0=/ +iter_next_key_ignoring_time: "j"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true {h-k}/[1.000000000,0=/] +iter_next: {m-n}/[3.000000000,0={localTs=2.000000000,0}/] +iter_next_key_ignoring_time: "o"/0,0=txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +iter_next: . # NextKeyIgnoringTime with only range keys. run ok