-
Notifications
You must be signed in to change notification settings - Fork 6.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
optimize file ingestion checks for range deletion overlap #3179
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -376,6 +376,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( | |
|
||
Status ExternalSstFileIngestionJob::IngestedFilesOverlapWithMemtables( | ||
SuperVersion* sv, bool* overlap) { | ||
*overlap = false; | ||
// Create an InternalIterator over all memtables | ||
Arena arena; | ||
ReadOptions ro; | ||
|
@@ -391,26 +392,33 @@ Status ExternalSstFileIngestionJob::IngestedFilesOverlapWithMemtables( | |
memtable_range_del_iters.push_back(active_range_del_iter); | ||
} | ||
sv->imm->AddRangeTombstoneIterators(ro, &memtable_range_del_iters); | ||
std::unique_ptr<InternalIterator> memtable_range_del_iter(NewMergingIterator( | ||
&cfd_->internal_comparator(), | ||
memtable_range_del_iters.empty() ? nullptr : &memtable_range_del_iters[0], | ||
static_cast<int>(memtable_range_del_iters.size()))); | ||
|
||
RangeDelAggregator range_del_agg(cfd_->internal_comparator(), | ||
{} /* snapshots */, | ||
false /* collapse_deletions */); | ||
Status status; | ||
*overlap = false; | ||
for (IngestedFileInfo& f : files_to_ingest_) { | ||
status = | ||
IngestedFileOverlapWithIteratorRange(&f, memtable_iter.get(), overlap); | ||
if (!status.ok() || *overlap == true) { | ||
break; | ||
} | ||
status = IngestedFileOverlapWithRangeDeletions( | ||
&f, memtable_range_del_iter.get(), overlap); | ||
if (!status.ok() || *overlap == true) { | ||
break; | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As no new locks are created here, why do you need a new scope? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was to make |
||
std::unique_ptr<InternalIterator> memtable_range_del_iter( | ||
NewMergingIterator(&cfd_->internal_comparator(), | ||
memtable_range_del_iters.empty() | ||
? nullptr | ||
: &memtable_range_del_iters[0], | ||
static_cast<int>(memtable_range_del_iters.size()))); | ||
status = range_del_agg.AddTombstones(std::move(memtable_range_del_iter)); | ||
} | ||
if (status.ok()) { | ||
for (IngestedFileInfo& f : files_to_ingest_) { | ||
status = IngestedFileOverlapWithIteratorRange(&f, memtable_iter.get(), | ||
overlap); | ||
if (!status.ok() || *overlap == true) { | ||
break; | ||
} | ||
if (range_del_agg.IsRangeOverlapped(f.smallest_user_key, | ||
f.largest_user_key)) { | ||
*overlap = true; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
return status; | ||
} | ||
|
||
|
@@ -575,34 +583,6 @@ Status ExternalSstFileIngestionJob::IngestedFileOverlapWithIteratorRange( | |
return iter->status(); | ||
} | ||
|
||
Status ExternalSstFileIngestionJob::IngestedFileOverlapWithRangeDeletions( | ||
const IngestedFileInfo* file_to_ingest, InternalIterator* range_del_iter, | ||
bool* overlap) { | ||
auto* vstorage = cfd_->current()->storage_info(); | ||
auto* ucmp = vstorage->InternalComparator()->user_comparator(); | ||
|
||
*overlap = false; | ||
if (range_del_iter != nullptr) { | ||
for (range_del_iter->SeekToFirst(); range_del_iter->Valid(); | ||
range_del_iter->Next()) { | ||
ParsedInternalKey parsed_key; | ||
if (!ParseInternalKey(range_del_iter->key(), &parsed_key)) { | ||
return Status::Corruption("corrupted range deletion key: " + | ||
range_del_iter->key().ToString()); | ||
} | ||
RangeTombstone range_del(parsed_key, range_del_iter->value()); | ||
if (ucmp->Compare(range_del.start_key_, | ||
file_to_ingest->largest_user_key) <= 0 && | ||
ucmp->Compare(file_to_ingest->smallest_user_key, | ||
range_del.end_key_) <= 0) { | ||
*overlap = true; | ||
break; | ||
} | ||
} | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
bool ExternalSstFileIngestionJob::IngestedFileFitInLevel( | ||
const IngestedFileInfo* file_to_ingest, int level) { | ||
if (level == 0) { | ||
|
@@ -639,23 +619,26 @@ Status ExternalSstFileIngestionJob::IngestedFileOverlapWithLevel( | |
ro.total_order_seek = true; | ||
MergeIteratorBuilder merge_iter_builder(&cfd_->internal_comparator(), | ||
&arena); | ||
// Files are opened lazily when the iterator needs them, thus range deletions | ||
// are also added lazily to the aggregator. We need to check for range | ||
// deletion overlap only in the case where there's no point-key overlap. Then, | ||
// we've already opened the file with range containing the ingested file's | ||
// begin key, and iterated through all files until the one containing the | ||
// ingested file's end key. So any files maybe containing range deletions | ||
// overlapping the ingested file must have been opened and had their range | ||
// deletions added to the aggregator. | ||
RangeDelAggregator range_del_agg(cfd_->internal_comparator(), | ||
{} /* snapshots */, | ||
false /* collapse_deletions */); | ||
sv->current->AddIteratorsForLevel(ro, env_options_, &merge_iter_builder, lvl, | ||
nullptr /* range_del_agg */); | ||
&range_del_agg); | ||
ScopedArenaIterator level_iter(merge_iter_builder.Finish()); | ||
|
||
std::vector<InternalIterator*> level_range_del_iters; | ||
sv->current->AddRangeDelIteratorsForLevel(ro, env_options_, lvl, | ||
&level_range_del_iters); | ||
std::unique_ptr<InternalIterator> level_range_del_iter(NewMergingIterator( | ||
&cfd_->internal_comparator(), | ||
level_range_del_iters.empty() ? nullptr : &level_range_del_iters[0], | ||
static_cast<int>(level_range_del_iters.size()))); | ||
|
||
Status status = IngestedFileOverlapWithIteratorRange( | ||
file_to_ingest, level_iter.get(), overlap_with_level); | ||
if (status.ok() && *overlap_with_level == false) { | ||
status = IngestedFileOverlapWithRangeDeletions( | ||
file_to_ingest, level_range_del_iter.get(), overlap_with_level); | ||
if (status.ok() && *overlap_with_level == false && | ||
range_del_agg.IsRangeOverlapped(file_to_ingest->smallest_user_key, | ||
file_to_ingest->largest_user_key)) { | ||
*overlap_with_level = true; | ||
} | ||
return status; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -140,6 +140,29 @@ bool RangeDelAggregator::ShouldDeleteImpl( | |
return parsed.sequence < tombstone_map_iter->second.seq_; | ||
} | ||
|
||
bool RangeDelAggregator::IsRangeOverlapped(const Slice& start, | ||
const Slice& end) { | ||
// so far only implemented for non-collapsed mode since file ingestion (only | ||
// client) doesn't use collapsing | ||
assert(!collapse_deletions_); | ||
if (rep_ == nullptr) { | ||
return false; | ||
} | ||
for (const auto& seqnum_and_tombstone_map : rep_->stripe_map_) { | ||
for (const auto& start_key_and_tombstone : | ||
seqnum_and_tombstone_map.second.raw_map) { | ||
const auto& tombstone = start_key_and_tombstone.second; | ||
if (icmp_.user_comparator()->Compare(start, tombstone.end_key_) < 0 && | ||
icmp_.user_comparator()->Compare(tombstone.start_key_, end) <= 0 && | ||
icmp_.user_comparator()->Compare(tombstone.start_key_, | ||
tombstone.end_key_) != 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am assuming start_key_ can never be less than end_key, in which case, shouldn't this condition be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah except maybe |
||
return true; | ||
} | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
bool RangeDelAggregator::ShouldAddTombstones( | ||
bool bottommost_level /* = false */) { | ||
// TODO(andrewkr): can we just open a file and throw it away if it ends up | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does the L0 file count increase by 2 here? I was assuming only one.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When ingesting an external file we first flush memtable if the memtable's key-range overlaps with the ingested file. That's done so the ingested file's data will be newer than any existing data. So the first one is memtable being flushed, and second one is file being ingested.