From bef7a8b98dc55f03875678793da9680fa7573db8 Mon Sep 17 00:00:00 2001 From: kungurtsev Date: Fri, 5 Jul 2024 16:34:13 +0200 Subject: [PATCH 1/2] Iterative B-Tree histograms builder (#6047) --- ydb/core/tablet_flat/flat_stat_table.cpp | 3 - .../flat_stat_table_btree_index.cpp | 217 +++++++ .../tablet_flat/flat_stat_table_btree_index.h | 208 +------ .../flat_stat_table_btree_index_histogram.cpp | 533 ++++++++++++++++++ .../flat_stat_table_btree_index_histogram.h | 525 ----------------- .../tablet_flat/test/libs/table/test_mixer.h | 2 +- ydb/core/tablet_flat/ut/ut_stat.cpp | 131 ++++- ydb/core/tablet_flat/ya.make | 2 + ydb/core/tx/datashard/datashard__stats.cpp | 2 +- .../schemeshard__table_stats_histogram.cpp | 54 +- ydb/core/tx/schemeshard/schemeshard_utils.h | 2 +- ydb/core/tx/schemeshard/ut_base/ut_base.cpp | 159 +++++- .../ut_compaction/ut_compaction.cpp | 2 +- 13 files changed, 1076 insertions(+), 764 deletions(-) create mode 100644 ydb/core/tablet_flat/flat_stat_table_btree_index.cpp create mode 100644 ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp delete mode 100644 ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.h diff --git a/ydb/core/tablet_flat/flat_stat_table.cpp b/ydb/core/tablet_flat/flat_stat_table.cpp index 08a850256e90..afacbe7f3747 100644 --- a/ydb/core/tablet_flat/flat_stat_table.cpp +++ b/ydb/core/tablet_flat/flat_stat_table.cpp @@ -17,9 +17,6 @@ bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, u } } - // TODO: enable b-tree index after benchmarks - mixedIndex = true; - return mixedIndex ? BuildStatsMixedIndex(subset, stats, rowCountResolution, dataSizeResolution, env, yieldHandler) : BuildStatsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler); diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp b/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp new file mode 100644 index 000000000000..e25f160cfef3 --- /dev/null +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp @@ -0,0 +1,217 @@ +#include "flat_stat_table.h" +#include "flat_table_subset.h" +#include "flat_stat_table_btree_index.h" + +namespace NKikimr::NTable { + +namespace { + +using TGroupId = NPage::TGroupId; +using TFrames = NPage::TFrames; +using TBtreeIndexNode = NPage::TBtreeIndexNode; +using TChild = TBtreeIndexNode::TChild; +using TColumns = TBtreeIndexNode::TColumns; +using TCells = NPage::TCells; + +ui64 GetPrevDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, bool& ready) { + auto& meta = part->IndexPages.GetBTree(groupId); + + if (rowId == 0) { + return 0; + } + if (rowId >= meta.GetRowCount()) { + return meta.GetDataSize(); + } + + TPageId pageId = meta.GetPageId(); + ui64 prevDataSize = 0; + + for (ui32 height = 0; height < meta.LevelCount; height++) { + auto page = env->TryGetPage(part, pageId, {}); + if (!page) { + ready = false; + return prevDataSize; + } + auto node = TBtreeIndexNode(*page); + auto pos = node.Seek(rowId); + + pageId = node.GetShortChild(pos).GetPageId(); + if (pos) { + prevDataSize = node.GetShortChild(pos - 1).GetDataSize(); + } + } + + return prevDataSize; +} + +ui64 GetPrevHistoricDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, TRowId& historicRowId, bool& ready) { + Y_ABORT_UNLESS(groupId == TGroupId(0, true)); + + auto& meta = part->IndexPages.GetBTree(groupId); + + if (rowId == 0) { + historicRowId = 0; + return 0; + } + if (rowId >= part->IndexPages.GetBTree({}).GetRowCount()) { + historicRowId = meta.GetRowCount(); + return meta.GetDataSize(); + } + + TPageId pageId = meta.GetPageId(); + ui64 prevDataSize = 0; + historicRowId = 0; + + // Minimum key is (startRowId, max, max) + ui64 startStep = Max(); + ui64 startTxId = Max(); + TCell key1Cells[3] = { + TCell::Make(rowId), + TCell::Make(startStep), + TCell::Make(startTxId), + }; + TCells key1{ key1Cells, 3 }; + + for (ui32 height = 0; height < meta.LevelCount; height++) { + auto page = env->TryGetPage(part, pageId, {}); + if (!page) { + ready = false; + return prevDataSize; + } + auto node = TBtreeIndexNode(*page); + auto pos = node.Seek(ESeek::Lower, key1, part->Scheme->HistoryGroup.ColsKeyIdx, part->Scheme->HistoryKeys.Get()); + + pageId = node.GetShortChild(pos).GetPageId(); + if (pos) { + const auto& prevChild = node.GetShortChild(pos - 1); + prevDataSize = prevChild.GetDataSize(); + historicRowId = prevChild.GetRowCount(); + } + } + + return prevDataSize; +} + +void AddBlobsSize(const TPart* part, TChanneledDataSize& stats, const TFrames* frames, ELargeObj lob, TRowId beginRowId, TRowId endRowId) noexcept { + ui32 page = frames->Lower(beginRowId, 0, Max()); + + while (auto &rel = frames->Relation(page)) { + if (rel.Row < endRowId) { + auto channel = part->GetPageChannel(lob, page); + stats.Add(rel.Size, channel); + ++page; + } else if (!rel.IsHead()) { + Y_ABORT("Got unaligned TFrames head record"); + } else { + break; + } + } +} + +bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler) { + bool ready = true; + + if (!part.Slices || part.Slices->empty()) { + return true; + } + + if (part->GroupsCount) { // main group + TGroupId groupId{}; + auto channel = part->GetGroupChannel(groupId); + + for (const auto& slice : *part.Slices) { + yieldHandler(); + + stats.RowCount += slice.EndRowId() - slice.BeginRowId(); + + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + + if (part->Small) { + AddBlobsSize(part.Part.Get(), stats.DataSize, part->Small.Get(), ELargeObj::Outer, slice.BeginRowId(), slice.EndRowId()); + } + if (part->Large) { + AddBlobsSize(part.Part.Get(), stats.DataSize, part->Large.Get(), ELargeObj::Extern, slice.BeginRowId(), slice.EndRowId()); + } + } + } + + for (ui32 groupIndex : xrange(1, part->GroupsCount)) { + TGroupId groupId{groupIndex}; + auto channel = part->GetGroupChannel(groupId); + for (const auto& slice : *part.Slices) { + yieldHandler(); + + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + } + } + + TVector> historicSlices; + + if (part->HistoricGroupsCount) { // main historic group + TGroupId groupId{0, true}; + auto channel = part->GetGroupChannel(groupId); + for (const auto& slice : *part.Slices) { + yieldHandler(); + + TRowId beginRowId, endRowId; + bool readySlice = true; + ui64 beginDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, beginRowId, readySlice); + ui64 endDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, endRowId, readySlice); + ready &= readySlice; + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + if (readySlice && endRowId > beginRowId) { + historicSlices.emplace_back(beginRowId, endRowId); + } + } + } + + for (ui32 groupIndex : xrange(1, part->HistoricGroupsCount)) { + TGroupId groupId{groupIndex, true}; + auto channel = part->GetGroupChannel(groupId); + for (const auto& slice : historicSlices) { + yieldHandler(); + + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.first, env, ready); + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.second, env, ready); + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + } + } + + return ready; +} + +} + +bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) { + stats.Clear(); + + bool ready = true; + for (const auto& part : subset.Flatten) { + stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel()); + ready &= AddDataSize(part, stats, env, yieldHandler); + } + + if (!ready) { + return false; + } + + ready &= BuildStatsHistogramsBTreeIndex(subset, stats, + stats.RowCount / histogramBucketsCount, stats.DataSize.Size / histogramBucketsCount, + env, yieldHandler); + + return ready; +} + +} diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index.h b/ydb/core/tablet_flat/flat_stat_table_btree_index.h index a82c8e70a41f..a01a92d8890b 100644 --- a/ydb/core/tablet_flat/flat_stat_table_btree_index.h +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index.h @@ -2,216 +2,12 @@ #include "flat_stat_table.h" #include "flat_table_subset.h" -#include "flat_stat_table_btree_index_histogram.h" namespace NKikimr::NTable { -namespace { +bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler); -using TGroupId = NPage::TGroupId; -using TFrames = NPage::TFrames; -using TBtreeIndexNode = NPage::TBtreeIndexNode; -using TChild = TBtreeIndexNode::TChild; -using TColumns = TBtreeIndexNode::TColumns; -using TCells = NPage::TCells; +bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler); -ui64 GetPrevDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, bool& ready) { - auto& meta = part->IndexPages.GetBTree(groupId); - - if (rowId == 0) { - return 0; - } - if (rowId >= meta.GetRowCount()) { - return meta.GetDataSize(); - } - - TPageId pageId = meta.GetPageId(); - ui64 prevDataSize = 0; - - for (ui32 height = 0; height < meta.LevelCount; height++) { - auto page = env->TryGetPage(part, pageId, {}); - if (!page) { - ready = false; - return prevDataSize; - } - auto node = TBtreeIndexNode(*page); - auto pos = node.Seek(rowId); - - pageId = node.GetShortChild(pos).GetPageId(); - if (pos) { - prevDataSize = node.GetShortChild(pos - 1).GetDataSize(); - } - } - - return prevDataSize; -} - -ui64 GetPrevHistoricDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, TRowId& historicRowId, bool& ready) { - Y_ABORT_UNLESS(groupId == TGroupId(0, true)); - - auto& meta = part->IndexPages.GetBTree(groupId); - - if (rowId == 0) { - historicRowId = 0; - return 0; - } - if (rowId >= part->IndexPages.GetBTree({}).GetRowCount()) { - historicRowId = meta.GetRowCount(); - return meta.GetDataSize(); - } - - TPageId pageId = meta.GetPageId(); - ui64 prevDataSize = 0; - historicRowId = 0; - - // Minimum key is (startRowId, max, max) - ui64 startStep = Max(); - ui64 startTxId = Max(); - TCell key1Cells[3] = { - TCell::Make(rowId), - TCell::Make(startStep), - TCell::Make(startTxId), - }; - TCells key1{ key1Cells, 3 }; - - for (ui32 height = 0; height < meta.LevelCount; height++) { - auto page = env->TryGetPage(part, pageId, {}); - if (!page) { - ready = false; - return prevDataSize; - } - auto node = TBtreeIndexNode(*page); - auto pos = node.Seek(ESeek::Lower, key1, part->Scheme->HistoryGroup.ColsKeyIdx, part->Scheme->HistoryKeys.Get()); - - pageId = node.GetShortChild(pos).GetPageId(); - if (pos) { - const auto& prevChild = node.GetShortChild(pos - 1); - prevDataSize = prevChild.GetDataSize(); - historicRowId = prevChild.GetRowCount(); - } - } - - return prevDataSize; -} - -void AddBlobsSize(const TPart* part, TChanneledDataSize& stats, const TFrames* frames, ELargeObj lob, TRowId beginRowId, TRowId endRowId) noexcept { - ui32 page = frames->Lower(beginRowId, 0, Max()); - - while (auto &rel = frames->Relation(page)) { - if (rel.Row < endRowId) { - auto channel = part->GetPageChannel(lob, page); - stats.Add(rel.Size, channel); - ++page; - } else if (!rel.IsHead()) { - Y_ABORT("Got unaligned TFrames head record"); - } else { - break; - } - } -} - -bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler) { - bool ready = true; - - if (!part.Slices || part.Slices->empty()) { - return true; - } - - if (part->GroupsCount) { // main group - TGroupId groupId{}; - auto channel = part->GetGroupChannel(groupId); - - for (const auto& slice : *part.Slices) { - yieldHandler(); - - stats.RowCount += slice.EndRowId() - slice.BeginRowId(); - - ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); - ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - - if (part->Small) { - AddBlobsSize(part.Part.Get(), stats.DataSize, part->Small.Get(), ELargeObj::Outer, slice.BeginRowId(), slice.EndRowId()); - } - if (part->Large) { - AddBlobsSize(part.Part.Get(), stats.DataSize, part->Large.Get(), ELargeObj::Extern, slice.BeginRowId(), slice.EndRowId()); - } - } - } - - for (ui32 groupIndex : xrange(1, part->GroupsCount)) { - TGroupId groupId{groupIndex}; - auto channel = part->GetGroupChannel(groupId); - for (const auto& slice : *part.Slices) { - yieldHandler(); - - ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); - ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - } - } - - TVector> historicSlices; - - if (part->HistoricGroupsCount) { // main historic group - TGroupId groupId{0, true}; - auto channel = part->GetGroupChannel(groupId); - for (const auto& slice : *part.Slices) { - yieldHandler(); - - TRowId beginRowId, endRowId; - bool readySlice = true; - ui64 beginDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, beginRowId, readySlice); - ui64 endDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, endRowId, readySlice); - ready &= readySlice; - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - if (readySlice && endRowId > beginRowId) { - historicSlices.emplace_back(beginRowId, endRowId); - } - } - } - - for (ui32 groupIndex : xrange(1, part->HistoricGroupsCount)) { - TGroupId groupId{groupIndex, true}; - auto channel = part->GetGroupChannel(groupId); - for (const auto& slice : historicSlices) { - yieldHandler(); - - ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.first, env, ready); - ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.second, env, ready); - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - } - } - - return ready; -} - -} - -inline bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) { - stats.Clear(); - - bool ready = true; - for (const auto& part : subset.Flatten) { - stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel()); - ready &= AddDataSize(part, stats, env, yieldHandler); - } - - if (!ready) { - return false; - } - - ready &= BuildStatsHistogramsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler); - - return ready; -} } diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp new file mode 100644 index 000000000000..3a8dbfd483d5 --- /dev/null +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp @@ -0,0 +1,533 @@ +#include "flat_stat_table.h" +#include "flat_table_subset.h" +#include "flat_page_btree_index_writer.h" + +namespace NKikimr::NTable { + +namespace { + +using TGroupId = NPage::TGroupId; +using TFrames = NPage::TFrames; +using TBtreeIndexNode = NPage::TBtreeIndexNode; +using TChild = TBtreeIndexNode::TChild; +using TColumns = TBtreeIndexNode::TColumns; +using TCells = NPage::TCells; +using TCellsIterable = TBtreeIndexNode::TCellsIterable; +using TCellsIter = TBtreeIndexNode::TCellsIter; + +const static TCellsIterable EmptyKey(static_cast(nullptr), TColumns()); + +enum class ENodeState : ui8 { + Initial, + Opened, + Closed, + Ignored, +}; + +class TTableHistogramBuilderBtreeIndex { + struct TNodeState { + const TPart* Part; + TPageId PageId; + ui32 Level; + TRowId BeginRowId, EndRowId; + ui64 BeginDataSize, EndDataSize; + TCellsIterable BeginKey, EndKey; + ENodeState State = ENodeState::Initial; + + TNodeState(const TPart* part, TPageId pageId, ui32 level, TRowId beginRowId, TRowId endRowId, TRowId beginDataSize, TRowId endDataSize, TCellsIterable beginKey, TCellsIterable endKey) + : Part(part) + , PageId(pageId) + , Level(level) + , BeginRowId(beginRowId) + , EndRowId(endRowId) + , BeginDataSize(beginDataSize) + , EndDataSize(endDataSize) + , BeginKey(beginKey) + , EndKey(endKey) + { + } + + TRowId GetRowCount() const noexcept { + return EndRowId - BeginRowId; + } + + ui64 GetDataSize() const noexcept { + return EndDataSize - BeginDataSize; + } + + // usually a node state goes in order: + // 1. Initial + // 2. Opened - after processing TEvent.IsBegin = true + // 3. Closed - after processing TEvent.IsBegin = false + // if an opened node is being loaded, its state goes in order: + // 1. Initial + // 2. Opened - after processing TEvent.IsBegin = true + // 3. Ignored - after have been loaded + // in a case when a node EndKey >= BeginKey a node state goes in order: + // (which is theoretically possible scenario because of slice bounds) + // 1. Initial + // 2. Closed - after processing TEvent.IsBegin = false + + bool Open(ui64& openedRowCount, ui64& openedDataSize) noexcept { + if (Y_LIKELY(State == ENodeState::Initial)) { + State = ENodeState::Opened; + openedRowCount += GetRowCount(); + openedDataSize += GetDataSize(); + return true; + } + return false; + } + + bool Close(ui64& openedRowCount, ui64& closedRowCount, ui64& openedDataSize, ui64& closedDataSize) noexcept { + if (State == ENodeState::Opened) { + State = ENodeState::Closed; + ui64 rowCount = GetRowCount(); + ui64 dataSize = GetDataSize(); + Y_ABORT_UNLESS(openedRowCount >= rowCount); + Y_ABORT_UNLESS(openedDataSize >= dataSize); + openedRowCount -= rowCount; + openedDataSize -= dataSize; + closedRowCount += rowCount; + closedDataSize += dataSize; + return true; + } else if (Y_UNLIKELY(State == ENodeState::Initial)) { + State = ENodeState::Closed; + closedRowCount += GetRowCount(); + closedDataSize += GetDataSize(); + return true; + } + return false; + } + + bool IgnoreOpened(ui64& openedRowCount, ui64& openedDataSize) noexcept { + if (Y_LIKELY(State == ENodeState::Opened)) { + State = ENodeState::Ignored; + ui64 rowCount = GetRowCount(); + ui64 dataSize = GetDataSize(); + Y_ABORT_UNLESS(openedRowCount >= rowCount); + Y_ABORT_UNLESS(openedDataSize >= dataSize); + openedRowCount -= rowCount; + openedDataSize -= dataSize; + return true; + } + return false; + } + }; + + struct TEvent { + TCellsIterable Key; + bool IsBegin; + TNodeState* Node; + }; + + struct TNodeEventKeyGreater { + const TKeyCellDefaults& KeyDefaults; + + bool operator ()(const TEvent& a, const TEvent& b) const noexcept { + return Compare(a, b) > 0; + } + + i8 Compare(const TEvent& a, const TEvent& b) const noexcept { + // events go in order: + // - Key = {}, IsBegin = true + // - ... + // - Key = {'c'}, IsBegin = false + // - Key = {'c'}, IsBegin = true + // - ... + // - Key = {'d'}, IsBegin = false + // - Key = {'d'}, IsBegin = true + // - ... + // - Key = {}, IsBegin = false + + if (a.Key && b.Key) { // compare by keys + auto cmp = CompareKeys(a.Key, b.Key, KeyDefaults); + if (cmp != 0) { + return cmp; + } + // keys are the same, compare by begin flag, end events first: + return Compare(a.IsBegin ? 1 : -1, b.IsBegin ? 1 : -1); + } + + // category = -1 for Key = { }, IsBegin = true + // category = 0 for Key = {*}, IsBegin = * + // category = -1 for Key = { }, IsBegin = false + return Compare(GetCategory(a), GetCategory(b)); + } + + private: + static i8 GetCategory(const TEvent& a) noexcept { + if (a.Key) { + return 0; + } + return a.IsBegin ? -1 : 1; + } + + static i8 Compare(i8 a, i8 b) noexcept { + if (a < b) return -1; + if (a > b) return 1; + return 0; + } + }; + + struct TNodeRowCountLess { + bool operator ()(const TNodeState* a, const TNodeState* b) const noexcept { + return a->GetRowCount() < b->GetRowCount(); + } + }; + + struct TNodeDataSizeLess { + bool operator ()(const TNodeState* a, const TNodeState* b) const noexcept { + return a->GetDataSize() < b->GetDataSize(); + } + }; + +public: + TTableHistogramBuilderBtreeIndex(const TSubset& subset, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler) + : Subset(subset) + , KeyDefaults(*Subset.Scheme->Keys) + , RowCountResolution(rowCountResolution) + , DataSizeResolution(dataSizeResolution) + , RowCountResolutionGap(RowCountResolution / 2) + , DataSizeResolutionGap(DataSizeResolution / 2) + , Env(env) + , YieldHandler(yieldHandler) + , NodeEventKeyGreater{KeyDefaults} + , FutureEvents(NodeEventKeyGreater) + { + } + + bool Build(TStats& stats) { + bool ready = true; + + for (auto index : xrange(Subset.Flatten.size())) { + auto& part = Subset.Flatten[index]; + auto& meta = part->IndexPages.GetBTree({}); + TCellsIterable beginKey = EmptyKey; + if (part.Slices && part.Slices->front().FirstKey.GetCells()) { + beginKey = MakeCellsIterableKey(part.Part.Get(), part.Slices->front().FirstKey); + } + TCellsIterable endKey = EmptyKey; + if (part.Slices && part.Slices->back().LastKey.GetCells()) { + endKey = MakeCellsIterableKey(part.Part.Get(), part.Slices->back().LastKey); + } + LoadedStateNodes.emplace_back(part.Part.Get(), meta.GetPageId(), meta.LevelCount, 0, meta.GetRowCount(), 0, meta.GetDataSize(), beginKey, endKey); + ready &= SlicePart(*part.Slices, LoadedStateNodes.back()); + } + + if (!ready) { + return false; + } + + ready &= BuildIterate(stats); + + FutureEvents.clear(); + LoadedBTreeNodes.clear(); + LoadedStateNodes.clear(); + + return ready; + } + +private: + bool SlicePart(const TSlices& slices, TNodeState& node) { + YieldHandler(); + + // TODO: avoid binary search for each call (we may intersect slices with nodes in linear time actually) + auto it = slices.LookupBackward(slices.end(), node.EndRowId - 1); + + if (it == slices.end() || node.EndRowId <= it->BeginRowId() || it->EndRowId() <= node.BeginRowId) { + // skip the node + return true; + } + + if (it->BeginRowId() <= node.BeginRowId && node.EndRowId <= it->EndRowId()) { + // take the node + AddFutureEvents(node); + return true; + } + + // split the node + + if (node.Level == 0) { + // can't split, decide by node.EndRowId - 1 + // TODO: decide by non-empty slice and node intersection, but this requires size calculation changes too + if (it->Has(node.EndRowId - 1)) { + AddFutureEvents(node); + } + return true; + } + + bool ready = true; + + const auto addNode = [&](TNodeState& child) { + ready &= SlicePart(slices, child); + }; + if (!TryLoadNode(node, addNode)) { + return false; + } + + return ready; + } + + bool BuildIterate(TStats& stats) { + // The idea is the following: + // - move a key pointer through all parts simultaneously + // keeping all nodes that contain current key pointer in opened heaps (sorted by size descending) + // all nodes that ended before current key pointer are considered as closed + // - keep an invariant that size of closed and opened nodes don't exceed next histogram bucket values + // otherwise, load opened nodes + // - because histogram is approximate each its value is allowed to be in a range + // [next value - gap, next value + gap] + + // next histogram keys are been looking for: + ui64 nextHistogramRowCount = RowCountResolution, nextHistogramDataSize = DataSizeResolution; + + // closed nodes stats: + ui64 closedRowCount = 0, closedDataSize = 0; + + // opened nodes stats and heaps: + ui64 openedRowCount = 0, openedDataSize = 0; + TPriorityQueue, TNodeRowCountLess> openedSortedByRowCount; + TPriorityQueue, TNodeDataSizeLess> openedSortedByDataSize; + + // will additionally save list of all nodes that start at current key pointer: + TVector currentKeyPointerOpens; + + while (FutureEvents && (nextHistogramRowCount != Max() || nextHistogramDataSize != Max())) { + YieldHandler(); + + auto currentKeyPointer = FutureEvents.top(); + currentKeyPointerOpens.clear(); + + auto processEvent = [&](const TEvent& event) { + Y_DEBUG_ABORT_UNLESS(NodeEventKeyGreater.Compare(event, currentKeyPointer) <= 0, "Can't process future events"); + if (event.IsBegin) { + if (event.Node->Open(openedRowCount, openedDataSize)) { + openedSortedByRowCount.push(event.Node); + openedSortedByDataSize.push(event.Node); + } + } else { + event.Node->Close(openedRowCount, closedRowCount, openedDataSize, closedDataSize); + } + }; + + // process all events with the same key and type as current key pointer: + do { + const TEvent& event = FutureEvents.top(); + processEvent(event); + if (event.IsBegin) { + currentKeyPointerOpens.push_back(event.Node); + } + FutureEvents.pop(); + } while (FutureEvents && NodeEventKeyGreater.Compare(FutureEvents.top(), currentKeyPointer) == 0); + + const auto addEvent = [&](TEvent event) { + // TODO: skip all closed nodes and don't process them here + // TODO: don't compare each node key and replace it with parentNode.Seek(currentKeyPointer) + auto cmp = NodeEventKeyGreater.Compare(event, currentKeyPointer); + if (cmp <= 0) { // event happened + processEvent(event); + if (cmp == 0) { + currentKeyPointerOpens.push_back(event.Node); + } + } else { // event didn't yet happen + FutureEvents.push(event); + } + }; + const auto addNode = [&](TNodeState& node) { + addEvent(TEvent{node.BeginKey, true, &node}); + addEvent(TEvent{node.EndKey, false, &node}); + }; + + // may safely skip current key pointer and go further only if at the next iteration + // sum of sizes of closed and opened nodes don't exceed next histogram bucket values (plus their gaps) + // otherwise, load opened nodes right now + // in that case, next level nodes will be converted to begin and end events + // and then either processed or been postponed to future events according to current key pointer position + while (nextHistogramRowCount != Max() && closedRowCount + openedRowCount > nextHistogramRowCount + RowCountResolutionGap && openedSortedByRowCount) { + auto node = openedSortedByRowCount.top(); + openedSortedByRowCount.pop(); + + // may have already closed or ignored nodes in the heap, just skip them + // leaf nodes will be closed later + if (node->Level && node->IgnoreOpened(openedRowCount, openedDataSize)) { + if (!TryLoadNode(*node, addNode)) { + return false; + } + } + } + while (nextHistogramDataSize != Max() && closedDataSize + openedDataSize > nextHistogramDataSize + DataSizeResolutionGap && openedSortedByDataSize) { + auto node = openedSortedByDataSize.top(); + openedSortedByDataSize.pop(); + + // may have already closed or ignored nodes in the heap, just skip them + // leaf nodes will be closed later + if (node->Level && node->IgnoreOpened(openedRowCount, openedDataSize)) { + if (!TryLoadNode(*node, addNode)) { + return false; + } + } + } + + // add current key pointer to a histogram if we either: + // - failed to split opened nodes and may exceed a next histogram bucket value (plus its gaps) + // - have enough closed nodes (more than a next histogram bucket value (minus its gap)) + // current key pointer value is calculated as follows: + // - size of all closed nodes + // - minus size of all nodes that start at current key pointer + // - plus half of size of all ohter opened nodes (as they exact position is unknown) + // also check that current key pointer value is > then last presented value in a histogram + if (currentKeyPointer.Key) { + if (nextHistogramRowCount != Max()) { + if (closedRowCount + openedRowCount > nextHistogramRowCount + RowCountResolutionGap || closedRowCount > nextHistogramRowCount - RowCountResolutionGap) { + ui64 currentKeyRowCountOpens = 0; + for (auto* node : currentKeyPointerOpens) { + if (node->State == ENodeState::Opened) { + currentKeyRowCountOpens += node->GetRowCount(); + } + } + Y_ABORT_UNLESS(currentKeyRowCountOpens <= openedRowCount); + ui64 currentKeyPointerRowCount = closedRowCount + (openedRowCount - currentKeyRowCountOpens) / 2; + if ((stats.RowCountHistogram.empty() ? 0 : stats.RowCountHistogram.back().Value) < currentKeyPointerRowCount && currentKeyPointerRowCount < stats.RowCount) { + AddKey(stats.RowCountHistogram, currentKeyPointer.Key, currentKeyPointerRowCount); + nextHistogramRowCount = Max(currentKeyPointerRowCount + 1, nextHistogramRowCount + RowCountResolution); + if (nextHistogramRowCount + RowCountResolutionGap > stats.RowCount) { + nextHistogramRowCount = Max(); + } + } + } + } + if (nextHistogramDataSize != Max()) { + if (closedDataSize + openedDataSize > nextHistogramDataSize + DataSizeResolutionGap || closedDataSize > nextHistogramDataSize - DataSizeResolutionGap) { + ui64 currentKeyDataSizeOpens = 0; + for (auto* node : currentKeyPointerOpens) { + if (node->State == ENodeState::Opened) { + currentKeyDataSizeOpens += node->GetDataSize(); + } + } + Y_ABORT_UNLESS(currentKeyDataSizeOpens <= openedDataSize); + ui64 currentKeyPointerDataSize = closedDataSize + (openedDataSize - currentKeyDataSizeOpens) / 2; + if ((stats.DataSizeHistogram.empty() ? 0 : stats.DataSizeHistogram.back().Value) < currentKeyPointerDataSize && currentKeyPointerDataSize < stats.DataSize.Size) { + AddKey(stats.DataSizeHistogram, currentKeyPointer.Key, currentKeyPointerDataSize); + nextHistogramDataSize = Max(currentKeyPointerDataSize + 1, nextHistogramDataSize + DataSizeResolution); + if (nextHistogramDataSize + DataSizeResolutionGap > stats.DataSize.Size) { + nextHistogramDataSize = Max(); + } + } + } + } + } + } + + return true; + } + + void AddKey(THistogram& histogram, TCellsIterable& key, ui64 value) { + TVector keyCells; + + // add columns that are present in the part: + auto iter = key.Iter(); + for (TPos pos : xrange(iter.Count())) { + Y_UNUSED(pos); + keyCells.push_back(iter.Next()); + } + + // extend with default values if needed: + for (TPos index = keyCells.size(); index < KeyDefaults.Defs.size(); ++index) { + keyCells.push_back(KeyDefaults.Defs[index]); + } + + TString serializedKey = TSerializedCellVec::Serialize(keyCells); + + histogram.push_back({serializedKey, value}); + } + + bool TryLoadNode(const TNodeState& parent, const auto& addNode) { + Y_ABORT_UNLESS(parent.Level); + + auto page = Env->TryGetPage(parent.Part, parent.PageId, {}); + if (!page) { + return false; + } + + LoadedBTreeNodes.emplace_back(*page); + auto &bTreeNode = LoadedBTreeNodes.back(); + auto& groupInfo = parent.Part->Scheme->GetLayout({}); + + for (auto pos : xrange(bTreeNode.GetChildrenCount())) { + auto& child = bTreeNode.GetChild(pos); + + LoadedStateNodes.emplace_back(parent.Part, child.GetPageId(), parent.Level - 1, + pos ? bTreeNode.GetChild(pos - 1).GetRowCount() : parent.BeginRowId, child.GetRowCount(), + pos ? bTreeNode.GetChild(pos - 1).GetTotalDataSize() : parent.BeginDataSize, child.GetTotalDataSize(), + pos ? bTreeNode.GetKeyCellsIterable(pos - 1, groupInfo.ColsKeyData) : parent.BeginKey, + pos < bTreeNode.GetKeysCount() ? bTreeNode.GetKeyCellsIterable(pos, groupInfo.ColsKeyData) : parent.EndKey); + + addNode(LoadedStateNodes.back()); + } + + return true; + } + + void AddFutureEvents(TNodeState& node) { + FutureEvents.push(TEvent{node.BeginKey, true, &node}); + FutureEvents.push(TEvent{node.EndKey, false, &node}); + } + +private: + TCellsIterable MakeCellsIterableKey(const TPart* part, TSerializedCellVec serializedKey) { + // Note: this method is only called for root nodes and don't worth optimizing + // so let's simply create a new fake b-tree index node with a given key + NPage::TBtreeIndexNodeWriter writer(part->Scheme, {}); + writer.AddChild({1, 1, 1, 0, 0}); + writer.AddKey(serializedKey.GetCells()); + writer.AddChild({2, 2, 2, 0, 0}); + TSharedData serializedNode = writer.Finish(); + LoadedBTreeNodes.emplace_back(serializedNode); + return LoadedBTreeNodes.back().GetKeyCellsIterable(0, part->Scheme->GetLayout({}).ColsKeyData); + } + + static int CompareKeys(const TCellsIterable& left_, const TCellsIterable& right_, const TKeyCellDefaults& keyDefaults) { + Y_ABORT_UNLESS(left_); + Y_ABORT_UNLESS(right_); + + auto left = left_.Iter(), right = right_.Iter(); + size_t end = Max(left.Count(), right.Count()); + Y_ABORT_UNLESS(end <= keyDefaults.Size(), "Key schema is smaller than compared keys"); + + for (size_t pos = 0; pos < end; ++pos) { + const auto& leftCell = pos < left.Count() ? left.Next() : keyDefaults.Defs[pos]; + const auto& rightCell = pos < right.Count() ? right.Next() : keyDefaults.Defs[pos]; + if (int cmp = CompareTypedCells(leftCell, rightCell, keyDefaults.Types[pos])) { + return cmp; + } + } + + return 0; + } + +private: + const TSubset& Subset; + const TKeyCellDefaults& KeyDefaults; + ui64 RowCountResolution, DataSizeResolution; + ui64 RowCountResolutionGap, DataSizeResolutionGap; + IPages* const Env; + TBuildStatsYieldHandler YieldHandler; + TDeque LoadedBTreeNodes; // keep nodes to use TCellsIterable references + TDeque LoadedStateNodes; // keep nodes to use their references + TNodeEventKeyGreater NodeEventKeyGreater; + TPriorityQueue, TNodeEventKeyGreater> FutureEvents; +}; + +} + +bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler) { + TTableHistogramBuilderBtreeIndex builder(subset, rowCountResolution, dataSizeResolution, env, yieldHandler); + + if (!builder.Build(stats)) { + return false; + } + + return true; +} + +} diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.h b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.h deleted file mode 100644 index 78bb64b69ccb..000000000000 --- a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.h +++ /dev/null @@ -1,525 +0,0 @@ -#pragma once - -#include "flat_stat_table.h" -#include "flat_table_subset.h" - -namespace NKikimr::NTable { - -namespace { - -using TGroupId = NPage::TGroupId; -using TFrames = NPage::TFrames; -using TBtreeIndexNode = NPage::TBtreeIndexNode; -using TChild = TBtreeIndexNode::TChild; -using TColumns = TBtreeIndexNode::TColumns; -using TCells = NPage::TCells; -using TCellsIterable = TBtreeIndexNode::TCellsIterable; -using TCellsIter = TBtreeIndexNode::TCellsIter; - -const static TCellsIterable EmptyKey(static_cast(nullptr), TColumns()); - -class TTableHistogramBuilderBtreeIndex { -public: - struct TNodeState : public TIntrusiveListItem { - TPageId PageId; - ui32 Level; - TRowId BeginRowId, EndRowId; - TCellsIterable BeginKey, EndKey; - ui64 BeginSize, EndSize; - - TNodeState(TPageId pageId, ui32 level, TRowId beginRowId, TRowId endRowId, TCellsIterable beginKey, TCellsIterable endKey, TRowId beginSize, TRowId endSize) - : PageId(pageId) - , Level(level) - , BeginRowId(beginRowId) - , EndRowId(endRowId) - , BeginKey(beginKey) - , EndKey(endKey) - , BeginSize(beginSize) - , EndSize(endSize) - { - } - - ui64 GetSize() const noexcept { - return EndSize - BeginSize; - } - }; - - struct TGetRowCount { - static ui64 Get(const TChild& child) noexcept { - return child.GetRowCount(); - } - }; - - struct TGetDataSize { - static ui64 Get(const TChild& child) noexcept { - return child.GetTotalDataSize(); - } - }; - -private: - struct TPartNodes { - TPartNodes(const TPart* part, size_t index) - : Part(part) - , Index(index) - { - } - - const TPart* GetPart() const noexcept { - return Part; - } - - size_t GetIndex() const noexcept { - return Index; - } - - size_t GetCount() const noexcept { - return Count; - } - - ui64 GetSize() const noexcept { - return Size; - } - - const TIntrusiveList& GetNodes() const noexcept { - return Nodes; - } - - TNodeState* PopFront() noexcept { - auto result = Nodes.PopFront(); - - Count--; - Size -= result->GetSize(); - - return result; - } - - TNodeState* PopBack() noexcept { - auto result = Nodes.PopBack(); - - Count--; - Size -= result->GetSize(); - - return result; - } - - void PushFront(TNodeState* item) noexcept { - Count++; - Size += item->GetSize(); - Nodes.PushFront(item); - } - - void PushBack(TNodeState* item) noexcept { - Count++; - Size += item->GetSize(); - Nodes.PushBack(item); - } - - bool operator < (const TPartNodes& other) const noexcept { - return Size < other.Size; - } - - private: - const TPart* Part; - size_t Index; - size_t Count = 0; - ui64 Size = 0; - TIntrusiveList Nodes; - }; - -public: - TTableHistogramBuilderBtreeIndex(const TSubset& subset, IPages* env, ui32 histogramBucketsCount, TBuildStatsYieldHandler yieldHandler) - : Subset(subset) - , KeyDefaults(*Subset.Scheme->Keys) - , Env(env) - , HistogramBucketsCount(histogramBucketsCount) - , YieldHandler(yieldHandler) - { - } - - template - bool Build(THistogram& histogram, ui64 statTotalSize) { - if (!HistogramBucketsCount) { - return true; - } - - Resolution = statTotalSize / HistogramBucketsCount; - StatTotalSize = statTotalSize; - - bool ready = true; - ui64 endSize = 0; - TVector parts; - - for (auto index : xrange(Subset.Flatten.size())) { - auto& part = Subset.Flatten[index]; - auto& meta = part->IndexPages.GetBTree({}); - parts.emplace_back(part.Part.Get(), index); - LoadedStateNodes.emplace_back(meta.GetPageId(), meta.LevelCount, 0, meta.GetRowCount(), EmptyKey, EmptyKey, 0, TGetSize::Get(meta)); - ready &= SlicePart(parts.back(), *part.Slices, LoadedStateNodes.back()); - endSize += parts.back().GetSize(); - } - - if (!ready) { - return false; - } - - if (endSize) { - ready &= BuildHistogramRecursive(histogram, parts, 0, endSize, 0); - } - - LoadedBTreeNodes.clear(); - LoadedStateNodes.clear(); - - return ready; - } - -private: - template - bool SlicePart(TPartNodes& part, const TSlices& slices, TNodeState& node) { - YieldHandler(); - - auto it = slices.LookupBackward(slices.end(), node.EndRowId - 1); - - if (it == slices.end() || node.EndRowId <= it->BeginRowId() || it->EndRowId() <= node.BeginRowId) { - // skip the node - return true; - } - - if (it->BeginRowId() <= node.BeginRowId && node.EndRowId <= it->EndRowId()) { - // take the node - part.PushBack(&node); - return true; - } - - // split the node - - if (node.Level == 0) { - // can't split, decide by node.EndRowId - 1 - if (it->Has(node.EndRowId - 1)) { - part.PushBack(&node); - } - return true; - } - - bool ready = true; - - const auto addNode = [&](TNodeState& child) { - ready &= SlicePart(part, slices, child); - }; - if (!TryLoadNode(part.GetPart(), node, addNode)) { - return false; - } - - return ready; - } - - template - bool BuildHistogramRecursive(THistogram& histogram, TVector& parts, ui64 beginSize, ui64 endSize, ui32 depth) { - const static ui32 MaxDepth = 100; - - YieldHandler(); - -#ifndef NDEBUG - { - Y_DEBUG_ABORT_UNLESS(beginSize < endSize); - ui64 size = 0; - for (const auto& part : parts) { - size += part.GetSize(); - } - Y_DEBUG_ABORT_UNLESS(size == endSize - beginSize); - } -#endif - - if (SafeDiff(endSize, beginSize) <= Resolution || depth > MaxDepth) { - Y_DEBUG_ABORT_UNLESS(depth <= MaxDepth, "Shouldn't normally happen"); - return true; - } - - auto biggestPart = std::max_element(parts.begin(), parts.end()); - if (Y_UNLIKELY(biggestPart == parts.end())) { - Y_DEBUG_ABORT("Invalid part states"); - return true; - } - Y_ABORT_UNLESS(biggestPart->GetCount()); - - if (biggestPart->GetCount() == 1 && biggestPart->GetNodes().Front()->Level > 0) { - const auto addNode = [&biggestPart](TNodeState& child) { - biggestPart->PushBack(&child); - }; - if (!TryLoadNode(biggestPart->GetPart(), *biggestPart->PopFront(), addNode)) { - return false; - } - } - TCellsIterable splitKey = biggestPart->GetCount() > 1 - ? FindMedianPartKey(*biggestPart) - : FindMedianTableKey(parts); - - if (!splitKey) { - return true; - } - - ui64 leftSize = 0, middleSize = 0, rightSize = 0; - TVector leftParts, middleParts, rightParts; - - for (auto& part : parts) { - auto& leftNodes = PushNextPartNodes(part, leftParts); - auto& middleNodes = PushNextPartNodes(part, middleParts); - auto& rightNodes = PushNextPartNodes(part, rightParts); - - while (part.GetCount()) { - auto& node = *part.PopFront(); - if (node.EndKey && CompareKeys(node.EndKey, splitKey) <= 0) { - leftNodes.PushBack(&node); - } else if (node.BeginKey && CompareKeys(node.BeginKey, splitKey) >= 0) { - rightNodes.PushBack(&node); - } else { - middleNodes.PushBack(&node); - } - } - - Y_DEBUG_ABORT_UNLESS(middleNodes.GetCount() <= 1); - leftSize += leftNodes.GetSize(); - middleSize += middleNodes.GetSize(); - rightSize += rightNodes.GetSize(); - } - - if (middleSize > Resolution / 2) { - std::make_heap(middleParts.begin(), middleParts.end()); - - while (middleSize > Resolution / 2 && middleParts.size()) { - std::pop_heap(middleParts.begin(), middleParts.end()); - auto& middleNodes = middleParts.back(); - auto& leftNodes = GetNextPartNodes(middleNodes, leftParts); - auto& rightNodes = GetNextPartNodes(middleNodes, rightParts); - TIntrusiveList rightNodesBuffer; - - leftSize -= leftNodes.GetSize(); - middleSize -= middleNodes.GetSize(); - rightSize -= rightNodes.GetSize(); - - auto count = middleNodes.GetCount(); - bool hasChanges = false; - for (auto index : xrange(count)) { - Y_UNUSED(index); - auto& node = *middleNodes.PopFront(); - if (!node.Level) { // can't be splitted, return as-is - middleNodes.PushBack(&node); - continue; - } - const auto addNode = [&](TNodeState& node) { - if (node.EndKey && CompareKeys(node.EndKey, splitKey) <= 0) { - leftNodes.PushBack(&node); - } else if (node.BeginKey && CompareKeys(node.BeginKey, splitKey) >= 0) { - rightNodesBuffer.PushBack(&node); - } else { - middleNodes.PushBack(&node); - } - }; - if (!TryLoadNode(middleNodes.GetPart(), node, addNode)) { - return false; - } - hasChanges = true; - } - - while (!rightNodesBuffer.Empty()) { // reverse right part new nodes - rightNodes.PushFront(rightNodesBuffer.PopBack()); - } - - Y_DEBUG_ABORT_UNLESS(middleNodes.GetCount() <= 1); - leftSize += leftNodes.GetSize(); - middleSize += middleNodes.GetSize(); - rightSize += rightNodes.GetSize(); - - if (hasChanges) { // return updated nodes to the heap - std::push_heap(middleParts.begin(), middleParts.end()); - } else { // can't be splitted, ignore - middleParts.pop_back(); - } - } - } - - if (middleSize == 0 && (leftSize == 0 || rightSize == 0)) { - // no progress, don't continue - return true; - } - - bool ready = true; - - if (leftSize) { - ready &= BuildHistogramRecursive(histogram, leftParts, beginSize, beginSize + leftSize, depth + 1); - } - - ui64 splitSize = beginSize + leftSize + middleSize / 2; - // Note: due to different calculation approaches splitSize may exceed StatTotalSize, ignore them - if (beginSize < splitSize && splitSize < Min(endSize, StatTotalSize)) { - AddBucket(histogram, splitKey, splitSize); - } - - if (rightSize) { - ready &= BuildHistogramRecursive(histogram, rightParts, SafeDiff(endSize, rightSize), endSize, depth + 1); - } - - return ready; - } - - TCellsIterable FindMedianPartKey(const TPartNodes& part) { - Y_ABORT_UNLESS(part.GetCount() > 1, "It's impossible to split part with only one node"); - - TCellsIterable splitKey = EmptyKey; - ui64 splitSize = 0, currentSize = 0; - const ui64 middleSize = part.GetSize() / 2; - - for (const auto& node : part.GetNodes()) { - if (currentSize) { // can't split with the first key, skip it - if (!splitSize || AbsDifference(currentSize, middleSize) < AbsDifference(splitSize, middleSize)) { - splitKey = node.BeginKey; - splitSize = currentSize; - } - } - - currentSize += node.GetSize(); - } - - Y_ABORT_UNLESS(splitKey); - - return splitKey; - } - - TCellsIterable FindMedianTableKey(const TVector& parts) { - TVector keys; - for (const auto& part : parts) { - for (const auto& node : part.GetNodes()) { - if (node.BeginKey) { - keys.push_back(node.BeginKey); - } - } - } - - auto median = keys.begin() + (keys.size() + 1) / 2; - - if (median == keys.end()) { - return EmptyKey; - } - - // Note: may work badly in case when all begin keys are the same - // however such cases are rare and don't worth optimizing with sort+unique complex code - // also this method is only called when we couldn't split the biggest part - std::nth_element(keys.begin(), median, keys.end(), [this](const TCellsIterable& left, const TCellsIterable& right) { - return CompareKeys(left, right) < 0; - }); - - return *median; - } - - void AddBucket(THistogram& histogram, TCellsIterable key, ui64 size) { - TVector splitKeyCells; - - // Add columns that are present in the part - auto iter = key.Iter(); - for (TPos pos : xrange(iter.Count())) { - Y_UNUSED(pos); - splitKeyCells.push_back(iter.Next()); - } - - // Extend with default values if needed - for (TPos index = splitKeyCells.size(); index < KeyDefaults.Defs.size(); ++index) { - splitKeyCells.push_back(KeyDefaults.Defs[index]); - } - - TString serializedSplitKey = TSerializedCellVec::Serialize(splitKeyCells); - - histogram.push_back({serializedSplitKey, size}); - } - - template - bool TryLoadNode(const TPart* part, const TNodeState& parent, const auto& addNode) { - Y_ABORT_UNLESS(parent.Level); - - auto page = Env->TryGetPage(part, parent.PageId, {}); - if (!page) { - return false; - } - - LoadedBTreeNodes.emplace_back(*page); - auto &bTreeNode = LoadedBTreeNodes.back(); - auto& groupInfo = part->Scheme->GetLayout({}); - - for (auto pos : xrange(bTreeNode.GetChildrenCount())) { - auto& child = bTreeNode.GetChild(pos); - - LoadedStateNodes.emplace_back(child.GetPageId(), parent.Level - 1, - pos ? bTreeNode.GetChild(pos - 1).GetRowCount() : parent.BeginRowId, child.GetRowCount(), - pos ? bTreeNode.GetKeyCellsIterable(pos - 1, groupInfo.ColsKeyData) : parent.BeginKey, - pos < bTreeNode.GetKeysCount() ? bTreeNode.GetKeyCellsIterable(pos, groupInfo.ColsKeyData) : parent.EndKey, - pos ? TGetSize::Get(bTreeNode.GetChild(pos - 1)) : parent.BeginSize, TGetSize::Get(child)); - - addNode(LoadedStateNodes.back()); - } - - return true; - } - - TPartNodes& PushNextPartNodes(const TPartNodes& part, TVector& list) const { - Y_ABORT_UNLESS(part.GetIndex() == list.size()); - list.emplace_back(part.GetPart(), part.GetIndex()); - return list.back(); - } - - TPartNodes& GetNextPartNodes(const TPartNodes& part, TVector& list) const { - Y_ABORT_UNLESS(part.GetPart() == list[part.GetIndex()].GetPart()); - return list[part.GetIndex()]; - } - -private: - int CompareKeys(const TCellsIterable& left_, const TCellsIterable& right_) const { - Y_DEBUG_ABORT_UNLESS(left_); - Y_DEBUG_ABORT_UNLESS(right_); - - auto left = left_.Iter(), right = right_.Iter(); - size_t end = Max(left.Count(), right.Count()); - Y_DEBUG_ABORT_UNLESS(end <= KeyDefaults.Size(), "Key schema is smaller than compared keys"); - - - for (size_t pos = 0; pos < end; ++pos) { - const auto& leftCell = pos < left.Count() ? left.Next() : KeyDefaults.Defs[pos]; - const auto& rightCell = pos < right.Count() ? right.Next() : KeyDefaults.Defs[pos]; - if (int cmp = CompareTypedCells(leftCell, rightCell, KeyDefaults.Types[pos])) { - return cmp; - } - } - - return 0; - } - - ui64 AbsDifference(ui64 a, ui64 b) const { - return static_cast(std::abs(static_cast(a) - static_cast(b))); - } - - ui64 SafeDiff(ui64 a, ui64 b) const { - return a - Min(a, b); - } - -private: - const TSubset& Subset; - const TKeyCellDefaults& KeyDefaults; - IPages* const Env; - ui32 HistogramBucketsCount; - TBuildStatsYieldHandler YieldHandler; - ui64 Resolution, StatTotalSize; - TDeque LoadedBTreeNodes; // keep nodes to use TCellsIterable key refs - TDeque LoadedStateNodes; // keep nodes to use TIntrusiveList -}; - -} - -inline bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) { - bool ready = true; - - TTableHistogramBuilderBtreeIndex builder(subset, env, histogramBucketsCount, yieldHandler); - - ready &= builder.Build(stats.RowCountHistogram, stats.RowCount); - ready &= builder.Build(stats.DataSizeHistogram, stats.DataSize.Size); - - return ready; -} - -} diff --git a/ydb/core/tablet_flat/test/libs/table/test_mixer.h b/ydb/core/tablet_flat/test/libs/table/test_mixer.h index 458e4b7070ae..7c82abb30a2d 100644 --- a/ydb/core/tablet_flat/test/libs/table/test_mixer.h +++ b/ydb/core/tablet_flat/test/libs/table/test_mixer.h @@ -45,7 +45,7 @@ namespace NTest { if (CurrentBucketRemainingRows-- == 0) { // start next bucket with CurrentBucketRemainingRows rows ui64 one = (Skip && Skip > Random.Uniform(Buckets) ? 1 : 0); - CurrentBucketRemainingRows = RowsPerBucket + one, Skip -= one, CurrentBucket++; + CurrentBucketRemainingRows = RowsPerBucket + one - 1, Skip -= one, CurrentBucket++; } return Min(CurrentBucket, Buckets - 1); diff --git a/ydb/core/tablet_flat/ut/ut_stat.cpp b/ydb/core/tablet_flat/ut/ut_stat.cpp index a02151472585..f17e1bfea9b9 100644 --- a/ydb/core/tablet_flat/ut/ut_stat.cpp +++ b/ydb/core/tablet_flat/ut/ut_stat.cpp @@ -2,6 +2,8 @@ #include "flat_stat_table.h" #include "flat_stat_table_mixed_index.h" #include "flat_stat_table_btree_index.h" +#include +#include #include #include #include @@ -199,7 +201,7 @@ Y_UNIT_TEST_SUITE(BuildStatsFlatIndex) { { TMixerSeq mixer(4, Mass0.Saved.Size()); auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, mixer); - CheckMixedIndex(*subset, 24000, 2106459, 25428); + CheckMixedIndex(*subset, 24000, 2106479, 25458); } Y_UNIT_TEST(Serial_Groups) @@ -295,7 +297,7 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { { TMixerSeq mixer(4, Mass0.Saved.Size()); auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, mixer); - CheckMixedIndex(*subset, 24000, 2106459, 49502); + CheckMixedIndex(*subset, 24000, 2106479, 49555); } Y_UNIT_TEST(Serial_Groups) @@ -455,14 +457,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { { const ui32 samples = 5; - Cerr << "Parts:" << Endl; + Cerr << subset.Flatten.size() << " parts:" << Endl; for (auto &part : subset.Flatten) { TTestEnv env; auto index = CreateIndexIter(part.Part.Get(), &env, {}); Cerr << " " << index->GetEndRowId() << " rows, " << IndexTools::CountMainPages(*part.Part) << " pages, " << (part->IndexPages.HasBTree() ? part->IndexPages.GetBTree({}).LevelCount : -1) << " levels: "; - for (ui32 sample : xrange(samples + 1)) { + for (ui32 sample : xrange(1u, samples + 1)) { TRowId rowId((index->GetEndRowId() - 1) * sample / samples); Y_ABORT_UNLESS(index->Seek(rowId) == EReady::Data); TSmallVec keyCells; @@ -492,7 +494,33 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { UNIT_ASSERT_LE(std::abs(percent), allowed); } - void CalcDataBefore(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { + void CalcDataBeforeIterate(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { + NTest::TChecker wrap(subset, { new TTouchEnv }); + auto env = wrap.GetEnv(); + env->Faulty = false; + + bytes = 0; + rows = 0; + wrap.Seek({}, ESeek::Lower); + + while (wrap.GetReady() == EReady::Data) { + ui64 prevBytes = env->TouchedBytes; + + wrap.Next(); + + if (wrap.GetReady() == EReady::Data && key.GetCells()) { + auto cmp = CompareTypedCellVectors(key.GetCells().data(), wrap->GetKey().Cells().data(), subset.Scheme->Keys->Types.data(), Min(key.GetCells().size(), wrap->GetKey().Cells().size())); + if (cmp < 0) { + break; + } + } + + rows++; + bytes = prevBytes; + } + } + + void CalcDataBeforePrecharge(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { TTouchEnv env; env.Faulty = false; @@ -513,6 +541,23 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { rows = env.TouchedRows; } + void CalcDataBefore(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { + bool groups = false; + rows = 0; + for (const auto& part : subset.Flatten) { + TTestEnv env; + auto index = CreateIndexIter(part.Part.Get(), &env, {}); + rows += index->GetEndRowId(); + groups |= part->GroupsCount > 1 || part->HistoricGroupsCount > 0; + } + + if (groups || rows > 10000) { + CalcDataBeforePrecharge(subset, key, bytes, rows); + } else { + CalcDataBeforeIterate(subset, key, bytes, rows); + } + } + void CheckHistogram(const TSubset& subset, THistogram histogram, bool isBytes, ui64 total, bool verifyPercents) { Cerr << " " << (isBytes ? "DataSizeHistogram:" : "RowCountHistogram:") << Endl; @@ -550,11 +595,11 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { ui64 delta = total - prevValue, actualDelta = total - prevActualValue; Cerr << " " << FormatPercent(delta, total) << " (actual " << FormatPercent(actualDelta, total) << ")" << Endl; if (verifyPercents) VerifyPercent(delta, total, 20); - UNIT_ASSERT_GT(total, prevValue); + UNIT_ASSERT_GE(total, prevValue); } } - void Check(const TSubset& subset, TMode mode, ui32 histogramBucketsCount = 10, bool verifyPercents = true) { + void Check(const TSubset& subset, TMode mode, ui32 histogramBucketsCount = 10, bool verifyPercents = true, bool faulty = true) { if (mode == 0) { Dump(subset); } @@ -571,6 +616,7 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { ui64 dataSizeResolution = totalBytes / histogramBucketsCount; TTouchEnv env; + env.Faulty = faulty; // env.Faulty = false; // uncomment for debug TStats stats; auto buildStats = [&]() { @@ -581,7 +627,7 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } }; - const ui32 attempts = 35; + const ui32 attempts = 100; for (ui32 attempt : xrange(attempts)) { if (buildStats()) { break; @@ -593,6 +639,11 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { CheckHistogram(subset, stats.RowCountHistogram, false, totalRows, verifyPercents); CheckHistogram(subset, stats.DataSizeHistogram, true, totalBytes, verifyPercents); + + if (mode == BTreeIndex && verifyPercents && histogramBucketsCount != 1000) { + UNIT_ASSERT_VALUES_EQUAL(stats.RowCountHistogram.size(), histogramBucketsCount - 1); + UNIT_ASSERT_VALUES_EQUAL(stats.DataSizeHistogram.size(), histogramBucketsCount - 1); + } } Y_UNIT_TEST(Single) @@ -906,6 +957,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } } + Y_UNIT_TEST(Single_Small_2_Levels_3_Buckets) + { + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + auto subset = TMake(Mass3, PageConf(Mass3.Model->Scheme->Families.size(), mode)).Mixed(0, 1, TMixerOne{ }); + Check(*subset, mode, 5, false); + } + } + Y_UNIT_TEST(Single_Small_1_Level) { for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { @@ -938,6 +997,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } } + Y_UNIT_TEST(Three_Mixed_Small_2_Levels_3_Buckets) + { + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + auto subset = TMake(Mass3, PageConf(Mass3.Model->Scheme->Families.size(), mode)).Mixed(0, 3, TMixerRnd(3)); + Check(*subset, mode, 5, false); + } + } + Y_UNIT_TEST(Three_Mixed_Small_1_Level) { for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { @@ -970,6 +1037,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } } + Y_UNIT_TEST(Three_Serial_Small_2_Levels_3_Buckets) + { + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + auto subset = TMake(Mass3, PageConf(Mass3.Model->Scheme->Families.size(), mode)).Mixed(0, 3, TMixerSeq(3, Mass3.Saved.Size())); + Check(*subset, mode, 5, false); + } + } + Y_UNIT_TEST(Three_Serial_Small_1_Level) { for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { @@ -1033,6 +1108,46 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { Check(*subset, mode, 10, false); } } + + Y_UNIT_TEST(Many_Mixed) + { + const ui32 partsCount = 1000; + const ui64 rowsCount = 100000; + + TAutoPtr mass = new NTest::TMass(new NTest::TModelStd(false), rowsCount); + + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + NPage::TConf conf; + conf.Groups.resize(mass->Model->Scheme->Families.size()); + conf.Group(0).PageRows = 1; // we don't care about pages actual size + conf.Group(0).BTreeIndexNodeKeysMin = conf.Group(0).BTreeIndexNodeKeysMax = 2; + conf.WriteBTreeIndex = (mode == FlatIndex ? false : true); + + TAutoPtr subset = TMake(*mass, conf).Mixed(0, partsCount, TMixerRnd(partsCount)); + + Check(*subset, mode, 10, false, false); + } + } + + Y_UNIT_TEST(Many_Serial) + { + const ui32 partsCount = 1000; + const ui64 rowsCount = 100000; + + TAutoPtr mass = new NTest::TMass(new NTest::TModelStd(false), rowsCount); + + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + NPage::TConf conf; + conf.Groups.resize(mass->Model->Scheme->Families.size()); + conf.Group(0).PageRows = 1; // we don't care about pages actual size + conf.Group(0).BTreeIndexNodeKeysMin = conf.Group(0).BTreeIndexNodeKeysMax = 2; + conf.WriteBTreeIndex = (mode == FlatIndex ? false : true); + + TAutoPtr subset = TMake(*mass, conf).Mixed(0, partsCount, TMixerSeq(partsCount, mass->Saved.Size())); + + Check(*subset, mode, 10, false, false); + } + } } } diff --git a/ydb/core/tablet_flat/ya.make b/ydb/core/tablet_flat/ya.make index e5f02f16af21..0355f40f09f2 100644 --- a/ydb/core/tablet_flat/ya.make +++ b/ydb/core/tablet_flat/ya.make @@ -56,6 +56,8 @@ SRCS( flat_stat_part.h flat_stat_table.h flat_stat_table.cpp + flat_stat_table_btree_index.cpp + flat_stat_table_btree_index_histogram.cpp flat_stat_part_group_iter_create.cpp flat_store_hotdog.cpp flat_table.cpp diff --git a/ydb/core/tx/datashard/datashard__stats.cpp b/ydb/core/tx/datashard/datashard__stats.cpp index 9087ecd9bd4b..67d5c62db373 100644 --- a/ydb/core/tx/datashard/datashard__stats.cpp +++ b/ydb/core/tx/datashard/datashard__stats.cpp @@ -167,7 +167,7 @@ class TTableStatsCoroBuilder : public TActorCoroImpl, private IPages { LOG_DEBUG_S(GetActorContext(), NKikimrServices::TX_DATASHARD, "BuildStats result at datashard " << TabletId << ", for tableId " << TableId << ": RowCount " << ev->Stats.RowCount << ", DataSize " << ev->Stats.DataSize.Size << ", IndexSize " << ev->Stats.IndexSize.Size << ", PartCount " << ev->PartCount << (ev->PartOwners.size() > 1 || ev->PartOwners.size() == 1 && *ev->PartOwners.begin() != TabletId ? ", with borrowed parts" : "") - << ", LoadedSize " << PagesSize << ", " << NFmt::Do(*Spent)); + << ", LoadedSize " << PagesSize << ", " << NFmt::Do(*Spent) << ", HistogramKeys " << ev->Stats.DataSizeHistogram.size()); Send(ReplyTo, ev.Release()); diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp index 091e60fdc26f..edd640d02ccd 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp @@ -34,11 +34,41 @@ static bool IsIntegerType(NScheme::TTypeInfo type) { } } -TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, const TConstArrayRef &keyColumnTypes) { - ui64 bucketsCount = histogram.BucketsSize(); - ui64 idxLo = bucketsCount * 0.33; - ui64 idxMed = bucketsCount * 0.5; - ui64 idxHi = bucketsCount * 0.66; +TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, ui64 total, const TConstArrayRef &keyColumnTypes) { + if (histogram.GetBuckets().empty()) { + return {}; + } + + ui64 idxLo = Max(), idxMed = Max(), idxHi = Max(); + { // search for median and acceptable bounds range so that after the split smallest size is >= 25% + ui64 idxMedDiff = Max(), idx = 0; + for (const auto& point : histogram.GetBuckets()) { + ui64 leftSize = Min(point.GetValue(), total); + ui64 rightSize = total - leftSize; + + // search for a median point at which abs(leftSize - rightSize) is minimum + ui64 sizesDiff = Max(leftSize, rightSize) - Min(leftSize, rightSize); + if (idxMedDiff > sizesDiff) { + idxMed = idx; + idxMedDiff = sizesDiff; + } + + if (leftSize * 4 >= total && idxLo == Max()) { + idxLo = idx; // first point at which leftSize >= 25% + } + if (rightSize * 4 >= total) { + idxHi = idx; // last point at which rightSize >= 25% + } + + idx++; + } + + bool canSplit = idxLo != Max() && idxLo <= idxMed && idxMed <= idxHi && idxHi != Max(); + + if (!canSplit) { + return {}; + } + } TSerializedCellVec keyLo(histogram.GetBuckets(idxLo).GetKey()); TSerializedCellVec keyMed(histogram.GetBuckets(idxMed).GetKey()); @@ -302,7 +332,8 @@ bool TTxPartitionHistogram::Execute(TTransactionContext& txc, const TActorContex << " for pathId " << tableId << " state '" << DatashardStateName(rec.GetShardState()).data() << "'" << " dataSize " << dataSize - << " rowCount " << rowCount); + << " rowCount " << rowCount + << " dataSizeHistogram buckets " << rec.GetTableStats().GetDataSizeHistogram().BucketsSize()); if (!Self->Tables.contains(tableId)) return true; @@ -353,12 +384,15 @@ bool TTxPartitionHistogram::Execute(TTransactionContext& txc, const TActorContex } else { // Choose number of parts and split boundaries const auto& histogram = rec.GetTableStats().GetDataSizeHistogram(); - if (histogram.BucketsSize() < 2) { + + splitKey = ChooseSplitKeyByHistogram(histogram, dataSize, keyColumnTypes); + if (splitKey.GetBuffer().empty()) { + LOG_WARN(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + "Failed to find proper split key (initially) for '%s' of datashard %" PRIu64, + ToString(splitReason), datashardId); return true; } - splitKey = ChooseSplitKeyByHistogram(histogram, keyColumnTypes); - // Split key must not be less than the first key TSerializedCellVec lowestKey(histogram.GetBuckets(0).GetKey()); if (0 < CompareTypedCellVectors(lowestKey.GetCells().data(), splitKey.GetCells().data(), @@ -366,7 +400,7 @@ bool TTxPartitionHistogram::Execute(TTransactionContext& txc, const TActorContex lowestKey.GetCells().size(), splitKey.GetCells().size())) { LOG_WARN(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, - "Failed to find proper split key for '%s' of datashard %" PRIu64, + "Failed to find proper split key (less than first) for '%s' of datashard %" PRIu64, ToString(splitReason), datashardId); return true; } diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index c4a4d1dcf244..b74161e931ef 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -54,7 +54,7 @@ inline NKikimrSchemeOp::TModifyScheme TransactionTemplate(const TString& working return tx; } -TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, +TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, ui64 total, const TConstArrayRef& keyColumnTypes); class TShardDeleter { diff --git a/ydb/core/tx/schemeshard/ut_base/ut_base.cpp b/ydb/core/tx/schemeshard/ut_base/ut_base.cpp index 3f1ab7be33fc..dfdc948fc201 100644 --- a/ydb/core/tx/schemeshard/ut_base/ut_base.cpp +++ b/ydb/core/tx/schemeshard/ut_base/ut_base.cpp @@ -9912,9 +9912,16 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { : KeyColumnTypes(keyColumnTypes.begin(), keyColumnTypes.end()) {} - TString FindSplitKey(const TVector>& histogramKeys) const { - NKikimrTableStats::THistogram histogram = FillHistogram(histogramKeys); - TSerializedCellVec splitKey = ChooseSplitKeyByHistogram(histogram, KeyColumnTypes); + TString FindSplitKey(const TVector>& histogramKeys, TVector histogramValues = {}, ui64 total = 0) const { + if (histogramValues.empty() && !histogramKeys.empty()) { + for (size_t i = 0; i < histogramKeys.size(); i++) { + histogramValues.push_back(i + 1); + } + total = histogramKeys.size() + 1; + } + + NKikimrTableStats::THistogram histogram = FillHistogram(histogramKeys, histogramValues); + TSerializedCellVec splitKey = ChooseSplitKeyByHistogram(histogram, total, KeyColumnTypes); return PrintKey(splitKey); } @@ -9964,11 +9971,13 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { return NKikimr::TSerializedCellVec(cells); } - NKikimrTableStats::THistogram FillHistogram(const TVector>& keys) const { + NKikimrTableStats::THistogram FillHistogram(const TVector>& keys, const TVector& values) const { NKikimrTableStats::THistogram histogram; - for (const auto& k : keys) { - TSerializedCellVec sk(MakeCells(k)); - histogram.AddBuckets()->SetKey(sk.GetBuffer()); + for (auto i : xrange(keys.size())) { + TSerializedCellVec sk(MakeCells(keys[i])); + auto bucket = histogram.AddBuckets(); + bucket->SetKey(sk.GetBuffer()); + bucket->SetValue(values[i]); } return histogram; } @@ -10085,7 +10094,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { { "2", "f", "42" }, { "3", "cccccccccccccccccccccccc", "42" } }); - UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 2, Utf8 : d, Uint32 : NULL)"); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 2, Utf8 : c, Uint32 : NULL)"); } { @@ -10102,6 +10111,140 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { }); UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 2, Utf8 : bbb, Uint32 : NULL)"); } + + { + TString splitKey = + schemaHelper.FindSplitKey({}); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "()"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 53, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 0, Utf8 : a, Uint32 : 1)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 25, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 0, Utf8 : a, Uint32 : 1)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 75, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 0, Utf8 : a, Uint32 : 1)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 24, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "()"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 76, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "()"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + { "1", "a", "1" }, + { "2", "a", "2" }, + { "3", "a", "3" }, + { "4", "a", "4" }, + { "5", "a", "5" }, + { "6", "a", "1" }, + { "7", "a", "2" }, + { "8", "a", "42" }, + }, { + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + }, 10); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 4, Utf8 : NULL, Uint32 : NULL)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + { "1", "a", "1" }, + { "2", "a", "2" }, + { "3", "a", "3" }, + { "4", "a", "4" }, + { "5", "a", "5" }, + { "6", "a", "1" }, + { "7", "a", "2" }, + { "8", "a", "42" }, + }, { + 1, + 2, + 3, + 4, + 5, + 6, + 30, + 40, + 70 + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 7, Utf8 : NULL, Uint32 : NULL)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + { "1", "a", "1" }, + { "2", "a", "2" }, + { "3", "a", "3" }, + { "4", "a", "4" }, + { "5", "a", "5" }, + { "6", "a", "1" }, + { "7", "a", "2" }, + { "8", "a", "42" }, + }, { + 30, + 40, + 70, + 90, + 91, + 92, + 93, + 94, + 95 + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 1, Utf8 : NULL, Uint32 : NULL)"); + } } Y_UNIT_TEST(ListNotCreatedDirCase) { diff --git a/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp b/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp index 4671e1d1d39c..f5884ec8433b 100644 --- a/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp +++ b/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp @@ -845,7 +845,7 @@ Y_UNIT_TEST_SUITE(TSchemeshardBorrowedCompactionTest) { } })"); env.TestWaitNotification(runtime, txId); - env.SimulateSleep(runtime, TDuration::Seconds(30)); + env.SimulateSleep(runtime, TDuration::Seconds(60)); simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); UNIT_ASSERT_VALUES_EQUAL(simpleInfo.Shards.size(), 5UL); From d60dccdd1d3581af6315336a476deac5bec8604b Mon Sep 17 00:00:00 2001 From: kungasc Date: Mon, 8 Jul 2024 10:47:51 +0000 Subject: [PATCH 2/2] fix comments --- .../tablet_flat/flat_stat_table_btree_index_histogram.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp index 3a8dbfd483d5..4c060d594780 100644 --- a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp @@ -150,7 +150,7 @@ class TTableHistogramBuilderBtreeIndex { // category = -1 for Key = { }, IsBegin = true // category = 0 for Key = {*}, IsBegin = * - // category = -1 for Key = { }, IsBegin = false + // category = +1 for Key = { }, IsBegin = false return Compare(GetCategory(a), GetCategory(b)); } @@ -159,12 +159,12 @@ class TTableHistogramBuilderBtreeIndex { if (a.Key) { return 0; } - return a.IsBegin ? -1 : 1; + return a.IsBegin ? -1 : +1; } static i8 Compare(i8 a, i8 b) noexcept { if (a < b) return -1; - if (a > b) return 1; + if (a > b) return +1; return 0; } };