From 3e3f23c2dc3ed2df39d6a681e3e7de53406d5a33 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Wed, 28 Aug 2024 16:09:18 +0800 Subject: [PATCH] storage: Add system.dt_local_indexes (#9379) ref pingcap/tiflash#9032 storage: Add system.dt_local_indexes Signed-off-by: Lloyd-Pottiger --- .../src/Storages/DeltaMerge/DeltaMergeStore.h | 20 +++ .../DeltaMerge/DeltaMergeStore_Statistics.cpp | 54 +++++++ .../src/Storages/DeltaMerge/Index/IndexInfo.h | 41 ++++++ .../System/StorageSystemDTLocalIndexes.cpp | 135 ++++++++++++++++++ .../System/StorageSystemDTLocalIndexes.h | 49 +++++++ .../Storages/System/attachSystemTables.cpp | 2 + 6 files changed, 301 insertions(+) create mode 100644 dbms/src/Storages/DeltaMerge/Index/IndexInfo.h create mode 100644 dbms/src/Storages/System/StorageSystemDTLocalIndexes.cpp create mode 100644 dbms/src/Storages/System/StorageSystemDTLocalIndexes.h diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 083890dc794..d4689c31490 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -173,6 +174,19 @@ struct StoreStats UInt64 background_tasks_length = 0; }; +struct LocalIndexStats +{ + String column_name{}; + UInt64 column_id{}; + String index_kind{}; + + UInt64 rows_stable_indexed{}; // Total rows + UInt64 rows_stable_not_indexed{}; // Total rows + UInt64 rows_delta_indexed{}; // Total rows + UInt64 rows_delta_not_indexed{}; // Total rows +}; +using LocalIndexesStats = std::vector; + class DeltaMergeStore : private boost::noncopyable { public: @@ -527,6 +541,7 @@ class DeltaMergeStore : private boost::noncopyable StoreStats getStoreStats(); SegmentsStats getSegmentsStats(); + LocalIndexesStats getLocalIndexStats(); bool isCommonHandle() const { return is_common_handle; } size_t getRowKeyColumnSize() const { return rowkey_column_size; } @@ -838,6 +853,11 @@ class DeltaMergeStore : private boost::noncopyable RowKeyValue next_gc_check_key; + // Some indexes are built in TiFlash locally. For example, Vector Index. + // Compares to the lightweight RoughSet Indexes, these indexes require lot + // of resources to build, so they will be built in separated background pool. + IndexInfosPtr local_index_infos; + // Synchronize between write threads and read threads. mutable std::shared_mutex read_write_mutex; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_Statistics.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_Statistics.cpp index 411d09a9f22..12e1ab15d62 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_Statistics.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_Statistics.cpp @@ -192,6 +192,60 @@ SegmentsStats DeltaMergeStore::getSegmentsStats() return stats; } +LocalIndexesStats DeltaMergeStore::getLocalIndexStats() +{ + std::shared_lock lock(read_write_mutex); + + if (!local_index_infos || local_index_infos->empty()) + return {}; + + LocalIndexesStats stats; + for (const auto & index_info : *local_index_infos) + { + LocalIndexStats index_stats; + index_stats.column_id = index_info.column_id; + index_stats.column_name = index_info.column_name; + index_stats.index_kind = "HNSW"; // TODO: Support more. + + for (const auto & [handle, segment] : segments) + { + UNUSED(handle); + + // Currently Delta is always not indexed. + index_stats.rows_delta_not_indexed + += segment->getDelta()->getRows(); // TODO: More precisely count column bytes instead. + + const auto & stable = segment->getStable(); + bool is_stable_indexed = true; + for (const auto & dmfile : stable->getDMFiles()) + { + if (!dmfile->isColumnExist(index_info.column_id)) + continue; // Regard as indexed, because column does not need any index + + auto column_stat = dmfile->getColumnStat(index_info.column_id); + + if (column_stat.index_bytes == 0 && column_stat.data_bytes > 0) + { + is_stable_indexed = false; + break; + } + } + + if (is_stable_indexed) + { + index_stats.rows_stable_indexed += stable->getRows(); + } + else + { + index_stats.rows_stable_not_indexed += stable->getRows(); + } + } + + stats.emplace_back(index_stats); + } + + return stats; +} } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/Index/IndexInfo.h b/dbms/src/Storages/DeltaMerge/Index/IndexInfo.h new file mode 100644 index 00000000000..5837c432251 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/Index/IndexInfo.h @@ -0,0 +1,41 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace DB::DM +{ + +enum class IndexType +{ + Vector = 1, +}; + +struct IndexInfo +{ + IndexType type; + ColumnID column_id; + String column_name; + // Now we only support vector index. + // In the future, we may support more types of indexes, using std::variant. + TiDB::VectorIndexDefinitionPtr index_definition; +}; + +using IndexInfos = std::vector; +using IndexInfosPtr = std::shared_ptr; + +} // namespace DB::DM diff --git a/dbms/src/Storages/System/StorageSystemDTLocalIndexes.cpp b/dbms/src/Storages/System/StorageSystemDTLocalIndexes.cpp new file mode 100644 index 00000000000..1830db53527 --- /dev/null +++ b/dbms/src/Storages/System/StorageSystemDTLocalIndexes.cpp @@ -0,0 +1,135 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +StorageSystemDTLocalIndexes::StorageSystemDTLocalIndexes(const std::string & name_) + : name(name_) +{ + setColumns(ColumnsDescription({ + {"database", std::make_shared()}, + {"table", std::make_shared()}, + + {"tidb_database", std::make_shared()}, + {"tidb_table", std::make_shared()}, + {"keyspace_id", std::make_shared(std::make_shared())}, + {"table_id", std::make_shared()}, + + {"column_name", std::make_shared()}, + {"column_id", std::make_shared()}, + {"index_kind", std::make_shared()}, + + {"rows_stable_indexed", std::make_shared()}, // Total rows + {"rows_stable_not_indexed", std::make_shared()}, // Total rows + {"rows_delta_indexed", std::make_shared()}, // Total rows + {"rows_delta_not_indexed", std::make_shared()}, // Total rows + })); +} + +BlockInputStreams StorageSystemDTLocalIndexes::read( + const Names & column_names, + const SelectQueryInfo &, + const Context & context, + QueryProcessingStage::Enum & processed_stage, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) +{ + check(column_names); + processed_stage = QueryProcessingStage::FetchColumns; + + MutableColumns res_columns = getSampleBlock().cloneEmptyColumns(); + + SchemaNameMapper mapper; + + auto databases = context.getDatabases(); + for (const auto & d : databases) + { + String database_name = d.first; + const auto & database = d.second; + const DatabaseTiFlash * db_tiflash = typeid_cast(database.get()); + + auto it = database->getIterator(context); + for (; it->isValid(); it->next()) + { + const auto & table_name = it->name(); + auto & storage = it->table(); + if (storage->getName() != MutableSupport::delta_tree_storage_name) + continue; + + auto dm_storage = std::dynamic_pointer_cast(storage); + const auto & table_info = dm_storage->getTableInfo(); + auto table_id = table_info.id; + auto store = dm_storage->getStoreIfInited(); + if (!store) + continue; + + if (dm_storage->isTombstone()) + continue; + + auto index_stats = store->getLocalIndexStats(); + for (auto & stat : index_stats) + { + size_t j = 0; + res_columns[j++]->insert(database_name); + res_columns[j++]->insert(table_name); + + String tidb_db_name; + KeyspaceID keyspace_id = NullspaceID; + if (db_tiflash) + { + tidb_db_name = db_tiflash->getDatabaseInfo().name; + keyspace_id = db_tiflash->getDatabaseInfo().keyspace_id; + } + res_columns[j++]->insert(tidb_db_name); + String tidb_table_name = table_info.name; + res_columns[j++]->insert(tidb_table_name); + if (keyspace_id == NullspaceID) + res_columns[j++]->insert(Field()); + else + res_columns[j++]->insert(static_cast(keyspace_id)); + res_columns[j++]->insert(table_id); + + res_columns[j++]->insert(stat.column_name); + res_columns[j++]->insert(stat.column_id); + res_columns[j++]->insert(stat.index_kind); + + res_columns[j++]->insert(stat.rows_stable_indexed); + res_columns[j++]->insert(stat.rows_stable_not_indexed); + res_columns[j++]->insert(stat.rows_delta_indexed); + res_columns[j++]->insert(stat.rows_delta_not_indexed); + } + } + } + + return BlockInputStreams( + 1, + std::make_shared(getSampleBlock().cloneWithColumns(std::move(res_columns)))); +} + +} // namespace DB diff --git a/dbms/src/Storages/System/StorageSystemDTLocalIndexes.h b/dbms/src/Storages/System/StorageSystemDTLocalIndexes.h new file mode 100644 index 00000000000..90b67a67b46 --- /dev/null +++ b/dbms/src/Storages/System/StorageSystemDTLocalIndexes.h @@ -0,0 +1,49 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include + + +namespace DB +{ +class Context; + +class StorageSystemDTLocalIndexes + : public ext::SharedPtrHelper + , public IStorage +{ +public: + std::string getName() const override { return "SystemDTLocalIndexes"; } + std::string getTableName() const override { return name; } + + BlockInputStreams read( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum & processed_stage, + size_t max_block_size, + unsigned num_streams) override; + +private: + const std::string name; + +protected: + explicit StorageSystemDTLocalIndexes(const std::string & name_); +}; + +} // namespace DB diff --git a/dbms/src/Storages/System/attachSystemTables.cpp b/dbms/src/Storages/System/attachSystemTables.cpp index d20bbe68a79..6be10c8b564 100644 --- a/dbms/src/Storages/System/attachSystemTables.cpp +++ b/dbms/src/Storages/System/attachSystemTables.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ void attachSystemTablesLocal(IDatabase & system_database) system_database.attachTable("databases", StorageSystemDatabases::create("databases")); system_database.attachTable("dt_tables", StorageSystemDTTables::create("dt_tables")); system_database.attachTable("dt_segments", StorageSystemDTSegments::create("dt_segments")); + system_database.attachTable("dt_local_indexes", StorageSystemDTLocalIndexes::create("dt_local_indexes")); system_database.attachTable("tables", StorageSystemTables::create("tables")); system_database.attachTable("columns", StorageSystemColumns::create("columns")); system_database.attachTable("functions", StorageSystemFunctions::create("functions"));