Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

storage: remove vector_index in column level #9487

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 1 addition & 12 deletions dbms/src/Storages/DeltaMerge/DeltaMergeDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,22 +89,11 @@ struct ColumnDefine
DataTypePtr type;
Field default_value;

/// Note: ColumnDefine is used in both Write path and Read path.
/// In the read path, vector_index is usually not available. Use AnnQueryInfo for
/// read related vector index information.
TiDB::VectorIndexDefinitionPtr vector_index;

explicit ColumnDefine(
ColId id_ = 0,
String name_ = "",
DataTypePtr type_ = nullptr,
Field default_value_ = Field{},
TiDB::VectorIndexDefinitionPtr vector_index_ = nullptr)
explicit ColumnDefine(ColId id_ = 0, String name_ = "", DataTypePtr type_ = nullptr, Field default_value_ = Field{})
: id(id_)
, name(std::move(name_))
, type(std::move(type_))
, default_value(std::move(default_value_))
, vector_index(vector_index_)
{}
};

Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2022,7 +2022,7 @@ void DeltaMergeStore::applySchemaChanges(TiDB::TableInfo & table_info)

std::atomic_store(&original_table_header, std::make_shared<Block>(toEmptyBlock(original_table_columns)));

// release the lock because `applyLocalIndexChange ` will try to acquire the lock
// release the lock because `applyLocalIndexChange` will try to acquire the lock
// and generate tasks on segments
lock.unlock();

Expand Down
25 changes: 0 additions & 25 deletions dbms/src/Storages/DeltaMerge/Index/LocalIndexInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,31 +159,6 @@ LocalIndexInfosChangeset generateLocalIndexInfos(
std::vector<ComplexIndexID> newly_added;
std::vector<ComplexIndexID> newly_dropped;
JaySon-Huang marked this conversation as resolved.
Show resolved Hide resolved

// In the serverless branch, previously we define vector index on TiDB::ColumnInfo
for (const auto & col : new_table_info.columns)
{
if (!col.vector_index)
continue;

// We do the check at the beginning, only assert check under debug mode
// is enough
assert(isVectorIndexSupported(logger));

const ComplexIndexID cindex_id{.index_id = EmptyIndexID, .column_id = col.id};
index_ids_in_new_table.emplace(cindex_id);
// already exist in `existing_indexes`
if (original_local_index_id_map.contains(cindex_id))
continue;
// newly added
new_index_infos->emplace_back(LocalIndexInfo{
.type = IndexType::Vector,
.index_id = EmptyIndexID, // the vector index created on ColumnInfo, use EmptyIndexID as the index_id
.column_id = col.id,
.index_definition = col.vector_index,
});
newly_added.emplace_back(cindex_id);
}

for (const auto & idx : new_table_info.index_infos)
{
if (!idx.vector_index)
Expand Down
25 changes: 9 additions & 16 deletions dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ try
{
auto cols = DMTestEnv::getDefaultColumns(DMTestEnv::PkType::HiddenTiDBRowID, /*add_nullable*/ true);
auto vec_cd = ColumnDefine(vec_column_id, vec_column_name, tests::typeFromString("Array(Float32)"));
vec_cd.vector_index = std::make_shared<TiDB::VectorIndexDefinition>(TiDB::VectorIndexDefinition{
auto vector_index = std::make_shared<TiDB::VectorIndexDefinition>(TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 3,
.distance_metric = tipb::VectorDistanceMetric::L2,
Expand All @@ -229,7 +229,7 @@ try
}

dm_file = restoreDMFile();
dm_file = buildIndex(*vec_cd.vector_index);
dm_file = buildIndex(*vector_index);

// Read with exact match
{
Expand Down Expand Up @@ -836,7 +836,7 @@ try
{
auto cols = DMTestEnv::getDefaultColumns(DMTestEnv::PkType::HiddenTiDBRowID, /*add_nullable*/ true);
auto vec_cd = ColumnDefine(vec_column_id, vec_column_name, tests::typeFromString("Array(Float32)"));
vec_cd.vector_index = std::make_shared<TiDB::VectorIndexDefinition>(TiDB::VectorIndexDefinition{
auto vector_index = std::make_shared<TiDB::VectorIndexDefinition>(TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 3,
.distance_metric = tipb::VectorDistanceMetric::L2,
Expand Down Expand Up @@ -866,7 +866,7 @@ try
}

dm_file = restoreDMFile();
dm_file = buildIndex(*vec_cd.vector_index);
dm_file = buildIndex(*vector_index);

{
auto ann_query_info = std::make_shared<tipb::ANNQueryInfo>();
Expand Down Expand Up @@ -904,7 +904,7 @@ try
{
auto cols = DMTestEnv::getDefaultColumns(DMTestEnv::PkType::HiddenTiDBRowID, /*add_nullable*/ true);
auto vec_cd = ColumnDefine(vec_column_id, vec_column_name, tests::typeFromString("Array(Float32)"));
vec_cd.vector_index = std::make_shared<TiDB::VectorIndexDefinition>(TiDB::VectorIndexDefinition{
auto vector_index = std::make_shared<TiDB::VectorIndexDefinition>(TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 3,
.distance_metric = tipb::VectorDistanceMetric::L2,
Expand Down Expand Up @@ -933,7 +933,7 @@ try
}

dm_file = restoreDMFile();
dm_file = buildIndex(*vec_cd.vector_index);
dm_file = buildIndex(*vector_index);

// Pack #0 is filtered out according to VecIndex
{
Expand Down Expand Up @@ -1045,7 +1045,7 @@ try
{
auto cols = DMTestEnv::getDefaultColumns(DMTestEnv::PkType::HiddenTiDBRowID, /*add_nullable*/ true);
auto vec_cd = ColumnDefine(vec_column_id, vec_column_name, tests::typeFromString("Array(Float32)"));
vec_cd.vector_index = std::make_shared<TiDB::VectorIndexDefinition>(TiDB::VectorIndexDefinition{
auto vector_index = std::make_shared<TiDB::VectorIndexDefinition>(TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 1,
.distance_metric = tipb::VectorDistanceMetric::L2,
Expand Down Expand Up @@ -1076,7 +1076,7 @@ try
}

dm_file = restoreDMFile();
dm_file = buildIndex(*vec_cd.vector_index);
dm_file = buildIndex(*vector_index);

// Pack Filter using RowKeyRange
{
Expand Down Expand Up @@ -1223,11 +1223,6 @@ class VectorIndexSegmentTestBase
void prepareColumns(const ColumnDefinesPtr & columns) override
{
auto vec_cd = ColumnDefine(vec_column_id, vec_column_name, tests::typeFromString("Array(Float32)"));
vec_cd.vector_index = std::make_shared<TiDB::VectorIndexDefinition>(TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 1,
.distance_metric = tipb::VectorDistanceMetric::L2,
});
columns->emplace_back(vec_cd);
}

Expand Down Expand Up @@ -1752,9 +1747,7 @@ class VectorIndexSegmentOnS3Test
FileCache::initialize(global_context.getPathCapacity(), file_cache_config);

auto cols = DMTestEnv::getDefaultColumns();
auto vec_cd = cdVec();
vec_cd.vector_index = std::make_shared<TiDB::VectorIndexDefinition>(index_info);
cols->emplace_back(vec_cd);
cols->emplace_back(cdVec());
setColumns(cols);

auto dm_context = dmContext();
Expand Down
187 changes: 0 additions & 187 deletions dbms/src/Storages/DeltaMerge/tests/gtest_local_index_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,191 +224,4 @@ try
}
CATCH

TEST(LocalIndexInfoTest, CheckIndexAddWithVecIndexOnColumnInfo)
try
{
// The serverless branch, vector index may directly defined on the ColumnInfo.
// Create table info with a vector index by column comments.
auto col_vector_index = TiDB::VectorIndexDefinitionPtr(new TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 3,
.distance_metric = tipb::VectorDistanceMetric::INNER_PRODUCT,
});
TiDB::TableInfo table_info;
{
TiDB::ColumnInfo column_info;
column_info.name = "vec";
column_info.id = 98;
table_info.columns.emplace_back(column_info);

TiDB::ColumnInfo column_info_v1;
column_info_v1.name = "vec1";
column_info_v1.id = 99;
column_info_v1.vector_index = col_vector_index;
table_info.columns.emplace_back(column_info_v1);
}

// Add a vector index by add vector index dirctly.
TiDB::IndexColumnInfo default_index_col_info;
default_index_col_info.name = "vec";
default_index_col_info.length = -1;
default_index_col_info.offset = 0;
TiDB::IndexInfo expect_idx;
{
expect_idx.id = 1;
expect_idx.idx_cols.emplace_back(default_index_col_info);
expect_idx.vector_index = TiDB::VectorIndexDefinitionPtr(new TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 1,
.distance_metric = tipb::VectorDistanceMetric::L2,
});
table_info.index_infos.emplace_back(expect_idx);
}

// check the different
auto logger = Logger::get();
LocalIndexInfosPtr index_info = nullptr;
{
auto new_index_info = generateLocalIndexInfos(index_info, table_info, logger).new_local_index_infos;
ASSERT_NE(new_index_info, nullptr);
ASSERT_EQ(new_index_info->size(), 2);

const auto & idx0 = (*new_index_info)[0];
ASSERT_EQ(IndexType::Vector, idx0.type);
ASSERT_EQ(EmptyIndexID, idx0.index_id); // defined on TiDB::ColumnInfo
ASSERT_EQ(99, idx0.column_id);
ASSERT_NE(nullptr, idx0.index_definition);
ASSERT_EQ(col_vector_index->kind, idx0.index_definition->kind);
ASSERT_EQ(col_vector_index->dimension, idx0.index_definition->dimension);
ASSERT_EQ(col_vector_index->distance_metric, idx0.index_definition->distance_metric);

const auto & idx1 = (*new_index_info)[1];
ASSERT_EQ(IndexType::Vector, idx1.type);
ASSERT_EQ(expect_idx.id, idx1.index_id);
ASSERT_EQ(98, idx1.column_id);
ASSERT_NE(nullptr, idx1.index_definition);
ASSERT_EQ(expect_idx.vector_index->kind, idx1.index_definition->kind);
ASSERT_EQ(expect_idx.vector_index->dimension, idx1.index_definition->dimension);
ASSERT_EQ(expect_idx.vector_index->distance_metric, idx1.index_definition->distance_metric);
// check again, table_info.index_infos doesn't change and return them
LocalIndexInfosPtr empty_index_info = nullptr;
ASSERT_EQ(2, generateLocalIndexInfos(empty_index_info, table_info, logger).new_local_index_infos->size());
// check again with the same table_info, nothing changed, return nullptr
ASSERT_EQ(nullptr, generateLocalIndexInfos(new_index_info, table_info, logger).new_local_index_infos);

// update
index_info = new_index_info;
}

// Drop the first vector index on column vec1.
table_info.index_infos.erase(table_info.index_infos.begin());

// Add another vector index to the TableInfo
TiDB::IndexInfo expect_idx2;
{
expect_idx2.id = 2; // another index_id
expect_idx2.idx_cols.emplace_back(default_index_col_info);
expect_idx2.vector_index = TiDB::VectorIndexDefinitionPtr(new TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 2,
.distance_metric = tipb::VectorDistanceMetric::COSINE, // another distance
});
table_info.index_infos.emplace_back(expect_idx2);
}
// check the different
{
auto new_index_info = generateLocalIndexInfos(index_info, table_info, logger).new_local_index_infos;
ASSERT_NE(new_index_info, nullptr);
ASSERT_EQ(new_index_info->size(), 2);

const auto & idx0 = (*new_index_info)[0];
ASSERT_EQ(IndexType::Vector, idx0.type);
ASSERT_EQ(EmptyIndexID, idx0.index_id); // defined on TiDB::ColumnInfo
ASSERT_EQ(99, idx0.column_id);
ASSERT_NE(nullptr, idx0.index_definition);
ASSERT_EQ(col_vector_index->kind, idx0.index_definition->kind);
ASSERT_EQ(col_vector_index->dimension, idx0.index_definition->dimension);
ASSERT_EQ(col_vector_index->distance_metric, idx0.index_definition->distance_metric);

const auto & idx1 = (*new_index_info)[1];
ASSERT_EQ(IndexType::Vector, idx1.type);
ASSERT_EQ(expect_idx2.id, idx1.index_id);
ASSERT_EQ(98, idx1.column_id);
ASSERT_NE(nullptr, idx1.index_definition);
ASSERT_EQ(expect_idx2.vector_index->kind, idx1.index_definition->kind);
ASSERT_EQ(expect_idx2.vector_index->dimension, idx1.index_definition->dimension);
ASSERT_EQ(expect_idx2.vector_index->distance_metric, idx1.index_definition->distance_metric);

// check again, nothing changed, return nullptr
ASSERT_EQ(nullptr, generateLocalIndexInfos(new_index_info, table_info, logger).new_local_index_infos);
}
}
CATCH

TEST(LocalIndexInfoTest, CheckIndexDropDefinedInColumnInfo)
{
auto logger = Logger::get();

TiDB::TableInfo table_info;
{
// The serverless branch, vector index may directly defined
// on the ColumnInfo
TiDB::ColumnInfo column_info_v1;
column_info_v1.name = "vec1";
column_info_v1.id = 99;
column_info_v1.vector_index = TiDB::VectorIndexDefinitionPtr(new TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 3,
.distance_metric = tipb::VectorDistanceMetric::INNER_PRODUCT,
});
table_info.columns.emplace_back(column_info_v1);

// A column without vector index
TiDB::ColumnInfo column_info_v2;
column_info_v2.name = "vec2";
column_info_v2.id = 100;
table_info.columns.emplace_back(column_info_v2);
}

LocalIndexInfosPtr index_info = nullptr;
{
// check the different with nullptr
auto new_index_info = generateLocalIndexInfos(index_info, table_info, logger).new_local_index_infos;
ASSERT_NE(nullptr, new_index_info);
ASSERT_EQ(new_index_info->size(), 1);
const auto & idx0 = (*new_index_info)[0];
ASSERT_EQ(IndexType::Vector, idx0.type);
ASSERT_EQ(EmptyIndexID, idx0.index_id); // the vector index defined on ColumnInfo
ASSERT_EQ(99, idx0.column_id);
ASSERT_NE(nullptr, idx0.index_definition);
ASSERT_EQ(tipb::VectorIndexKind::HNSW, idx0.index_definition->kind);
ASSERT_EQ(3, idx0.index_definition->dimension);
ASSERT_EQ(tipb::VectorDistanceMetric::INNER_PRODUCT, idx0.index_definition->distance_metric);

// check again, nothing changed, return nullptr
ASSERT_EQ(nullptr, generateLocalIndexInfos(new_index_info, table_info, logger).new_local_index_infos);

// update
index_info = new_index_info;
}

// drop column along with index info defined in column info
table_info.columns.erase(table_info.columns.begin());
{
// check the different with existing index_info
auto new_index_info = generateLocalIndexInfos(index_info, table_info, logger).new_local_index_infos;
ASSERT_NE(nullptr, new_index_info);
// not null
ASSERT_NE(new_index_info, nullptr);
// has been dropped
ASSERT_EQ(new_index_info->size(), 0);

// check again, nothing changed, return nullptr
ASSERT_EQ(nullptr, generateLocalIndexInfos(new_index_info, table_info, logger).new_local_index_infos);

// update
index_info = new_index_info;
}
}

} // namespace DB::DM::tests
3 changes: 1 addition & 2 deletions dbms/src/Storages/StorageDeltaMerge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ void StorageDeltaMerge::updateTableColumnInfo()
if (itr != columns.end())
{
col_def.default_value = itr->defaultValueToField();
col_def.vector_index = itr->vector_index;
}

if (col_def.id != TiDBPkColumnID && col_def.id != VersionColumnID && col_def.id != DelMarkColumnID
Expand Down Expand Up @@ -328,7 +327,7 @@ void StorageDeltaMerge::updateTableColumnInfo()
table_column_defines.begin(),
table_column_defines.end(),
[](const ColumnDefine & col, FmtBuffer & fb) {
fb.fmtAppend("{} {} {}", col.name, col.type->getFamilyName(), col.vector_index);
fb.fmtAppend("{} {}", col.name, col.type->getFamilyName());
},
", ");
return fmt_buf.toString();
Expand Down
Loading