Skip to content

Commit

Permalink
storage: remove vector_index in TiDB::ColumnInfo
Browse files Browse the repository at this point in the history
Signed-off-by: Lloyd-Pottiger <[email protected]>
  • Loading branch information
Lloyd-Pottiger committed Sep 27, 2024
1 parent 0512a8f commit 1d84d9e
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 268 deletions.
25 changes: 0 additions & 25 deletions dbms/src/Storages/DeltaMerge/Index/LocalIndexInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,31 +159,6 @@ LocalIndexInfosChangeset generateLocalIndexInfos(
std::vector<ComplexIndexID> newly_added;
std::vector<ComplexIndexID> newly_dropped;

// In the serverless branch, previously we define vector index on TiDB::ColumnInfo
for (const auto & col : new_table_info.columns)
{
if (!col.vector_index)
continue;

// We do the check at the beginning, only assert check under debug mode
// is enough
assert(isVectorIndexSupported(logger));

const ComplexIndexID cindex_id{.index_id = EmptyIndexID, .column_id = col.id};
index_ids_in_new_table.emplace(cindex_id);
// already exist in `existing_indexes`
if (original_local_index_id_map.contains(cindex_id))
continue;
// newly added
new_index_infos->emplace_back(LocalIndexInfo{
.type = IndexType::Vector,
.index_id = EmptyIndexID, // the vector index created on ColumnInfo, use EmptyIndexID as the index_id
.column_id = col.id,
.index_definition = col.vector_index,
});
newly_added.emplace_back(cindex_id);
}

for (const auto & idx : new_table_info.index_infos)
{
if (!idx.vector_index)
Expand Down
187 changes: 0 additions & 187 deletions dbms/src/Storages/DeltaMerge/tests/gtest_local_index_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,191 +224,4 @@ try
}
CATCH

TEST(LocalIndexInfoTest, CheckIndexAddWithVecIndexOnColumnInfo)
try
{
// The serverless branch, vector index may directly defined on the ColumnInfo.
// Create table info with a vector index by column comments.
auto col_vector_index = TiDB::VectorIndexDefinitionPtr(new TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 3,
.distance_metric = tipb::VectorDistanceMetric::INNER_PRODUCT,
});
TiDB::TableInfo table_info;
{
TiDB::ColumnInfo column_info;
column_info.name = "vec";
column_info.id = 98;
table_info.columns.emplace_back(column_info);

TiDB::ColumnInfo column_info_v1;
column_info_v1.name = "vec1";
column_info_v1.id = 99;
column_info_v1.vector_index = col_vector_index;
table_info.columns.emplace_back(column_info_v1);
}

// Add a vector index by add vector index dirctly.
TiDB::IndexColumnInfo default_index_col_info;
default_index_col_info.name = "vec";
default_index_col_info.length = -1;
default_index_col_info.offset = 0;
TiDB::IndexInfo expect_idx;
{
expect_idx.id = 1;
expect_idx.idx_cols.emplace_back(default_index_col_info);
expect_idx.vector_index = TiDB::VectorIndexDefinitionPtr(new TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 1,
.distance_metric = tipb::VectorDistanceMetric::L2,
});
table_info.index_infos.emplace_back(expect_idx);
}

// check the different
auto logger = Logger::get();
LocalIndexInfosPtr index_info = nullptr;
{
auto new_index_info = generateLocalIndexInfos(index_info, table_info, logger).new_local_index_infos;
ASSERT_NE(new_index_info, nullptr);
ASSERT_EQ(new_index_info->size(), 2);

const auto & idx0 = (*new_index_info)[0];
ASSERT_EQ(IndexType::Vector, idx0.type);
ASSERT_EQ(EmptyIndexID, idx0.index_id); // defined on TiDB::ColumnInfo
ASSERT_EQ(99, idx0.column_id);
ASSERT_NE(nullptr, idx0.index_definition);
ASSERT_EQ(col_vector_index->kind, idx0.index_definition->kind);
ASSERT_EQ(col_vector_index->dimension, idx0.index_definition->dimension);
ASSERT_EQ(col_vector_index->distance_metric, idx0.index_definition->distance_metric);

const auto & idx1 = (*new_index_info)[1];
ASSERT_EQ(IndexType::Vector, idx1.type);
ASSERT_EQ(expect_idx.id, idx1.index_id);
ASSERT_EQ(98, idx1.column_id);
ASSERT_NE(nullptr, idx1.index_definition);
ASSERT_EQ(expect_idx.vector_index->kind, idx1.index_definition->kind);
ASSERT_EQ(expect_idx.vector_index->dimension, idx1.index_definition->dimension);
ASSERT_EQ(expect_idx.vector_index->distance_metric, idx1.index_definition->distance_metric);
// check again, table_info.index_infos doesn't change and return them
LocalIndexInfosPtr empty_index_info = nullptr;
ASSERT_EQ(2, generateLocalIndexInfos(empty_index_info, table_info, logger).new_local_index_infos->size());
// check again with the same table_info, nothing changed, return nullptr
ASSERT_EQ(nullptr, generateLocalIndexInfos(new_index_info, table_info, logger).new_local_index_infos);

// update
index_info = new_index_info;
}

// Drop the first vector index on column vec1.
table_info.index_infos.erase(table_info.index_infos.begin());

// Add another vector index to the TableInfo
TiDB::IndexInfo expect_idx2;
{
expect_idx2.id = 2; // another index_id
expect_idx2.idx_cols.emplace_back(default_index_col_info);
expect_idx2.vector_index = TiDB::VectorIndexDefinitionPtr(new TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 2,
.distance_metric = tipb::VectorDistanceMetric::COSINE, // another distance
});
table_info.index_infos.emplace_back(expect_idx2);
}
// check the different
{
auto new_index_info = generateLocalIndexInfos(index_info, table_info, logger).new_local_index_infos;
ASSERT_NE(new_index_info, nullptr);
ASSERT_EQ(new_index_info->size(), 2);

const auto & idx0 = (*new_index_info)[0];
ASSERT_EQ(IndexType::Vector, idx0.type);
ASSERT_EQ(EmptyIndexID, idx0.index_id); // defined on TiDB::ColumnInfo
ASSERT_EQ(99, idx0.column_id);
ASSERT_NE(nullptr, idx0.index_definition);
ASSERT_EQ(col_vector_index->kind, idx0.index_definition->kind);
ASSERT_EQ(col_vector_index->dimension, idx0.index_definition->dimension);
ASSERT_EQ(col_vector_index->distance_metric, idx0.index_definition->distance_metric);

const auto & idx1 = (*new_index_info)[1];
ASSERT_EQ(IndexType::Vector, idx1.type);
ASSERT_EQ(expect_idx2.id, idx1.index_id);
ASSERT_EQ(98, idx1.column_id);
ASSERT_NE(nullptr, idx1.index_definition);
ASSERT_EQ(expect_idx2.vector_index->kind, idx1.index_definition->kind);
ASSERT_EQ(expect_idx2.vector_index->dimension, idx1.index_definition->dimension);
ASSERT_EQ(expect_idx2.vector_index->distance_metric, idx1.index_definition->distance_metric);

// check again, nothing changed, return nullptr
ASSERT_EQ(nullptr, generateLocalIndexInfos(new_index_info, table_info, logger).new_local_index_infos);
}
}
CATCH

TEST(LocalIndexInfoTest, CheckIndexDropDefinedInColumnInfo)
{
auto logger = Logger::get();

TiDB::TableInfo table_info;
{
// The serverless branch, vector index may directly defined
// on the ColumnInfo
TiDB::ColumnInfo column_info_v1;
column_info_v1.name = "vec1";
column_info_v1.id = 99;
column_info_v1.vector_index = TiDB::VectorIndexDefinitionPtr(new TiDB::VectorIndexDefinition{
.kind = tipb::VectorIndexKind::HNSW,
.dimension = 3,
.distance_metric = tipb::VectorDistanceMetric::INNER_PRODUCT,
});
table_info.columns.emplace_back(column_info_v1);

// A column without vector index
TiDB::ColumnInfo column_info_v2;
column_info_v2.name = "vec2";
column_info_v2.id = 100;
table_info.columns.emplace_back(column_info_v2);
}

LocalIndexInfosPtr index_info = nullptr;
{
// check the different with nullptr
auto new_index_info = generateLocalIndexInfos(index_info, table_info, logger).new_local_index_infos;
ASSERT_NE(nullptr, new_index_info);
ASSERT_EQ(new_index_info->size(), 1);
const auto & idx0 = (*new_index_info)[0];
ASSERT_EQ(IndexType::Vector, idx0.type);
ASSERT_EQ(EmptyIndexID, idx0.index_id); // the vector index defined on ColumnInfo
ASSERT_EQ(99, idx0.column_id);
ASSERT_NE(nullptr, idx0.index_definition);
ASSERT_EQ(tipb::VectorIndexKind::HNSW, idx0.index_definition->kind);
ASSERT_EQ(3, idx0.index_definition->dimension);
ASSERT_EQ(tipb::VectorDistanceMetric::INNER_PRODUCT, idx0.index_definition->distance_metric);

// check again, nothing changed, return nullptr
ASSERT_EQ(nullptr, generateLocalIndexInfos(new_index_info, table_info, logger).new_local_index_infos);

// update
index_info = new_index_info;
}

// drop column along with index info defined in column info
table_info.columns.erase(table_info.columns.begin());
{
// check the different with existing index_info
auto new_index_info = generateLocalIndexInfos(index_info, table_info, logger).new_local_index_infos;
ASSERT_NE(nullptr, new_index_info);
// not null
ASSERT_NE(new_index_info, nullptr);
// has been dropped
ASSERT_EQ(new_index_info->size(), 0);

// check again, nothing changed, return nullptr
ASSERT_EQ(nullptr, generateLocalIndexInfos(new_index_info, table_info, logger).new_local_index_infos);

// update
index_info = new_index_info;
}
}

} // namespace DB::DM::tests
1 change: 0 additions & 1 deletion dbms/src/Storages/StorageDeltaMerge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ void StorageDeltaMerge::updateTableColumnInfo()
if (itr != columns.end())
{
col_def.default_value = itr->defaultValueToField();
col_def.vector_index = itr->vector_index;
}

if (col_def.id != TiDBPkColumnID && col_def.id != VersionColumnID && col_def.id != DelMarkColumnID
Expand Down
47 changes: 15 additions & 32 deletions dbms/src/TiDB/Schema/TiDB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,31 +117,25 @@ enum class IndexType
HNSW = 5,
};

VectorIndexDefinitionPtr parseVectorIndexFromJSON(IndexType index_type, const Poco::JSON::Object::Ptr & json)
inline tipb::VectorIndexKind toVectorIndexKind(IndexType index_type)
{
assert(json); // not nullptr

tipb::VectorIndexKind kind = tipb::VectorIndexKind::INVALID_INDEX_KIND;
if (unlikely(json->has("kind")))
{
// TODO(vector-index): remove this deadcode
auto kind_field = json->getValue<String>("kind");
RUNTIME_CHECK_MSG(
tipb::VectorIndexKind_Parse(kind_field, &kind),
"invalid kind of vector index, {}",
kind_field);
RUNTIME_CHECK(kind != tipb::VectorIndexKind::INVALID_INDEX_KIND);
}
else
switch (index_type)
{
RUNTIME_CHECK_MSG(
index_type == IndexType::HNSW,
"Invalid index_type for vector index, {}({})",
magic_enum::enum_name(index_type),
fmt::underlying(index_type));
kind = tipb::VectorIndexKind::HNSW;
case IndexType::HNSW:
return tipb::VectorIndexKind::HNSW;
default:
throw Exception(
DB::ErrorCodes::LOGICAL_ERROR,
"Invalid index type for vector index {}",
magic_enum::enum_name(index_type));
}
}

VectorIndexDefinitionPtr parseVectorIndexFromJSON(IndexType index_type, const Poco::JSON::Object::Ptr & json)
{
assert(json); // not nullptr

auto kind = toVectorIndexKind(index_type);
auto dimension = json->getValue<UInt64>("dimension");
RUNTIME_CHECK(dimension > 0 && dimension <= TiDB::MAX_VECTOR_DIMENSION, dimension); // Just a protection

Expand Down Expand Up @@ -480,11 +474,6 @@ try
}
json->set("state", static_cast<Int32>(state));

if (vector_index)
{
json->set("vector_index", vectorIndexToJSON(vector_index));
}

#ifndef NDEBUG
// Check stringify in Debug mode
std::stringstream str;
Expand Down Expand Up @@ -536,12 +525,6 @@ try
collate = type_json->get("Collate");
}
state = static_cast<SchemaState>(json->getValue<Int32>("state"));

// TODO(vector-index): remove this deadcode
if (auto vector_index_json = json->getObject("vector_index"); vector_index_json)
{
vector_index = parseVectorIndexFromJSON(IndexType::HNSW, vector_index_json);
}
}
catch (const Poco::Exception & e)
{
Expand Down
3 changes: 0 additions & 3 deletions dbms/src/TiDB/Schema/TiDB.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ struct ColumnInfo
std::vector<std::pair<std::string, Int16>> elems;
SchemaState state = StateNone;

// TODO(vector-index): This index will be moved to the table level later
VectorIndexDefinitionPtr vector_index = nullptr;

#ifdef M
#error "Please undefine macro M first."
#endif
Expand Down
Loading

0 comments on commit 1d84d9e

Please sign in to comment.