From 4900a9013a7d9091e0e6f68e9a753bcfb308461f Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Thu, 15 Aug 2024 00:08:11 +0800 Subject: [PATCH] Remove the hack --- dbms/src/Storages/DeltaMerge/Index/VectorIndexCache.h | 11 +++++++---- .../DeltaMerge/Index/VectorIndexHNSW/Index.cpp | 3 ++- .../DeltaMerge/tests/gtest_dm_vector_index.cpp | 11 +++++++++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/Index/VectorIndexCache.h b/dbms/src/Storages/DeltaMerge/Index/VectorIndexCache.h index 2cf51b73812..013631ca1f0 100644 --- a/dbms/src/Storages/DeltaMerge/Index/VectorIndexCache.h +++ b/dbms/src/Storages/DeltaMerge/Index/VectorIndexCache.h @@ -24,6 +24,11 @@ #include #include +namespace DB::DM::tests +{ +class VectorIndexTestUtils; +} + namespace DB::DM { @@ -44,11 +49,8 @@ class VectorIndexCache std::condition_variable shutdown_cv; std::mutex shutdown_mu; -#ifdef DBMS_PUBLIC_GTEST -public: -#else private: -#endif + friend class ::DB::DM::tests::VectorIndexTestUtils; // Drop the in-memory Vector Index if the on-disk file is deleted. // mmaped file could be unmmaped so that disk space can be reclaimed. @@ -56,6 +58,7 @@ class VectorIndexCache void cleanOutdatedLoop(); + // TODO(vector-index): Use task on BackgroundProcessingPool instead of a raw thread std::thread cleaner_thread; public: diff --git a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp index 44e5d40e07d..b5ee8adb0b8 100644 --- a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp @@ -162,6 +162,7 @@ std::vector VectorIndexHNSWViewer::search( std::atomic discarded_nodes = 0; std::atomic has_exception_in_search = false; + // The non-valid rows should be discarded by this lambda auto predicate = [&](typename USearchImplType::member_cref_t const & member) { // Must catch exceptions in the predicate, because search runs on other threads. try @@ -180,7 +181,7 @@ std::vector VectorIndexHNSWViewer::search( } }; - // TODO: Support efSearch. + // TODO(vector-index): Support efSearch. auto result = index.search( // reinterpret_cast(query_info->ref_vec_f32().data() + sizeof(UInt32)), query_info->top_k(), diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp index 893ed9dffa8..8c0b68014d2 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp @@ -95,6 +95,11 @@ class VectorIndexTestUtils // When used in read, no need to assign vector_index. return ColumnDefine(vec_column_id, vec_column_name, tests::typeFromString("Array(Float32)")); } + + static size_t cleanVectorCacheEntries(const std::shared_ptr & cache) + { + return cache->cleanOutdatedCacheEntries(); + } }; class VectorIndexDMFileTest @@ -1638,7 +1643,8 @@ try { // We should be able to clear something from the vector index cache. auto vec_cache = TiFlashTestEnv::getGlobalContext().getVectorIndexCache(); - ASSERT_EQ(1, vec_cache->cleanOutdatedCacheEntries()); + ASSERT_NE(vec_cache, nullptr); + ASSERT_EQ(1, cleanVectorCacheEntries(vec_cache)); } { // When cache is evicted (and memory cache is dropped), the query should be fine. @@ -1777,7 +1783,8 @@ try { // We should be able to clear something from the vector index cache. auto vec_cache = TiFlashTestEnv::getGlobalContext().getVectorIndexCache(); - ASSERT_EQ(1, vec_cache->cleanOutdatedCacheEntries()); + ASSERT_NE(vec_cache, nullptr); + ASSERT_EQ(1, cleanVectorCacheEntries(vec_cache)); } { // Query should be fine.