Skip to content

Commit

Permalink
storage: Add vector search metrics (#9349)
Browse files Browse the repository at this point in the history
ref #9032

storage: Add vector search metrics

Signed-off-by: Wish <[email protected]>

Co-authored-by: Wenxuan <[email protected]>
  • Loading branch information
Lloyd-Pottiger and breezewish committed Aug 26, 2024
1 parent d6abc70 commit 3f15689
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 3 deletions.
17 changes: 17 additions & 0 deletions dbms/src/Common/TiFlashMetrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,23 @@ static_assert(RAFT_REGION_BIG_WRITE_THRES * 4 < RAFT_REGION_BIG_WRITE_MAX, "Inva
F(type_gac_req_ru_consumption_delta, {"type", "gac_req_ru_consumption_delta"}), \
F(type_gac_resp_tokens, {"type", "gac_resp_tokens"}), \
F(type_gac_resp_capacity, {"type", "gac_resp_capacity"})) \
M(tiflash_vector_index_memory_usage, \
"Vector index memory usage", \
Gauge, \
F(type_build, {"type", "build"}), \
F(type_view, {"type", "view"})) \
M(tiflash_vector_index_active_instances, \
"Active Vector index instances", \
Gauge, \
F(type_build, {"type", "build"}), \
F(type_view, {"type", "view"})) \
M(tiflash_vector_index_duration, \
"Vector index operation duration", \
Histogram, \
F(type_build, {{"type", "build"}}, ExpBuckets{0.001, 2, 20}), \
F(type_download, {{"type", "download"}}, ExpBuckets{0.001, 2, 20}), \
F(type_view, {{"type", "view"}}, ExpBuckets{0.001, 2, 20}), \
F(type_search, {{"type", "search"}}, ExpBuckets{0.001, 2, 20})) \
M(tiflash_storage_io_limiter_pending_count, \
"I/O limiter pending count", \
Counter, \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,8 @@ void DMFileWithVectorIndexBlockInputStream::loadVectorIndex()

auto download_duration = watch.elapsedSeconds();
duration_load_index += download_duration;

GET_METRIC(tiflash_vector_index_duration, type_download).Observe(download_duration);
}
else
{
Expand Down
45 changes: 45 additions & 0 deletions dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.

#include <Columns/ColumnArray.h>
#include <Common/Stopwatch.h>
#include <Common/TiFlashMetrics.h>
#include <Functions/FunctionHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
Expand All @@ -22,6 +24,7 @@
#include <tipb/executor.pb.h>

#include <algorithm>
#include <ext/scope_guard.h>
#include <usearch/index.hpp>
#include <usearch/index_plugins.hpp>

Expand Down Expand Up @@ -62,6 +65,7 @@ VectorIndexHNSWBuilder::VectorIndexHNSWBuilder(const TiDB::VectorIndexDefinition
getUSearchMetricKind(definition->distance_metric))))
{
RUNTIME_CHECK(definition_->kind == tipb::VectorIndexKind::HNSW);
GET_METRIC(tiflash_vector_index_active_instances, type_build).Increment();
}

void VectorIndexHNSWBuilder::addBlock(const IColumn & column, const ColumnVector<UInt8> * del_mark)
Expand All @@ -83,6 +87,9 @@ void VectorIndexHNSWBuilder::addBlock(const IColumn & column, const ColumnVector
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for HNSW index");
}

Stopwatch w;
SCOPE_EXIT({ total_duration += w.elapsedSeconds(); });

for (int i = 0, i_max = col_array->size(); i < i_max; ++i)
{
auto row_offset = added_rows;
Expand Down Expand Up @@ -110,14 +117,30 @@ void VectorIndexHNSWBuilder::addBlock(const IColumn & column, const ColumnVector
row_offset,
rc.error.release());
}

auto current_memory_usage = index.memory_usage();
auto delta = static_cast<Int64>(current_memory_usage) - static_cast<Int64>(last_reported_memory_usage);
GET_METRIC(tiflash_vector_index_memory_usage, type_build).Increment(static_cast<double>(delta));
last_reported_memory_usage = current_memory_usage;
}

void VectorIndexHNSWBuilder::save(std::string_view path) const
{
Stopwatch w;
SCOPE_EXIT({ total_duration += w.elapsedSeconds(); });

auto result = index.save(unum::usearch::output_file_t(path.data()));
RUNTIME_CHECK_MSG(result, "Failed to save vector index: {}", result.error.what());
}

VectorIndexHNSWBuilder::~VectorIndexHNSWBuilder()
{
GET_METRIC(tiflash_vector_index_duration, type_build).Observe(total_duration);
GET_METRIC(tiflash_vector_index_memory_usage, type_build)
.Decrement(static_cast<double>(last_reported_memory_usage));
GET_METRIC(tiflash_vector_index_active_instances, type_build).Decrement();
}

VectorIndexViewerPtr VectorIndexHNSWViewer::view(const dtpb::VectorIndexFileProps & file_props, std::string_view path)
{
RUNTIME_CHECK(file_props.index_kind() == tipb::VectorIndexKind_Name(tipb::VectorIndexKind::HNSW));
Expand All @@ -126,13 +149,20 @@ VectorIndexViewerPtr VectorIndexHNSWViewer::view(const dtpb::VectorIndexFileProp
RUNTIME_CHECK(tipb::VectorDistanceMetric_Parse(file_props.distance_metric(), &metric));
RUNTIME_CHECK(metric != tipb::VectorDistanceMetric::INVALID_DISTANCE_METRIC);

Stopwatch w;
SCOPE_EXIT({ GET_METRIC(tiflash_vector_index_duration, type_view).Observe(w.elapsedSeconds()); });

auto vi = std::make_shared<VectorIndexHNSWViewer>(file_props);
vi->index = USearchImplType::make(unum::usearch::metric_punned_t( //
file_props.dimensions(),
getUSearchMetricKind(metric)));
auto result = vi->index.view(unum::usearch::memory_mapped_file_t(path.data()));
RUNTIME_CHECK_MSG(result, "Failed to load vector index: {}", result.error.what());

auto current_memory_usage = vi->index.memory_usage();
GET_METRIC(tiflash_vector_index_memory_usage, type_view).Increment(static_cast<double>(current_memory_usage));
vi->last_reported_memory_usage = current_memory_usage;

return vi;
}

Expand Down Expand Up @@ -181,6 +211,9 @@ std::vector<VectorIndexBuilder::Key> VectorIndexHNSWViewer::search(
}
};

Stopwatch w;
SCOPE_EXIT({ GET_METRIC(tiflash_vector_index_duration, type_search).Observe(w.elapsedSeconds()); });

// TODO(vector-index): Support efSearch.
auto result = index.search( //
reinterpret_cast<const Float32 *>(query_info->ref_vec_f32().data() + sizeof(UInt32)),
Expand Down Expand Up @@ -211,4 +244,16 @@ void VectorIndexHNSWViewer::get(Key key, std::vector<Float32> & out) const
index.get(key, out.data());
}

VectorIndexHNSWViewer::VectorIndexHNSWViewer(const dtpb::VectorIndexFileProps & props)
: VectorIndexViewer(props)
{
GET_METRIC(tiflash_vector_index_active_instances, type_view).Increment();
}

VectorIndexHNSWViewer::~VectorIndexHNSWViewer()
{
GET_METRIC(tiflash_vector_index_memory_usage, type_view).Decrement(static_cast<double>(last_reported_memory_usage));
GET_METRIC(tiflash_vector_index_active_instances, type_view).Decrement();
}

} // namespace DB::DM
13 changes: 10 additions & 3 deletions dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,37 @@ class VectorIndexHNSWBuilder : public VectorIndexBuilder
public:
explicit VectorIndexHNSWBuilder(const TiDB::VectorIndexDefinitionPtr & definition_);

~VectorIndexHNSWBuilder() override;

void addBlock(const IColumn & column, const ColumnVector<UInt8> * del_mark) override;

void save(std::string_view path) const override;

private:
USearchImplType index;
UInt64 added_rows = 0; // Includes nulls and deletes. Used as the index key.

mutable double total_duration = 0;
size_t last_reported_memory_usage = 0;
};

class VectorIndexHNSWViewer : public VectorIndexViewer
{
public:
static VectorIndexViewerPtr view(const dtpb::VectorIndexFileProps & props, std::string_view path);

explicit VectorIndexHNSWViewer(const dtpb::VectorIndexFileProps & props)
: VectorIndexViewer(props)
{}
explicit VectorIndexHNSWViewer(const dtpb::VectorIndexFileProps & props);

~VectorIndexHNSWViewer() override;

std::vector<Key> search(const ANNQueryInfoPtr & query_info, const RowFilter & valid_rows) const override;

void get(Key key, std::vector<Float32> & out) const override;

private:
USearchImplType index;

size_t last_reported_memory_usage = 0;
};

} // namespace DB::DM

0 comments on commit 3f15689

Please sign in to comment.