From 9a9daf8e91852d7de417e9357061bbe82c36dfa2 Mon Sep 17 00:00:00 2001 From: Wenxuan Date: Mon, 24 Jun 2024 13:01:25 +0800 Subject: [PATCH] compute: Use SimSIMD for vectors (#221) Signed-off-by: Wish --- .gitmodules | 3 + contrib/CMakeLists.txt | 2 + contrib/simsimd | 1 + contrib/simsimd-cmake/CMakeLists.txt | 13 + contrib/usearch | 2 +- contrib/usearch-cmake/CMakeLists.txt | 2 +- dbms/CMakeLists.txt | 1 + dbms/src/Functions/tests/gtest_vector.cpp | 10 +- .../Index/VectorIndexHNSW/Index.cpp | 8 +- .../DeltaMerge/Index/VectorIndexHNSW/Index.h | 7 +- .../VectorIndexHNSW/usearch_index_dense.h | 2241 ----------------- dbms/src/TiDB/Decode/Vector.cpp | 57 +- 12 files changed, 47 insertions(+), 2300 deletions(-) create mode 160000 contrib/simsimd create mode 100644 contrib/simsimd-cmake/CMakeLists.txt delete mode 100644 dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/usearch_index_dense.h diff --git a/.gitmodules b/.gitmodules index d8452a9e647..7ca0fff755f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -143,3 +143,6 @@ [submodule "contrib/usearch"] path = contrib/usearch url = https://github.com/unum-cloud/usearch.git +[submodule "contrib/simsimd"] + path = contrib/simsimd + url = https://github.com/ashvardanian/SimSIMD diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index b2a2766e1c3..9830c102217 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -192,3 +192,5 @@ add_subdirectory(magic_enum) add_subdirectory(aws-cmake) add_subdirectory(usearch-cmake) + +add_subdirectory(simsimd-cmake) diff --git a/contrib/simsimd b/contrib/simsimd new file mode 160000 index 00000000000..3e21934fbe3 --- /dev/null +++ b/contrib/simsimd @@ -0,0 +1 @@ +Subproject commit 3e21934fbe33cbfbd175ab82a83d3cc937b21e2a diff --git a/contrib/simsimd-cmake/CMakeLists.txt b/contrib/simsimd-cmake/CMakeLists.txt new file mode 100644 index 00000000000..7b7a943a367 --- /dev/null +++ b/contrib/simsimd-cmake/CMakeLists.txt @@ -0,0 +1,13 @@ +set(SIMSIMD_PROJECT_DIR "${TiFlash_SOURCE_DIR}/contrib/simsimd") +set(SIMSIMD_SOURCE_DIR "${SIMSIMD_PROJECT_DIR}/include") + +add_library(_simsimd INTERFACE) + +if (NOT EXISTS "${SIMSIMD_SOURCE_DIR}/simsimd/simsimd.h") + message (FATAL_ERROR "submodule contrib/simsimd not found") +endif() + +target_include_directories(_simsimd SYSTEM INTERFACE + ${SIMSIMD_SOURCE_DIR}) + +add_library(tiflash_contrib::simsimd ALIAS _simsimd) diff --git a/contrib/usearch b/contrib/usearch index b878f4182c9..5ea48c87c56 160000 --- a/contrib/usearch +++ b/contrib/usearch @@ -1 +1 @@ -Subproject commit b878f4182c95d473acc31efc46876925dbaf88d1 +Subproject commit 5ea48c87c56a25ab57634a8f207f80ae675ed58e diff --git a/contrib/usearch-cmake/CMakeLists.txt b/contrib/usearch-cmake/CMakeLists.txt index 672dfe4d428..99ccc61748e 100644 --- a/contrib/usearch-cmake/CMakeLists.txt +++ b/contrib/usearch-cmake/CMakeLists.txt @@ -8,7 +8,7 @@ if (NOT EXISTS "${USEARCH_SOURCE_DIR}/usearch/index.hpp") endif() target_include_directories(_usearch SYSTEM INTERFACE - ${USEARCH_PROJECT_DIR}/simsimd/include + # ${USEARCH_PROJECT_DIR}/simsimd/include # Use our simsimd ${USEARCH_PROJECT_DIR}/fp16/include ${USEARCH_SOURCE_DIR}) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index e43964caebe..8be6d28b2a2 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -195,6 +195,7 @@ target_link_libraries (dbms ${BTRIE_LIBRARIES} absl::synchronization tiflash_contrib::usearch + tiflash_contrib::simsimd tiflash_contrib::aws_s3 etcdpb diff --git a/dbms/src/Functions/tests/gtest_vector.cpp b/dbms/src/Functions/tests/gtest_vector.cpp index d67eb683540..3d191e72ce7 100644 --- a/dbms/src/Functions/tests/gtest_vector.cpp +++ b/dbms/src/Functions/tests/gtest_vector.cpp @@ -203,7 +203,15 @@ TEST_F(Vector, CosineDistance) try { ASSERT_COLUMN_EQ( - createColumn>({0.0, std::nullopt, 0.0, 1.0, 2.0, 0.0, 2.0, std::nullopt}), + createColumn>( + {0.004130363464355469, + 1.0, // CosDistance to (0,0) cannot be calculated, clapped to 1.0 + 0.00572967529296875, + 1.0, + 1.9942703247070312, + 0.00022123707458376884, + 1.9997787475585938, + std::nullopt}), executeFunction( "vecCosineDistance", createColumn( diff --git a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp index 48bb3d0853a..53f557719d6 100644 --- a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp @@ -187,15 +187,15 @@ std::vector VectorIndexHNSWViewer::search( std::atomic discarded_nodes = 0; std::atomic has_exception_in_search = false; - auto predicate = [&](typename USearchImplType::member_cref_t const & member) { + auto predicate = [&](const Key & key) { // Must catch exceptions in the predicate, because search runs on other threads. try { // Note: We don't increase the thread_local perf, because search runs on other threads. visited_nodes++; - if (!valid_rows[member.key]) + if (!valid_rows[key]) discarded_nodes++; - return valid_rows[member.key]; + return valid_rows[key]; } catch (...) { @@ -209,7 +209,7 @@ std::vector VectorIndexHNSWViewer::search( SCOPE_EXIT({ GET_METRIC(tiflash_vector_index_duration, type_search).Observe(w.elapsedSeconds()); }); // TODO: Support efSearch. - auto result = index.search( // + auto result = index.filtered_search( // reinterpret_cast(queryInfo->ref_vec_f32().data() + sizeof(UInt32)), queryInfo->top_k(), predicate); diff --git a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h index 5ed5b709339..6e8f0d191d2 100644 --- a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h +++ b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h @@ -17,12 +17,7 @@ #include #include -#if __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wpass-failed" -#include -#pragma clang diagnostic pop -#endif +#include namespace DB::DM { diff --git a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/usearch_index_dense.h b/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/usearch_index_dense.h deleted file mode 100644 index 3912b7671d5..00000000000 --- a/dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/usearch_index_dense.h +++ /dev/null @@ -1,2241 +0,0 @@ -/** - * @brief This is a modified version of usearch's index_dense.hpp. - * It supports predicate fn when doing the vector search. - * - * Original implementation: https://github.com/unum-cloud/usearch/blob/v2.9.1/include/usearch/index_dense.hpp - */ - -// NOLINTBEGIN(readability-*,google-*,modernize-use-auto) - -#pragma once -#include // `aligned_alloc` - -#include // `std::function` -#include // `std::iota` -#include // `std::thread` -#include -#include -#include // `std::vector` - -#if defined(USEARCH_DEFINED_CPP17) -#include // `std::shared_mutex` -#endif - -namespace unum -{ -namespace usearch -{ - -template -class index_dense_gt; - -/** - * @brief The "magic" sequence helps infer the type of the file. - * USearch indexes start with the "usearch" string. - */ -constexpr char const * default_magic() -{ - return "usearch"; -} - -using index_dense_head_buffer_t = byte_t[64]; - -static_assert(sizeof(index_dense_head_buffer_t) == 64, "File header should be exactly 64 bytes"); - -/** - * @brief Serialized binary representations of the USearch index start with metadata. - * Metadata is parsed into a `index_dense_head_t`, containing the USearch package version, - * and the properties of the index. - * - * It uses: 13 bytes for file versioning, 22 bytes for structural information = 35 bytes. - * The following 24 bytes contain binary size of the graph, of the vectors, and the checksum, - * leaving 5 bytes at the end vacant. - */ -struct index_dense_head_t -{ - // Versioning: - using magic_t = char[7]; - using version_t = std::uint16_t; - - // Versioning: 7 + 2 * 3 = 13 bytes - char const * magic; - misaligned_ref_gt version_major; - misaligned_ref_gt version_minor; - misaligned_ref_gt version_patch; - - // Structural: 4 * 3 = 12 bytes - misaligned_ref_gt kind_metric; - misaligned_ref_gt kind_scalar; - misaligned_ref_gt kind_key; - misaligned_ref_gt kind_compressed_slot; - - // Population: 8 * 3 = 24 bytes - misaligned_ref_gt count_present; - misaligned_ref_gt count_deleted; - misaligned_ref_gt dimensions; - misaligned_ref_gt multi; - - index_dense_head_t(byte_t * ptr) noexcept - : magic((char const *)exchange(ptr, ptr + sizeof(magic_t))) - , // - version_major(exchange(ptr, ptr + sizeof(version_t))) - , // - version_minor(exchange(ptr, ptr + sizeof(version_t))) - , // - version_patch(exchange(ptr, ptr + sizeof(version_t))) - , // - kind_metric(exchange(ptr, ptr + sizeof(metric_kind_t))) - , // - kind_scalar(exchange(ptr, ptr + sizeof(scalar_kind_t))) - , // - kind_key(exchange(ptr, ptr + sizeof(scalar_kind_t))) - , // - kind_compressed_slot(exchange(ptr, ptr + sizeof(scalar_kind_t))) - , // - count_present(exchange(ptr, ptr + sizeof(std::uint64_t))) - , // - count_deleted(exchange(ptr, ptr + sizeof(std::uint64_t))) - , // - dimensions(exchange(ptr, ptr + sizeof(std::uint64_t))) - , // - multi(exchange(ptr, ptr + sizeof(bool))) - {} -}; - -struct index_dense_head_result_t -{ - index_dense_head_buffer_t buffer; - index_dense_head_t head; - error_t error; - - explicit operator bool() const noexcept { return !error; } - index_dense_head_result_t failed(error_t message) noexcept - { - error = std::move(message); - return std::move(*this); - } -}; - -struct index_dense_config_t : public index_config_t -{ - std::size_t expansion_add = default_expansion_add(); - std::size_t expansion_search = default_expansion_search(); - bool exclude_vectors = false; - bool multi = false; - - /** - * @brief Allows you to reduce RAM consumption by avoiding - * reverse-indexing keys-to-vectors, and only keeping - * the vectors-to-keys mappings. - * - * ! This configuration parameter doesn't affect the serialized file, - * ! and is not preserved between runs. Makes sense for small vector - * ! representations that fit ina single cache line. - */ - bool enable_key_lookups = true; - - index_dense_config_t(index_config_t base) noexcept - : index_config_t(base) - {} - - index_dense_config_t( - std::size_t c = default_connectivity(), - std::size_t ea = default_expansion_add(), - std::size_t es = default_expansion_search()) noexcept - : index_config_t(c) - , expansion_add(ea ? ea : default_expansion_add()) - , expansion_search(es ? es : default_expansion_search()) - {} -}; - -struct index_dense_clustering_config_t -{ - std::size_t min_clusters = 0; - std::size_t max_clusters = 0; - enum mode_t - { - merge_smallest_k, - merge_closest_k, - } mode = merge_smallest_k; -}; - -struct index_dense_serialization_config_t -{ - bool exclude_vectors = false; - bool use_64_bit_dimensions = false; -}; - -struct index_dense_copy_config_t : public index_copy_config_t -{ - bool force_vector_copy = true; - - index_dense_copy_config_t() = default; - index_dense_copy_config_t(index_copy_config_t base) noexcept - : index_copy_config_t(base) - {} -}; - -struct index_dense_metadata_result_t -{ - index_dense_serialization_config_t config; - index_dense_head_buffer_t head_buffer; - index_dense_head_t head; - error_t error; - - explicit operator bool() const noexcept { return !error; } - index_dense_metadata_result_t failed(error_t message) noexcept - { - error = std::move(message); - return std::move(*this); - } - - index_dense_metadata_result_t() noexcept - : config() - , head_buffer() - , head(head_buffer) - , error() - {} - - index_dense_metadata_result_t(index_dense_metadata_result_t && other) noexcept - : config() - , head_buffer() - , head(head_buffer) - , error(std::move(other.error)) - { - std::memcpy(&config, &other.config, sizeof(other.config)); - std::memcpy(&head_buffer, &other.head_buffer, sizeof(other.head_buffer)); - } - - index_dense_metadata_result_t & operator=(index_dense_metadata_result_t && other) noexcept - { - std::memcpy(&config, &other.config, sizeof(other.config)); - std::memcpy(&head_buffer, &other.head_buffer, sizeof(other.head_buffer)); - error = std::move(other.error); - return *this; - } -}; - -/** - * @brief Extracts metadata from a pre-constructed index on disk, - * without loading it or mapping the whole binary file. - */ -inline index_dense_metadata_result_t index_dense_metadata_from_path(char const * file_path) noexcept -{ - index_dense_metadata_result_t result; - std::unique_ptr file(std::fopen(file_path, "rb"), &std::fclose); - if (!file) - return result.failed(std::strerror(errno)); - - // Read the header - std::size_t read = std::fread(result.head_buffer, sizeof(index_dense_head_buffer_t), 1, file.get()); - if (!read) - return result.failed(std::feof(file.get()) ? "End of file reached!" : std::strerror(errno)); - - // Check if the file immediately starts with the index, instead of vectors - result.config.exclude_vectors = true; - if (std::memcmp(result.head_buffer, default_magic(), std::strlen(default_magic())) == 0) - return result; - - if (std::fseek(file.get(), 0L, SEEK_END) != 0) - return result.failed("Can't infer file size"); - - // Check if it starts with 32-bit - std::size_t const file_size = std::ftell(file.get()); - - std::uint32_t dimensions_u32[2]{0}; - std::memcpy(dimensions_u32, result.head_buffer, sizeof(dimensions_u32)); - std::size_t offset_if_u32 = std::size_t(dimensions_u32[0]) * dimensions_u32[1] + sizeof(dimensions_u32); - - std::uint64_t dimensions_u64[2]{0}; - std::memcpy(dimensions_u64, result.head_buffer, sizeof(dimensions_u64)); - std::size_t offset_if_u64 = std::size_t(dimensions_u64[0]) * dimensions_u64[1] + sizeof(dimensions_u64); - - // Check if it starts with 32-bit - if (offset_if_u32 + sizeof(index_dense_head_buffer_t) < file_size) - { - if (std::fseek(file.get(), static_cast(offset_if_u32), SEEK_SET) != 0) - return result.failed(std::strerror(errno)); - read = std::fread(result.head_buffer, sizeof(index_dense_head_buffer_t), 1, file.get()); - if (!read) - return result.failed(std::feof(file.get()) ? "End of file reached!" : std::strerror(errno)); - - result.config.exclude_vectors = false; - result.config.use_64_bit_dimensions = false; - if (std::memcmp(result.head_buffer, default_magic(), std::strlen(default_magic())) == 0) - return result; - } - - // Check if it starts with 64-bit - if (offset_if_u64 + sizeof(index_dense_head_buffer_t) < file_size) - { - if (std::fseek(file.get(), static_cast(offset_if_u64), SEEK_SET) != 0) - return result.failed(std::strerror(errno)); - read = std::fread(result.head_buffer, sizeof(index_dense_head_buffer_t), 1, file.get()); - if (!read) - return result.failed(std::feof(file.get()) ? "End of file reached!" : std::strerror(errno)); - - // Check if it starts with 64-bit - result.config.exclude_vectors = false; - result.config.use_64_bit_dimensions = true; - if (std::memcmp(result.head_buffer, default_magic(), std::strlen(default_magic())) == 0) - return result; - } - - return result.failed("Not a dense USearch index!"); -} - -/** - * @brief Extracts metadata from a pre-constructed index serialized into an in-memory buffer. - */ -inline index_dense_metadata_result_t index_dense_metadata_from_buffer( - memory_mapped_file_t file, - std::size_t offset = 0) noexcept -{ - index_dense_metadata_result_t result; - - // Read the header - if (offset + sizeof(index_dense_head_buffer_t) >= file.size()) - return result.failed("End of file reached!"); - - byte_t * const file_data = file.data() + offset; - std::size_t const file_size = file.size() - offset; - std::memcpy(&result.head_buffer, file_data, sizeof(index_dense_head_buffer_t)); - - // Check if the file immediately starts with the index, instead of vectors - result.config.exclude_vectors = true; - if (std::memcmp(result.head_buffer, default_magic(), std::strlen(default_magic())) == 0) - return result; - - // Check if it starts with 32-bit - std::uint32_t dimensions_u32[2]{0}; - std::memcpy(dimensions_u32, result.head_buffer, sizeof(dimensions_u32)); - std::size_t offset_if_u32 = std::size_t(dimensions_u32[0]) * dimensions_u32[1] + sizeof(dimensions_u32); - - std::uint64_t dimensions_u64[2]{0}; - std::memcpy(dimensions_u64, result.head_buffer, sizeof(dimensions_u64)); - std::size_t offset_if_u64 = std::size_t(dimensions_u64[0]) * dimensions_u64[1] + sizeof(dimensions_u64); - - // Check if it starts with 32-bit - if (offset_if_u32 + sizeof(index_dense_head_buffer_t) < file_size) - { - std::memcpy(&result.head_buffer, file_data + offset_if_u32, sizeof(index_dense_head_buffer_t)); - result.config.exclude_vectors = false; - result.config.use_64_bit_dimensions = false; - if (std::memcmp(result.head_buffer, default_magic(), std::strlen(default_magic())) == 0) - return result; - } - - // Check if it starts with 64-bit - if (offset_if_u64 + sizeof(index_dense_head_buffer_t) < file_size) - { - std::memcpy(&result.head_buffer, file_data + offset_if_u64, sizeof(index_dense_head_buffer_t)); - result.config.exclude_vectors = false; - result.config.use_64_bit_dimensions = true; - if (std::memcmp(result.head_buffer, default_magic(), std::strlen(default_magic())) == 0) - return result; - } - - return result.failed("Not a dense USearch index!"); -} - -/** - * @brief Oversimplified type-punned index for equidimensional vectors - * with automatic @b down-casting, hardware-specific @b SIMD metrics, - * and ability to @b remove existing vectors, common in Semantic Caching - * applications. - * - * @section Serialization - * - * The serialized binary form of `index_dense_gt` is made up of three parts: - * 1. Binary matrix, aka the `.bbin` part, - * 2. Metadata about used metrics, number of used vs free slots, - * 3. The HNSW index in a binary form. - * The first (1.) generally starts with 2 integers - number of rows (vectors) and @b single-byte columns. - * The second (2.) starts with @b "usearch"-magic-string, used to infer the file type on open. - * The third (3.) is implemented by the underlying `index_gt` class. - */ -template // -class index_dense_gt -{ -public: - using vector_key_t = key_at; - using key_t = vector_key_t; - using compressed_slot_t = compressed_slot_at; - using distance_t = distance_punned_t; - using metric_t = metric_punned_t; - - using member_ref_t = member_ref_gt; - using member_cref_t = member_cref_gt; - - using head_t = index_dense_head_t; - using head_buffer_t = index_dense_head_buffer_t; - using head_result_t = index_dense_head_result_t; - - using serialization_config_t = index_dense_serialization_config_t; - - using dynamic_allocator_t = aligned_allocator_gt; - using tape_allocator_t = memory_mapping_allocator_gt<64>; - -private: - /// @brief Schema: input buffer, bytes in input buffer, output buffer. - using cast_t = std::function; - /// @brief Punned index. - using index_t = index_gt< // - distance_t, - vector_key_t, - compressed_slot_t, // - dynamic_allocator_t, - tape_allocator_t>; - using index_allocator_t = aligned_allocator_gt; - - using member_iterator_t = typename index_t::member_iterator_t; - using member_citerator_t = typename index_t::member_citerator_t; - - /// @brief Punned metric object. - class metric_proxy_t - { - index_dense_gt const * index_ = nullptr; - - public: - metric_proxy_t(index_dense_gt const & index) noexcept - : index_(&index) - {} - - inline distance_t operator()(byte_t const * a, member_cref_t b) const noexcept { return f(a, v(b)); } - inline distance_t operator()(member_cref_t a, member_cref_t b) const noexcept { return f(v(a), v(b)); } - - inline distance_t operator()(byte_t const * a, member_citerator_t b) const noexcept { return f(a, v(b)); } - inline distance_t operator()(member_citerator_t a, member_citerator_t b) const noexcept - { - return f(v(a), v(b)); - } - - inline distance_t operator()(byte_t const * a, byte_t const * b) const noexcept { return f(a, b); } - - inline byte_t const * v(member_cref_t m) const noexcept { return index_->vectors_lookup_[get_slot(m)]; } - inline byte_t const * v(member_citerator_t m) const noexcept { return index_->vectors_lookup_[get_slot(m)]; } - inline distance_t f(byte_t const * a, byte_t const * b) const noexcept { return index_->metric_(a, b); } - }; - - index_dense_config_t config_; - index_t * typed_ = nullptr; - - mutable std::vector cast_buffer_; - struct casts_t - { - cast_t from_b1x8; - cast_t from_i8; - cast_t from_f16; - cast_t from_f32; - cast_t from_f64; - - cast_t to_b1x8; - cast_t to_i8; - cast_t to_f16; - cast_t to_f32; - cast_t to_f64; - } casts_; - - /// @brief An instance of a potentially stateful `metric_t` used to initialize copies and forks. - metric_t metric_; - - using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>; - /// @brief Allocator for the copied vectors, aligned to widest double-precision scalars. - vectors_tape_allocator_t vectors_tape_allocator_; - - /// @brief For every managed `compressed_slot_t` stores a pointer to the allocated vector copy. - mutable std::vector vectors_lookup_; - - /// @brief Originally forms and array of integers [0, threads], marking all - mutable std::vector available_threads_; - - /// @brief Mutex, controlling concurrent access to `available_threads_`. - mutable std::mutex available_threads_mutex_; - -#if defined(USEARCH_DEFINED_CPP17) - using shared_mutex_t = std::shared_mutex; -#else - using shared_mutex_t = unfair_shared_mutex_t; -#endif - using shared_lock_t = shared_lock_gt; - using unique_lock_t = std::unique_lock; - - struct key_and_slot_t - { - vector_key_t key; - compressed_slot_t slot; - - bool any_slot() const { return slot == default_free_value(); } - static key_and_slot_t any_slot(vector_key_t key) { return {key, default_free_value()}; } - }; - - struct lookup_key_hash_t - { - using is_transparent = void; - std::size_t operator()(key_and_slot_t const & k) const noexcept { return std::hash{}(k.key); } - std::size_t operator()(vector_key_t const & k) const noexcept { return std::hash{}(k); } - }; - - struct lookup_key_same_t - { - using is_transparent = void; - bool operator()(key_and_slot_t const & a, vector_key_t const & b) const noexcept { return a.key == b; } - bool operator()(vector_key_t const & a, key_and_slot_t const & b) const noexcept { return a == b.key; } - bool operator()(key_and_slot_t const & a, key_and_slot_t const & b) const noexcept { return a.key == b.key; } - }; - - /// @brief Multi-Map from keys to IDs, and allocated vectors. - flat_hash_multi_set_gt slot_lookup_; - - /// @brief Mutex, controlling concurrent access to `slot_lookup_`. - mutable shared_mutex_t slot_lookup_mutex_; - - /// @brief Ring-shaped queue of deleted entries, to be reused on future insertions. - ring_gt free_keys_; - - /// @brief Mutex, controlling concurrent access to `free_keys_`. - mutable std::mutex free_keys_mutex_; - - /// @brief A constant for the reserved key value, used to mark deleted entries. - vector_key_t free_key_ = default_free_value(); - -public: - using search_result_t = typename index_t::search_result_t; - using cluster_result_t = typename index_t::cluster_result_t; - using add_result_t = typename index_t::add_result_t; - using stats_t = typename index_t::stats_t; - using match_t = typename index_t::match_t; - - index_dense_gt() = default; - index_dense_gt(index_dense_gt && other) - : config_(std::move(other.config_)) - , - - typed_(exchange(other.typed_, nullptr)) - , // - cast_buffer_(std::move(other.cast_buffer_)) - , // - casts_(std::move(other.casts_)) - , // - metric_(std::move(other.metric_)) - , // - - vectors_tape_allocator_(std::move(other.vectors_tape_allocator_)) - , // - vectors_lookup_(std::move(other.vectors_lookup_)) - , // - - available_threads_(std::move(other.available_threads_)) - , // - slot_lookup_(std::move(other.slot_lookup_)) - , // - free_keys_(std::move(other.free_keys_)) - , // - free_key_(std::move(other.free_key_)) - {} // - - index_dense_gt & operator=(index_dense_gt && other) - { - swap(other); - return *this; - } - - /** - * @brief Swaps the contents of this index with another index. - * @param other The other index to swap with. - */ - void swap(index_dense_gt & other) - { - std::swap(config_, other.config_); - - std::swap(typed_, other.typed_); - std::swap(cast_buffer_, other.cast_buffer_); - std::swap(casts_, other.casts_); - std::swap(metric_, other.metric_); - - std::swap(vectors_tape_allocator_, other.vectors_tape_allocator_); - std::swap(vectors_lookup_, other.vectors_lookup_); - - std::swap(available_threads_, other.available_threads_); - std::swap(slot_lookup_, other.slot_lookup_); - std::swap(free_keys_, other.free_keys_); - std::swap(free_key_, other.free_key_); - } - - ~index_dense_gt() - { - if (typed_) - typed_->~index_t(); - index_allocator_t{}.deallocate(typed_, 1); - typed_ = nullptr; - } - - /** - * @brief Constructs an instance of ::index_dense_gt. - * @param[in] metric One of the provided or an @b ad-hoc metric, type-punned. - * @param[in] config The index configuration (optional). - * @param[in] free_key The key used for freed vectors (optional). - * @return An instance of ::index_dense_gt. - */ - static index_dense_gt make( // - metric_t metric, // - index_dense_config_t config = {}, // - vector_key_t free_key = default_free_value()) - { - scalar_kind_t scalar_kind = metric.scalar_kind(); - std::size_t hardware_threads = std::thread::hardware_concurrency(); - - index_dense_gt result; - result.config_ = config; - result.cast_buffer_.resize(hardware_threads * metric.bytes_per_vector()); - result.casts_ = make_casts_(scalar_kind); - result.metric_ = metric; - result.free_key_ = free_key; - - // Fill the thread IDs. - result.available_threads_.resize(hardware_threads); - std::iota(result.available_threads_.begin(), result.available_threads_.end(), 0ul); - - // Available since C11, but only C++17, so we use the C version. - index_t * raw = index_allocator_t{}.allocate(1); - new (raw) index_t(config); - result.typed_ = raw; - return result; - } - - static index_dense_gt make(char const * path, bool view = false) - { - index_dense_metadata_result_t meta = index_dense_metadata_from_path(path); - if (!meta) - return {}; - metric_punned_t metric(meta.head.dimensions, meta.head.kind_metric, meta.head.kind_scalar); - index_dense_gt result = make(metric); - if (!result) - return result; - if (view) - result.view(path); - else - result.load(path); - return result; - } - - explicit operator bool() const { return typed_; } - std::size_t connectivity() const { return typed_->connectivity(); } - std::size_t size() const { return typed_->size() - free_keys_.size(); } - std::size_t capacity() const { return typed_->capacity(); } - std::size_t max_level() const noexcept { return typed_->max_level(); } - index_dense_config_t const & config() const { return config_; } - index_limits_t const & limits() const { return typed_->limits(); } - bool multi() const { return config_.multi; } - - // The metric and its properties - metric_t const & metric() const { return metric_; } - void change_metric(metric_t metric) { metric_ = std::move(metric); } - - scalar_kind_t scalar_kind() const noexcept { return metric_.scalar_kind(); } - std::size_t bytes_per_vector() const noexcept { return metric_.bytes_per_vector(); } - std::size_t scalar_words() const noexcept { return metric_.scalar_words(); } - std::size_t dimensions() const noexcept { return metric_.dimensions(); } - - // Fetching and changing search criteria - std::size_t expansion_add() const { return config_.expansion_add; } - std::size_t expansion_search() const { return config_.expansion_search; } - void change_expansion_add(std::size_t n) { config_.expansion_add = n; } - void change_expansion_search(std::size_t n) { config_.expansion_search = n; } - - member_citerator_t cbegin() const { return typed_->cbegin(); } - member_citerator_t cend() const { return typed_->cend(); } - member_citerator_t begin() const { return typed_->begin(); } - member_citerator_t end() const { return typed_->end(); } - member_iterator_t begin() { return typed_->begin(); } - member_iterator_t end() { return typed_->end(); } - - stats_t stats() const { return typed_->stats(); } - stats_t stats(std::size_t level) const { return typed_->stats(level); } - stats_t stats(stats_t * stats_per_level, std::size_t max_level) const - { - return typed_->stats(stats_per_level, max_level); - } - - dynamic_allocator_t const & allocator() const { return typed_->dynamic_allocator(); } - vector_key_t const & free_key() const { return free_key_; } - - /** - * @brief A relatively accurate lower bound on the amount of memory consumed by the system. - * In practice it's error will be below 10%. - * - * @see `serialized_length` for the length of the binary serialized representation. - */ - std::size_t memory_usage() const - { - return // - typed_->memory_usage(0) + // - typed_->tape_allocator().total_wasted() + // - typed_->tape_allocator().total_reserved() + // - vectors_tape_allocator_.total_allocated(); - } - - static constexpr std::size_t any_thread() { return std::numeric_limits::max(); } - static constexpr distance_t infinite_distance() { return std::numeric_limits::max(); } - - struct aggregated_distances_t - { - std::size_t count = 0; - distance_t mean = infinite_distance(); - distance_t min = infinite_distance(); - distance_t max = infinite_distance(); - }; - - // clang-format off - add_result_t add(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_b1x8); } - add_result_t add(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_i8); } - add_result_t add(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_f16); } - add_result_t add(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_f32); } - add_result_t add(vector_key_t key, f64_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_f64); } - - template - search_result_t search(b1x8_t const* vector, std::size_t wanted, predicate_at&& predicate = predicate_at{}, std::size_t thread = any_thread(), size_t expansion = default_expansion_search(), bool exact = false) const { return search_(vector, wanted, predicate, thread, expansion, exact, casts_.from_b1x8); } - template - search_result_t search(i8_t const* vector, std::size_t wanted, predicate_at&& predicate = predicate_at{}, std::size_t thread = any_thread(), size_t expansion = default_expansion_search(), bool exact = false) const { return search_(vector, wanted, predicate, thread, expansion, exact, casts_.from_i8); } - template - search_result_t search(f16_t const* vector, std::size_t wanted, predicate_at&& predicate = predicate_at{}, std::size_t thread = any_thread(), size_t expansion = default_expansion_search(), bool exact = false) const { return search_(vector, wanted, predicate, thread, expansion, exact, casts_.from_f16); } - template - search_result_t search(f32_t const* vector, std::size_t wanted, predicate_at&& predicate = predicate_at{}, std::size_t thread = any_thread(), size_t expansion = default_expansion_search(), bool exact = false) const { return search_(vector, wanted, predicate, thread, expansion, exact, casts_.from_f32); } - template - search_result_t search(f64_t const* vector, std::size_t wanted, predicate_at&& predicate = predicate_at{}, std::size_t thread = any_thread(), size_t expansion = default_expansion_search(), bool exact = false) const { return search_(vector, wanted, predicate, thread, expansion, exact, casts_.from_f64); } - - std::size_t get(vector_key_t key, b1x8_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_b1x8); } - std::size_t get(vector_key_t key, i8_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_i8); } - std::size_t get(vector_key_t key, f16_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f16); } - std::size_t get(vector_key_t key, f32_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f32); } - std::size_t get(vector_key_t key, f64_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f64); } - - cluster_result_t cluster(b1x8_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_b1x8); } - cluster_result_t cluster(i8_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_i8); } - cluster_result_t cluster(f16_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f16); } - cluster_result_t cluster(f32_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f32); } - cluster_result_t cluster(f64_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f64); } - - aggregated_distances_t distance_between(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to_b1x8); } - aggregated_distances_t distance_between(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to_i8); } - aggregated_distances_t distance_between(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to_f16); } - aggregated_distances_t distance_between(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to_f32); } - aggregated_distances_t distance_between(vector_key_t key, f64_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to_f64); } - // clang-format on - - /** - * @brief Computes the distance between two managed entities. - * If either key maps into more than one vector, will aggregate results - * exporting the mean, maximum, and minimum values. - */ - aggregated_distances_t distance_between(vector_key_t a, vector_key_t b, std::size_t = any_thread()) const - { - shared_lock_t lock(slot_lookup_mutex_); - aggregated_distances_t result; - if (!multi()) - { - auto a_it = slot_lookup_.find(key_and_slot_t::any_slot(a)); - auto b_it = slot_lookup_.find(key_and_slot_t::any_slot(b)); - bool a_missing = a_it == slot_lookup_.end(); - bool b_missing = b_it == slot_lookup_.end(); - if (a_missing || b_missing) - return result; - - key_and_slot_t a_key_and_slot = *a_it; - byte_t const * a_vector = vectors_lookup_[a_key_and_slot.slot]; - key_and_slot_t b_key_and_slot = *b_it; - byte_t const * b_vector = vectors_lookup_[b_key_and_slot.slot]; - distance_t a_b_distance = metric_(a_vector, b_vector); - - result.mean = result.min = result.max = a_b_distance; - result.count = 1; - return result; - } - - auto a_range = slot_lookup_.equal_range(key_and_slot_t::any_slot(a)); - auto b_range = slot_lookup_.equal_range(key_and_slot_t::any_slot(b)); - bool a_missing = a_range.first == a_range.second; - bool b_missing = b_range.first == b_range.second; - if (a_missing || b_missing) - return result; - - result.min = std::numeric_limits::max(); - result.max = std::numeric_limits::min(); - result.mean = 0; - result.count = 0; - - while (a_range.first != a_range.second) - { - key_and_slot_t a_key_and_slot = *a_range.first; - byte_t const * a_vector = vectors_lookup_[a_key_and_slot.slot]; - while (b_range.first != b_range.second) - { - key_and_slot_t b_key_and_slot = *b_range.first; - byte_t const * b_vector = vectors_lookup_[b_key_and_slot.slot]; - distance_t a_b_distance = metric_(a_vector, b_vector); - - result.mean += a_b_distance; - result.min = (std::min)(result.min, a_b_distance); - result.max = (std::max)(result.max, a_b_distance); - result.count++; - - // - ++b_range.first; - } - ++a_range.first; - } - - result.mean /= result.count; - return result; - } - - /** - * @brief Identifies a node in a given `level`, that is the closest to the `key`. - */ - cluster_result_t cluster(vector_key_t key, std::size_t level, std::size_t thread = any_thread()) const - { - // Check if such `key` is even present. - shared_lock_t slots_lock(slot_lookup_mutex_); - auto key_range = slot_lookup_.equal_range(key_and_slot_t::any_slot(key)); - cluster_result_t result; - if (key_range.first == key_range.second) - return result.failed("Key missing!"); - - index_cluster_config_t cluster_config; - thread_lock_t lock = thread_lock_(thread); - cluster_config.thread = lock.thread_id; - cluster_config.expansion = config_.expansion_search; - metric_proxy_t metric{*this}; - auto allow = [=](member_cref_t const & member) noexcept { - return member.key != free_key_; - }; - - // Find the closest cluster for any vector under that key. - while (key_range.first != key_range.second) - { - key_and_slot_t key_and_slot = *key_range.first; - byte_t const * vector_data = vectors_lookup_[key_and_slot.slot]; - cluster_result_t new_result = typed_->cluster(vector_data, level, metric, cluster_config, allow); - if (!new_result) - return new_result; - if (new_result.cluster.distance < result.cluster.distance) - result = std::move(new_result); - - ++key_range.first; - } - return result; - } - - /** - * @brief Reserves memory for the index and the keyed lookup. - * @return `true` if the memory reservation was successful, `false` otherwise. - */ - bool reserve(index_limits_t limits) - { - { - unique_lock_t lock(slot_lookup_mutex_); - slot_lookup_.reserve(limits.members); - vectors_lookup_.resize(limits.members); - } - return typed_->reserve(limits); - } - - /** - * @brief Erases all the vectors from the index. - * - * Will change `size()` to zero, but will keep the same `capacity()`. - * Will keep the number of available threads/contexts the same as it was. - */ - void clear() - { - unique_lock_t lookup_lock(slot_lookup_mutex_); - - std::unique_lock free_lock(free_keys_mutex_); - typed_->clear(); - slot_lookup_.clear(); - vectors_lookup_.clear(); - free_keys_.clear(); - vectors_tape_allocator_.reset(); - } - - /** - * @brief Erases all members from index, closing files, and returning RAM to OS. - * - * Will change both `size()` and `capacity()` to zero. - * Will deallocate all threads/contexts. - * If the index is memory-mapped - releases the mapping and the descriptor. - */ - void reset() - { - unique_lock_t lookup_lock(slot_lookup_mutex_); - - std::unique_lock free_lock(free_keys_mutex_); - std::unique_lock available_threads_lock(available_threads_mutex_); - typed_->reset(); - slot_lookup_.clear(); - vectors_lookup_.clear(); - free_keys_.clear(); - vectors_tape_allocator_.reset(); - - // Reset the thread IDs. - available_threads_.resize(std::thread::hardware_concurrency()); - std::iota(available_threads_.begin(), available_threads_.end(), 0ul); - } - - /** - * @brief Saves serialized binary index representation to a stream. - */ - template - serialization_result_t save_to_stream( - output_callback_at && output, // - serialization_config_t config = {}, // - progress_at && progress = {}) const - { - serialization_result_t result; - std::uint64_t matrix_rows = 0; - std::uint64_t matrix_cols = 0; - - // We may not want to put the vectors into the same file - if (!config.exclude_vectors) - { - // Save the matrix size - if (!config.use_64_bit_dimensions) - { - std::uint32_t dimensions[2]; - dimensions[0] = static_cast(typed_->size()); - dimensions[1] = static_cast(metric_.bytes_per_vector()); - if (!output(&dimensions, sizeof(dimensions))) - return result.failed("Failed to serialize into stream"); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - } - else - { - std::uint64_t dimensions[2]; - dimensions[0] = static_cast(typed_->size()); - dimensions[1] = static_cast(metric_.bytes_per_vector()); - if (!output(&dimensions, sizeof(dimensions))) - return result.failed("Failed to serialize into stream"); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - } - - // Dump the vectors one after another - for (std::uint64_t i = 0; i != matrix_rows; ++i) - { - byte_t * vector = vectors_lookup_[i]; - if (!output(vector, matrix_cols)) - return result.failed("Failed to serialize into stream"); - } - } - - // Augment metadata - { - index_dense_head_buffer_t buffer; - std::memset(buffer, 0, sizeof(buffer)); - index_dense_head_t head{buffer}; - std::memcpy(buffer, default_magic(), std::strlen(default_magic())); - - // Describe software version - using version_t = index_dense_head_t::version_t; - head.version_major = static_cast(USEARCH_VERSION_MAJOR); - head.version_minor = static_cast(USEARCH_VERSION_MINOR); - head.version_patch = static_cast(USEARCH_VERSION_PATCH); - - // Describes types used - head.kind_metric = metric_.metric_kind(); - head.kind_scalar = metric_.scalar_kind(); - head.kind_key = unum::usearch::scalar_kind(); - head.kind_compressed_slot = unum::usearch::scalar_kind(); - - head.count_present = size(); - head.count_deleted = typed_->size() - size(); - head.dimensions = dimensions(); - head.multi = multi(); - - if (!output(&buffer, sizeof(buffer))) - return result.failed("Failed to serialize into stream"); - } - - // Save the actual proximity graph - return typed_->save_to_stream(std::forward(output), std::forward(progress)); - } - - /** - * @brief Estimate the binary length (in bytes) of the serialized index. - */ - std::size_t serialized_length(serialization_config_t config = {}) const noexcept - { - std::size_t dimensions_length = 0; - std::size_t matrix_length = 0; - if (!config.exclude_vectors) - { - dimensions_length = config.use_64_bit_dimensions ? sizeof(std::uint64_t) * 2 : sizeof(std::uint32_t) * 2; - matrix_length = typed_->size() * metric_.bytes_per_vector(); - } - return dimensions_length + matrix_length + sizeof(index_dense_head_buffer_t) + typed_->serialized_length(); - } - - /** - * @brief Parses the index from file to RAM. - * @param[in] path The path to the file. - * @param[in] config Configuration parameters for imports. - * @return Outcome descriptor explicitly convertible to boolean. - */ - template - serialization_result_t load_from_stream( - input_callback_at && input, // - serialization_config_t config = {}, // - progress_at && progress = {}) - { - // Discard all previous memory allocations of `vectors_tape_allocator_` - reset(); - - // Infer the new index size - serialization_result_t result; - std::uint64_t matrix_rows = 0; - std::uint64_t matrix_cols = 0; - - // We may not want to load the vectors from the same file, or allow attaching them afterwards - if (!config.exclude_vectors) - { - // Save the matrix size - if (!config.use_64_bit_dimensions) - { - std::uint32_t dimensions[2]; - if (!input(&dimensions, sizeof(dimensions))) - return result.failed("Failed to read 32-bit dimensions of the matrix"); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - } - else - { - std::uint64_t dimensions[2]; - if (!input(&dimensions, sizeof(dimensions))) - return result.failed("Failed to read 64-bit dimensions of the matrix"); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - } - // Load the vectors one after another - vectors_lookup_.resize(matrix_rows); - for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) - { - byte_t * vector = vectors_tape_allocator_.allocate(matrix_cols); - if (!input(vector, matrix_cols)) - return result.failed("Failed to read vectors"); - vectors_lookup_[slot] = vector; - } - } - - // Load metadata and choose the right metric - { - index_dense_head_buffer_t buffer; - if (!input(buffer, sizeof(buffer))) - return result.failed("Failed to read the index "); - - index_dense_head_t head{buffer}; - if (std::memcmp(buffer, default_magic(), std::strlen(default_magic())) != 0) - return result.failed("Magic header mismatch - the file isn't an index"); - - // Validate the software version - if (head.version_major != USEARCH_VERSION_MAJOR) - return result.failed("File format may be different, please rebuild"); - - // Check the types used - if (head.kind_key != unum::usearch::scalar_kind()) - return result.failed("Key type doesn't match, consider rebuilding"); - if (head.kind_compressed_slot != unum::usearch::scalar_kind()) - return result.failed("Slot type doesn't match, consider rebuilding"); - - config_.multi = head.multi; - metric_ = metric_t(head.dimensions, head.kind_metric, head.kind_scalar); - cast_buffer_.resize(available_threads_.size() * metric_.bytes_per_vector()); - casts_ = make_casts_(head.kind_scalar); - } - - // Pull the actual proximity graph - result = typed_->load_from_stream(std::forward(input), std::forward(progress)); - if (!result) - return result; - if (typed_->size() != static_cast(matrix_rows)) - return result.failed("Index size and the number of vectors doesn't match"); - - reindex_keys_(); - return result; - } - - /** - * @brief Parses the index from file, without loading it into RAM. - * @param[in] path The path to the file. - * @param[in] config Configuration parameters for imports. - * @return Outcome descriptor explicitly convertible to boolean. - */ - template - serialization_result_t view( - memory_mapped_file_t file, // - std::size_t offset = 0, - serialization_config_t config = {}, // - progress_at && progress = {}) - { - // Discard all previous memory allocations of `vectors_tape_allocator_` - reset(); - - serialization_result_t result = file.open_if_not(); - if (!result) - return result; - - // Infer the new index size - std::uint64_t matrix_rows = 0; - std::uint64_t matrix_cols = 0; - span_punned_t vectors_buffer; - - // We may not want to fetch the vectors from the same file, or allow attaching them afterwards - if (!config.exclude_vectors) - { - // Save the matrix size - if (!config.use_64_bit_dimensions) - { - std::uint32_t dimensions[2]; - if (file.size() - offset < sizeof(dimensions)) - return result.failed("File is corrupted and lacks matrix dimensions"); - std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions)); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - offset += sizeof(dimensions); - } - else - { - std::uint64_t dimensions[2]; - if (file.size() - offset < sizeof(dimensions)) - return result.failed("File is corrupted and lacks matrix dimensions"); - std::memcpy(&dimensions, file.data() + offset, sizeof(dimensions)); - matrix_rows = dimensions[0]; - matrix_cols = dimensions[1]; - offset += sizeof(dimensions); - } - vectors_buffer = {file.data() + offset, static_cast(matrix_rows * matrix_cols)}; - offset += vectors_buffer.size(); - } - - // Load metadata and choose the right metric - { - index_dense_head_buffer_t buffer; - if (file.size() - offset < sizeof(buffer)) - return result.failed("File is corrupted and lacks a header"); - - std::memcpy(buffer, file.data() + offset, sizeof(buffer)); - - index_dense_head_t head{buffer}; - if (std::memcmp(buffer, default_magic(), std::strlen(default_magic())) != 0) - return result.failed("Magic header mismatch - the file isn't an index"); - - // Validate the software version - if (head.version_major != USEARCH_VERSION_MAJOR) - return result.failed("File format may be different, please rebuild"); - - // Check the types used - if (head.kind_key != unum::usearch::scalar_kind()) - return result.failed("Key type doesn't match, consider rebuilding"); - if (head.kind_compressed_slot != unum::usearch::scalar_kind()) - return result.failed("Slot type doesn't match, consider rebuilding"); - - config_.multi = head.multi; - metric_ = metric_t(head.dimensions, head.kind_metric, head.kind_scalar); - cast_buffer_.resize(available_threads_.size() * metric_.bytes_per_vector()); - casts_ = make_casts_(head.kind_scalar); - offset += sizeof(buffer); - } - - // Pull the actual proximity graph - result = typed_->view(std::move(file), offset, std::forward(progress)); - if (!result) - return result; - if (typed_->size() != static_cast(matrix_rows)) - return result.failed("Index size and the number of vectors doesn't match"); - - // Address the vectors - vectors_lookup_.resize(matrix_rows); - if (!config.exclude_vectors) - for (std::uint64_t slot = 0; slot != matrix_rows; ++slot) - vectors_lookup_[slot] = (byte_t *)vectors_buffer.data() + matrix_cols * slot; - - reindex_keys_(); - return result; - } - - /** - * @brief Saves the index to a file. - * @param[in] path The path to the file. - * @param[in] config Configuration parameters for exports. - * @return Outcome descriptor explicitly convertible to boolean. - */ - template - serialization_result_t save(output_file_t file, serialization_config_t config = {}, progress_at && progress = {}) - const - { - serialization_result_t io_result = file.open_if_not(); - if (!io_result) - return io_result; - - serialization_result_t stream_result = save_to_stream( - [&](void const * buffer, std::size_t length) { - io_result = file.write(buffer, length); - return !!io_result; - }, - config, - std::forward(progress)); - - if (!stream_result) - { - io_result.error.release(); - return stream_result; - } - return io_result; - } - - /** - * @brief Memory-maps the serialized binary index representation from disk, - * @b without copying data into RAM, and fetching it on-demand. - */ - template - serialization_result_t save( - memory_mapped_file_t file, // - std::size_t offset = 0, // - serialization_config_t config = {}, // - progress_at && progress = {}) const - { - serialization_result_t io_result = file.open_if_not(); - if (!io_result) - return io_result; - - serialization_result_t stream_result = save_to_stream( - [&](void const * buffer, std::size_t length) { - if (offset + length > file.size()) - return false; - std::memcpy(file.data() + offset, buffer, length); - offset += length; - return true; - }, - config, - std::forward(progress)); - - return stream_result; - } - - /** - * @brief Parses the index from file to RAM. - * @param[in] path The path to the file. - * @param[in] config Configuration parameters for imports. - * @return Outcome descriptor explicitly convertible to boolean. - */ - template - serialization_result_t load(input_file_t file, serialization_config_t config = {}, progress_at && progress = {}) - { - serialization_result_t io_result = file.open_if_not(); - if (!io_result) - return io_result; - - serialization_result_t stream_result = load_from_stream( - [&](void * buffer, std::size_t length) { - io_result = file.read(buffer, length); - return !!io_result; - }, - config, - std::forward(progress)); - - if (!stream_result) - { - io_result.error.release(); - return stream_result; - } - return io_result; - } - - /** - * @brief Memory-maps the serialized binary index representation from disk, - * @b without copying data into RAM, and fetching it on-demand. - */ - template - serialization_result_t load( - memory_mapped_file_t file, // - std::size_t offset = 0, // - serialization_config_t config = {}, // - progress_at && progress = {}) - { - serialization_result_t io_result = file.open_if_not(); - if (!io_result) - return io_result; - - serialization_result_t stream_result = load_from_stream( - [&](void * buffer, std::size_t length) { - if (offset + length > file.size()) - return false; - std::memcpy(buffer, file.data() + offset, length); - offset += length; - return true; - }, - config, - std::forward(progress)); - - return stream_result; - } - - template - serialization_result_t save( - char const * file_path, // - serialization_config_t config = {}, // - progress_at && progress = {}) const - { - return save(output_file_t(file_path), config, std::forward(progress)); - } - - template - serialization_result_t load( - char const * file_path, // - serialization_config_t config = {}, // - progress_at && progress = {}) - { - return load(input_file_t(file_path), config, std::forward(progress)); - } - - /** - * @brief Checks if a vector with specified key is present. - * @return `true` if the key is present in the index, `false` otherwise. - */ - bool contains(vector_key_t key) const - { - shared_lock_t lock(slot_lookup_mutex_); - return slot_lookup_.contains(key_and_slot_t::any_slot(key)); - } - - /** - * @brief Count the number of vectors with specified key present. - * @return Zero if nothing is found, a positive integer otherwise. - */ - std::size_t count(vector_key_t key) const - { - shared_lock_t lock(slot_lookup_mutex_); - return slot_lookup_.count(key_and_slot_t::any_slot(key)); - } - - struct labeling_result_t - { - error_t error{}; - std::size_t completed{}; - - explicit operator bool() const noexcept { return !error; } - labeling_result_t failed(error_t message) noexcept - { - error = std::move(message); - return std::move(*this); - } - }; - - /** - * @brief Removes an entry with the specified key from the index. - * @param[in] key The key of the entry to remove. - * @return The ::labeling_result_t indicating the result of the removal operation. - * If the removal was successful, `result.completed` will be `true`. - * If the key was not found in the index, `result.completed` will be `false`. - * If an error occurred during the removal operation, `result.error` will contain an error message. - */ - labeling_result_t remove(vector_key_t key) - { - labeling_result_t result; - - unique_lock_t lookup_lock(slot_lookup_mutex_); - auto matching_slots = slot_lookup_.equal_range(key_and_slot_t::any_slot(key)); - if (matching_slots.first == matching_slots.second) - return result; - - // Grow the removed entries ring, if needed - std::size_t matching_count = std::distance(matching_slots.first, matching_slots.second); - std::unique_lock free_lock(free_keys_mutex_); - if (!free_keys_.reserve(free_keys_.size() + matching_count)) - return result.failed("Can't allocate memory for a free-list"); - - // A removed entry would be: - // - present in `free_keys_` - // - missing in the `slot_lookup_` - // - marked in the `typed_` index with a `free_key_` - for (auto slots_it = matching_slots.first; slots_it != matching_slots.second; ++slots_it) - { - compressed_slot_t slot = (*slots_it).slot; - free_keys_.push(slot); - typed_->at(slot).key = free_key_; - } - slot_lookup_.erase(key); - result.completed = matching_count; - - return result; - } - - /** - * @brief Removes multiple entries with the specified keys from the index. - * @param[in] keys_begin The beginning of the keys range. - * @param[in] keys_end The ending of the keys range. - * @return The ::labeling_result_t indicating the result of the removal operation. - * `result.completed` will contain the number of keys that were successfully removed. - * `result.error` will contain an error message if an error occurred during the removal operation. - */ - template - labeling_result_t remove(keys_iterator_at keys_begin, keys_iterator_at keys_end) - { - labeling_result_t result; - unique_lock_t lookup_lock(slot_lookup_mutex_); - std::unique_lock free_lock(free_keys_mutex_); - // Grow the removed entries ring, if needed - std::size_t matching_count = 0; - for (auto keys_it = keys_begin; keys_it != keys_end; ++keys_it) - matching_count += slot_lookup_.count(key_and_slot_t::any_slot(*keys_it)); - - if (!free_keys_.reserve(free_keys_.size() + matching_count)) - return result.failed("Can't allocate memory for a free-list"); - - // Remove them one-by-one - for (auto keys_it = keys_begin; keys_it != keys_end; ++keys_it) - { - vector_key_t key = *keys_it; - auto matching_slots = slot_lookup_.equal_range(key_and_slot_t::any_slot(key)); - // A removed entry would be: - // - present in `free_keys_` - // - missing in the `slot_lookup_` - // - marked in the `typed_` index with a `free_key_` - matching_count = 0; - for (auto slots_it = matching_slots.first; slots_it != matching_slots.second; ++slots_it) - { - compressed_slot_t slot = (*slots_it).slot; - free_keys_.push(slot); - typed_->at(slot).key = free_key_; - ++matching_count; - } - - slot_lookup_.erase(key); - result.completed += matching_count; - } - - return result; - } - - /** - * @brief Renames an entry with the specified key to a new key. - * @param[in] from The current key of the entry to rename. - * @param[in] to The new key to assign to the entry. - * @return The ::labeling_result_t indicating the result of the rename operation. - * If the rename was successful, `result.completed` will be `true`. - * If the entry with the current key was not found, `result.completed` will be `false`. - */ - labeling_result_t rename(vector_key_t from, vector_key_t to) - { - labeling_result_t result; - unique_lock_t lookup_lock(slot_lookup_mutex_); - - if (!multi() && slot_lookup_.contains(key_and_slot_t::any_slot(to))) - return result.failed("Renaming impossible, the key is already in use"); - - // The `from` may map to multiple entries - while (true) - { - key_and_slot_t key_and_slot_removed; - if (!slot_lookup_.pop_first(key_and_slot_t::any_slot(from), key_and_slot_removed)) - break; - - key_and_slot_t key_and_slot_replacing{to, key_and_slot_removed.slot}; - slot_lookup_.try_emplace(key_and_slot_replacing); // This can't fail - typed_->at(key_and_slot_removed.slot).key = to; - ++result.completed; - } - - return result; - } - - /** - * @brief Exports a range of keys for the vectors present in the index. - * @param[out] keys Pointer to the array where the keys will be exported. - * @param[in] offset The number of keys to skip. Useful for pagination. - * @param[in] limit The maximum number of keys to export, that can fit in ::keys. - */ - void export_keys(vector_key_t * keys, std::size_t offset, std::size_t limit) const - { - shared_lock_t lock(slot_lookup_mutex_); - offset = (std::min)(offset, slot_lookup_.size()); - slot_lookup_.for_each([&](key_and_slot_t const & key_and_slot) { - if (offset) - // Skip the first `offset` entries - --offset; - else if (limit) - { - *keys = key_and_slot.key; - ++keys; - --limit; - } - }); - } - - struct copy_result_t - { - index_dense_gt index; - error_t error; - - explicit operator bool() const noexcept { return !error; } - copy_result_t failed(error_t message) noexcept - { - error = std::move(message); - return std::move(*this); - } - }; - - /** - * @brief Copies the ::index_dense_gt @b with all the data in it. - * @param config The copy configuration (optional). - * @return A copy of the ::index_dense_gt instance. - */ - copy_result_t copy(index_dense_copy_config_t config = {}) const - { - copy_result_t result = fork(); - if (!result) - return result; - - auto typed_result = typed_->copy(config); - if (!typed_result) - return result.failed(std::move(typed_result.error)); - - // Export the free (removed) slot numbers - index_dense_gt & copy = result.index; - if (!copy.free_keys_.reserve(free_keys_.size())) - return result.failed(std::move(typed_result.error)); - for (std::size_t i = 0; i != free_keys_.size(); ++i) - copy.free_keys_.push(free_keys_[i]); - - // Allocate buffers and move the vectors themselves - if (!config.force_vector_copy && copy.config_.exclude_vectors) - copy.vectors_lookup_ = vectors_lookup_; - else - { - copy.vectors_lookup_.resize(vectors_lookup_.size()); - for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot) - copy.vectors_lookup_[slot] = copy.vectors_tape_allocator_.allocate(copy.metric_.bytes_per_vector()); - if (std::count(copy.vectors_lookup_.begin(), copy.vectors_lookup_.end(), nullptr)) - return result.failed("Out of memory!"); - for (std::size_t slot = 0; slot != vectors_lookup_.size(); ++slot) - std::memcpy(copy.vectors_lookup_[slot], vectors_lookup_[slot], metric_.bytes_per_vector()); - } - - copy.slot_lookup_ = slot_lookup_; - *copy.typed_ = std::move(typed_result.index); - return result; - } - - /** - * @brief Copies the ::index_dense_gt model @b without any data. - * @return A similarly configured ::index_dense_gt instance. - */ - copy_result_t fork() const - { - copy_result_t result; - index_dense_gt & other = result.index; - - other.config_ = config_; - other.cast_buffer_ = cast_buffer_; - other.casts_ = casts_; - - other.metric_ = metric_; - other.available_threads_ = available_threads_; - other.free_key_ = free_key_; - - index_t * raw = index_allocator_t{}.allocate(1); - if (!raw) - return result.failed("Can't allocate the index"); - - new (raw) index_t(config()); - other.typed_ = raw; - return result; - } - - struct compaction_result_t - { - error_t error{}; - std::size_t pruned_edges{}; - - explicit operator bool() const noexcept { return !error; } - compaction_result_t failed(error_t message) noexcept - { - error = std::move(message); - return std::move(*this); - } - }; - - /** - * @brief Performs compaction on the index, pruning links to removed entries. - * @param executor The executor parallel processing. Default ::dummy_executor_t single-threaded. - * @param progress The progress tracker instance to use. Default ::dummy_progress_t reports nothing. - * @return The ::compaction_result_t indicating the result of the compaction operation. - * `result.pruned_edges` will contain the number of edges that were removed. - * `result.error` will contain an error message if an error occurred during the compaction operation. - */ - template - compaction_result_t isolate(executor_at && executor = executor_at{}, progress_at && progress = progress_at{}) - { - compaction_result_t result; - std::atomic pruned_edges; - auto disallow = [&](member_cref_t const & member) noexcept { - bool freed = member.key == free_key_; - pruned_edges += freed; - return freed; - }; - typed_->isolate(disallow, std::forward(executor), std::forward(progress)); - result.pruned_edges = pruned_edges; - return result; - } - - class values_proxy_t - { - index_dense_gt const * index_; - - public: - values_proxy_t(index_dense_gt const & index) noexcept - : index_(&index) - {} - byte_t const * operator[](compressed_slot_t slot) const noexcept { return index_->vectors_lookup_[slot]; } - byte_t const * operator[](member_citerator_t it) const noexcept - { - return index_->vectors_lookup_[get_slot(it)]; - } - }; - - /** - * @brief Performs compaction on the index, pruning links to removed entries. - * @param executor The executor parallel processing. Default ::dummy_executor_t single-threaded. - * @param progress The progress tracker instance to use. Default ::dummy_progress_t reports nothing. - * @return The ::compaction_result_t indicating the result of the compaction operation. - * `result.pruned_edges` will contain the number of edges that were removed. - * `result.error` will contain an error message if an error occurred during the compaction operation. - */ - template - compaction_result_t compact(executor_at && executor = executor_at{}, progress_at && progress = progress_at{}) - { - compaction_result_t result; - - std::vector new_vectors_lookup(vectors_lookup_.size()); - vectors_tape_allocator_t new_vectors_allocator; - - auto track_slot_change = [&](vector_key_t, compressed_slot_t old_slot, compressed_slot_t new_slot) { - byte_t * new_vector = new_vectors_allocator.allocate(metric_.bytes_per_vector()); - byte_t * old_vector = vectors_lookup_[old_slot]; - std::memcpy(new_vector, old_vector, metric_.bytes_per_vector()); - new_vectors_lookup[new_slot] = new_vector; - }; - typed_->compact( - values_proxy_t{*this}, - metric_proxy_t{*this}, - track_slot_change, - std::forward(executor), - std::forward(progress)); - vectors_lookup_ = std::move(new_vectors_lookup); - vectors_tape_allocator_ = std::move(new_vectors_allocator); - return result; - } - - template < // - typename man_to_woman_at = dummy_key_to_key_mapping_t, // - typename woman_to_man_at = dummy_key_to_key_mapping_t, // - typename executor_at = dummy_executor_t, // - typename progress_at = dummy_progress_t // - > - join_result_t join( // - index_dense_gt const & women, // - index_join_config_t config = {}, // - man_to_woman_at && man_to_woman = man_to_woman_at{}, // - woman_to_man_at && woman_to_man = woman_to_man_at{}, // - executor_at && executor = executor_at{}, // - progress_at && progress = progress_at{}) const - { - index_dense_gt const & men = *this; - return unum::usearch::join( // - *men.typed_, - *women.typed_, // - values_proxy_t{men}, - values_proxy_t{women}, // - metric_proxy_t{men}, - metric_proxy_t{women}, // - config, // - std::forward(man_to_woman), // - std::forward(woman_to_man), // - std::forward(executor), // - std::forward(progress)); - } - - struct clustering_result_t - { - error_t error{}; - std::size_t clusters{}; - std::size_t visited_members{}; - std::size_t computed_distances{}; - - explicit operator bool() const noexcept { return !error; } - clustering_result_t failed(error_t message) noexcept - { - error = std::move(message); - return std::move(*this); - } - }; - - /** - * @brief Implements clustering, classifying the given objects (vectors of member keys) - * into a given number of clusters. - * - * @param[in] queries_begin Iterator pointing to the first query. - * @param[in] queries_end Iterator pointing to the last query. - * @param[in] executor Thread-pool to execute the job in parallel. - * @param[in] progress Callback to report the execution progress. - * @param[in] config Configuration parameters for clustering. - * - * @param[out] cluster_keys Pointer to the array where the cluster keys will be exported. - * @param[out] cluster_distances Pointer to the array where the distances to those centroids will be exported. - */ - template < // - typename queries_iterator_at, // - typename executor_at = dummy_executor_t, // - typename progress_at = dummy_progress_t // - > - clustering_result_t cluster( // - queries_iterator_at queries_begin, // - queries_iterator_at queries_end, // - index_dense_clustering_config_t config, // - vector_key_t * cluster_keys, // - distance_t * cluster_distances, // - executor_at && executor = executor_at{}, // - progress_at && progress = progress_at{}) - { - std::size_t const queries_count = queries_end - queries_begin; - - // Find the first level (top -> down) that has enough nodes to exceed `config.min_clusters`. - std::size_t level = max_level(); - if (config.min_clusters) - { - for (; level > 1; --level) - { - if (stats(level).nodes > config.min_clusters) - break; - } - } - else - level = 1, config.max_clusters = stats(1).nodes, config.min_clusters = 2; - - clustering_result_t result; - if (max_level() < 2) - return result.failed("Index too small to cluster!"); - - // A structure used to track the popularity of a specific cluster - struct cluster_t - { - vector_key_t centroid; - vector_key_t merged_into; - std::size_t popularity; - byte_t * vector; - }; - - auto centroid_id = [](cluster_t const & a, cluster_t const & b) { - return a.centroid < b.centroid; - }; - auto higher_popularity = [](cluster_t const & a, cluster_t const & b) { - return a.popularity > b.popularity; - }; - - std::atomic visited_members(0); - std::atomic computed_distances(0); - std::atomic atomic_error{nullptr}; - - using dynamic_allocator_traits_t = std::allocator_traits; - using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; - buffer_gt clusters(queries_count); - if (!clusters) - return result.failed("Out of memory!"); - - map_to_clusters: - // Concurrently perform search until a certain depth - executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) { - auto result = cluster(queries_begin[query_idx], level, thread_idx); - if (!result) - { - atomic_error = result.error.release(); - return false; - } - - cluster_keys[query_idx] = result.cluster.member.key; - cluster_distances[query_idx] = result.cluster.distance; - - // Export in case we need to refine afterwards - clusters[query_idx].centroid = result.cluster.member.key; - clusters[query_idx].vector = vectors_lookup_[result.cluster.member.slot]; - clusters[query_idx].merged_into = free_key(); - clusters[query_idx].popularity = 1; - - visited_members += result.visited_members; - computed_distances += result.computed_distances; - return true; - }); - - if (atomic_error) - return result.failed(atomic_error.load()); - - // Now once we have identified the closest clusters, - // we can try reducing their quantity, refining - std::sort(clusters.begin(), clusters.end(), centroid_id); - - // Transform into run-length encoding, computing the number of unique clusters - std::size_t unique_clusters = 0; - { - std::size_t last_idx = 0; - for (std::size_t current_idx = 1; current_idx != clusters.size(); ++current_idx) - { - if (clusters[last_idx].centroid == clusters[current_idx].centroid) - { - clusters[last_idx].popularity++; - } - else - { - last_idx++; - clusters[last_idx] = clusters[current_idx]; - } - } - unique_clusters = last_idx + 1; - } - - // In some cases the queries may be co-located, all mapping into the same cluster on that - // level. In that case we refine the granularity and dive deeper into clusters: - if (unique_clusters < config.min_clusters && level > 1) - { - level--; - goto map_to_clusters; - } - - std::sort(clusters.data(), clusters.data() + unique_clusters, higher_popularity); - - // If clusters are too numerous, merge the ones that are too close to each other. - std::size_t merge_cycles = 0; - merge_nearby_clusters: - if (unique_clusters > config.max_clusters) - { - cluster_t & merge_source = clusters[unique_clusters - 1]; - std::size_t merge_target_idx = 0; - distance_t merge_distance = std::numeric_limits::max(); - - for (std::size_t candidate_idx = 0; candidate_idx + 1 < unique_clusters; ++candidate_idx) - { - distance_t distance = metric_(merge_source.vector, clusters[candidate_idx].vector); - if (distance < merge_distance) - { - merge_distance = distance; - merge_target_idx = candidate_idx; - } - } - - merge_source.merged_into = clusters[merge_target_idx].centroid; - clusters[merge_target_idx].popularity += exchange(merge_source.popularity, 0); - - // The target object may have to be swapped a few times to get to optimal position. - while (merge_target_idx - && clusters[merge_target_idx - 1].popularity < clusters[merge_target_idx].popularity) - std::swap(clusters[merge_target_idx - 1], clusters[merge_target_idx]), --merge_target_idx; - - unique_clusters--; - merge_cycles++; - goto merge_nearby_clusters; - } - - // Replace evicted clusters - if (merge_cycles) - { - // Sort dropped clusters by name to accelerate future lookups - auto clusters_end = clusters.data() + config.max_clusters + merge_cycles; - std::sort(clusters.data(), clusters_end, centroid_id); - - executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) { - vector_key_t & cluster_key = cluster_keys[query_idx]; - distance_t & cluster_distance = cluster_distances[query_idx]; - - // Recursively trace replacements of that cluster - while (true) - { - // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key` - cluster_t updated_cluster; - updated_cluster.centroid = cluster_key; - updated_cluster = *std::lower_bound(clusters.data(), clusters_end, updated_cluster, centroid_id); - if (updated_cluster.merged_into == free_key()) - break; - cluster_key = updated_cluster.merged_into; - } - - cluster_distance = distance_between(cluster_key, queries_begin[query_idx], thread_idx).mean; - return true; - }); - } - - result.computed_distances = computed_distances; - result.visited_members = visited_members; - - (void)progress; - return result; - } - -private: - struct thread_lock_t - { - index_dense_gt const & parent; - std::size_t thread_id; - bool engaged; - - ~thread_lock_t() - { - if (engaged) - parent.thread_unlock_(thread_id); - } - }; - - thread_lock_t thread_lock_(std::size_t thread_id) const - { - if (thread_id != any_thread()) - return {*this, thread_id, false}; - - available_threads_mutex_.lock(); - thread_id = available_threads_.back(); - available_threads_.pop_back(); - available_threads_mutex_.unlock(); - return {*this, thread_id, true}; - } - - void thread_unlock_(std::size_t thread_id) const - { - available_threads_mutex_.lock(); - available_threads_.push_back(thread_id); - available_threads_mutex_.unlock(); - } - - template - add_result_t add_( // - vector_key_t key, - scalar_at const * vector, // - std::size_t thread, - bool force_vector_copy, - cast_t const & cast) - { - if (!multi() && contains(key)) - return add_result_t{}.failed("Duplicate keys not allowed in high-level wrappers"); - - // Cast the vector, if needed for compatibility with `metric_` - thread_lock_t lock = thread_lock_(thread); - bool copy_vector = !config_.exclude_vectors || force_vector_copy; - byte_t const * vector_data = reinterpret_cast(vector); - { - byte_t * casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * lock.thread_id; - bool casted = cast(vector_data, dimensions(), casted_data); - if (casted) - vector_data = casted_data, copy_vector = true; - } - - // Check if there are some removed entries, whose nodes we can reuse - compressed_slot_t free_slot = default_free_value(); - { - std::unique_lock lock(free_keys_mutex_); - free_keys_.try_pop(free_slot); - } - - // Perform the insertion or the update - bool reuse_node = free_slot != default_free_value(); - auto on_success = [&](member_ref_t member) { - unique_lock_t slot_lock(slot_lookup_mutex_); - slot_lookup_.try_emplace(key_and_slot_t{key, static_cast(member.slot)}); - if (copy_vector) - { - if (!reuse_node) - vectors_lookup_[member.slot] = vectors_tape_allocator_.allocate(metric_.bytes_per_vector()); - std::memcpy(vectors_lookup_[member.slot], vector_data, metric_.bytes_per_vector()); - } - else - vectors_lookup_[member.slot] = (byte_t *)vector_data; - }; - - index_update_config_t update_config; - update_config.thread = lock.thread_id; - update_config.expansion = config_.expansion_add; - - metric_proxy_t metric{*this}; - return reuse_node // - ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, metric, update_config, on_success) - : typed_->add(key, vector_data, metric, update_config, on_success); - } - - template - search_result_t search_( // - scalar_at const * vector, - std::size_t wanted, // - predicate_at && predicate, - std::size_t thread, - size_t expansion, - bool exact, - cast_t const & cast) const - { - // Cast the vector, if needed for compatibility with `metric_` - thread_lock_t lock = thread_lock_(thread); - byte_t const * vector_data = reinterpret_cast(vector); - { - byte_t * casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * lock.thread_id; - bool casted = cast(vector_data, dimensions(), casted_data); - if (casted) - vector_data = casted_data; - } - - index_search_config_t search_config; - search_config.thread = lock.thread_id; - search_config.expansion = expansion; - search_config.exact = exact; - - auto allow = [=](member_cref_t const & member) noexcept { - if (member.key == free_key_) - return false; - if constexpr (!is_dummy()) - return predicate(member); - return true; - }; - return typed_->search(vector_data, wanted, metric_proxy_t{*this}, search_config, allow); - } - - template - cluster_result_t cluster_( // - scalar_at const * vector, - std::size_t level, // - std::size_t thread, - cast_t const & cast) const - { - // Cast the vector, if needed for compatibility with `metric_` - thread_lock_t lock = thread_lock_(thread); - byte_t const * vector_data = reinterpret_cast(vector); - { - byte_t * casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * lock.thread_id; - bool casted = cast(vector_data, dimensions(), casted_data); - if (casted) - vector_data = casted_data; - } - - index_cluster_config_t cluster_config; - cluster_config.thread = lock.thread_id; - cluster_config.expansion = config_.expansion_search; - - auto allow = [=](member_cref_t const & member) noexcept { - return member.key != free_key_; - }; - return typed_->cluster(vector_data, level, metric_proxy_t{*this}, cluster_config, allow); - } - - template - aggregated_distances_t distance_between_( // - vector_key_t key, - scalar_at const * vector, // - std::size_t thread, - cast_t const & cast) const - { - // Cast the vector, if needed for compatibility with `metric_` - thread_lock_t lock = thread_lock_(thread); - byte_t const * vector_data = reinterpret_cast(vector); - { - byte_t * casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * lock.thread_id; - bool casted = cast(vector_data, dimensions(), casted_data); - if (casted) - vector_data = casted_data; - } - - // Check if such `key` is even present. - shared_lock_t slots_lock(slot_lookup_mutex_); - auto key_range = slot_lookup_.equal_range(key_and_slot_t::any_slot(key)); - aggregated_distances_t result; - if (key_range.first == key_range.second) - return result; - - result.min = std::numeric_limits::max(); - result.max = std::numeric_limits::min(); - result.mean = 0; - result.count = 0; - - while (key_range.first != key_range.second) - { - key_and_slot_t key_and_slot = *key_range.first; - byte_t const * a_vector = vectors_lookup_[key_and_slot.slot]; - byte_t const * b_vector = vector_data; - distance_t a_b_distance = metric_(a_vector, b_vector); - - result.mean += a_b_distance; - result.min = (std::min)(result.min, a_b_distance); - result.max = (std::max)(result.max, a_b_distance); - result.count++; - - // - ++key_range.first; - } - - result.mean /= result.count; - return result; - } - - void reindex_keys_() - { - // Estimate number of entries first - std::size_t count_total = typed_->size(); - std::size_t count_removed = 0; - for (std::size_t i = 0; i != count_total; ++i) - { - member_cref_t member = typed_->at(i); - count_removed += member.key == free_key_; - } - - if (!count_removed && !config_.enable_key_lookups) - return; - - // Pull entries from the underlying `typed_` into either - // into `slot_lookup_`, or `free_keys_` if they are unused. - unique_lock_t lock(slot_lookup_mutex_); - slot_lookup_.clear(); - if (config_.enable_key_lookups) - slot_lookup_.reserve(count_total - count_removed); - free_keys_.clear(); - free_keys_.reserve(count_removed); - for (std::size_t i = 0; i != typed_->size(); ++i) - { - member_cref_t member = typed_->at(i); - if (member.key == free_key_) - free_keys_.push(static_cast(i)); - else if (config_.enable_key_lookups) - slot_lookup_.try_emplace(key_and_slot_t{vector_key_t(member.key), static_cast(i)}); - } - } - - template - std::size_t get_(vector_key_t key, scalar_at * reconstructed, std::size_t vectors_limit, cast_t const & cast) const - { - if (!multi()) - { - compressed_slot_t slot; - // Find the matching ID - { - shared_lock_t lock(slot_lookup_mutex_); - auto it = slot_lookup_.find(key_and_slot_t::any_slot(key)); - if (it == slot_lookup_.end()) - return false; - slot = (*it).slot; - } - // Export the entry - byte_t const * punned_vector = reinterpret_cast(vectors_lookup_[slot]); - bool casted = cast(punned_vector, dimensions(), (byte_t *)reconstructed); - if (!casted) - std::memcpy(reconstructed, punned_vector, metric_.bytes_per_vector()); - return true; - } - else - { - shared_lock_t lock(slot_lookup_mutex_); - auto equal_range_pair = slot_lookup_.equal_range(key_and_slot_t::any_slot(key)); - std::size_t count_exported = 0; - for (auto begin = equal_range_pair.first; - begin != equal_range_pair.second && count_exported != vectors_limit; - ++begin, ++count_exported) - { - // - compressed_slot_t slot = (*begin).slot; - byte_t const * punned_vector = reinterpret_cast(vectors_lookup_[slot]); - byte_t * reconstructed_vector = (byte_t *)reconstructed + metric_.bytes_per_vector() * count_exported; - bool casted = cast(punned_vector, dimensions(), reconstructed_vector); - if (!casted) - std::memcpy(reconstructed_vector, punned_vector, metric_.bytes_per_vector()); - } - return count_exported; - } - } - - template - static casts_t make_casts_() - { - casts_t result; - - result.from_b1x8 = cast_gt{}; - result.from_i8 = cast_gt{}; - result.from_f16 = cast_gt{}; - result.from_f32 = cast_gt{}; - result.from_f64 = cast_gt{}; - - result.to_b1x8 = cast_gt{}; - result.to_i8 = cast_gt{}; - result.to_f16 = cast_gt{}; - result.to_f32 = cast_gt{}; - result.to_f64 = cast_gt{}; - - return result; - } - - static casts_t make_casts_(scalar_kind_t scalar_kind) - { - switch (scalar_kind) - { - case scalar_kind_t::f64_k: - return make_casts_(); - case scalar_kind_t::f32_k: - return make_casts_(); - case scalar_kind_t::f16_k: - return make_casts_(); - case scalar_kind_t::i8_k: - return make_casts_(); - case scalar_kind_t::b1x8_k: - return make_casts_(); - default: - return {}; - } - } -}; - -using index_dense_t = index_dense_gt<>; -using index_dense_big_t = index_dense_gt; - -/** - * @brief Adapts the Male-Optimal Stable Marriage algorithm for unequal sets - * to perform fast one-to-one matching between two large collections - * of vectors, using approximate nearest neighbors search. - * - * @param[inout] man_to_woman Container to map ::first keys to ::second. - * @param[inout] woman_to_man Container to map ::second keys to ::first. - * @param[in] executor Thread-pool to execute the job in parallel. - * @param[in] progress Callback to report the execution progress. - */ -template < // - - typename men_key_at, // - typename women_key_at, // - typename men_slot_at, // - typename women_slot_at, // - - typename man_to_woman_at = dummy_key_to_key_mapping_t, // - typename woman_to_man_at = dummy_key_to_key_mapping_t, // - typename executor_at = dummy_executor_t, // - typename progress_at = dummy_progress_t // - > -static join_result_t join( // - index_dense_gt const & men, // - index_dense_gt const & women, // - - index_join_config_t config = {}, // - man_to_woman_at && man_to_woman = man_to_woman_at{}, // - woman_to_man_at && woman_to_man = woman_to_man_at{}, // - executor_at && executor = executor_at{}, // - progress_at && progress = progress_at{}) noexcept -{ - return men.join( // - women, - config, // - std::forward(woman_to_man), // - std::forward(man_to_woman), // - std::forward(executor), // - std::forward(progress)); -} - -} // namespace usearch -} // namespace unum - -// NOLINTEND(readability-*,google-*,modernize-use-auto) diff --git a/dbms/src/TiDB/Decode/Vector.cpp b/dbms/src/TiDB/Decode/Vector.cpp index 6cb76f2d066..ce2862a75bf 100644 --- a/dbms/src/TiDB/Decode/Vector.cpp +++ b/dbms/src/TiDB/Decode/Vector.cpp @@ -17,6 +17,10 @@ #include #include +#define SIMSIMD_NATIVE_F16 0 +#define SIMSIMD_NATIVE_BF16 0 +#include + #include namespace DB @@ -52,15 +56,8 @@ Float64 VectorFloat32Ref::l2SquaredDistance(VectorFloat32Ref b) const { checkDims(b); - Float32 distance = 0.0; - Float32 diff; - - for (size_t i = 0, i_max = size(); i < i_max; ++i) - { - // Hope this can be vectorized. - diff = elements[i] - b[i]; - distance += diff * diff; - } + simsimd_distance_t distance; + simsimd_l2sq_f32(elements, b.elements, elements_n, &distance); return distance; } @@ -69,13 +66,8 @@ Float64 VectorFloat32Ref::innerProduct(VectorFloat32Ref b) const { checkDims(b); - Float32 distance = 0.0; - - for (size_t i = 0, i_max = size(); i < i_max; ++i) - { - // Hope this can be vectorized. - distance += elements[i] * b[i]; - } + simsimd_distance_t distance; + simsimd_dot_f32(elements, b.elements, elements_n, &distance); return distance; } @@ -84,37 +76,10 @@ Float64 VectorFloat32Ref::cosineDistance(VectorFloat32Ref b) const { checkDims(b); - Float32 distance = 0.0; - Float32 norma = 0.0; - Float32 normb = 0.0; + simsimd_distance_t distance; + simsimd_cos_f32(elements, b.elements, elements_n, &distance); - for (size_t i = 0, i_max = size(); i < i_max; ++i) - { - // Hope this can be vectorized. - distance += elements[i] * b[i]; - norma += elements[i] * elements[i]; - normb += b[i] * b[i]; - } - - Float64 similarity - = static_cast(distance) / std::sqrt(static_cast(norma) * static_cast(normb)); - - if (std::isnan(similarity)) - { - // Divide by zero - return std::nan(""); - } - - if (similarity > 1.0) - { - similarity = 1.0; - } - else if (similarity < -1.0) - { - similarity = -1.0; - } - - return 1.0 - similarity; + return distance; } Float64 VectorFloat32Ref::l1Distance(VectorFloat32Ref b) const