Skip to content

Commit

Permalink
[#23998] DocDB: Update usearch and other header-only third-party depe…
Browse files Browse the repository at this point in the history
…ndencies

Summary:
Updating inline third-party dependencies:

- usearch : https://github.com/unum-cloud/usearch/commits/240fe9c298100f9e37a2d7377b1595be6ba1f412
- fp16    : https://github.com/Maratyszcza/FP16/commits/98b0a46bce017382a6351a19577ec43a715b6835
- hnswlib : https://github.com/nmslib/hnswlib/commits/2142dc6f4dd08e64ab727a7bbd93be7f732e80b0
- simsimd : https://github.com/ashvardanian/simsimd/commits/6834f4639af0271e760b00c08a464555b536f4f4

Moving the auto-generated comments in files belonging to the inline thirdparty dependencies to the end of those files to preserve the original line numbers.

Not turning on SimSIMD use in Usearch yet because that has issues with GCC 11.

The updates to the third-party libraries will be auto-generated using the command below and pushed as separate commits:

build-support/thirdparty_tool --sync-inline-thirdparty
Jira: DB-12885

Test Plan:
Jenkins

Manual tests using hnsw_tool

Reviewers: tnayak, sergei

Reviewed By: tnayak

Subscribers: ybase

Differential Revision: https://phorge.dev.yugabyte.com/D38134
  • Loading branch information
mbautin committed Sep 21, 2024
1 parent de9d4ad commit e131b20
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 20 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@ include_directories(src)
include_directories("src/inline-thirdparty/usearch")
include_directories("src/inline-thirdparty/fp16")
include_directories("src/inline-thirdparty/hnswlib")
include_directories("src/inline-thirdparty/simsimd")


enable_testing()
Expand Down
2 changes: 1 addition & 1 deletion build-support/inline_thirdparty.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
dependencies:
- name: usearch
git_url: https://github.com/unum-cloud/usearch
commit: 4fbb56e02aa928a011abdedb66adfef128123e5f
commit: 240fe9c298100f9e37a2d7377b1595be6ba1f412
src_dir: include
dest_dir: usearch

Expand Down
8 changes: 3 additions & 5 deletions python/yugabyte/inline_thirdparty.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,7 @@ def get_latest_commit_explanation(

def add_comment_to_file(
file_path: str,
dep: InlineDependency,
latest_commit_in_subdir: str) -> None:
dep: InlineDependency) -> None:
"""Adds a comment to the include file indicating what version of the dependcy is being used."""
if not file_path.endswith(FILE_EXTENSIONS_SUPPORTING_CPP_COMMENTS):
logging.info("Cannot add comment to file %s", file_path)
Expand All @@ -117,11 +116,10 @@ def add_comment_to_file(
f"// This file is part of the {dep.name} inline third-party dependency of YugabyteDB.",
f"// Git repo: {dep.git_url}",
f"// Git tag: {dep.tag}" if dep.tag else f"// Git commit: {dep.commit}",
f"// {get_latest_commit_explanation(dep, latest_commit_in_subdir, cpp_comment=True)}",
"//",
"// See also src/inline-thirdparty/README.md.",
])
file_util.write_file(comment + '\n\n' + content, file_path)
file_util.write_file(content.rstrip() + '\n\n' + comment + '\n', file_path)


def validate_dir(dep: InlineDependency, dir_type: str) -> None:
Expand Down Expand Up @@ -204,7 +202,7 @@ def clone_and_copy_subtrees(dependencies: List[InlineDependency]) -> None:
for root, dirs_unused, files in os.walk(dest_subtree):
for file in files:
file_path = os.path.join(root, file)
add_comment_to_file(file_path, dep, latest_commit_in_subdir)
add_comment_to_file(file_path, dep)

# Commit the changes in the current repository
make_commit(dep, latest_commit_in_subdir, resolved_commit)
Expand Down
28 changes: 14 additions & 14 deletions src/yb/tools/hnsw_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -706,20 +706,20 @@ Status BenchmarkExecute(const BenchmarkArguments& args) {
// method has to use in case the ANN method doesn't support the input vector type. To avoid
// error-prone code duplication, we use a macro that expands to a bunch of if statements.

#define YB_VECTOR_INDEX_BENCHMARK_SUPPORTED_CASES \
/* method, distance, input type, indexed type */ \
/* Euclidean distance */ \
((Usearch, L2Squared, float, float )) \
((Usearch, L2Squared, uint8_t, float )) \
((Hnswlib, L2Squared, float, float )) \
((Hnswlib, L2Squared, uint8_t, uint8_t)) \
/* Cosine similarity */ \
((Usearch, Cosine, float, float )) \
((Usearch, Cosine, uint8_t, float )) \
/* Inner product */ \
((Usearch, InnerProduct, float, float )) \
((Usearch, InnerProduct, uint8_t, float )) \
((Hnswlib, InnerProduct, float, float )) \
#define YB_VECTOR_INDEX_BENCHMARK_SUPPORTED_CASES \
/* method, distance, input type, indexed type */ \
/* Euclidean distance */ \
((Usearch, L2Squared, float, float )) \
((Usearch, L2Squared, uint8_t, float )) \
((Hnswlib, L2Squared, float, float )) \
((Hnswlib, L2Squared, uint8_t, uint8_t)) \
/* Cosine similarity */ \
((Usearch, Cosine, float, float )) \
((Usearch, Cosine, uint8_t, float )) \
/* Inner product */ \
((Usearch, InnerProduct, float, float )) \
((Usearch, InnerProduct, uint8_t, float )) \
((Hnswlib, InnerProduct, float, float )) \
((Hnswlib, InnerProduct, uint8_t, uint8_t))

#define YB_VECTOR_INDEX_BENCHMARK_HELPER(method, distance_enum_element, input_type, indexed_type) \
Expand Down
12 changes: 12 additions & 0 deletions src/yb/vector/usearch_include_wrapper_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,25 @@
#ifdef __clang__
// For https://gist.githubusercontent.com/mbautin/87278fc41654c6c74cf7232960364c95/raw
#pragma GCC diagnostic ignored "-Wpass-failed"
#pragma GCC diagnostic ignored "-Wdeprecated-volatile"

#ifdef __aarch64__
// Temporarily disable failing on #warning directives inside index_plugins.hpp. This will become
// unnecessary once we enable SimSIMD.
#pragma GCC diagnostic ignored "-W#warnings"
#endif

#if __clang_major__ == 14
// For https://gist.githubusercontent.com/mbautin/7856257553a1d41734b1cec7c73a0fb4/raw
#pragma GCC diagnostic ignored "-Wambiguous-reversed-operator"
#endif

// Usearch 2.15.1 has unused variables in the insert_sorted function for to_move and
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif // __clang__

#define SIMSIMD_NATIVE_BF16 0

#include "usearch/index.hpp"
#include "usearch/index_dense.hpp"

Expand Down

0 comments on commit e131b20

Please sign in to comment.