Skip to content

Commit

Permalink
compute: Use SimSIMD for vectors (#221)
Browse files Browse the repository at this point in the history
Signed-off-by: Wish <[email protected]>
  • Loading branch information
breezewish authored and JaySon-Huang committed Aug 6, 2024
1 parent 86629a5 commit 9a9daf8
Show file tree
Hide file tree
Showing 12 changed files with 47 additions and 2,300 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,6 @@
[submodule "contrib/usearch"]
path = contrib/usearch
url = https://github.com/unum-cloud/usearch.git
[submodule "contrib/simsimd"]
path = contrib/simsimd
url = https://github.com/ashvardanian/SimSIMD
2 changes: 2 additions & 0 deletions contrib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,5 @@ add_subdirectory(magic_enum)
add_subdirectory(aws-cmake)

add_subdirectory(usearch-cmake)

add_subdirectory(simsimd-cmake)
1 change: 1 addition & 0 deletions contrib/simsimd
Submodule simsimd added at 3e2193
13 changes: 13 additions & 0 deletions contrib/simsimd-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
set(SIMSIMD_PROJECT_DIR "${TiFlash_SOURCE_DIR}/contrib/simsimd")
set(SIMSIMD_SOURCE_DIR "${SIMSIMD_PROJECT_DIR}/include")

add_library(_simsimd INTERFACE)

if (NOT EXISTS "${SIMSIMD_SOURCE_DIR}/simsimd/simsimd.h")
message (FATAL_ERROR "submodule contrib/simsimd not found")
endif()

target_include_directories(_simsimd SYSTEM INTERFACE
${SIMSIMD_SOURCE_DIR})

add_library(tiflash_contrib::simsimd ALIAS _simsimd)
2 changes: 1 addition & 1 deletion contrib/usearch
Submodule usearch updated 94 files
+8 −0 .github/ISSUE_TEMPLATE/bug_report.yml
+0 −36 .github/workflows/Dockerfile.libs
+105 −52 .github/workflows/prerelease.yml
+250 −101 .github/workflows/release.yml
+4 −0 .github/workflows/update_version.sh
+7 −0 .gitignore
+2 −1 .npmignore
+4 −2 .releaserc
+13 −0 .swift-format
+10 −0 .vscode/extensions.json
+5 −0 .vscode/settings.json
+6 −6 .vscode/tasks.json
+16 −21 BENCHMARKS.md
+4 −4 CITATION.cff
+21 −21 CMakeLists.txt
+144 −30 CONTRIBUTING.md
+224 −0 Cargo.lock
+16 −1 Cargo.toml
+2 −2 Package.swift
+99 −62 README.md
+1 −1 VERSION
+28 −14 binding.gyp
+83 −4 build.rs
+2 −0 c/CMakeLists.txt
+232 −19 c/README.md
+138 −37 c/lib.cpp
+62 −50 c/test.c
+166 −26 c/usearch.h
+1 −1 conanfile.py
+6 −0 cpp/CMakeLists.txt
+12 −1 cpp/README.md
+1 −3 cpp/bench.cpp
+401 −46 cpp/test.cpp
+1 −1 csharp/nuget/nuget-package.props
+1 −1 docs/benchmarks.rst
+6 −1 docs/conf.dox
+8 −1 docs/conf.py
+3 −1 docs/index.rst
+2 −2 docs/javascript/reference.rst
+6 −0 docs/objc/index.rst
+10 −0 docs/objc/reference.rst
+7 −0 docs/requirements.txt
+8 −0 docs/swift/reference.rst
+2 −2 golang/README.md
+52 −21 include/usearch/index.hpp
+44 −15 include/usearch/index_dense.hpp
+253 −133 include/usearch/index_plugins.hpp
+4 −0 java/cloud/unum/usearch/Index.java
+8 −0 java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
+3 −0 javascript/dist-package-cjs.json
+3 −0 javascript/dist-package-esm.json
+10 −1 javascript/lib.cpp
+1 −0 javascript/node-gyp-build.d.ts
+25 −0 javascript/tsconfig-base.json
+8 −0 javascript/tsconfig-cjs.json
+8 −0 javascript/tsconfig-esm.json
+0 −443 javascript/usearch.js
+14 −4 javascript/usearch.test.js
+650 −0 javascript/usearch.ts
+109 −0 objc/README.md
+6 −0 objc/USearchObjective.mm
+1,518 −22 package-lock.json
+32 −23 package.json
+42 −8 pyproject.toml
+14 −8 python/lib.cpp
+96 −26 python/scripts/bench.ipynb
+47 −15 python/scripts/bench_exact.py
+1 −1 python/scripts/index_faiss.py
+89 −0 python/scripts/test_distances.py
+59 −13 python/scripts/test_index.py
+22 −27 python/scripts/test_jit.py
+56 −21 python/scripts/test_sqlite.py
+2 −0 python/scripts/test_tooling.py
+111 −4 python/usearch/__init__.py
+10 −14 python/usearch/client.py
+71 −35 python/usearch/eval.py
+27 −55 python/usearch/index.py
+4 −15 python/usearch/numba.py
+3 −9 python/usearch/server.py
+182 −12 rust/README.md
+67 −31 rust/lib.cpp
+31 −11 rust/lib.hpp
+1,086 −51 rust/lib.rs
+23 −20 setup.py
+1 −1 simsimd
+35 −0 sqlite/CMakeLists.txt
+3 −3 sqlite/README.md
+51 −17 sqlite/lib.cpp
+1 −1 stringzilla
+6 −6 swift/Index+Sugar.swift
+51 −6 swift/README.md
+10 −5 swift/Test.swift
+0 −21 tsconfig.json
+9 −9 wasmer.toml
2 changes: 1 addition & 1 deletion contrib/usearch-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ if (NOT EXISTS "${USEARCH_SOURCE_DIR}/usearch/index.hpp")
endif()

target_include_directories(_usearch SYSTEM INTERFACE
${USEARCH_PROJECT_DIR}/simsimd/include
# ${USEARCH_PROJECT_DIR}/simsimd/include # Use our simsimd
${USEARCH_PROJECT_DIR}/fp16/include
${USEARCH_SOURCE_DIR})

Expand Down
1 change: 1 addition & 0 deletions dbms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ target_link_libraries (dbms
${BTRIE_LIBRARIES}
absl::synchronization
tiflash_contrib::usearch
tiflash_contrib::simsimd
tiflash_contrib::aws_s3

etcdpb
Expand Down
10 changes: 9 additions & 1 deletion dbms/src/Functions/tests/gtest_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,15 @@ TEST_F(Vector, CosineDistance)
try
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<Float64>>({0.0, std::nullopt, 0.0, 1.0, 2.0, 0.0, 2.0, std::nullopt}),
createColumn<Nullable<Float64>>(
{0.004130363464355469,
1.0, // CosDistance to (0,0) cannot be calculated, clapped to 1.0
0.00572967529296875,
1.0,
1.9942703247070312,
0.00022123707458376884,
1.9997787475585938,
std::nullopt}),
executeFunction(
"vecCosineDistance",
createColumn<Array>(
Expand Down
8 changes: 4 additions & 4 deletions dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,15 @@ std::vector<VectorIndexBuilder::Key> VectorIndexHNSWViewer::search(
std::atomic<size_t> discarded_nodes = 0;
std::atomic<bool> has_exception_in_search = false;

auto predicate = [&](typename USearchImplType::member_cref_t const & member) {
auto predicate = [&](const Key & key) {
// Must catch exceptions in the predicate, because search runs on other threads.
try
{
// Note: We don't increase the thread_local perf, because search runs on other threads.
visited_nodes++;
if (!valid_rows[member.key])
if (!valid_rows[key])
discarded_nodes++;
return valid_rows[member.key];
return valid_rows[key];
}
catch (...)
{
Expand All @@ -209,7 +209,7 @@ std::vector<VectorIndexBuilder::Key> VectorIndexHNSWViewer::search(
SCOPE_EXIT({ GET_METRIC(tiflash_vector_index_duration, type_search).Observe(w.elapsedSeconds()); });

// TODO: Support efSearch.
auto result = index.search( //
auto result = index.filtered_search( //
reinterpret_cast<const Float32 *>(queryInfo->ref_vec_f32().data() + sizeof(UInt32)),
queryInfo->top_k(),
predicate);
Expand Down
7 changes: 1 addition & 6 deletions dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,7 @@
#include <Storages/DeltaMerge/File/dtpb/dmfile.pb.h>
#include <Storages/DeltaMerge/Index/VectorIndex.h>

#if __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wpass-failed"
#include <Storages/DeltaMerge/Index/VectorIndexHNSW/usearch_index_dense.h>
#pragma clang diagnostic pop
#endif
#include <usearch/index_dense.hpp>

namespace DB::DM
{
Expand Down
Loading

0 comments on commit 9a9daf8

Please sign in to comment.