Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build wheel on windows #2

Merged
merged 2 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 13 additions & 16 deletions .github/workflows/build-wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,34 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
# os: [ubuntu-latest, windows-latest, macos-13, macos-14]
os: [ubuntu-latest, macos-13, macos-14]
os: [ubuntu-latest, windows-latest, macos-13, macos-14]
# os: [ubuntu-latest, macos-13, macos-14]
# os: [windows-latest]

steps:
- uses: actions/checkout@v4

# Used to host cibuildwheel
- uses: actions/setup-python@v5

- name: setup devcmd
if: ${{ matrix.os == 'windows-latest' }}
# NOTE: this is necessary to correctly find and use cl.exe
uses: ilammy/[email protected]
- name: Install cibuildwheel
run: python -m pip install cibuildwheel==2.19.1 pipx pytest apsw numpy

- name: Install linux deps
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
sudo apt-get install zip -y
- uses: ilammy/msvc-dev-cmd@v1

- name: Setup cmake
uses: jwlawson/actions-setup-cmake@v2

- name: Setup Ninja
uses: seanmiddleditch/gha-setup-ninja@v5

- name: Bootstrap vcpkg
run: |
git submodule update --init --recursive
python bootstrap_vcpkg.py

- name: Install cibuildwheel
run: python -m pip install cibuildwheel==2.19.1

- name: Build wheels
run: python -m cibuildwheel --output-dir wheelhouse
env:
MACOSX_DEPLOYMENT_TARGET: '10.15' # 10.15 is the minimum version that fully supports c++17
run: pipx run cibuildwheel --output-dir wheelhouse

- uses: actions/upload-artifact@v4
with:
Expand Down
14 changes: 13 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
cmake_minimum_required(VERSION 3.22 FATAL_ERROR)

if (WIN32)
set(VCPKG_TARGET_TRIPLET "x64-windows-static-md-release")
message(STATUS "VCPKG_TARGET_TRIPLET on windows: ${VCPKG_TARGET_TRIPLET}")
endif(WIN32)

project(vectorlite VERSION 0.1.0 LANGUAGES CXX)

configure_file(src/version.h.in version.h)
Expand Down Expand Up @@ -31,12 +36,13 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
endif ()

add_library(vectorlite SHARED src/vectorlite.cpp src/virtual_table.cpp src/vector.cpp src/util.cpp src/vector_space.cpp src/index_options.cpp src/sqlite_functions.cpp src/constraint.cpp)
# remove the lib prefix to make the shared library name consistent on all platforms.
set_target_properties(vectorlite PROPERTIES PREFIX "")
target_include_directories(vectorlite PUBLIC ${RAPIDJSON_INCLUDE_DIRS} ${HNSWLIB_INCLUDE_DIRS} ${PROJECT_BINARY_DIR})
target_link_libraries(vectorlite PRIVATE unofficial::sqlite3::sqlite3 absl::status absl::statusor absl::strings re2::re2)
# copy the shared library to the python package to make running integration tests easier
add_custom_command(TARGET vectorlite POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:vectorlite> ${PROJECT_SOURCE_DIR}/vectorlite_py/$<TARGET_FILE_NAME:vectorlite>)


include(GoogleTest)
enable_testing()
file(GLOB TEST_SOURCES src/*.cpp)
Expand All @@ -61,6 +67,12 @@ if(CMAKE_BUILD_TYPE STREQUAL "Debug")
target_link_libraries(unit-test PRIVATE absl::log)
endif()

# TODO: For mysterious reason, absl::log symbols are required for even release build on MSVC. Only DLOG are used which should be guarded by NDEBUG and not included in Release build.
if(MSVC)
target_link_libraries(vectorlite PRIVATE absl::log)
target_link_libraries(unit-test PRIVATE absl::log)
endif()

gtest_discover_tests(unit-test)

add_test(NAME unit-test COMMAND unit-test)
Expand Down
2 changes: 1 addition & 1 deletion examples/knn_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def create_connection():
# create connection to in-memory database
conn = apsw.Connection(':memory:') if use_apsw else sqlite3.connect(':memory:')
conn.enable_load_extension(True)
conn.load_extension('../build/release/libvectorlite.so')
conn.load_extension('../build/release/vectorlite.so')
return conn

conn = create_connection()
Expand Down
2 changes: 2 additions & 0 deletions examples/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
numpy>=1.22
apsw>=3.45
2 changes: 1 addition & 1 deletion integration_test/delete_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ int main(int argc, char* argv[]) {
const auto& vectors = GenerateRandomVectors();
rc = sqlite3_enable_load_extension(db, 1);
assert(rc == SQLITE_OK);
rc = sqlite3_load_extension(db, "build/dev/libvectorlite.so", "sqlite3_extension_init", &zErrMsg);
rc = sqlite3_load_extension(db, "build/dev/vectorlite.so", "sqlite3_extension_init", &zErrMsg);
if (rc != SQLITE_OK) {
std::cerr << "load extension failed: " << zErrMsg << std::endl;
sqlite3_free(zErrMsg);
Expand Down
122 changes: 67 additions & 55 deletions integration_test/python/test/vectorlite_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import tempfile
import os
import platform

def get_connection():
conn = apsw.Connection(':memory:')
Expand Down Expand Up @@ -97,59 +98,70 @@ def test_vector_distance(conn):
assert np.isclose(math.sqrt(result), l2_distance)

def test_index_file(random_vectors):
conn = get_connection()
def remove_quote(s: str):
return s.strip('\'').strip('\"')
with tempfile.TemporaryDirectory() as tempdir:
index_file_path = os.path.join(tempdir, 'index.bin')
assert not os.path.exists(index_file_path)
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')

for i in range(NUM_ELEMENTS):
cur.execute('insert into my_table (rowid, my_embedding) values (?, ?)', (i, random_vectors[i].tobytes()))

result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

conn.close()
# The index file should be created
index_file_size = os.path.getsize(index_file_path)
assert os.path.exists(index_file_path) and index_file_size > 0

# test if the index file could be loaded with the same parameters without inserting data again
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')
result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10
conn.close()
# The index file should be created
assert os.path.exists(index_file_path) and os.path.getsize(index_file_path) == index_file_size

# test if the index file could be loaded with different hnsw parameters and distance type without inserting data again
# But hnsw parameters can't be changed even if different values are set, they will be owverwritten by the value from the index file
# todo: test whether hnsw parameters are overwritten after more functions are introduced to provide runtime stats.
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=32,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

# test searching with ef_search = 30, which defaults to 10
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?, ?))', (random_vectors[0].tobytes(), 10, 30)).fetchall()
assert len(result) == 10
conn.close()
assert os.path.exists(index_file_path) and os.path.getsize(index_file_path) == index_file_size


# test if `drop table` deletes the index file
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=64,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

cur.execute(f'drop table my_table2')
assert not os.path.exists(index_file_path)
conn.close()


file_path = os.path.join(tempdir, 'index.bin')
file_paths = [f'\"{file_path}\"', f'\'{file_path}\'']

# Windows paths always contain ':', which must be quoted by double/single quotes
# Unix paths don't necessarliy contain special charactors that needs to be quoted.
if platform.system().lower() != 'windows':
file_paths.append(file_path)

for index_file_path in file_paths:
assert not os.path.exists(remove_quote(index_file_path))

conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')

for i in range(NUM_ELEMENTS):
cur.execute('insert into my_table (rowid, my_embedding) values (?, ?)', (i, random_vectors[i].tobytes()))

result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

conn.close()
# The index file should be created
index_file_size = os.path.getsize(remove_quote(index_file_path))
assert os.path.exists(remove_quote(index_file_path)) and index_file_size > 0

# test if the index file could be loaded with the same parameters without inserting data again
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')
result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10
conn.close()
# The index file should be created
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size

# test if the index file could be loaded with different hnsw parameters and distance type without inserting data again
# But hnsw parameters can't be changed even if different values are set, they will be owverwritten by the value from the index file
# todo: test whether hnsw parameters are overwritten after more functions are introduced to provide runtime stats.
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=32,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

# test searching with ef_search = 30, which defaults to 10
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?, ?))', (random_vectors[0].tobytes(), 10, 30)).fetchall()
assert len(result) == 10
conn.close()
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size


# test if `drop table` deletes the index file
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=64,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

cur.execute(f'drop table my_table2')
assert not os.path.exists(remove_quote(index_file_path))
conn.close()


11 changes: 9 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
[build-system]
requires = ["setuptools>=59", "wheel", "cmake", "ninja"]
requires = ["setuptools>=59", "wheel"]

build-backend = "setuptools.build_meta"

[tool.cibuildwheel]
test-requires = ["pytest", "numpy", "apsw>=3.46"]
test-command = "pytest {project}/integration_test/python/test"
skip = ["*-win32", "*-manylinux_i686", "*musllinux*", "pp*", "cp36*", "cp37*", "cp38*"]
skip = ["*-win32", "*-win_arm64", "*-manylinux_i686", "*musllinux*", "pp*", "cp36*", "cp37*", "cp38*", "cp39*"]

[tool.cibuildwheel.macos]
environment = {MACOSX_DEPLOYMENT_TARGET = "10.15"} # 10.15 is the minimum version that fully supports c++17

# todo: support musllinux
[tool.cibuildwheel.linux]
before-build = "yum install -y ninja-build"
30 changes: 16 additions & 14 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from pathlib import Path

from setuptools import Extension, setup
import cmake
# import cmake
import subprocess
import ninja
# import ninja

VERSION = '0.1.0'
PACKAGE_NAME = 'vectorlite_py'
Expand All @@ -19,33 +19,35 @@
machine = platform.machine()

print(f'Current platfrom: {system}, {machine}')
print(f'cmake bin dir: {cmake.CMAKE_BIN_DIR}. cwd: {os.getcwd()}')
cmake_path = os.path.join(cmake.CMAKE_BIN_DIR, 'cmake')
ctest_path = os.path.join(cmake.CMAKE_BIN_DIR, 'ctest')
ninja_path = os.path.join(ninja.BIN_DIR, 'ninja')
cmake_version = subprocess.run(['cmake', '--version'], check=True)
cmake_version.check_returncode()
# print(f'cmake bin dir: {cmake.CMAKE_BIN_DIR}. cwd: {os.getcwd()}')
# cmake_path = os.path.join(cmake.CMAKE_BIN_DIR, 'cmake')
# ctest_path = os.path.join(cmake.CMAKE_BIN_DIR, 'ctest')
# ninja_path = os.path.join(ninja.BIN_DIR, 'ninja')
# cmake_version = subprocess.run([cmake_path, '--version'], check=True)
# cmake_version.check_returncode()

class CMakeExtension(Extension):
def __init__(self, name: str) -> None:
super().__init__(name, sources=[])

def get_lib_name():
if system.lower() == 'linux':
return 'libvectorlite.so'
return 'vectorlite.so'
if system.lower() == 'darwin':
return 'libvectorlite.dylib'
return 'vectorlite.dylib'
if system.lower() == 'windows':
return 'libvectorlite.dll'
return 'vectorlite.dll'
raise ValueError(f'Unsupported platform: {system}')

class CMakeBuild(build_ext):
def build_extension(self, ext: CMakeExtension) -> None:
cmake_path = 'cmake'
ctest_path = 'ctest'
print(f'Building extension for {self.plat_name} {self.compiler.compiler_type}')
extra_args = []
if system.lower() == 'windows':
extra_args = ['-DCMAKE_CXX_COMPILER=cl', '-DCMAKE_C_COMPILER=cl']
configure = subprocess.run([cmake_path, '--preset', 'release', f'-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_path}', *extra_args])
# if system.lower() == 'windows':
# extra_args = ['-DCMAKE_CXX_COMPILER=cl', '-DCMAKE_C_COMPILER=cl']
configure = subprocess.run([cmake_path, '--preset', 'release', *extra_args])
configure.check_returncode()

subprocess.run([cmake_path, '--build', os.path.join('build', 'release'), '-j8'], check=True)
Expand Down
10 changes: 10 additions & 0 deletions src/virtual_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@ static int InitVirtualTable(bool load_from_file, sqlite3* db, void* pAux,
std::string_view index_file_path;
if (argc == 3 + kModuleParamOffset) {
index_file_path = argv[2 + kModuleParamOffset];
int size = index_file_path.size();
// Handle cases where the index_file_path is enclosed in double/single
// quotes. It is necessary for windows paths, because they contain ':', that
// must be quoted for sqlite to parse correctly.
if (size > 2) {
if ((index_file_path[0] == '\"' && index_file_path[size - 1] == '\"') ||
(index_file_path[0] == '\'' && index_file_path[size - 1] == '\'')) {
index_file_path = index_file_path.substr(1, size - 2);
}
}
}

std::string sql = absl::StrFormat("CREATE TABLE X(%s, distance REAL hidden)",
Expand Down
2 changes: 1 addition & 1 deletion vectorlite_py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
__version__ = '0.1.0'

def vectorlite_path():
loadable_path = os.path.join(os.path.dirname(__file__), 'libvectorlite')
loadable_path = os.path.join(os.path.dirname(__file__), 'vectorlite')
return os.path.normpath(loadable_path)


Expand Down