From 27e044f56a6cff47fe160b1fbc9ddd4ac965f311 Mon Sep 17 00:00:00 2001 From: fynnfluegge Date: Sun, 14 Jan 2024 14:46:10 +0100 Subject: [PATCH 1/2] serialize/deserialize faiss db --- codeqai/vector_store.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/codeqai/vector_store.py b/codeqai/vector_store.py index 78481a9..bbaf05d 100644 --- a/codeqai/vector_store.py +++ b/codeqai/vector_store.py @@ -1,3 +1,5 @@ +import os + import inquirer from langchain.embeddings.base import Embeddings from langchain.schema import Document @@ -17,10 +19,11 @@ def __init__(self, name: str, embeddings: Embeddings): def load_documents(self): spinner = yaspin(text="💾 Loading vector store...", color="green") spinner.start() - self.db = FAISS.load_local( - index_name=self.name, - folder_path=get_cache_path(), - embeddings=self.embeddings, + with open(os.path.join(get_cache_path(), f"{self.name}.faiss"), "rb") as file: + index = file.read() + + self.db = FAISS.deserialize_from_bytes( + embeddings=self.embeddings, serialized=index ) self.vector_cache = load_vector_cache(f"{self.name}.json") spinner.stop() @@ -31,8 +34,12 @@ def index_documents(self, documents: list[Document]): spinner = yaspin(text="💾 Indexing vector store...", color="green") spinner.start() self.db = FAISS.from_documents(documents, self.embeddings) - self.db.save_local(index_name=self.name, folder_path=get_cache_path()) - + bytes = self.db.serialize_to_bytes() + with open( + os.path.join(get_cache_path(), f"{self.name}.faiss"), "wb" + ) as binary_file: + # Write bytes to file + binary_file.write(bytes) # Create vector cache index_to_docstore_id = self.db.index_to_docstore_id for i in range(len(documents)): From 4c68634b9a238b6c75b8785270a963eb76abb227 Mon Sep 17 00:00:00 2001 From: fynnfluegge Date: Sun, 14 Jan 2024 14:50:23 +0100 Subject: [PATCH 2/2] refactor --- codeqai/vector_store.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/codeqai/vector_store.py b/codeqai/vector_store.py index bbaf05d..183e836 100644 --- a/codeqai/vector_store.py +++ b/codeqai/vector_store.py @@ -34,12 +34,11 @@ def index_documents(self, documents: list[Document]): spinner = yaspin(text="💾 Indexing vector store...", color="green") spinner.start() self.db = FAISS.from_documents(documents, self.embeddings) - bytes = self.db.serialize_to_bytes() + index = self.db.serialize_to_bytes() with open( os.path.join(get_cache_path(), f"{self.name}.faiss"), "wb" ) as binary_file: - # Write bytes to file - binary_file.write(bytes) + binary_file.write(index) # Create vector cache index_to_docstore_id = self.db.index_to_docstore_id for i in range(len(documents)):