langchain-ai · Mateusz-Switala · Sep 22, 2024 · Sep 23, 2024 · eyurtsev · Sep 23, 2024
diff --git a/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py b/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py
@@ -738,9 +738,69 @@ def add_texts(
         Returns:
             List[str]: The resulting keys for each inserted element.
         """
-        from pymilvus import Collection, MilvusException
 
         texts = list(texts)
+
+        try:
+            embeddings: list = self.embedding_func.embed_documents(texts)
+        except NotImplementedError:
+            embeddings = [self.embedding_func.embed_query(x) for x in texts]
+
+        if len(embeddings) == 0:
+            logger.debug("Nothing to insert, skipping.")
+            return []
+
+        return self.add_embeddings(
+            text_embeddings=zip(texts, embeddings),
+            metadatas=metadatas,
+            timeout=timeout,
+            batch_size=batch_size,
+            ids=ids,
+            **kwargs,
+        )
+
+    def add_embeddings(
+        self,
+        text_embeddings: Iterable[Tuple[str, List[float]]],
-        text_embeddings: Iterable[Tuple[str, List[float]]],
+        embeddings: Sequence[Tuple[str, List[float]]],
-        text_embeddings: Iterable[Tuple[str, List[float]]],
+        embeddings: Sequence[Tuple[str, List[float]]],
+        metadatas: Optional[List[dict]] = None,
+        timeout: Optional[float] = None,
+        batch_size: int = 1000,
+        *,
+        ids: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Insert text data with embeddings vectors into Milvus.
+
+        Inserting data when the collection has not be made yet will result
+        in creating a new Collection. The data of the first entity decides
+        the schema of the new collection, the dim is extracted from the first
+        embedding and the columns are decided by the first metadata dict.
+        Metadata keys will need to be present for all inserted values. At
+        the moment there is no None equivalent in Milvus.
+
+        Args:
+            text_embeddings (Iterable[Tuple[str, List[float]]]): The texts with
+                embeddings vectors, it is assumed that they all fit in memory.
+            metadatas (Optional[List[dict]]): Metadata dicts attached to each of
+                the texts. Defaults to None.
+            should be less than 65535 bytes. Required and work when auto_id is False.
+            timeout (Optional[float]): Timeout for each batch insert. Defaults
+                to None.
+            batch_size (int, optional): Batch size to use for insertion.
+                Defaults to 1000.
+            ids (Optional[List[str]]): List of text ids. The length of each item
+
+        Raises:
+            MilvusException: Failure to add texts and embeddings
+
+        Returns:
+            List[str]: The resulting keys for each inserted element.
+        """
+
+        from pymilvus import Collection, MilvusException
+
+        texts, embeddings = zip(*text_embeddings)
+
         if not self.auto_id:
             assert isinstance(ids, list), (
                 "A list of valid ids are required when auto_id is False. "
@@ -762,15 +822,6 @@ def add_texts(
                     "The ids will be generated automatically."
                 )
 
-        try:
-            embeddings: list = self.embedding_func.embed_documents(texts)
-        except NotImplementedError:
-            embeddings = [self.embedding_func.embed_query(x) for x in texts]
-
-        if len(embeddings) == 0:
-            logger.debug("Nothing to insert, skipping.")
-            return []
-
         # If the collection hasn't been initialized yet, perform all steps to do so
         if not isinstance(self.col, Collection):
             kwargs = {"embeddings": embeddings, "metadatas": metadatas}

diff --git a/libs/partners/milvus/pyproject.toml b/libs/partners/milvus/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "langchain-milvus"
-version = "0.1.5"
+version = "0.1.7"
 description = "An integration package connecting Milvus and LangChain"
 authors = []
 readme = "README.md"

diff --git a/libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py b/libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py
@@ -63,6 +63,22 @@ def test_milvus(temp_milvus_db: Any) -> None:
     assert_docs_equal_without_pk(output, [Document(page_content="foo")])
 
 
+def test_milvus_add_embeddings_search(temp_milvus_db: Any) -> None:
+    """Test end to end with add embeddings"""
+    embed_func = FakeEmbeddings()
+    docsearch = Milvus(
+        embed_func,
+        connection_args={"uri": temp_milvus_db},
+        drop_old=True,
+        consistency_level="Strong",
+        auto_id=True,
+    )
+
+    docsearch.add_embeddings(zip(fake_texts, embed_func.embed_documents(fake_texts)))
+    output = docsearch.similarity_search("foo", k=1)
+    assert_docs_equal_without_pk(output, [Document(page_content="foo")])
+
+
 def test_milvus_vector_search(temp_milvus_db: Any) -> None:
     """Test end to end construction and search by vector."""
     docsearch = _milvus_from_texts(db_path=temp_milvus_db)