From e3f01143f8f6bc3f6a26bf762284bf6f24027726 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20T=C5=99=C3=ADska?= <martin.triska@gmail.com>
Date: Thu, 15 Aug 2024 21:24:25 +0200
Subject: [PATCH 1/9] Bugfix for AzureSearch vector store

---
 .../vectorstores/azuresearch.py               | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azuresearch.py b/libs/community/langchain_community/vectorstores/azuresearch.py
index 5d7a6fc8edc8a..17729857a478b 100644
--- a/libs/community/langchain_community/vectorstores/azuresearch.py
+++ b/libs/community/langchain_community/vectorstores/azuresearch.py
@@ -428,6 +428,12 @@ def add_texts(
             logger.debug("Nothing to insert, skipping.")
             return []
 
+        # when `keys`` are not passed in and there is `ids` in kwargs, use those instead
+        # base class expects `ids` passed in rather than `keys`
+        # https://github.com/langchain-ai/langchain/blob/4cdaca67dc51dba887289f56c6fead3c1a52f97d/libs/core/langchain_core/vectorstores/base.py#L65
+        if (not keys) and ('ids' in kwargs) and (len(kwargs['ids'])==len(embeddings)):
+            keys=kwargs['ids']
+
         return self.add_embeddings(zip(texts, embeddings), metadatas, keys=keys)
 
     async def aadd_texts(
@@ -451,6 +457,12 @@ async def aadd_texts(
         if len(embeddings) == 0:
             logger.debug("Nothing to insert, skipping.")
             return []
+        
+        # when `keys`` are not passed in and there is `ids` in kwargs, use those instead
+        # base class expects `ids` passed in rather than `keys`
+        # https://github.com/langchain-ai/langchain/blob/4cdaca67dc51dba887289f56c6fead3c1a52f97d/libs/core/langchain_core/vectorstores/base.py#L65        
+        if (not keys) and ('ids' in kwargs) and (len(kwargs['ids'])==len(embeddings)):
+            keys=kwargs['ids']
 
         return await self.aadd_embeddings(zip(texts, embeddings), metadatas, keys=keys)
 
@@ -468,9 +480,13 @@ def add_embeddings(
         data = []
         for i, (text, embedding) in enumerate(text_embeddings):
             # Use provided key otherwise use default key
-            key = keys[i] if keys else str(uuid.uuid4())
-            # Encoding key for Azure Search valid characters
-            key = base64.urlsafe_b64encode(bytes(key, "utf-8")).decode("ascii")
+            if keys: 
+                key = keys[i]
+            else:
+                key = str(uuid.uuid4())
+                # Encoding key for Azure Search valid characters
+                key = base64.urlsafe_b64encode(bytes(key, "utf-8")).decode("ascii")
+            
             metadata = metadatas[i] if metadatas else {}
             # Add data to index
             # Additional metadata to fields mapping

From 16922791c559a08bcd7da71b4bc10b940ce9793e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20T=C5=99=C3=ADska?= <martin.triska@gmail.com>
Date: Thu, 15 Aug 2024 21:25:20 +0200
Subject: [PATCH 2/9] typo fix

---
 .../community/langchain_community/vectorstores/azuresearch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azuresearch.py b/libs/community/langchain_community/vectorstores/azuresearch.py
index 17729857a478b..58c360700b2b1 100644
--- a/libs/community/langchain_community/vectorstores/azuresearch.py
+++ b/libs/community/langchain_community/vectorstores/azuresearch.py
@@ -428,7 +428,7 @@ def add_texts(
             logger.debug("Nothing to insert, skipping.")
             return []
 
-        # when `keys`` are not passed in and there is `ids` in kwargs, use those instead
+        # when `keys` are not passed in and there is `ids` in kwargs, use those instead
         # base class expects `ids` passed in rather than `keys`
         # https://github.com/langchain-ai/langchain/blob/4cdaca67dc51dba887289f56c6fead3c1a52f97d/libs/core/langchain_core/vectorstores/base.py#L65
         if (not keys) and ('ids' in kwargs) and (len(kwargs['ids'])==len(embeddings)):
@@ -458,7 +458,7 @@ async def aadd_texts(
             logger.debug("Nothing to insert, skipping.")
             return []
         
-        # when `keys`` are not passed in and there is `ids` in kwargs, use those instead
+        # when `keys` are not passed in and there is `ids` in kwargs, use those instead
         # base class expects `ids` passed in rather than `keys`
         # https://github.com/langchain-ai/langchain/blob/4cdaca67dc51dba887289f56c6fead3c1a52f97d/libs/core/langchain_core/vectorstores/base.py#L65        
         if (not keys) and ('ids' in kwargs) and (len(kwargs['ids'])==len(embeddings)):

From d00bbc13401d1c0d25b243e8a4cf0fb50e677a55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20T=C5=99=C3=ADska?= <martin.triska@gmail.com>
Date: Fri, 16 Aug 2024 14:30:31 +0200
Subject: [PATCH 3/9] auto-format

---
 .../vectorstores/azuresearch.py                  | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azuresearch.py b/libs/community/langchain_community/vectorstores/azuresearch.py
index 58c360700b2b1..6fd12db4322a6 100644
--- a/libs/community/langchain_community/vectorstores/azuresearch.py
+++ b/libs/community/langchain_community/vectorstores/azuresearch.py
@@ -431,8 +431,8 @@ def add_texts(
         # when `keys` are not passed in and there is `ids` in kwargs, use those instead
         # base class expects `ids` passed in rather than `keys`
         # https://github.com/langchain-ai/langchain/blob/4cdaca67dc51dba887289f56c6fead3c1a52f97d/libs/core/langchain_core/vectorstores/base.py#L65
-        if (not keys) and ('ids' in kwargs) and (len(kwargs['ids'])==len(embeddings)):
-            keys=kwargs['ids']
+        if (not keys) and ("ids" in kwargs) and (len(kwargs["ids"]) == len(embeddings)):
+            keys = kwargs["ids"]
 
         return self.add_embeddings(zip(texts, embeddings), metadatas, keys=keys)
 
@@ -457,12 +457,12 @@ async def aadd_texts(
         if len(embeddings) == 0:
             logger.debug("Nothing to insert, skipping.")
             return []
-        
+
         # when `keys` are not passed in and there is `ids` in kwargs, use those instead
         # base class expects `ids` passed in rather than `keys`
-        # https://github.com/langchain-ai/langchain/blob/4cdaca67dc51dba887289f56c6fead3c1a52f97d/libs/core/langchain_core/vectorstores/base.py#L65        
-        if (not keys) and ('ids' in kwargs) and (len(kwargs['ids'])==len(embeddings)):
-            keys=kwargs['ids']
+        # https://github.com/langchain-ai/langchain/blob/4cdaca67dc51dba887289f56c6fead3c1a52f97d/libs/core/langchain_core/vectorstores/base.py#L65
+        if (not keys) and ("ids" in kwargs) and (len(kwargs["ids"]) == len(embeddings)):
+            keys = kwargs["ids"]
 
         return await self.aadd_embeddings(zip(texts, embeddings), metadatas, keys=keys)
 
@@ -480,13 +480,13 @@ def add_embeddings(
         data = []
         for i, (text, embedding) in enumerate(text_embeddings):
             # Use provided key otherwise use default key
-            if keys: 
+            if keys:
                 key = keys[i]
             else:
                 key = str(uuid.uuid4())
                 # Encoding key for Azure Search valid characters
                 key = base64.urlsafe_b64encode(bytes(key, "utf-8")).decode("ascii")
-            
+
             metadata = metadatas[i] if metadatas else {}
             # Add data to index
             # Additional metadata to fields mapping

From 48f724f69c83bb910bf30cfdc51d93aa7e3a3af3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20T=C5=99=C3=ADska?= <martin.triska@gmail.com>
Date: Thu, 29 Aug 2024 15:54:54 +0200
Subject: [PATCH 4/9] added unit test

---
 .../vectorstores/test_azure_search.py         | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
index a06fbfd151b0b..37cf8f77018f9 100644
--- a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
@@ -188,3 +188,35 @@ def mock_create_index() -> None:
         )
         assert vector_store.client is not None
         assert vector_store.client._api_version == "test"
+
+@pytest.mark.requires("azure.search.documents")
+def test_ids_used_correctly() -> None:
+    """Check whether vector store uses the document ids when provided with them.    
+    """
+    from langchain_core.documents import Document
+    from azure.search.documents.indexes import SearchIndexClient
+    from azure.search.documents import SearchClient
+    
+    def mock_upload_documents(self, documents):
+        # assume all documents uploaded successfuly
+        response = [type('Response', (object,), {'succeeded': True})() for _ in documents]
+        return response
+    
+    # 
+    documents = [Document(page_content="page zero Lorem Ipsum", 
+                          metadata={"source":"document.pdf",
+                                    "page":0,
+                                    "id": "ID-document-1"}),
+                 Document(page_content="page one Lorem Ipsum", 
+                          metadata={"source":"document.pdf",
+                                    "page":1,
+                                    "id": "ID-document-2"})
+    ]
+    ids_provided = [i.metadata.get("id") for i in documents]
+
+    with patch.object(SearchClient, "upload_documents", mock_upload_documents), \
+         patch.object(SearchIndexClient, "get_index", mock_default_index):
+        vector_store = create_vector_store()
+        ids_used_at_upload = vector_store.add_texts(documents, ids=ids_provided)
+        assert len(ids_provided)==len(ids_used_at_upload)
+        assert ids_provided==ids_used_at_upload
\ No newline at end of file

From 3b8563f1204250ebef33e566da52d5b5a1a73422 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20T=C5=99=C3=ADska?= <martin.triska@gmail.com>
Date: Thu, 29 Aug 2024 15:56:59 +0200
Subject: [PATCH 5/9] tiny formatting update

---
 .../tests/unit_tests/vectorstores/test_azure_search.py         | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
index 37cf8f77018f9..dc519c4731070 100644
--- a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
@@ -202,7 +202,6 @@ def mock_upload_documents(self, documents):
         response = [type('Response', (object,), {'succeeded': True})() for _ in documents]
         return response
     
-    # 
     documents = [Document(page_content="page zero Lorem Ipsum", 
                           metadata={"source":"document.pdf",
                                     "page":0,
@@ -219,4 +218,4 @@ def mock_upload_documents(self, documents):
         vector_store = create_vector_store()
         ids_used_at_upload = vector_store.add_texts(documents, ids=ids_provided)
         assert len(ids_provided)==len(ids_used_at_upload)
-        assert ids_provided==ids_used_at_upload
\ No newline at end of file
+        assert ids_provided==ids_used_at_upload

From de28870989262b71307983d69537cc5f130a4aaf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20T=C5=99=C3=ADska?= <martin.triska@gmail.com>
Date: Thu, 29 Aug 2024 16:06:30 +0200
Subject: [PATCH 6/9] formatting

---
 .../vectorstores/test_azure_search.py         | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
index dc519c4731070..ea5c64e169866 100644
--- a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
@@ -189,33 +189,37 @@ def mock_create_index() -> None:
         assert vector_store.client is not None
         assert vector_store.client._api_version == "test"
 
+
 @pytest.mark.requires("azure.search.documents")
 def test_ids_used_correctly() -> None:
-    """Check whether vector store uses the document ids when provided with them.    
-    """
-    from langchain_core.documents import Document
-    from azure.search.documents.indexes import SearchIndexClient
+    """Check whether vector store uses the document ids when provided with them."""
     from azure.search.documents import SearchClient
-    
+    from azure.search.documents.indexes import SearchIndexClient
+    from langchain_core.documents import Document
+
     def mock_upload_documents(self, documents):
         # assume all documents uploaded successfuly
-        response = [type('Response', (object,), {'succeeded': True})() for _ in documents]
+        response = [
+            type("Response", (object,), {"succeeded": True})() for _ in documents
+        ]
         return response
-    
-    documents = [Document(page_content="page zero Lorem Ipsum", 
-                          metadata={"source":"document.pdf",
-                                    "page":0,
-                                    "id": "ID-document-1"}),
-                 Document(page_content="page one Lorem Ipsum", 
-                          metadata={"source":"document.pdf",
-                                    "page":1,
-                                    "id": "ID-document-2"})
+
+    documents = [
+        Document(
+            page_content="page zero Lorem Ipsum",
+            metadata={"source": "document.pdf", "page": 0, "id": "ID-document-1"},
+        ),
+        Document(
+            page_content="page one Lorem Ipsum",
+            metadata={"source": "document.pdf", "page": 1, "id": "ID-document-2"},
+        ),
     ]
     ids_provided = [i.metadata.get("id") for i in documents]
 
-    with patch.object(SearchClient, "upload_documents", mock_upload_documents), \
-         patch.object(SearchIndexClient, "get_index", mock_default_index):
+    with patch.object(
+        SearchClient, "upload_documents", mock_upload_documents
+    ), patch.object(SearchIndexClient, "get_index", mock_default_index):
         vector_store = create_vector_store()
         ids_used_at_upload = vector_store.add_texts(documents, ids=ids_provided)
-        assert len(ids_provided)==len(ids_used_at_upload)
-        assert ids_provided==ids_used_at_upload
+        assert len(ids_provided) == len(ids_used_at_upload)
+        assert ids_provided == ids_used_at_upload

From 9650275b43a84fa7a50b6bb1b0fa8cc876150525 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20T=C5=99=C3=ADska?= <martin.triska@gmail.com>
Date: Thu, 29 Aug 2024 16:22:19 +0200
Subject: [PATCH 7/9] fixed typing issues in test

---
 .../unit_tests/vectorstores/test_azure_search.py     | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
index ea5c64e169866..e1d8c0d595515 100644
--- a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
@@ -197,11 +197,13 @@ def test_ids_used_correctly() -> None:
     from azure.search.documents.indexes import SearchIndexClient
     from langchain_core.documents import Document
 
-    def mock_upload_documents(self, documents):
+    class Response:
+        def __init__(self):
+            self.succeeded: bool = True
+
+    def mock_upload_documents(self, documents: List[object]) -> List[Response]:
         # assume all documents uploaded successfuly
-        response = [
-            type("Response", (object,), {"succeeded": True})() for _ in documents
-        ]
+        response = [Response() for _ in documents]
         return response
 
     documents = [
@@ -220,6 +222,6 @@ def mock_upload_documents(self, documents):
         SearchClient, "upload_documents", mock_upload_documents
     ), patch.object(SearchIndexClient, "get_index", mock_default_index):
         vector_store = create_vector_store()
-        ids_used_at_upload = vector_store.add_texts(documents, ids=ids_provided)
+        ids_used_at_upload = vector_store.add_documents(documents, ids=ids_provided)
         assert len(ids_provided) == len(ids_used_at_upload)
         assert ids_provided == ids_used_at_upload

From 587267608b3e03e5f16856a05625abacb7da9c25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20T=C5=99=C3=ADska?= <martin.triska@gmail.com>
Date: Thu, 29 Aug 2024 17:06:35 +0200
Subject: [PATCH 8/9] add "ignore [no-untyped-def]" in the test

---
 .../tests/unit_tests/vectorstores/test_azure_search.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
index e1d8c0d595515..271874f978cf1 100644
--- a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
@@ -198,10 +198,10 @@ def test_ids_used_correctly() -> None:
     from langchain_core.documents import Document
 
     class Response:
-        def __init__(self):
+        def __init__(self) -> None:
             self.succeeded: bool = True
 
-    def mock_upload_documents(self, documents: List[object]) -> List[Response]:
+    def mock_upload_documents(self, documents: List[object]) -> List[Response]: # type: ignore[no-untyped-def]
         # assume all documents uploaded successfuly
         response = [Response() for _ in documents]
         return response

From 7c6c232d9eca5744d01c84aee24875eb6ccff551 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20T=C5=99=C3=ADska?= <martin.triska@gmail.com>
Date: Thu, 29 Aug 2024 17:11:55 +0200
Subject: [PATCH 9/9] formatting: literally added a space before comment :-/

---
 .../tests/unit_tests/vectorstores/test_azure_search.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
index 271874f978cf1..255105f32894b 100644
--- a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py
@@ -201,7 +201,7 @@ class Response:
         def __init__(self) -> None:
             self.succeeded: bool = True
 
-    def mock_upload_documents(self, documents: List[object]) -> List[Response]: # type: ignore[no-untyped-def]
+    def mock_upload_documents(self, documents: List[object]) -> List[Response]:  # type: ignore[no-untyped-def]
         # assume all documents uploaded successfuly
         response = [Response() for _ in documents]
         return response