From 27212f54a85826dd0dbd329b34d1d67bf1c8f771 Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Tue, 12 Mar 2024 16:05:33 +0100
Subject: [PATCH 01/16] first refactorings

---
 .../chroma/tests/test_document_store.py       | 30 -----------
 .../tests/test_cohere_chat_generator.py       | 12 -----
 integrations/deepeval/tests/test_evaluator.py |  1 +
 .../mongodb_atlas/tests/test_retriever.py     | 52 ++++++++++++-------
 4 files changed, 35 insertions(+), 60 deletions(-)

diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py
index 8d61e63ed..5b827a984 100644
--- a/integrations/chroma/tests/test_document_store.py
+++ b/integrations/chroma/tests/test_document_store.py
@@ -60,7 +60,6 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do
             assert doc_received.content == doc_expected.content
             assert doc_received.meta == doc_expected.meta
 
-    @pytest.mark.unit
     def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         """
         We customize this test because Chroma consider "not equal" true when
@@ -72,14 +71,12 @@ def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: L
             result, [doc for doc in filterable_docs if doc.meta.get("page", "100") != "100"]
         )
 
-    @pytest.mark.unit
     def test_delete_empty(self, document_store: ChromaDocumentStore):
         """
         Deleting a non-existing document should not raise with Chroma
         """
         document_store.delete_documents(["test"])
 
-    @pytest.mark.unit
     def test_delete_not_empty_nonexisting(self, document_store: ChromaDocumentStore):
         """
         Deleting a non-existing document should not raise with Chroma
@@ -131,144 +128,117 @@ def test_same_collection_name_reinitialization(self):
         ChromaDocumentStore("test_name")
 
     @pytest.mark.skip(reason="Filter on array contents is not supported.")
-    @pytest.mark.unit
     def test_filter_document_array(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter on dataframe contents is not supported.")
-    @pytest.mark.unit
     def test_filter_document_dataframe(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter on table contents is not supported.")
-    @pytest.mark.unit
     def test_eq_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter on embedding value is not supported.")
-    @pytest.mark.unit
     def test_eq_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="$in operator is not supported.")
-    @pytest.mark.unit
     def test_in_filter_explicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="$in operator is not supported. Filter on table contents is not supported.")
-    @pytest.mark.unit
     def test_in_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="$in operator is not supported.")
-    @pytest.mark.unit
     def test_in_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter on table contents is not supported.")
-    @pytest.mark.unit
     def test_ne_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter on embedding value is not supported.")
-    @pytest.mark.unit
     def test_ne_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="$nin operator is not supported. Filter on table contents is not supported.")
-    @pytest.mark.unit
     def test_nin_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="$nin operator is not supported. Filter on embedding value is not supported.")
-    @pytest.mark.unit
     def test_nin_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="$nin operator is not supported.")
-    @pytest.mark.unit
     def test_nin_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_simple_implicit_and_with_multi_key_dict(
         self, document_store: ChromaDocumentStore, filterable_docs: List[Document]
     ):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_simple_explicit_and_with_multikey_dict(
         self, document_store: ChromaDocumentStore, filterable_docs: List[Document]
     ):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_simple_explicit_and_with_list(
         self, document_store: ChromaDocumentStore, filterable_docs: List[Document]
     ):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_simple_implicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_nested_explicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_nested_implicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_simple_or(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_nested_or(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter on table contents is not supported.")
-    @pytest.mark.unit
     def test_filter_nested_and_or_explicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_nested_and_or_implicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_nested_or_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
         pass
 
     @pytest.mark.skip(reason="Filter syntax not supported.")
-    @pytest.mark.unit
     def test_filter_nested_multiple_identical_operators_same_level(
         self, document_store: ChromaDocumentStore, filterable_docs: List[Document]
     ):
         pass
 
     @pytest.mark.skip(reason="Duplicate policy not supported.")
-    @pytest.mark.unit
     def test_write_duplicate_fail(self, document_store: ChromaDocumentStore):
         pass
 
     @pytest.mark.skip(reason="Duplicate policy not supported.")
-    @pytest.mark.unit
     def test_write_duplicate_skip(self, document_store: ChromaDocumentStore):
         pass
 
     @pytest.mark.skip(reason="Duplicate policy not supported.")
-    @pytest.mark.unit
     def test_write_duplicate_overwrite(self, document_store: ChromaDocumentStore):
         pass
diff --git a/integrations/cohere/tests/test_cohere_chat_generator.py b/integrations/cohere/tests/test_cohere_chat_generator.py
index 7fd588fec..9a822856e 100644
--- a/integrations/cohere/tests/test_cohere_chat_generator.py
+++ b/integrations/cohere/tests/test_cohere_chat_generator.py
@@ -53,7 +53,6 @@ def chat_messages():
 
 
 class TestCohereChatGenerator:
-    @pytest.mark.unit
     def test_init_default(self, monkeypatch):
         monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
 
@@ -64,14 +63,12 @@ def test_init_default(self, monkeypatch):
         assert component.api_base_url == cohere.COHERE_API_URL
         assert not component.generation_kwargs
 
-    @pytest.mark.unit
     def test_init_fail_wo_api_key(self, monkeypatch):
         monkeypatch.delenv("COHERE_API_KEY", raising=False)
         monkeypatch.delenv("CO_API_KEY", raising=False)
         with pytest.raises(ValueError):
             CohereChatGenerator()
 
-    @pytest.mark.unit
     def test_init_with_parameters(self):
         component = CohereChatGenerator(
             api_key=Secret.from_token("test-api-key"),
@@ -86,7 +83,6 @@ def test_init_with_parameters(self):
         assert component.api_base_url == "test-base-url"
         assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
 
-    @pytest.mark.unit
     def test_to_dict_default(self, monkeypatch):
         monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         component = CohereChatGenerator()
@@ -102,7 +98,6 @@ def test_to_dict_default(self, monkeypatch):
             },
         }
 
-    @pytest.mark.unit
     def test_to_dict_with_parameters(self, monkeypatch):
         monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         monkeypatch.setenv("CO_API_KEY", "fake-api-key")
@@ -125,7 +120,6 @@ def test_to_dict_with_parameters(self, monkeypatch):
             },
         }
 
-    @pytest.mark.unit
     def test_to_dict_with_lambda_streaming_callback(self, monkeypatch):
         monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
         component = CohereChatGenerator(
@@ -146,7 +140,6 @@ def test_to_dict_with_lambda_streaming_callback(self, monkeypatch):
             },
         }
 
-    @pytest.mark.unit
     def test_from_dict(self, monkeypatch):
         monkeypatch.setenv("COHERE_API_KEY", "fake-api-key")
         monkeypatch.setenv("CO_API_KEY", "fake-api-key")
@@ -166,7 +159,6 @@ def test_from_dict(self, monkeypatch):
         assert component.api_base_url == "test-base-url"
         assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
 
-    @pytest.mark.unit
     def test_from_dict_fail_wo_env_var(self, monkeypatch):
         monkeypatch.delenv("COHERE_API_KEY", raising=False)
         monkeypatch.delenv("CO_API_KEY", raising=False)
@@ -183,7 +175,6 @@ def test_from_dict_fail_wo_env_var(self, monkeypatch):
         with pytest.raises(ValueError):
             CohereChatGenerator.from_dict(data)
 
-    @pytest.mark.unit
     def test_run(self, chat_messages, mock_chat_response):  # noqa: ARG002
         component = CohereChatGenerator(api_key=Secret.from_token("test-api-key"))
         response = component.run(chat_messages)
@@ -195,13 +186,11 @@ def test_run(self, chat_messages, mock_chat_response):  # noqa: ARG002
         assert len(response["replies"]) == 1
         assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
 
-    @pytest.mark.unit
     def test_message_to_dict(self, chat_messages):
         obj = CohereChatGenerator(api_key=Secret.from_token("test-api-key"))
         dictionary = [obj._message_to_dict(message) for message in chat_messages]
         assert dictionary == [{"user_name": "Chatbot", "text": "What's the capital of France"}]
 
-    @pytest.mark.unit
     def test_run_with_params(self, chat_messages, mock_chat_response):
         component = CohereChatGenerator(
             api_key=Secret.from_token("test-api-key"), generation_kwargs={"max_tokens": 10, "temperature": 0.5}
@@ -220,7 +209,6 @@ def test_run_with_params(self, chat_messages, mock_chat_response):
         assert len(response["replies"]) == 1
         assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
 
-    @pytest.mark.unit
     def test_run_streaming(self, chat_messages, mock_chat_response):
         streaming_call_count = 0
 
diff --git a/integrations/deepeval/tests/test_evaluator.py b/integrations/deepeval/tests/test_evaluator.py
index 8534ef687..7d1946185 100644
--- a/integrations/deepeval/tests/test_evaluator.py
+++ b/integrations/deepeval/tests/test_evaluator.py
@@ -270,6 +270,7 @@ def test_evaluator_outputs(metric, inputs, expected_outputs, metric_params, monk
 # OpenAI API. It is parameterized by the metric, the inputs to the evalutor
 # and the metric parameters.
 @pytest.mark.skipif("OPENAI_API_KEY" not in os.environ, reason="OPENAI_API_KEY not set")
+@pytest.mark.integration
 @pytest.mark.parametrize(
     "metric, inputs, metric_params",
     [
diff --git a/integrations/mongodb_atlas/tests/test_retriever.py b/integrations/mongodb_atlas/tests/test_retriever.py
index ec44513e2..4ef5222ce 100644
--- a/integrations/mongodb_atlas/tests/test_retriever.py
+++ b/integrations/mongodb_atlas/tests/test_retriever.py
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
-from unittest.mock import Mock
+from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 from haystack.dataclasses import Document
@@ -10,34 +10,48 @@
 from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore
 
 
-@pytest.fixture
-def document_store():
-    store = MongoDBAtlasDocumentStore(
-        database_name="haystack_integration_test",
-        collection_name="test_embeddings_collection",
-        vector_search_index="cosine_index",
-    )
-    return store
+class TestRetriever:
 
+    @pytest.fixture
+    def mock_client(self):
+        with patch(
+            "haystack_integrations.document_stores.mongodb_atlas.document_store.MongoClient"
+        ) as mock_mongo_client:
+            mock_connection = MagicMock()
+            mock_database = MagicMock()
+            mock_collection_names = MagicMock(return_value=["test_embeddings_collection"])
+            mock_database.list_collection_names = mock_collection_names
+            mock_connection.__getitem__.return_value = mock_database
+            mock_mongo_client.return_value = mock_connection
+            yield mock_mongo_client
 
-class TestRetriever:
-    def test_init_default(self, document_store: MongoDBAtlasDocumentStore):
-        retriever = MongoDBAtlasEmbeddingRetriever(document_store=document_store)
-        assert retriever.document_store == document_store
+    def test_init_default(self):
+        mock_store = Mock(spec=MongoDBAtlasDocumentStore)
+        retriever = MongoDBAtlasEmbeddingRetriever(document_store=mock_store)
+        assert retriever.document_store == mock_store
         assert retriever.filters == {}
         assert retriever.top_k == 10
 
-    def test_init(self, document_store: MongoDBAtlasDocumentStore):
+    def test_init(self):
+        mock_store = Mock(spec=MongoDBAtlasDocumentStore)
         retriever = MongoDBAtlasEmbeddingRetriever(
-            document_store=document_store,
+            document_store=mock_store,
             filters={"field": "value"},
             top_k=5,
         )
-        assert retriever.document_store == document_store
+        assert retriever.document_store == mock_store
         assert retriever.filters == {"field": "value"}
         assert retriever.top_k == 5
 
-    def test_to_dict(self, document_store: MongoDBAtlasDocumentStore):
+    def test_to_dict(self, mock_client, monkeypatch):  # noqa: ARG002  mock_client appears unused but is required
+        monkeypatch.setenv("MONGO_CONNECTION_STRING", "test_conn_str")
+
+        document_store = MongoDBAtlasDocumentStore(
+            database_name="haystack_integration_test",
+            collection_name="test_embeddings_collection",
+            vector_search_index="cosine_index",
+        )
+
         retriever = MongoDBAtlasEmbeddingRetriever(document_store=document_store, filters={"field": "value"}, top_k=5)
         res = retriever.to_dict()
         assert res == {
@@ -61,7 +75,9 @@ def test_to_dict(self, document_store: MongoDBAtlasDocumentStore):
             },
         }
 
-    def test_from_dict(self):
+    def test_from_dict(self, mock_client, monkeypatch):  # noqa: ARG002  mock_client appears unused but is required
+        monkeypatch.setenv("MONGO_CONNECTION_STRING", "test_conn_str")
+
         data = {
             "type": "haystack_integrations.components.retrievers.mongodb_atlas.embedding_retriever.MongoDBAtlasEmbeddingRetriever",  # noqa: E501
             "init_parameters": {

From 30ff8f3bbeca20cce0aaa5f037700da7b31c8c0c Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Tue, 12 Mar 2024 17:22:16 +0100
Subject: [PATCH 02/16] separate unit tests in pgvector

---
 .../tests/test_document_store.py              |  56 +--
 .../opensearch/tests/test_document_store.py   |  51 +--
 integrations/pgvector/tests/conftest.py       |  36 ++
 .../pgvector/tests/test_document_store.py     | 367 +++++++++---------
 .../tests/test_embedding_retrieval.py         |   1 +
 integrations/pgvector/tests/test_filters.py   | 226 ++++++-----
 integrations/pgvector/tests/test_retriever.py |  25 +-
 7 files changed, 411 insertions(+), 351 deletions(-)

diff --git a/integrations/elasticsearch/tests/test_document_store.py b/integrations/elasticsearch/tests/test_document_store.py
index e46e76ed2..a1e992a9f 100644
--- a/integrations/elasticsearch/tests/test_document_store.py
+++ b/integrations/elasticsearch/tests/test_document_store.py
@@ -15,6 +15,34 @@
 from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
 
 
+@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
+def test_to_dict(_mock_elasticsearch_client):
+    document_store = ElasticsearchDocumentStore(hosts="some hosts")
+    res = document_store.to_dict()
+    assert res == {
+        "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
+        "init_parameters": {
+            "hosts": "some hosts",
+            "index": "default",
+            "embedding_similarity_function": "cosine",
+        },
+    }
+
+@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
+def test_from_dict(_mock_elasticsearch_client):
+    data = {
+        "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
+        "init_parameters": {
+            "hosts": "some hosts",
+            "index": "default",
+            "embedding_similarity_function": "cosine",
+        },
+    }
+    document_store = ElasticsearchDocumentStore.from_dict(data)
+    assert document_store._hosts == "some hosts"
+    assert document_store._index == "default"
+    assert document_store._embedding_similarity_function == "cosine"
+
 @pytest.mark.integration
 class TestDocumentStore(DocumentStoreBaseTests):
     """
@@ -67,34 +95,6 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do
 
         super().assert_documents_are_equal(received, expected)
 
-    @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
-    def test_to_dict(self, _mock_elasticsearch_client):
-        document_store = ElasticsearchDocumentStore(hosts="some hosts")
-        res = document_store.to_dict()
-        assert res == {
-            "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
-            "init_parameters": {
-                "hosts": "some hosts",
-                "index": "default",
-                "embedding_similarity_function": "cosine",
-            },
-        }
-
-    @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
-    def test_from_dict(self, _mock_elasticsearch_client):
-        data = {
-            "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
-            "init_parameters": {
-                "hosts": "some hosts",
-                "index": "default",
-                "embedding_similarity_function": "cosine",
-            },
-        }
-        document_store = ElasticsearchDocumentStore.from_dict(data)
-        assert document_store._hosts == "some hosts"
-        assert document_store._index == "default"
-        assert document_store._embedding_similarity_function == "cosine"
-
     def test_user_agent_header(self, document_store: ElasticsearchDocumentStore):
         assert document_store._client._headers["user-agent"].startswith("haystack-py-ds/")
 
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index e3a314141..765df7afb 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -14,6 +14,32 @@
 from opensearchpy.exceptions import RequestError
 
 
+@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
+def test_to_dict(_mock_opensearch_client):
+    document_store = OpenSearchDocumentStore(hosts="some hosts")
+    res = document_store.to_dict()
+    assert res == {
+        "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore",
+        "init_parameters": {
+            "hosts": "some hosts",
+            "index": "default",
+        },
+    }
+
+@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
+def test_from_dict(_mock_opensearch_client):
+    data = {
+        "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore",
+        "init_parameters": {
+            "hosts": "some hosts",
+            "index": "default",
+        },
+    }
+    document_store = OpenSearchDocumentStore.from_dict(data)
+    assert document_store._hosts == "some hosts"
+    assert document_store._index == "default"
+
+@pytest.mark.integration
 class TestDocumentStore(DocumentStoreBaseTests):
     """
     Common test cases will be provided by `DocumentStoreBaseTests` but
@@ -87,31 +113,6 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do
 
         super().assert_documents_are_equal(received, expected)
 
-    @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
-    def test_to_dict(self, _mock_opensearch_client):
-        document_store = OpenSearchDocumentStore(hosts="some hosts")
-        res = document_store.to_dict()
-        assert res == {
-            "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore",
-            "init_parameters": {
-                "hosts": "some hosts",
-                "index": "default",
-            },
-        }
-
-    @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
-    def test_from_dict(self, _mock_opensearch_client):
-        data = {
-            "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore",
-            "init_parameters": {
-                "hosts": "some hosts",
-                "index": "default",
-            },
-        }
-        document_store = OpenSearchDocumentStore.from_dict(data)
-        assert document_store._hosts == "some hosts"
-        assert document_store._index == "default"
-
     def test_write_documents(self, document_store: OpenSearchDocumentStore):
         docs = [Document(id="1")]
         assert document_store.write_documents(docs) == 1
diff --git a/integrations/pgvector/tests/conftest.py b/integrations/pgvector/tests/conftest.py
index 068f2ac54..94b35a04d 100644
--- a/integrations/pgvector/tests/conftest.py
+++ b/integrations/pgvector/tests/conftest.py
@@ -1,4 +1,5 @@
 import os
+from unittest.mock import patch
 
 import pytest
 from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore
@@ -24,3 +25,38 @@ def document_store(request):
     yield store
 
     store.delete_table()
+
+
+@pytest.fixture
+def patches_for_unit_tests():
+    with patch("haystack_integrations.document_stores.pgvector.document_store.connect") as mock_connect, patch(
+        "haystack_integrations.document_stores.pgvector.document_store.register_vector"
+    ) as mock_register, patch(
+        "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.delete_table"
+    ) as mock_delete, patch(
+        "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore._create_table_if_not_exists"
+    ) as mock_create, patch(
+        "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore._handle_hnsw"
+    ) as mock_hnsw:
+
+        yield mock_connect, mock_register, mock_delete, mock_create, mock_hnsw
+
+
+@pytest.fixture
+def mock_store(patches_for_unit_tests, monkeypatch):  # noqa: ARG001  patches are not explicitly called but necessary
+    monkeypatch.setenv("PG_CONN_STR", "some-connection-string")
+    table_name = "haystack"
+    embedding_dimension = 768
+    vector_function = "cosine_similarity"
+    recreate_table = True
+    search_strategy = "exact_nearest_neighbor"
+
+    store = PgvectorDocumentStore(
+        table_name=table_name,
+        embedding_dimension=embedding_dimension,
+        vector_function=vector_function,
+        recreate_table=recreate_table,
+        search_strategy=search_strategy,
+    )
+
+    yield store
diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py
index 1e158f134..8ffa2bd31 100644
--- a/integrations/pgvector/tests/test_document_store.py
+++ b/integrations/pgvector/tests/test_document_store.py
@@ -13,6 +13,7 @@
 from pandas import DataFrame
 
 
+@pytest.mark.integration
 class TestDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest):
     def test_write_documents(self, document_store: PgvectorDocumentStore):
         docs = [Document(id="1")]
@@ -25,7 +26,6 @@ def test_write_blob(self, document_store: PgvectorDocumentStore):
         docs = [Document(id="1", blob=bytestream)]
         document_store.write_documents(docs)
 
-        # TODO: update when filters are implemented
         retrieved_docs = document_store.filter_documents()
         assert retrieved_docs == docs
 
@@ -35,185 +35,192 @@ def test_write_dataframe(self, document_store: PgvectorDocumentStore):
 
         document_store.write_documents(docs)
 
-        # TODO: update when filters are implemented
         retrieved_docs = document_store.filter_documents()
         assert retrieved_docs == docs
 
-    def test_init(self):
-        document_store = PgvectorDocumentStore(
-            table_name="my_table",
-            embedding_dimension=512,
-            vector_function="l2_distance",
-            recreate_table=True,
-            search_strategy="hnsw",
-            hnsw_recreate_index_if_exists=True,
-            hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128},
-            hnsw_ef_search=50,
-        )
-
-        assert document_store.table_name == "my_table"
-        assert document_store.embedding_dimension == 512
-        assert document_store.vector_function == "l2_distance"
-        assert document_store.recreate_table
-        assert document_store.search_strategy == "hnsw"
-        assert document_store.hnsw_recreate_index_if_exists
-        assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128}
-        assert document_store.hnsw_ef_search == 50
-
-    def test_to_dict(self):
-        document_store = PgvectorDocumentStore(
-            table_name="my_table",
-            embedding_dimension=512,
-            vector_function="l2_distance",
-            recreate_table=True,
-            search_strategy="hnsw",
-            hnsw_recreate_index_if_exists=True,
-            hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128},
-            hnsw_ef_search=50,
-        )
-
-        assert document_store.to_dict() == {
-            "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
-            "init_parameters": {
-                "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
-                "table_name": "my_table",
-                "embedding_dimension": 512,
-                "vector_function": "l2_distance",
-                "recreate_table": True,
-                "search_strategy": "hnsw",
-                "hnsw_recreate_index_if_exists": True,
-                "hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128},
-                "hnsw_ef_search": 50,
-            },
-        }
-
-    def test_from_haystack_to_pg_documents(self):
-        haystack_docs = [
-            Document(
-                id="1",
-                content="This is a text",
-                meta={"meta_key": "meta_value"},
-                embedding=[0.1, 0.2, 0.3],
-                score=0.5,
-            ),
-            Document(
-                id="2",
-                dataframe=DataFrame({"col1": [1, 2], "col2": [3, 4]}),
-                meta={"meta_key": "meta_value"},
-                embedding=[0.4, 0.5, 0.6],
-                score=0.6,
-            ),
-            Document(
-                id="3",
-                blob=ByteStream(b"test", meta={"blob_meta_key": "blob_meta_value"}, mime_type="mime_type"),
-                meta={"meta_key": "meta_value"},
-                embedding=[0.7, 0.8, 0.9],
-                score=0.7,
-            ),
-        ]
-
-        with patch(
-            "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__"
-        ) as mock_init:
-            mock_init.return_value = None
-            ds = PgvectorDocumentStore(connection_string="test")
-
-        pg_docs = ds._from_haystack_to_pg_documents(haystack_docs)
-
-        assert pg_docs[0]["id"] == "1"
-        assert pg_docs[0]["content"] == "This is a text"
-        assert pg_docs[0]["dataframe"] is None
-        assert pg_docs[0]["blob_data"] is None
-        assert pg_docs[0]["blob_meta"] is None
-        assert pg_docs[0]["blob_mime_type"] is None
-        assert pg_docs[0]["meta"].obj == {"meta_key": "meta_value"}
-        assert pg_docs[0]["embedding"] == [0.1, 0.2, 0.3]
-        assert "score" not in pg_docs[0]
-
-        assert pg_docs[1]["id"] == "2"
-        assert pg_docs[1]["content"] is None
-        assert pg_docs[1]["dataframe"].obj == DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json()
-        assert pg_docs[1]["blob_data"] is None
-        assert pg_docs[1]["blob_meta"] is None
-        assert pg_docs[1]["blob_mime_type"] is None
-        assert pg_docs[1]["meta"].obj == {"meta_key": "meta_value"}
-        assert pg_docs[1]["embedding"] == [0.4, 0.5, 0.6]
-        assert "score" not in pg_docs[1]
-
-        assert pg_docs[2]["id"] == "3"
-        assert pg_docs[2]["content"] is None
-        assert pg_docs[2]["dataframe"] is None
-        assert pg_docs[2]["blob_data"] == b"test"
-        assert pg_docs[2]["blob_meta"].obj == {"blob_meta_key": "blob_meta_value"}
-        assert pg_docs[2]["blob_mime_type"] == "mime_type"
-        assert pg_docs[2]["meta"].obj == {"meta_key": "meta_value"}
-        assert pg_docs[2]["embedding"] == [0.7, 0.8, 0.9]
-        assert "score" not in pg_docs[2]
-
-    def test_from_pg_to_haystack_documents(self):
-        pg_docs = [
-            {
-                "id": "1",
-                "content": "This is a text",
-                "dataframe": None,
-                "blob_data": None,
-                "blob_meta": None,
-                "blob_mime_type": None,
-                "meta": {"meta_key": "meta_value"},
-                "embedding": "[0.1, 0.2, 0.3]",
-            },
-            {
-                "id": "2",
-                "content": None,
-                "dataframe": DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json(),
-                "blob_data": None,
-                "blob_meta": None,
-                "blob_mime_type": None,
-                "meta": {"meta_key": "meta_value"},
-                "embedding": "[0.4, 0.5, 0.6]",
-            },
-            {
-                "id": "3",
-                "content": None,
-                "dataframe": None,
-                "blob_data": b"test",
-                "blob_meta": {"blob_meta_key": "blob_meta_value"},
-                "blob_mime_type": "mime_type",
-                "meta": {"meta_key": "meta_value"},
-                "embedding": "[0.7, 0.8, 0.9]",
-            },
-        ]
-
-        with patch(
-            "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__"
-        ) as mock_init:
-            mock_init.return_value = None
-            ds = PgvectorDocumentStore(connection_string="test")
-
-        haystack_docs = ds._from_pg_to_haystack_documents(pg_docs)
-
-        assert haystack_docs[0].id == "1"
-        assert haystack_docs[0].content == "This is a text"
-        assert haystack_docs[0].dataframe is None
-        assert haystack_docs[0].blob is None
-        assert haystack_docs[0].meta == {"meta_key": "meta_value"}
-        assert haystack_docs[0].embedding == [0.1, 0.2, 0.3]
-        assert haystack_docs[0].score is None
-
-        assert haystack_docs[1].id == "2"
-        assert haystack_docs[1].content is None
-        assert haystack_docs[1].dataframe.equals(DataFrame({"col1": [1, 2], "col2": [3, 4]}))
-        assert haystack_docs[1].blob is None
-        assert haystack_docs[1].meta == {"meta_key": "meta_value"}
-        assert haystack_docs[1].embedding == [0.4, 0.5, 0.6]
-        assert haystack_docs[1].score is None
-
-        assert haystack_docs[2].id == "3"
-        assert haystack_docs[2].content is None
-        assert haystack_docs[2].dataframe is None
-        assert haystack_docs[2].blob.data == b"test"
-        assert haystack_docs[2].blob.meta == {"blob_meta_key": "blob_meta_value"}
-        assert haystack_docs[2].blob.mime_type == "mime_type"
-        assert haystack_docs[2].meta == {"meta_key": "meta_value"}
-        assert haystack_docs[2].embedding == [0.7, 0.8, 0.9]
-        assert haystack_docs[2].score is None
+
+def test_init(patches_for_unit_tests, monkeypatch):  # noqa: ARG001  patches are not explicitly called but necessary
+    monkeypatch.setenv("PG_CONN_STR", "some_connection_string")
+
+    document_store = PgvectorDocumentStore(
+        table_name="my_table",
+        embedding_dimension=512,
+        vector_function="l2_distance",
+        recreate_table=True,
+        search_strategy="hnsw",
+        hnsw_recreate_index_if_exists=True,
+        hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128},
+        hnsw_ef_search=50,
+    )
+
+    assert document_store.table_name == "my_table"
+    assert document_store.embedding_dimension == 512
+    assert document_store.vector_function == "l2_distance"
+    assert document_store.recreate_table
+    assert document_store.search_strategy == "hnsw"
+    assert document_store.hnsw_recreate_index_if_exists
+    assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128}
+    assert document_store.hnsw_ef_search == 50
+
+
+def test_to_dict(patches_for_unit_tests, monkeypatch):  # noqa: ARG001  patches are not explicitly called but necessary
+    monkeypatch.setenv("PG_CONN_STR", "some_connection_string")
+
+    document_store = PgvectorDocumentStore(
+        table_name="my_table",
+        embedding_dimension=512,
+        vector_function="l2_distance",
+        recreate_table=True,
+        search_strategy="hnsw",
+        hnsw_recreate_index_if_exists=True,
+        hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128},
+        hnsw_ef_search=50,
+    )
+
+    assert document_store.to_dict() == {
+        "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
+        "init_parameters": {
+            "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
+            "table_name": "my_table",
+            "embedding_dimension": 512,
+            "vector_function": "l2_distance",
+            "recreate_table": True,
+            "search_strategy": "hnsw",
+            "hnsw_recreate_index_if_exists": True,
+            "hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128},
+            "hnsw_ef_search": 50,
+        },
+    }
+
+
+def test_from_haystack_to_pg_documents():
+    haystack_docs = [
+        Document(
+            id="1",
+            content="This is a text",
+            meta={"meta_key": "meta_value"},
+            embedding=[0.1, 0.2, 0.3],
+            score=0.5,
+        ),
+        Document(
+            id="2",
+            dataframe=DataFrame({"col1": [1, 2], "col2": [3, 4]}),
+            meta={"meta_key": "meta_value"},
+            embedding=[0.4, 0.5, 0.6],
+            score=0.6,
+        ),
+        Document(
+            id="3",
+            blob=ByteStream(b"test", meta={"blob_meta_key": "blob_meta_value"}, mime_type="mime_type"),
+            meta={"meta_key": "meta_value"},
+            embedding=[0.7, 0.8, 0.9],
+            score=0.7,
+        ),
+    ]
+
+    with patch(
+        "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__"
+    ) as mock_init:
+        mock_init.return_value = None
+        ds = PgvectorDocumentStore(connection_string="test")
+
+    pg_docs = ds._from_haystack_to_pg_documents(haystack_docs)
+
+    assert pg_docs[0]["id"] == "1"
+    assert pg_docs[0]["content"] == "This is a text"
+    assert pg_docs[0]["dataframe"] is None
+    assert pg_docs[0]["blob_data"] is None
+    assert pg_docs[0]["blob_meta"] is None
+    assert pg_docs[0]["blob_mime_type"] is None
+    assert pg_docs[0]["meta"].obj == {"meta_key": "meta_value"}
+    assert pg_docs[0]["embedding"] == [0.1, 0.2, 0.3]
+    assert "score" not in pg_docs[0]
+
+    assert pg_docs[1]["id"] == "2"
+    assert pg_docs[1]["content"] is None
+    assert pg_docs[1]["dataframe"].obj == DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json()
+    assert pg_docs[1]["blob_data"] is None
+    assert pg_docs[1]["blob_meta"] is None
+    assert pg_docs[1]["blob_mime_type"] is None
+    assert pg_docs[1]["meta"].obj == {"meta_key": "meta_value"}
+    assert pg_docs[1]["embedding"] == [0.4, 0.5, 0.6]
+    assert "score" not in pg_docs[1]
+
+    assert pg_docs[2]["id"] == "3"
+    assert pg_docs[2]["content"] is None
+    assert pg_docs[2]["dataframe"] is None
+    assert pg_docs[2]["blob_data"] == b"test"
+    assert pg_docs[2]["blob_meta"].obj == {"blob_meta_key": "blob_meta_value"}
+    assert pg_docs[2]["blob_mime_type"] == "mime_type"
+    assert pg_docs[2]["meta"].obj == {"meta_key": "meta_value"}
+    assert pg_docs[2]["embedding"] == [0.7, 0.8, 0.9]
+    assert "score" not in pg_docs[2]
+
+
+def test_from_pg_to_haystack_documents():
+    pg_docs = [
+        {
+            "id": "1",
+            "content": "This is a text",
+            "dataframe": None,
+            "blob_data": None,
+            "blob_meta": None,
+            "blob_mime_type": None,
+            "meta": {"meta_key": "meta_value"},
+            "embedding": "[0.1, 0.2, 0.3]",
+        },
+        {
+            "id": "2",
+            "content": None,
+            "dataframe": DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json(),
+            "blob_data": None,
+            "blob_meta": None,
+            "blob_mime_type": None,
+            "meta": {"meta_key": "meta_value"},
+            "embedding": "[0.4, 0.5, 0.6]",
+        },
+        {
+            "id": "3",
+            "content": None,
+            "dataframe": None,
+            "blob_data": b"test",
+            "blob_meta": {"blob_meta_key": "blob_meta_value"},
+            "blob_mime_type": "mime_type",
+            "meta": {"meta_key": "meta_value"},
+            "embedding": "[0.7, 0.8, 0.9]",
+        },
+    ]
+
+    with patch(
+        "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__"
+    ) as mock_init:
+        mock_init.return_value = None
+        ds = PgvectorDocumentStore(connection_string="test")
+
+    haystack_docs = ds._from_pg_to_haystack_documents(pg_docs)
+
+    assert haystack_docs[0].id == "1"
+    assert haystack_docs[0].content == "This is a text"
+    assert haystack_docs[0].dataframe is None
+    assert haystack_docs[0].blob is None
+    assert haystack_docs[0].meta == {"meta_key": "meta_value"}
+    assert haystack_docs[0].embedding == [0.1, 0.2, 0.3]
+    assert haystack_docs[0].score is None
+
+    assert haystack_docs[1].id == "2"
+    assert haystack_docs[1].content is None
+    assert haystack_docs[1].dataframe.equals(DataFrame({"col1": [1, 2], "col2": [3, 4]}))
+    assert haystack_docs[1].blob is None
+    assert haystack_docs[1].meta == {"meta_key": "meta_value"}
+    assert haystack_docs[1].embedding == [0.4, 0.5, 0.6]
+    assert haystack_docs[1].score is None
+
+    assert haystack_docs[2].id == "3"
+    assert haystack_docs[2].content is None
+    assert haystack_docs[2].dataframe is None
+    assert haystack_docs[2].blob.data == b"test"
+    assert haystack_docs[2].blob.meta == {"blob_meta_key": "blob_meta_value"}
+    assert haystack_docs[2].blob.mime_type == "mime_type"
+    assert haystack_docs[2].meta == {"meta_key": "meta_value"}
+    assert haystack_docs[2].embedding == [0.7, 0.8, 0.9]
+    assert haystack_docs[2].score is None
diff --git a/integrations/pgvector/tests/test_embedding_retrieval.py b/integrations/pgvector/tests/test_embedding_retrieval.py
index 1d5e8e297..2c384f57c 100644
--- a/integrations/pgvector/tests/test_embedding_retrieval.py
+++ b/integrations/pgvector/tests/test_embedding_retrieval.py
@@ -10,6 +10,7 @@
 from numpy.random import rand
 
 
+@pytest.mark.integration
 class TestEmbeddingRetrieval:
     @pytest.fixture
     def document_store_w_hnsw_index(self, request):
diff --git a/integrations/pgvector/tests/test_filters.py b/integrations/pgvector/tests/test_filters.py
index 8b2dc8ec9..bda10e3c0 100644
--- a/integrations/pgvector/tests/test_filters.py
+++ b/integrations/pgvector/tests/test_filters.py
@@ -15,6 +15,7 @@
 from psycopg.types.json import Jsonb
 
 
+@pytest.mark.integration
 class TestFilters(FilterDocumentsTest):
     def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
         """
@@ -35,6 +36,9 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do
             received_doc.embedding, expected_doc.embedding = None, None
             assert received_doc == expected_doc
 
+    @pytest.mark.skip(reason="NOT operator is not supported in PgvectorDocumentStore")
+    def test_not_operator(self, document_store, filterable_docs): ...
+
     def test_complex_filter(self, document_store, filterable_docs):
         document_store.write_documents(filterable_docs)
         filters = {
@@ -69,111 +73,119 @@ def test_complex_filter(self, document_store, filterable_docs):
             ],
         )
 
-    @pytest.mark.skip(reason="NOT operator is not supported in PgvectorDocumentStore")
-    def test_not_operator(self, document_store, filterable_docs): ...
 
-    def test_treat_meta_field(self):
-        assert _treat_meta_field(field="meta.number", value=9) == "(meta->>'number')::integer"
-        assert _treat_meta_field(field="meta.number", value=[1, 2, 3]) == "(meta->>'number')::integer"
-        assert _treat_meta_field(field="meta.name", value="my_name") == "meta->>'name'"
-        assert _treat_meta_field(field="meta.name", value=["my_name"]) == "meta->>'name'"
-        assert _treat_meta_field(field="meta.number", value=1.1) == "(meta->>'number')::real"
-        assert _treat_meta_field(field="meta.number", value=[1.1, 2.2, 3.3]) == "(meta->>'number')::real"
-        assert _treat_meta_field(field="meta.bool", value=True) == "(meta->>'bool')::boolean"
-        assert _treat_meta_field(field="meta.bool", value=[True, False, True]) == "(meta->>'bool')::boolean"
-
-        # do not cast the field if its value is not one of the known types, an empty list or None
-        assert _treat_meta_field(field="meta.other", value={"a": 3, "b": "example"}) == "meta->>'other'"
-        assert _treat_meta_field(field="meta.empty_list", value=[]) == "meta->>'empty_list'"
-        assert _treat_meta_field(field="meta.name", value=None) == "meta->>'name'"
-
-    def test_comparison_condition_dataframe_jsonb_conversion(self):
-        dataframe = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
-        condition = {"field": "meta.df", "operator": "==", "value": dataframe}
-        field, values = _parse_comparison_condition(condition)
-        assert field == "(meta.df)::jsonb = %s"
-
-        # we check each slot of the Jsonb object because it does not implement __eq__
-        assert values[0].obj == Jsonb(dataframe.to_json()).obj
-        assert values[0].dumps == Jsonb(dataframe.to_json()).dumps
-
-    def test_comparison_condition_missing_operator(self):
-        condition = {"field": "meta.type", "value": "article"}
-        with pytest.raises(FilterError):
-            _parse_comparison_condition(condition)
-
-    def test_comparison_condition_missing_value(self):
-        condition = {"field": "meta.type", "operator": "=="}
-        with pytest.raises(FilterError):
-            _parse_comparison_condition(condition)
-
-    def test_comparison_condition_unknown_operator(self):
-        condition = {"field": "meta.type", "operator": "unknown", "value": "article"}
-        with pytest.raises(FilterError):
-            _parse_comparison_condition(condition)
-
-    def test_logical_condition_missing_operator(self):
-        condition = {"conditions": []}
-        with pytest.raises(FilterError):
-            _parse_logical_condition(condition)
-
-    def test_logical_condition_missing_conditions(self):
-        condition = {"operator": "AND"}
-        with pytest.raises(FilterError):
-            _parse_logical_condition(condition)
-
-    def test_logical_condition_unknown_operator(self):
-        condition = {"operator": "unknown", "conditions": []}
-        with pytest.raises(FilterError):
-            _parse_logical_condition(condition)
-
-    def test_logical_condition_nested(self):
-        condition = {
-            "operator": "AND",
-            "conditions": [
-                {
-                    "operator": "OR",
-                    "conditions": [
-                        {"field": "meta.domain", "operator": "!=", "value": "science"},
-                        {"field": "meta.chapter", "operator": "in", "value": ["intro", "conclusion"]},
-                    ],
-                },
-                {
-                    "operator": "OR",
-                    "conditions": [
-                        {"field": "meta.number", "operator": ">=", "value": 90},
-                        {"field": "meta.author", "operator": "not in", "value": ["John", "Jane"]},
-                    ],
-                },
-            ],
-        }
-        query, values = _parse_logical_condition(condition)
-        assert query == (
-            "((meta->>'domain' IS DISTINCT FROM %s OR meta->>'chapter' = ANY(%s)) "
-            "AND ((meta->>'number')::integer >= %s OR meta->>'author' IS NULL OR meta->>'author' != ALL(%s)))"
-        )
-        assert values == ["science", [["intro", "conclusion"]], 90, [["John", "Jane"]]]
-
-    def test_convert_filters_to_where_clause_and_params(self):
-        filters = {
-            "operator": "AND",
-            "conditions": [
-                {"field": "meta.number", "operator": "==", "value": 100},
-                {"field": "meta.chapter", "operator": "==", "value": "intro"},
-            ],
-        }
-        where_clause, params = _convert_filters_to_where_clause_and_params(filters)
-        assert where_clause == SQL(" WHERE ") + SQL("((meta->>'number')::integer = %s AND meta->>'chapter' = %s)")
-        assert params == (100, "intro")
-
-    def test_convert_filters_to_where_clause_and_params_handle_null(self):
-        filters = {
-            "operator": "AND",
-            "conditions": [
-                {"field": "meta.number", "operator": "==", "value": None},
-                {"field": "meta.chapter", "operator": "==", "value": "intro"},
-            ],
-        }
-        where_clause, params = _convert_filters_to_where_clause_and_params(filters)
-        assert where_clause == SQL(" WHERE ") + SQL("(meta->>'number' IS NULL AND meta->>'chapter' = %s)")
-        assert params == ("intro",)
+def test_treat_meta_field():
+    assert _treat_meta_field(field="meta.number", value=9) == "(meta->>'number')::integer"
+    assert _treat_meta_field(field="meta.number", value=[1, 2, 3]) == "(meta->>'number')::integer"
+    assert _treat_meta_field(field="meta.name", value="my_name") == "meta->>'name'"
+    assert _treat_meta_field(field="meta.name", value=["my_name"]) == "meta->>'name'"
+    assert _treat_meta_field(field="meta.number", value=1.1) == "(meta->>'number')::real"
+    assert _treat_meta_field(field="meta.number", value=[1.1, 2.2, 3.3]) == "(meta->>'number')::real"
+    assert _treat_meta_field(field="meta.bool", value=True) == "(meta->>'bool')::boolean"
+    assert _treat_meta_field(field="meta.bool", value=[True, False, True]) == "(meta->>'bool')::boolean"
+
+    # do not cast the field if its value is not one of the known types, an empty list or None
+    assert _treat_meta_field(field="meta.other", value={"a": 3, "b": "example"}) == "meta->>'other'"
+    assert _treat_meta_field(field="meta.empty_list", value=[]) == "meta->>'empty_list'"
+    assert _treat_meta_field(field="meta.name", value=None) == "meta->>'name'"
+
+
+def test_comparison_condition_dataframe_jsonb_conversion():
+    dataframe = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    condition = {"field": "meta.df", "operator": "==", "value": dataframe}
+    field, values = _parse_comparison_condition(condition)
+    assert field == "(meta.df)::jsonb = %s"
+
+    # we check each slot of the Jsonb object because it does not implement __eq__
+    assert values[0].obj == Jsonb(dataframe.to_json()).obj
+    assert values[0].dumps == Jsonb(dataframe.to_json()).dumps
+
+
+def test_comparison_condition_missing_operator():
+    condition = {"field": "meta.type", "value": "article"}
+    with pytest.raises(FilterError):
+        _parse_comparison_condition(condition)
+
+
+def test_comparison_condition_missing_value():
+    condition = {"field": "meta.type", "operator": "=="}
+    with pytest.raises(FilterError):
+        _parse_comparison_condition(condition)
+
+
+def test_comparison_condition_unknown_operator():
+    condition = {"field": "meta.type", "operator": "unknown", "value": "article"}
+    with pytest.raises(FilterError):
+        _parse_comparison_condition(condition)
+
+
+def test_logical_condition_missing_operator():
+    condition = {"conditions": []}
+    with pytest.raises(FilterError):
+        _parse_logical_condition(condition)
+
+
+def test_logical_condition_missing_conditions():
+    condition = {"operator": "AND"}
+    with pytest.raises(FilterError):
+        _parse_logical_condition(condition)
+
+
+def test_logical_condition_unknown_operator():
+    condition = {"operator": "unknown", "conditions": []}
+    with pytest.raises(FilterError):
+        _parse_logical_condition(condition)
+
+
+def test_logical_condition_nested():
+    condition = {
+        "operator": "AND",
+        "conditions": [
+            {
+                "operator": "OR",
+                "conditions": [
+                    {"field": "meta.domain", "operator": "!=", "value": "science"},
+                    {"field": "meta.chapter", "operator": "in", "value": ["intro", "conclusion"]},
+                ],
+            },
+            {
+                "operator": "OR",
+                "conditions": [
+                    {"field": "meta.number", "operator": ">=", "value": 90},
+                    {"field": "meta.author", "operator": "not in", "value": ["John", "Jane"]},
+                ],
+            },
+        ],
+    }
+    query, values = _parse_logical_condition(condition)
+    assert query == (
+        "((meta->>'domain' IS DISTINCT FROM %s OR meta->>'chapter' = ANY(%s)) "
+        "AND ((meta->>'number')::integer >= %s OR meta->>'author' IS NULL OR meta->>'author' != ALL(%s)))"
+    )
+    assert values == ["science", [["intro", "conclusion"]], 90, [["John", "Jane"]]]
+
+
+def test_convert_filters_to_where_clause_and_params():
+    filters = {
+        "operator": "AND",
+        "conditions": [
+            {"field": "meta.number", "operator": "==", "value": 100},
+            {"field": "meta.chapter", "operator": "==", "value": "intro"},
+        ],
+    }
+    where_clause, params = _convert_filters_to_where_clause_and_params(filters)
+    assert where_clause == SQL(" WHERE ") + SQL("((meta->>'number')::integer = %s AND meta->>'chapter' = %s)")
+    assert params == (100, "intro")
+
+
+def test_convert_filters_to_where_clause_and_params_handle_null():
+    filters = {
+        "operator": "AND",
+        "conditions": [
+            {"field": "meta.number", "operator": "==", "value": None},
+            {"field": "meta.chapter", "operator": "==", "value": "intro"},
+        ],
+    }
+    where_clause, params = _convert_filters_to_where_clause_and_params(filters)
+    assert where_clause == SQL(" WHERE ") + SQL("(meta->>'number' IS NULL AND meta->>'chapter' = %s)")
+    assert params == ("intro",)
diff --git a/integrations/pgvector/tests/test_retriever.py b/integrations/pgvector/tests/test_retriever.py
index 8eab10de5..0dd153fec 100644
--- a/integrations/pgvector/tests/test_retriever.py
+++ b/integrations/pgvector/tests/test_retriever.py
@@ -10,25 +10,25 @@
 
 
 class TestRetriever:
-    def test_init_default(self, document_store: PgvectorDocumentStore):
-        retriever = PgvectorEmbeddingRetriever(document_store=document_store)
-        assert retriever.document_store == document_store
+    def test_init_default(self, mock_store):
+        retriever = PgvectorEmbeddingRetriever(document_store=mock_store)
+        assert retriever.document_store == mock_store
         assert retriever.filters == {}
         assert retriever.top_k == 10
-        assert retriever.vector_function == document_store.vector_function
+        assert retriever.vector_function == mock_store.vector_function
 
-    def test_init(self, document_store: PgvectorDocumentStore):
+    def test_init(self, mock_store):
         retriever = PgvectorEmbeddingRetriever(
-            document_store=document_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance"
+            document_store=mock_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance"
         )
-        assert retriever.document_store == document_store
+        assert retriever.document_store == mock_store
         assert retriever.filters == {"field": "value"}
         assert retriever.top_k == 5
         assert retriever.vector_function == "l2_distance"
 
-    def test_to_dict(self, document_store: PgvectorDocumentStore):
+    def test_to_dict(self, mock_store):
         retriever = PgvectorEmbeddingRetriever(
-            document_store=document_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance"
+            document_store=mock_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance"
         )
         res = retriever.to_dict()
         t = "haystack_integrations.components.retrievers.pgvector.embedding_retriever.PgvectorEmbeddingRetriever"
@@ -39,7 +39,7 @@ def test_to_dict(self, document_store: PgvectorDocumentStore):
                     "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
                     "init_parameters": {
                         "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
-                        "table_name": "haystack_test_to_dict",
+                        "table_name": "haystack",
                         "embedding_dimension": 768,
                         "vector_function": "cosine_similarity",
                         "recreate_table": True,
@@ -55,7 +55,10 @@ def test_to_dict(self, document_store: PgvectorDocumentStore):
             },
         }
 
-    def test_from_dict(self):
+    def test_from_dict(
+        self, patches_for_unit_tests, monkeypatch  # noqa:ARG002  patches are not explicitly called but necessary
+    ):
+        monkeypatch.setenv("PG_CONN_STR", "some-connection-string")
         t = "haystack_integrations.components.retrievers.pgvector.embedding_retriever.PgvectorEmbeddingRetriever"
         data = {
             "type": t,

From 6e6c6d1e56cb90b97558317976674715ae0a3f0f Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Tue, 12 Mar 2024 17:36:03 +0100
Subject: [PATCH 03/16] small change to weaviate

---
 integrations/weaviate/tests/test_document_store.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py
index a2b32d578..ab2ad0ff0 100644
--- a/integrations/weaviate/tests/test_document_store.py
+++ b/integrations/weaviate/tests/test_document_store.py
@@ -34,7 +34,7 @@
     EmbeddedOptions,
 )
 
-
+@pytest.mark.integration
 class TestWeaviateDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest):
     @pytest.fixture
     def document_store(self, request) -> WeaviateDocumentStore:

From 6809cc1f38f54f24ff66efa83004f1c962a9ccd1 Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Tue, 12 Mar 2024 17:38:59 +0100
Subject: [PATCH 04/16] fix format

---
 integrations/elasticsearch/tests/test_document_store.py | 2 ++
 integrations/opensearch/tests/test_document_store.py    | 2 ++
 integrations/weaviate/tests/test_document_store.py      | 1 +
 3 files changed, 5 insertions(+)

diff --git a/integrations/elasticsearch/tests/test_document_store.py b/integrations/elasticsearch/tests/test_document_store.py
index a1e992a9f..308486a78 100644
--- a/integrations/elasticsearch/tests/test_document_store.py
+++ b/integrations/elasticsearch/tests/test_document_store.py
@@ -28,6 +28,7 @@ def test_to_dict(_mock_elasticsearch_client):
         },
     }
 
+
 @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
 def test_from_dict(_mock_elasticsearch_client):
     data = {
@@ -43,6 +44,7 @@ def test_from_dict(_mock_elasticsearch_client):
     assert document_store._index == "default"
     assert document_store._embedding_similarity_function == "cosine"
 
+
 @pytest.mark.integration
 class TestDocumentStore(DocumentStoreBaseTests):
     """
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 765df7afb..bc0d1c434 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -26,6 +26,7 @@ def test_to_dict(_mock_opensearch_client):
         },
     }
 
+
 @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
 def test_from_dict(_mock_opensearch_client):
     data = {
@@ -39,6 +40,7 @@ def test_from_dict(_mock_opensearch_client):
     assert document_store._hosts == "some hosts"
     assert document_store._index == "default"
 
+
 @pytest.mark.integration
 class TestDocumentStore(DocumentStoreBaseTests):
     """
diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py
index ab2ad0ff0..801751483 100644
--- a/integrations/weaviate/tests/test_document_store.py
+++ b/integrations/weaviate/tests/test_document_store.py
@@ -34,6 +34,7 @@
     EmbeddedOptions,
 )
 
+
 @pytest.mark.integration
 class TestWeaviateDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest):
     @pytest.fixture

From 2cfb58628745ab04be63cb7f6d6e57645c27af11 Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 09:22:18 +0100
Subject: [PATCH 05/16] wip

---
 .github/workflows/fastembed.yml | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml
index fe736029a..66f69b2cf 100644
--- a/.github/workflows/fastembed.yml
+++ b/.github/workflows/fastembed.yml
@@ -43,3 +43,34 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Calculate alert data
+        id: calculator
+        shell: bash
+        if: (success() || failure()) && github.ref_name == 'notify-nightly-failures'
+        run: |
+          if [ "${{ job.status }}" = "success" ]; then
+            echo "alert_type=success" >> "$GITHUB_OUTPUT";
+          else
+            echo "alert_type=error" >> "$GITHUB_OUTPUT";
+          fi
+
+      - name: Send event to Datadog
+        if: (success() || failure()) && github.ref_name == 'notify-nightly-failures'
+        uses: masci/datadog@v1
+        with:
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
+          api-url: https://api.datadoghq.eu
+          events: |
+            - title: "${{ github.workflow }} workflow"
+              text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
+              alert_type: "${{ steps.calculator.outputs.alert_type }}"
+              source_type_name: "Github"
+              host: ${{ github.repository_owner }}
+              tags:
+                - "project:${{ github.repository }}"
+                - "job:${{ github.job }}"
+                - "run_id:${{ github.run_id }}"
+                - "workflow:${{ github.workflow }}"
+                - "branch:${{ github.ref_name }}"
+                - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"                  

From 86edc6ae144f0fa0c1d3dddfcf67ed82d2cda5e2 Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 09:33:08 +0100
Subject: [PATCH 06/16] retry

---
 .github/workflows/fastembed.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml
index 66f69b2cf..74a3311e6 100644
--- a/.github/workflows/fastembed.yml
+++ b/.github/workflows/fastembed.yml
@@ -47,7 +47,7 @@ jobs:
       - name: Calculate alert data
         id: calculator
         shell: bash
-        if: (success() || failure()) && github.ref_name == 'notify-nightly-failures'
+        if: (success() || failure())
         run: |
           if [ "${{ job.status }}" = "success" ]; then
             echo "alert_type=success" >> "$GITHUB_OUTPUT";
@@ -56,7 +56,7 @@ jobs:
           fi
 
       - name: Send event to Datadog
-        if: (success() || failure()) && github.ref_name == 'notify-nightly-failures'
+        if: (success() || failure())
         uses: masci/datadog@v1
         with:
           api-key: ${{ secrets.CORE_DATADOG_API_KEY }}

From 77c0da6917829c96fdfb442ff8bf0e4b307e7915 Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 10:00:00 +0100
Subject: [PATCH 07/16] try failur

---
 .github/workflows/fastembed.yml               | 19 ++++---------------
 .../tests/test_fastembed_text_embedder.py     |  2 +-
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml
index 74a3311e6..42d66f7f2 100644
--- a/.github/workflows/fastembed.yml
+++ b/.github/workflows/fastembed.yml
@@ -44,27 +44,16 @@ jobs:
       - name: Run tests
         run: hatch run cov
 
-      - name: Calculate alert data
-        id: calculator
-        shell: bash
-        if: (success() || failure())
-        run: |
-          if [ "${{ job.status }}" = "success" ]; then
-            echo "alert_type=success" >> "$GITHUB_OUTPUT";
-          else
-            echo "alert_type=error" >> "$GITHUB_OUTPUT";
-          fi
-
-      - name: Send event to Datadog
-        if: (success() || failure())
+      - name: Send failure event to Datadog
+        if: failure()
         uses: masci/datadog@v1
         with:
           api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
           api-url: https://api.datadoghq.eu
           events: |
-            - title: "${{ github.workflow }} workflow"
+            - title: "core-integrations nightly failure: ${{ github.workflow }}"
               text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
-              alert_type: "${{ steps.calculator.outputs.alert_type }}"
+              alert_type: "error"
               source_type_name: "Github"
               host: ${{ github.repository_owner }}
               tags:
diff --git a/integrations/fastembed/tests/test_fastembed_text_embedder.py b/integrations/fastembed/tests/test_fastembed_text_embedder.py
index 402980485..73cc395ab 100644
--- a/integrations/fastembed/tests/test_fastembed_text_embedder.py
+++ b/integrations/fastembed/tests/test_fastembed_text_embedder.py
@@ -207,5 +207,5 @@ def test_run(self):
         embedding = result["embedding"]
 
         assert isinstance(embedding, list)
-        assert len(embedding) == 384
+        assert len(embedding) == 383
         assert all(isinstance(emb, float) for emb in embedding)

From b81bf06ae09c90d413bab64b2dcbee16eabff7ea Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 10:02:49 +0100
Subject: [PATCH 08/16] restrict

---
 .github/workflows/fastembed.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml
index 42d66f7f2..ab85584ae 100644
--- a/.github/workflows/fastembed.yml
+++ b/.github/workflows/fastembed.yml
@@ -45,7 +45,7 @@ jobs:
         run: hatch run cov
 
       - name: Send failure event to Datadog
-        if: failure()
+        if: matrix.python-version == '3.9' && runner.os == 'Linux' && failure()
         uses: masci/datadog@v1
         with:
           api-key: ${{ secrets.CORE_DATADOG_API_KEY }}

From 0a8f77f14b24631831bc0ae7898bac5f7af2812b Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 10:22:37 +0100
Subject: [PATCH 09/16] retry

---
 .github/workflows/fastembed.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml
index ab85584ae..42d66f7f2 100644
--- a/.github/workflows/fastembed.yml
+++ b/.github/workflows/fastembed.yml
@@ -45,7 +45,7 @@ jobs:
         run: hatch run cov
 
       - name: Send failure event to Datadog
-        if: matrix.python-version == '3.9' && runner.os == 'Linux' && failure()
+        if: failure()
         uses: masci/datadog@v1
         with:
           api-key: ${{ secrets.CORE_DATADOG_API_KEY }}

From a9431053a676b6549896f6dedf9ebb47055bec67 Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 11:10:51 +0100
Subject: [PATCH 10/16] try using composite action

---
 .github/workflows/CI_send_failure_event.yml | 25 +++++++++++++++++++++
 .github/workflows/fastembed.yml             | 22 +++++-------------
 2 files changed, 30 insertions(+), 17 deletions(-)
 create mode 100644 .github/workflows/CI_send_failure_event.yml

diff --git a/.github/workflows/CI_send_failure_event.yml b/.github/workflows/CI_send_failure_event.yml
new file mode 100644
index 000000000..ce5232e9c
--- /dev/null
+++ b/.github/workflows/CI_send_failure_event.yml
@@ -0,0 +1,25 @@
+name: "Send failure event to Datadog"
+inputs:
+  api-key:
+    description: "Datadog API key"
+    required: true
+runs:
+  using: "composite"
+  steps:
+    - uses: masci/datadog@v1
+      with:
+        api-key: ${{ inputs.api-key }}
+        api-url: https://api.datadoghq.eu
+        events: |
+          - title: "core-integrations nightly failure: ${{ github.workflow }}"
+            text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
+            alert_type: "error"
+            source_type_name: "Github"
+            host: ${{ github.repository_owner }}
+            tags:
+              - "project:${{ github.repository }}"
+              - "job:${{ github.job }}"
+              - "run_id:${{ github.run_id }}"
+              - "workflow:${{ github.workflow }}"
+              - "branch:${{ github.ref_name }}"
+              - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
\ No newline at end of file
diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml
index 42d66f7f2..db2309ff5 100644
--- a/.github/workflows/fastembed.yml
+++ b/.github/workflows/fastembed.yml
@@ -45,21 +45,9 @@ jobs:
         run: hatch run cov
 
       - name: Send failure event to Datadog
-        if: failure()
-        uses: masci/datadog@v1
+        # we want to send a failure event to Datadog and notify the team
+        # only if the job fails during scheduled nightly runs
+        if: failure() #github.event_name == 'schedule' &&
+        uses: ./.github/workflows/CI_send_failure_event.yml
         with:
-          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
-          api-url: https://api.datadoghq.eu
-          events: |
-            - title: "core-integrations nightly failure: ${{ github.workflow }}"
-              text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
-              alert_type: "error"
-              source_type_name: "Github"
-              host: ${{ github.repository_owner }}
-              tags:
-                - "project:${{ github.repository }}"
-                - "job:${{ github.job }}"
-                - "run_id:${{ github.run_id }}"
-                - "workflow:${{ github.workflow }}"
-                - "branch:${{ github.ref_name }}"
-                - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"                  
+          api-key: ${{ secrets.DATADOG_API_KEY }}          

From 3dfa3535ece017d5ac3f9d5f92f0adeb71ff2bec Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 11:20:25 +0100
Subject: [PATCH 11/16] retry composite action

---
 .../send_failure/action.yml}                                    | 0
 .github/workflows/fastembed.yml                                 | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename .github/{workflows/CI_send_failure_event.yml => actions/send_failure/action.yml} (100%)

diff --git a/.github/workflows/CI_send_failure_event.yml b/.github/actions/send_failure/action.yml
similarity index 100%
rename from .github/workflows/CI_send_failure_event.yml
rename to .github/actions/send_failure/action.yml
diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml
index db2309ff5..3d55239f1 100644
--- a/.github/workflows/fastembed.yml
+++ b/.github/workflows/fastembed.yml
@@ -48,6 +48,6 @@ jobs:
         # we want to send a failure event to Datadog and notify the team
         # only if the job fails during scheduled nightly runs
         if: failure() #github.event_name == 'schedule' &&
-        uses: ./.github/workflows/CI_send_failure_event.yml
+        uses: ./.github/actions/send_failure
         with:
           api-key: ${{ secrets.DATADOG_API_KEY }}          

From 19bbe73196779842c048985a8c07756f55fc8c9d Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 11:48:17 +0100
Subject: [PATCH 12/16] fix typo

---
 .github/workflows/fastembed.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml
index 3d55239f1..0fcccef26 100644
--- a/.github/workflows/fastembed.yml
+++ b/.github/workflows/fastembed.yml
@@ -50,4 +50,4 @@ jobs:
         if: failure() #github.event_name == 'schedule' &&
         uses: ./.github/actions/send_failure
         with:
-          api-key: ${{ secrets.DATADOG_API_KEY }}          
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}          

From 3ed6b9be4aec1def50fbfb5e2e029e341b687f5d Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 12:12:10 +0100
Subject: [PATCH 13/16] update all workflows

---
 .github/actions/send_failure/action.yml    | 5 ++++-
 .github/workflows/astra.yml                | 9 ++++++++-
 .github/workflows/chroma.yml               | 7 +++++++
 .github/workflows/cohere.yml               | 7 +++++++
 .github/workflows/deepeval.yml             | 7 +++++++
 .github/workflows/elasticsearch.yml        | 7 +++++++
 .github/workflows/fastembed.yml            | 7 +++----
 .github/workflows/google_ai.yml            | 7 +++++++
 .github/workflows/google_vertex.yml        | 7 +++++++
 .github/workflows/gradient.yml             | 9 ++++++++-
 .github/workflows/instructor_embedders.yml | 7 +++++++
 .github/workflows/jina.yml                 | 7 +++++++
 .github/workflows/llama_cpp.yml            | 7 +++++++
 .github/workflows/mistral.yml              | 7 +++++++
 .github/workflows/mongodb_atlas.yml        | 7 +++++++
 .github/workflows/nvidia.yml               | 7 +++++++
 .github/workflows/ollama.yml               | 7 +++++++
 .github/workflows/opensearch.yml           | 7 +++++++
 .github/workflows/optimum.yml              | 7 +++++++
 .github/workflows/pgvector.yml             | 7 +++++++
 .github/workflows/pinecone.yml             | 7 +++++++
 .github/workflows/qdrant.yml               | 7 +++++++
 .github/workflows/ragas.yml                | 7 +++++++
 .github/workflows/unstructured.yml         | 7 +++++++
 .github/workflows/uptrain.yml              | 7 +++++++
 .github/workflows/weaviate.yml             | 7 +++++++
 26 files changed, 177 insertions(+), 7 deletions(-)

diff --git a/.github/actions/send_failure/action.yml b/.github/actions/send_failure/action.yml
index ce5232e9c..4a92890d5 100644
--- a/.github/actions/send_failure/action.yml
+++ b/.github/actions/send_failure/action.yml
@@ -3,6 +3,9 @@ inputs:
   api-key:
     description: "Datadog API key"
     required: true
+  title:
+    description: "Custom title for the event"
+    required: true    
 runs:
   using: "composite"
   steps:
@@ -11,7 +14,7 @@ runs:
         api-key: ${{ inputs.api-key }}
         api-url: https://api.datadoghq.eu
         events: |
-          - title: "core-integrations nightly failure: ${{ github.workflow }}"
+          - title: ${{ inputs.title }}
             text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
             alert_type: "error"
             source_type_name: "Github"
diff --git a/.github/workflows/astra.yml b/.github/workflows/astra.yml
index d859626ff..e90edc2e9 100644
--- a/.github/workflows/astra.yml
+++ b/.github/workflows/astra.yml
@@ -61,4 +61,11 @@ jobs:
       env:
         ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_API_ENDPOINT }}
         ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_TOKEN }}
-      run: hatch run cov
\ No newline at end of file
+      run: hatch run cov
+
+    - name: Send event to Datadog for nightly failures
+      if: github.event_name == 'schedule' && failure()
+      uses: ./.github/actions/send_failure
+      with:
+        title: "core-integrations nightly failure: ${{ github.workflow }}"
+        api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
\ No newline at end of file
diff --git a/.github/workflows/chroma.yml b/.github/workflows/chroma.yml
index fec309f6b..e6712d807 100644
--- a/.github/workflows/chroma.yml
+++ b/.github/workflows/chroma.yml
@@ -58,3 +58,10 @@ jobs:
 
     - name: Run tests
       run: hatch run cov
+
+    - name: Send event to Datadog for nightly failures
+      if: github.event_name == 'schedule' && failure()
+      uses: ./.github/actions/send_failure
+      with:
+        title: "core-integrations nightly failure: ${{ github.workflow }}"
+        api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
\ No newline at end of file
diff --git a/.github/workflows/cohere.yml b/.github/workflows/cohere.yml
index fb6b00680..6f23760a0 100644
--- a/.github/workflows/cohere.yml
+++ b/.github/workflows/cohere.yml
@@ -55,3 +55,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
\ No newline at end of file
diff --git a/.github/workflows/deepeval.yml b/.github/workflows/deepeval.yml
index e2468fa8c..a9efc2f3a 100644
--- a/.github/workflows/deepeval.yml
+++ b/.github/workflows/deepeval.yml
@@ -58,3 +58,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/elasticsearch.yml b/.github/workflows/elasticsearch.yml
index 688e5c48f..21efcbc34 100644
--- a/.github/workflows/elasticsearch.yml
+++ b/.github/workflows/elasticsearch.yml
@@ -56,3 +56,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml
index 0fcccef26..a2b076c1a 100644
--- a/.github/workflows/fastembed.yml
+++ b/.github/workflows/fastembed.yml
@@ -44,10 +44,9 @@ jobs:
       - name: Run tests
         run: hatch run cov
 
-      - name: Send failure event to Datadog
-        # we want to send a failure event to Datadog and notify the team
-        # only if the job fails during scheduled nightly runs
-        if: failure() #github.event_name == 'schedule' &&
+      - name: Send event to Datadog for nightly failures
+        if: failure() #github.event_name == 'schedule' && 
         uses: ./.github/actions/send_failure
         with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
           api-key: ${{ secrets.CORE_DATADOG_API_KEY }}          
diff --git a/.github/workflows/google_ai.yml b/.github/workflows/google_ai.yml
index 6093df4a4..9efeb8590 100644
--- a/.github/workflows/google_ai.yml
+++ b/.github/workflows/google_ai.yml
@@ -59,3 +59,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/google_vertex.yml b/.github/workflows/google_vertex.yml
index 6f6c6d0d9..03890ed4a 100644
--- a/.github/workflows/google_vertex.yml
+++ b/.github/workflows/google_vertex.yml
@@ -58,3 +58,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/gradient.yml b/.github/workflows/gradient.yml
index 61a04be7b..8fbaf6f18 100644
--- a/.github/workflows/gradient.yml
+++ b/.github/workflows/gradient.yml
@@ -57,4 +57,11 @@ jobs:
       run: hatch run docs
 
     - name: Run tests
-      run: hatch run cov
\ No newline at end of file
+      run: hatch run cov
+
+    - name: Send event to Datadog for nightly failures
+      if: github.event_name == 'schedule' && failure()
+      uses: ./.github/actions/send_failure
+      with:
+        title: "core-integrations nightly failure: ${{ github.workflow }}"
+        api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
\ No newline at end of file
diff --git a/.github/workflows/instructor_embedders.yml b/.github/workflows/instructor_embedders.yml
index 09d04e9d3..5282c8e18 100644
--- a/.github/workflows/instructor_embedders.yml
+++ b/.github/workflows/instructor_embedders.yml
@@ -36,3 +36,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/jina.yml b/.github/workflows/jina.yml
index 1f8e83a7d..1ab0e2a2b 100644
--- a/.github/workflows/jina.yml
+++ b/.github/workflows/jina.yml
@@ -58,3 +58,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/llama_cpp.yml b/.github/workflows/llama_cpp.yml
index 89c7e5426..712e91fa2 100644
--- a/.github/workflows/llama_cpp.yml
+++ b/.github/workflows/llama_cpp.yml
@@ -58,3 +58,10 @@ jobs:
 
     - name: Run tests
       run: hatch run cov
+
+    - name: Send event to Datadog for nightly failures
+      if: github.event_name == 'schedule' && failure()
+      uses: ./.github/actions/send_failure
+      with:
+        title: "core-integrations nightly failure: ${{ github.workflow }}"
+        api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/mistral.yml b/.github/workflows/mistral.yml
index a02b5ad43..029bb974a 100644
--- a/.github/workflows/mistral.yml
+++ b/.github/workflows/mistral.yml
@@ -59,3 +59,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/mongodb_atlas.yml b/.github/workflows/mongodb_atlas.yml
index af19776cd..bf48a75c2 100644
--- a/.github/workflows/mongodb_atlas.yml
+++ b/.github/workflows/mongodb_atlas.yml
@@ -56,3 +56,10 @@ jobs:
       - name: Run tests
         working-directory: integrations/mongodb_atlas
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/nvidia.yml b/.github/workflows/nvidia.yml
index 6e7562c17..8b6ec030a 100644
--- a/.github/workflows/nvidia.yml
+++ b/.github/workflows/nvidia.yml
@@ -55,3 +55,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml
index 28b522890..c977ba116 100644
--- a/.github/workflows/ollama.yml
+++ b/.github/workflows/ollama.yml
@@ -76,3 +76,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/opensearch.yml b/.github/workflows/opensearch.yml
index 72a01d090..da177b83c 100644
--- a/.github/workflows/opensearch.yml
+++ b/.github/workflows/opensearch.yml
@@ -57,3 +57,10 @@ jobs:
       - name: Run tests
         working-directory: integrations/opensearch
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/optimum.yml b/.github/workflows/optimum.yml
index 3b0d137da..077413920 100644
--- a/.github/workflows/optimum.yml
+++ b/.github/workflows/optimum.yml
@@ -58,3 +58,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/pgvector.yml b/.github/workflows/pgvector.yml
index badb2565b..647f520e1 100644
--- a/.github/workflows/pgvector.yml
+++ b/.github/workflows/pgvector.yml
@@ -62,3 +62,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/pinecone.yml b/.github/workflows/pinecone.yml
index a82fb74de..49d421813 100644
--- a/.github/workflows/pinecone.yml
+++ b/.github/workflows/pinecone.yml
@@ -57,3 +57,10 @@ jobs:
       - name: Run tests
         working-directory: integrations/pinecone
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/qdrant.yml b/.github/workflows/qdrant.yml
index 9f031031f..3c72b0f02 100644
--- a/.github/workflows/qdrant.yml
+++ b/.github/workflows/qdrant.yml
@@ -58,3 +58,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/ragas.yml b/.github/workflows/ragas.yml
index e2ce46764..d3def92ca 100644
--- a/.github/workflows/ragas.yml
+++ b/.github/workflows/ragas.yml
@@ -58,3 +58,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/unstructured.yml b/.github/workflows/unstructured.yml
index 83cad6dfc..b2778431c 100644
--- a/.github/workflows/unstructured.yml
+++ b/.github/workflows/unstructured.yml
@@ -70,3 +70,10 @@ jobs:
 
       - name: Run tests 
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/uptrain.yml b/.github/workflows/uptrain.yml
index bacfa27fb..64453b0fd 100644
--- a/.github/workflows/uptrain.yml
+++ b/.github/workflows/uptrain.yml
@@ -54,3 +54,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/.github/workflows/weaviate.yml b/.github/workflows/weaviate.yml
index 051415336..69447b96b 100644
--- a/.github/workflows/weaviate.yml
+++ b/.github/workflows/weaviate.yml
@@ -55,3 +55,10 @@ jobs:
 
       - name: Run tests
         run: hatch run cov
+
+      - name: Send event to Datadog for nightly failures
+        if: github.event_name == 'schedule' && failure()
+        uses: ./.github/actions/send_failure
+        with:
+          title: "core-integrations nightly failure: ${{ github.workflow }}"
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}

From 41872384236b8edb833102d16d46e05a5ff04c51 Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 12:17:04 +0100
Subject: [PATCH 14/16] retry

---
 .github/actions/send_failure/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/send_failure/action.yml b/.github/actions/send_failure/action.yml
index 4a92890d5..8481b0b80 100644
--- a/.github/actions/send_failure/action.yml
+++ b/.github/actions/send_failure/action.yml
@@ -14,7 +14,7 @@ runs:
         api-key: ${{ inputs.api-key }}
         api-url: https://api.datadoghq.eu
         events: |
-          - title: ${{ inputs.title }}
+          - title: "${{ inputs.title }}"
             text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
             alert_type: "error"
             source_type_name: "Github"

From 42cff9ddb0b46b9dc01375ec672a00aad75dbd11 Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 12:30:53 +0100
Subject: [PATCH 15/16] fix test

---
 integrations/fastembed/tests/test_fastembed_text_embedder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/fastembed/tests/test_fastembed_text_embedder.py b/integrations/fastembed/tests/test_fastembed_text_embedder.py
index 73cc395ab..402980485 100644
--- a/integrations/fastembed/tests/test_fastembed_text_embedder.py
+++ b/integrations/fastembed/tests/test_fastembed_text_embedder.py
@@ -207,5 +207,5 @@ def test_run(self):
         embedding = result["embedding"]
 
         assert isinstance(embedding, list)
-        assert len(embedding) == 383
+        assert len(embedding) == 384
         assert all(isinstance(emb, float) for emb in embedding)

From ad55fcc6b39cec807c62afc22ed05d980196739c Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Mar 2024 13:39:58 +0100
Subject: [PATCH 16/16] fix

---
 integrations/pgvector/tests/test_document_store.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py
index bd8d4478d..bf5ccd5d4 100644
--- a/integrations/pgvector/tests/test_document_store.py
+++ b/integrations/pgvector/tests/test_document_store.py
@@ -39,12 +39,8 @@ def test_write_dataframe(self, document_store: PgvectorDocumentStore):
         assert retrieved_docs == docs
 
 
-<<<<<<< HEAD
-def test_init(patches_for_unit_tests, monkeypatch):  # noqa: ARG001  patches are not explicitly called but necessary
-=======
 @pytest.mark.usefixtures("patches_for_unit_tests")
 def test_init(monkeypatch):
->>>>>>> main
     monkeypatch.setenv("PG_CONN_STR", "some_connection_string")
 
     document_store = PgvectorDocumentStore(