From 27212f54a85826dd0dbd329b34d1d67bf1c8f771 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Tue, 12 Mar 2024 16:05:33 +0100 Subject: [PATCH 01/16] first refactorings --- .../chroma/tests/test_document_store.py | 30 ----------- .../tests/test_cohere_chat_generator.py | 12 ----- integrations/deepeval/tests/test_evaluator.py | 1 + .../mongodb_atlas/tests/test_retriever.py | 52 ++++++++++++------- 4 files changed, 35 insertions(+), 60 deletions(-) diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index 8d61e63ed..5b827a984 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -60,7 +60,6 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do assert doc_received.content == doc_expected.content assert doc_received.meta == doc_expected.meta - @pytest.mark.unit def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): """ We customize this test because Chroma consider "not equal" true when @@ -72,14 +71,12 @@ def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: L result, [doc for doc in filterable_docs if doc.meta.get("page", "100") != "100"] ) - @pytest.mark.unit def test_delete_empty(self, document_store: ChromaDocumentStore): """ Deleting a non-existing document should not raise with Chroma """ document_store.delete_documents(["test"]) - @pytest.mark.unit def test_delete_not_empty_nonexisting(self, document_store: ChromaDocumentStore): """ Deleting a non-existing document should not raise with Chroma @@ -131,144 +128,117 @@ def test_same_collection_name_reinitialization(self): ChromaDocumentStore("test_name") @pytest.mark.skip(reason="Filter on array contents is not supported.") - @pytest.mark.unit def test_filter_document_array(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on dataframe contents is not supported.") - @pytest.mark.unit def test_filter_document_dataframe(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on table contents is not supported.") - @pytest.mark.unit def test_eq_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on embedding value is not supported.") - @pytest.mark.unit def test_eq_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$in operator is not supported.") - @pytest.mark.unit def test_in_filter_explicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$in operator is not supported. Filter on table contents is not supported.") - @pytest.mark.unit def test_in_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$in operator is not supported.") - @pytest.mark.unit def test_in_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on table contents is not supported.") - @pytest.mark.unit def test_ne_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on embedding value is not supported.") - @pytest.mark.unit def test_ne_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$nin operator is not supported. Filter on table contents is not supported.") - @pytest.mark.unit def test_nin_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$nin operator is not supported. Filter on embedding value is not supported.") - @pytest.mark.unit def test_nin_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$nin operator is not supported.") - @pytest.mark.unit def test_nin_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_simple_implicit_and_with_multi_key_dict( self, document_store: ChromaDocumentStore, filterable_docs: List[Document] ): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_simple_explicit_and_with_multikey_dict( self, document_store: ChromaDocumentStore, filterable_docs: List[Document] ): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_simple_explicit_and_with_list( self, document_store: ChromaDocumentStore, filterable_docs: List[Document] ): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_simple_implicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_explicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_implicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_simple_or(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_or(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on table contents is not supported.") - @pytest.mark.unit def test_filter_nested_and_or_explicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_and_or_implicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_or_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_multiple_identical_operators_same_level( self, document_store: ChromaDocumentStore, filterable_docs: List[Document] ): pass @pytest.mark.skip(reason="Duplicate policy not supported.") - @pytest.mark.unit def test_write_duplicate_fail(self, document_store: ChromaDocumentStore): pass @pytest.mark.skip(reason="Duplicate policy not supported.") - @pytest.mark.unit def test_write_duplicate_skip(self, document_store: ChromaDocumentStore): pass @pytest.mark.skip(reason="Duplicate policy not supported.") - @pytest.mark.unit def test_write_duplicate_overwrite(self, document_store: ChromaDocumentStore): pass diff --git a/integrations/cohere/tests/test_cohere_chat_generator.py b/integrations/cohere/tests/test_cohere_chat_generator.py index 7fd588fec..9a822856e 100644 --- a/integrations/cohere/tests/test_cohere_chat_generator.py +++ b/integrations/cohere/tests/test_cohere_chat_generator.py @@ -53,7 +53,6 @@ def chat_messages(): class TestCohereChatGenerator: - @pytest.mark.unit def test_init_default(self, monkeypatch): monkeypatch.setenv("COHERE_API_KEY", "test-api-key") @@ -64,14 +63,12 @@ def test_init_default(self, monkeypatch): assert component.api_base_url == cohere.COHERE_API_URL assert not component.generation_kwargs - @pytest.mark.unit def test_init_fail_wo_api_key(self, monkeypatch): monkeypatch.delenv("COHERE_API_KEY", raising=False) monkeypatch.delenv("CO_API_KEY", raising=False) with pytest.raises(ValueError): CohereChatGenerator() - @pytest.mark.unit def test_init_with_parameters(self): component = CohereChatGenerator( api_key=Secret.from_token("test-api-key"), @@ -86,7 +83,6 @@ def test_init_with_parameters(self): assert component.api_base_url == "test-base-url" assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} - @pytest.mark.unit def test_to_dict_default(self, monkeypatch): monkeypatch.setenv("COHERE_API_KEY", "test-api-key") component = CohereChatGenerator() @@ -102,7 +98,6 @@ def test_to_dict_default(self, monkeypatch): }, } - @pytest.mark.unit def test_to_dict_with_parameters(self, monkeypatch): monkeypatch.setenv("COHERE_API_KEY", "test-api-key") monkeypatch.setenv("CO_API_KEY", "fake-api-key") @@ -125,7 +120,6 @@ def test_to_dict_with_parameters(self, monkeypatch): }, } - @pytest.mark.unit def test_to_dict_with_lambda_streaming_callback(self, monkeypatch): monkeypatch.setenv("COHERE_API_KEY", "test-api-key") component = CohereChatGenerator( @@ -146,7 +140,6 @@ def test_to_dict_with_lambda_streaming_callback(self, monkeypatch): }, } - @pytest.mark.unit def test_from_dict(self, monkeypatch): monkeypatch.setenv("COHERE_API_KEY", "fake-api-key") monkeypatch.setenv("CO_API_KEY", "fake-api-key") @@ -166,7 +159,6 @@ def test_from_dict(self, monkeypatch): assert component.api_base_url == "test-base-url" assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} - @pytest.mark.unit def test_from_dict_fail_wo_env_var(self, monkeypatch): monkeypatch.delenv("COHERE_API_KEY", raising=False) monkeypatch.delenv("CO_API_KEY", raising=False) @@ -183,7 +175,6 @@ def test_from_dict_fail_wo_env_var(self, monkeypatch): with pytest.raises(ValueError): CohereChatGenerator.from_dict(data) - @pytest.mark.unit def test_run(self, chat_messages, mock_chat_response): # noqa: ARG002 component = CohereChatGenerator(api_key=Secret.from_token("test-api-key")) response = component.run(chat_messages) @@ -195,13 +186,11 @@ def test_run(self, chat_messages, mock_chat_response): # noqa: ARG002 assert len(response["replies"]) == 1 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] - @pytest.mark.unit def test_message_to_dict(self, chat_messages): obj = CohereChatGenerator(api_key=Secret.from_token("test-api-key")) dictionary = [obj._message_to_dict(message) for message in chat_messages] assert dictionary == [{"user_name": "Chatbot", "text": "What's the capital of France"}] - @pytest.mark.unit def test_run_with_params(self, chat_messages, mock_chat_response): component = CohereChatGenerator( api_key=Secret.from_token("test-api-key"), generation_kwargs={"max_tokens": 10, "temperature": 0.5} @@ -220,7 +209,6 @@ def test_run_with_params(self, chat_messages, mock_chat_response): assert len(response["replies"]) == 1 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] - @pytest.mark.unit def test_run_streaming(self, chat_messages, mock_chat_response): streaming_call_count = 0 diff --git a/integrations/deepeval/tests/test_evaluator.py b/integrations/deepeval/tests/test_evaluator.py index 8534ef687..7d1946185 100644 --- a/integrations/deepeval/tests/test_evaluator.py +++ b/integrations/deepeval/tests/test_evaluator.py @@ -270,6 +270,7 @@ def test_evaluator_outputs(metric, inputs, expected_outputs, metric_params, monk # OpenAI API. It is parameterized by the metric, the inputs to the evalutor # and the metric parameters. @pytest.mark.skipif("OPENAI_API_KEY" not in os.environ, reason="OPENAI_API_KEY not set") +@pytest.mark.integration @pytest.mark.parametrize( "metric, inputs, metric_params", [ diff --git a/integrations/mongodb_atlas/tests/test_retriever.py b/integrations/mongodb_atlas/tests/test_retriever.py index ec44513e2..4ef5222ce 100644 --- a/integrations/mongodb_atlas/tests/test_retriever.py +++ b/integrations/mongodb_atlas/tests/test_retriever.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from unittest.mock import Mock +from unittest.mock import MagicMock, Mock, patch import pytest from haystack.dataclasses import Document @@ -10,34 +10,48 @@ from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore -@pytest.fixture -def document_store(): - store = MongoDBAtlasDocumentStore( - database_name="haystack_integration_test", - collection_name="test_embeddings_collection", - vector_search_index="cosine_index", - ) - return store +class TestRetriever: + @pytest.fixture + def mock_client(self): + with patch( + "haystack_integrations.document_stores.mongodb_atlas.document_store.MongoClient" + ) as mock_mongo_client: + mock_connection = MagicMock() + mock_database = MagicMock() + mock_collection_names = MagicMock(return_value=["test_embeddings_collection"]) + mock_database.list_collection_names = mock_collection_names + mock_connection.__getitem__.return_value = mock_database + mock_mongo_client.return_value = mock_connection + yield mock_mongo_client -class TestRetriever: - def test_init_default(self, document_store: MongoDBAtlasDocumentStore): - retriever = MongoDBAtlasEmbeddingRetriever(document_store=document_store) - assert retriever.document_store == document_store + def test_init_default(self): + mock_store = Mock(spec=MongoDBAtlasDocumentStore) + retriever = MongoDBAtlasEmbeddingRetriever(document_store=mock_store) + assert retriever.document_store == mock_store assert retriever.filters == {} assert retriever.top_k == 10 - def test_init(self, document_store: MongoDBAtlasDocumentStore): + def test_init(self): + mock_store = Mock(spec=MongoDBAtlasDocumentStore) retriever = MongoDBAtlasEmbeddingRetriever( - document_store=document_store, + document_store=mock_store, filters={"field": "value"}, top_k=5, ) - assert retriever.document_store == document_store + assert retriever.document_store == mock_store assert retriever.filters == {"field": "value"} assert retriever.top_k == 5 - def test_to_dict(self, document_store: MongoDBAtlasDocumentStore): + def test_to_dict(self, mock_client, monkeypatch): # noqa: ARG002 mock_client appears unused but is required + monkeypatch.setenv("MONGO_CONNECTION_STRING", "test_conn_str") + + document_store = MongoDBAtlasDocumentStore( + database_name="haystack_integration_test", + collection_name="test_embeddings_collection", + vector_search_index="cosine_index", + ) + retriever = MongoDBAtlasEmbeddingRetriever(document_store=document_store, filters={"field": "value"}, top_k=5) res = retriever.to_dict() assert res == { @@ -61,7 +75,9 @@ def test_to_dict(self, document_store: MongoDBAtlasDocumentStore): }, } - def test_from_dict(self): + def test_from_dict(self, mock_client, monkeypatch): # noqa: ARG002 mock_client appears unused but is required + monkeypatch.setenv("MONGO_CONNECTION_STRING", "test_conn_str") + data = { "type": "haystack_integrations.components.retrievers.mongodb_atlas.embedding_retriever.MongoDBAtlasEmbeddingRetriever", # noqa: E501 "init_parameters": { From 30ff8f3bbeca20cce0aaa5f037700da7b31c8c0c Mon Sep 17 00:00:00 2001 From: anakin87 Date: Tue, 12 Mar 2024 17:22:16 +0100 Subject: [PATCH 02/16] separate unit tests in pgvector --- .../tests/test_document_store.py | 56 +-- .../opensearch/tests/test_document_store.py | 51 +-- integrations/pgvector/tests/conftest.py | 36 ++ .../pgvector/tests/test_document_store.py | 367 +++++++++--------- .../tests/test_embedding_retrieval.py | 1 + integrations/pgvector/tests/test_filters.py | 226 ++++++----- integrations/pgvector/tests/test_retriever.py | 25 +- 7 files changed, 411 insertions(+), 351 deletions(-) diff --git a/integrations/elasticsearch/tests/test_document_store.py b/integrations/elasticsearch/tests/test_document_store.py index e46e76ed2..a1e992a9f 100644 --- a/integrations/elasticsearch/tests/test_document_store.py +++ b/integrations/elasticsearch/tests/test_document_store.py @@ -15,6 +15,34 @@ from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore +@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") +def test_to_dict(_mock_elasticsearch_client): + document_store = ElasticsearchDocumentStore(hosts="some hosts") + res = document_store.to_dict() + assert res == { + "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", + "init_parameters": { + "hosts": "some hosts", + "index": "default", + "embedding_similarity_function": "cosine", + }, + } + +@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") +def test_from_dict(_mock_elasticsearch_client): + data = { + "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", + "init_parameters": { + "hosts": "some hosts", + "index": "default", + "embedding_similarity_function": "cosine", + }, + } + document_store = ElasticsearchDocumentStore.from_dict(data) + assert document_store._hosts == "some hosts" + assert document_store._index == "default" + assert document_store._embedding_similarity_function == "cosine" + @pytest.mark.integration class TestDocumentStore(DocumentStoreBaseTests): """ @@ -67,34 +95,6 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do super().assert_documents_are_equal(received, expected) - @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") - def test_to_dict(self, _mock_elasticsearch_client): - document_store = ElasticsearchDocumentStore(hosts="some hosts") - res = document_store.to_dict() - assert res == { - "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", - "init_parameters": { - "hosts": "some hosts", - "index": "default", - "embedding_similarity_function": "cosine", - }, - } - - @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") - def test_from_dict(self, _mock_elasticsearch_client): - data = { - "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", - "init_parameters": { - "hosts": "some hosts", - "index": "default", - "embedding_similarity_function": "cosine", - }, - } - document_store = ElasticsearchDocumentStore.from_dict(data) - assert document_store._hosts == "some hosts" - assert document_store._index == "default" - assert document_store._embedding_similarity_function == "cosine" - def test_user_agent_header(self, document_store: ElasticsearchDocumentStore): assert document_store._client._headers["user-agent"].startswith("haystack-py-ds/") diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py index e3a314141..765df7afb 100644 --- a/integrations/opensearch/tests/test_document_store.py +++ b/integrations/opensearch/tests/test_document_store.py @@ -14,6 +14,32 @@ from opensearchpy.exceptions import RequestError +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_to_dict(_mock_opensearch_client): + document_store = OpenSearchDocumentStore(hosts="some hosts") + res = document_store.to_dict() + assert res == { + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", + "init_parameters": { + "hosts": "some hosts", + "index": "default", + }, + } + +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_from_dict(_mock_opensearch_client): + data = { + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", + "init_parameters": { + "hosts": "some hosts", + "index": "default", + }, + } + document_store = OpenSearchDocumentStore.from_dict(data) + assert document_store._hosts == "some hosts" + assert document_store._index == "default" + +@pytest.mark.integration class TestDocumentStore(DocumentStoreBaseTests): """ Common test cases will be provided by `DocumentStoreBaseTests` but @@ -87,31 +113,6 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do super().assert_documents_are_equal(received, expected) - @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") - def test_to_dict(self, _mock_opensearch_client): - document_store = OpenSearchDocumentStore(hosts="some hosts") - res = document_store.to_dict() - assert res == { - "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", - "init_parameters": { - "hosts": "some hosts", - "index": "default", - }, - } - - @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") - def test_from_dict(self, _mock_opensearch_client): - data = { - "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", - "init_parameters": { - "hosts": "some hosts", - "index": "default", - }, - } - document_store = OpenSearchDocumentStore.from_dict(data) - assert document_store._hosts == "some hosts" - assert document_store._index == "default" - def test_write_documents(self, document_store: OpenSearchDocumentStore): docs = [Document(id="1")] assert document_store.write_documents(docs) == 1 diff --git a/integrations/pgvector/tests/conftest.py b/integrations/pgvector/tests/conftest.py index 068f2ac54..94b35a04d 100644 --- a/integrations/pgvector/tests/conftest.py +++ b/integrations/pgvector/tests/conftest.py @@ -1,4 +1,5 @@ import os +from unittest.mock import patch import pytest from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore @@ -24,3 +25,38 @@ def document_store(request): yield store store.delete_table() + + +@pytest.fixture +def patches_for_unit_tests(): + with patch("haystack_integrations.document_stores.pgvector.document_store.connect") as mock_connect, patch( + "haystack_integrations.document_stores.pgvector.document_store.register_vector" + ) as mock_register, patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.delete_table" + ) as mock_delete, patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore._create_table_if_not_exists" + ) as mock_create, patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore._handle_hnsw" + ) as mock_hnsw: + + yield mock_connect, mock_register, mock_delete, mock_create, mock_hnsw + + +@pytest.fixture +def mock_store(patches_for_unit_tests, monkeypatch): # noqa: ARG001 patches are not explicitly called but necessary + monkeypatch.setenv("PG_CONN_STR", "some-connection-string") + table_name = "haystack" + embedding_dimension = 768 + vector_function = "cosine_similarity" + recreate_table = True + search_strategy = "exact_nearest_neighbor" + + store = PgvectorDocumentStore( + table_name=table_name, + embedding_dimension=embedding_dimension, + vector_function=vector_function, + recreate_table=recreate_table, + search_strategy=search_strategy, + ) + + yield store diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index 1e158f134..8ffa2bd31 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -13,6 +13,7 @@ from pandas import DataFrame +@pytest.mark.integration class TestDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest): def test_write_documents(self, document_store: PgvectorDocumentStore): docs = [Document(id="1")] @@ -25,7 +26,6 @@ def test_write_blob(self, document_store: PgvectorDocumentStore): docs = [Document(id="1", blob=bytestream)] document_store.write_documents(docs) - # TODO: update when filters are implemented retrieved_docs = document_store.filter_documents() assert retrieved_docs == docs @@ -35,185 +35,192 @@ def test_write_dataframe(self, document_store: PgvectorDocumentStore): document_store.write_documents(docs) - # TODO: update when filters are implemented retrieved_docs = document_store.filter_documents() assert retrieved_docs == docs - def test_init(self): - document_store = PgvectorDocumentStore( - table_name="my_table", - embedding_dimension=512, - vector_function="l2_distance", - recreate_table=True, - search_strategy="hnsw", - hnsw_recreate_index_if_exists=True, - hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128}, - hnsw_ef_search=50, - ) - - assert document_store.table_name == "my_table" - assert document_store.embedding_dimension == 512 - assert document_store.vector_function == "l2_distance" - assert document_store.recreate_table - assert document_store.search_strategy == "hnsw" - assert document_store.hnsw_recreate_index_if_exists - assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128} - assert document_store.hnsw_ef_search == 50 - - def test_to_dict(self): - document_store = PgvectorDocumentStore( - table_name="my_table", - embedding_dimension=512, - vector_function="l2_distance", - recreate_table=True, - search_strategy="hnsw", - hnsw_recreate_index_if_exists=True, - hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128}, - hnsw_ef_search=50, - ) - - assert document_store.to_dict() == { - "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", - "init_parameters": { - "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"}, - "table_name": "my_table", - "embedding_dimension": 512, - "vector_function": "l2_distance", - "recreate_table": True, - "search_strategy": "hnsw", - "hnsw_recreate_index_if_exists": True, - "hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128}, - "hnsw_ef_search": 50, - }, - } - - def test_from_haystack_to_pg_documents(self): - haystack_docs = [ - Document( - id="1", - content="This is a text", - meta={"meta_key": "meta_value"}, - embedding=[0.1, 0.2, 0.3], - score=0.5, - ), - Document( - id="2", - dataframe=DataFrame({"col1": [1, 2], "col2": [3, 4]}), - meta={"meta_key": "meta_value"}, - embedding=[0.4, 0.5, 0.6], - score=0.6, - ), - Document( - id="3", - blob=ByteStream(b"test", meta={"blob_meta_key": "blob_meta_value"}, mime_type="mime_type"), - meta={"meta_key": "meta_value"}, - embedding=[0.7, 0.8, 0.9], - score=0.7, - ), - ] - - with patch( - "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__" - ) as mock_init: - mock_init.return_value = None - ds = PgvectorDocumentStore(connection_string="test") - - pg_docs = ds._from_haystack_to_pg_documents(haystack_docs) - - assert pg_docs[0]["id"] == "1" - assert pg_docs[0]["content"] == "This is a text" - assert pg_docs[0]["dataframe"] is None - assert pg_docs[0]["blob_data"] is None - assert pg_docs[0]["blob_meta"] is None - assert pg_docs[0]["blob_mime_type"] is None - assert pg_docs[0]["meta"].obj == {"meta_key": "meta_value"} - assert pg_docs[0]["embedding"] == [0.1, 0.2, 0.3] - assert "score" not in pg_docs[0] - - assert pg_docs[1]["id"] == "2" - assert pg_docs[1]["content"] is None - assert pg_docs[1]["dataframe"].obj == DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json() - assert pg_docs[1]["blob_data"] is None - assert pg_docs[1]["blob_meta"] is None - assert pg_docs[1]["blob_mime_type"] is None - assert pg_docs[1]["meta"].obj == {"meta_key": "meta_value"} - assert pg_docs[1]["embedding"] == [0.4, 0.5, 0.6] - assert "score" not in pg_docs[1] - - assert pg_docs[2]["id"] == "3" - assert pg_docs[2]["content"] is None - assert pg_docs[2]["dataframe"] is None - assert pg_docs[2]["blob_data"] == b"test" - assert pg_docs[2]["blob_meta"].obj == {"blob_meta_key": "blob_meta_value"} - assert pg_docs[2]["blob_mime_type"] == "mime_type" - assert pg_docs[2]["meta"].obj == {"meta_key": "meta_value"} - assert pg_docs[2]["embedding"] == [0.7, 0.8, 0.9] - assert "score" not in pg_docs[2] - - def test_from_pg_to_haystack_documents(self): - pg_docs = [ - { - "id": "1", - "content": "This is a text", - "dataframe": None, - "blob_data": None, - "blob_meta": None, - "blob_mime_type": None, - "meta": {"meta_key": "meta_value"}, - "embedding": "[0.1, 0.2, 0.3]", - }, - { - "id": "2", - "content": None, - "dataframe": DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json(), - "blob_data": None, - "blob_meta": None, - "blob_mime_type": None, - "meta": {"meta_key": "meta_value"}, - "embedding": "[0.4, 0.5, 0.6]", - }, - { - "id": "3", - "content": None, - "dataframe": None, - "blob_data": b"test", - "blob_meta": {"blob_meta_key": "blob_meta_value"}, - "blob_mime_type": "mime_type", - "meta": {"meta_key": "meta_value"}, - "embedding": "[0.7, 0.8, 0.9]", - }, - ] - - with patch( - "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__" - ) as mock_init: - mock_init.return_value = None - ds = PgvectorDocumentStore(connection_string="test") - - haystack_docs = ds._from_pg_to_haystack_documents(pg_docs) - - assert haystack_docs[0].id == "1" - assert haystack_docs[0].content == "This is a text" - assert haystack_docs[0].dataframe is None - assert haystack_docs[0].blob is None - assert haystack_docs[0].meta == {"meta_key": "meta_value"} - assert haystack_docs[0].embedding == [0.1, 0.2, 0.3] - assert haystack_docs[0].score is None - - assert haystack_docs[1].id == "2" - assert haystack_docs[1].content is None - assert haystack_docs[1].dataframe.equals(DataFrame({"col1": [1, 2], "col2": [3, 4]})) - assert haystack_docs[1].blob is None - assert haystack_docs[1].meta == {"meta_key": "meta_value"} - assert haystack_docs[1].embedding == [0.4, 0.5, 0.6] - assert haystack_docs[1].score is None - - assert haystack_docs[2].id == "3" - assert haystack_docs[2].content is None - assert haystack_docs[2].dataframe is None - assert haystack_docs[2].blob.data == b"test" - assert haystack_docs[2].blob.meta == {"blob_meta_key": "blob_meta_value"} - assert haystack_docs[2].blob.mime_type == "mime_type" - assert haystack_docs[2].meta == {"meta_key": "meta_value"} - assert haystack_docs[2].embedding == [0.7, 0.8, 0.9] - assert haystack_docs[2].score is None + +def test_init(patches_for_unit_tests, monkeypatch): # noqa: ARG001 patches are not explicitly called but necessary + monkeypatch.setenv("PG_CONN_STR", "some_connection_string") + + document_store = PgvectorDocumentStore( + table_name="my_table", + embedding_dimension=512, + vector_function="l2_distance", + recreate_table=True, + search_strategy="hnsw", + hnsw_recreate_index_if_exists=True, + hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128}, + hnsw_ef_search=50, + ) + + assert document_store.table_name == "my_table" + assert document_store.embedding_dimension == 512 + assert document_store.vector_function == "l2_distance" + assert document_store.recreate_table + assert document_store.search_strategy == "hnsw" + assert document_store.hnsw_recreate_index_if_exists + assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128} + assert document_store.hnsw_ef_search == 50 + + +def test_to_dict(patches_for_unit_tests, monkeypatch): # noqa: ARG001 patches are not explicitly called but necessary + monkeypatch.setenv("PG_CONN_STR", "some_connection_string") + + document_store = PgvectorDocumentStore( + table_name="my_table", + embedding_dimension=512, + vector_function="l2_distance", + recreate_table=True, + search_strategy="hnsw", + hnsw_recreate_index_if_exists=True, + hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128}, + hnsw_ef_search=50, + ) + + assert document_store.to_dict() == { + "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", + "init_parameters": { + "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"}, + "table_name": "my_table", + "embedding_dimension": 512, + "vector_function": "l2_distance", + "recreate_table": True, + "search_strategy": "hnsw", + "hnsw_recreate_index_if_exists": True, + "hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128}, + "hnsw_ef_search": 50, + }, + } + + +def test_from_haystack_to_pg_documents(): + haystack_docs = [ + Document( + id="1", + content="This is a text", + meta={"meta_key": "meta_value"}, + embedding=[0.1, 0.2, 0.3], + score=0.5, + ), + Document( + id="2", + dataframe=DataFrame({"col1": [1, 2], "col2": [3, 4]}), + meta={"meta_key": "meta_value"}, + embedding=[0.4, 0.5, 0.6], + score=0.6, + ), + Document( + id="3", + blob=ByteStream(b"test", meta={"blob_meta_key": "blob_meta_value"}, mime_type="mime_type"), + meta={"meta_key": "meta_value"}, + embedding=[0.7, 0.8, 0.9], + score=0.7, + ), + ] + + with patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__" + ) as mock_init: + mock_init.return_value = None + ds = PgvectorDocumentStore(connection_string="test") + + pg_docs = ds._from_haystack_to_pg_documents(haystack_docs) + + assert pg_docs[0]["id"] == "1" + assert pg_docs[0]["content"] == "This is a text" + assert pg_docs[0]["dataframe"] is None + assert pg_docs[0]["blob_data"] is None + assert pg_docs[0]["blob_meta"] is None + assert pg_docs[0]["blob_mime_type"] is None + assert pg_docs[0]["meta"].obj == {"meta_key": "meta_value"} + assert pg_docs[0]["embedding"] == [0.1, 0.2, 0.3] + assert "score" not in pg_docs[0] + + assert pg_docs[1]["id"] == "2" + assert pg_docs[1]["content"] is None + assert pg_docs[1]["dataframe"].obj == DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json() + assert pg_docs[1]["blob_data"] is None + assert pg_docs[1]["blob_meta"] is None + assert pg_docs[1]["blob_mime_type"] is None + assert pg_docs[1]["meta"].obj == {"meta_key": "meta_value"} + assert pg_docs[1]["embedding"] == [0.4, 0.5, 0.6] + assert "score" not in pg_docs[1] + + assert pg_docs[2]["id"] == "3" + assert pg_docs[2]["content"] is None + assert pg_docs[2]["dataframe"] is None + assert pg_docs[2]["blob_data"] == b"test" + assert pg_docs[2]["blob_meta"].obj == {"blob_meta_key": "blob_meta_value"} + assert pg_docs[2]["blob_mime_type"] == "mime_type" + assert pg_docs[2]["meta"].obj == {"meta_key": "meta_value"} + assert pg_docs[2]["embedding"] == [0.7, 0.8, 0.9] + assert "score" not in pg_docs[2] + + +def test_from_pg_to_haystack_documents(): + pg_docs = [ + { + "id": "1", + "content": "This is a text", + "dataframe": None, + "blob_data": None, + "blob_meta": None, + "blob_mime_type": None, + "meta": {"meta_key": "meta_value"}, + "embedding": "[0.1, 0.2, 0.3]", + }, + { + "id": "2", + "content": None, + "dataframe": DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json(), + "blob_data": None, + "blob_meta": None, + "blob_mime_type": None, + "meta": {"meta_key": "meta_value"}, + "embedding": "[0.4, 0.5, 0.6]", + }, + { + "id": "3", + "content": None, + "dataframe": None, + "blob_data": b"test", + "blob_meta": {"blob_meta_key": "blob_meta_value"}, + "blob_mime_type": "mime_type", + "meta": {"meta_key": "meta_value"}, + "embedding": "[0.7, 0.8, 0.9]", + }, + ] + + with patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__" + ) as mock_init: + mock_init.return_value = None + ds = PgvectorDocumentStore(connection_string="test") + + haystack_docs = ds._from_pg_to_haystack_documents(pg_docs) + + assert haystack_docs[0].id == "1" + assert haystack_docs[0].content == "This is a text" + assert haystack_docs[0].dataframe is None + assert haystack_docs[0].blob is None + assert haystack_docs[0].meta == {"meta_key": "meta_value"} + assert haystack_docs[0].embedding == [0.1, 0.2, 0.3] + assert haystack_docs[0].score is None + + assert haystack_docs[1].id == "2" + assert haystack_docs[1].content is None + assert haystack_docs[1].dataframe.equals(DataFrame({"col1": [1, 2], "col2": [3, 4]})) + assert haystack_docs[1].blob is None + assert haystack_docs[1].meta == {"meta_key": "meta_value"} + assert haystack_docs[1].embedding == [0.4, 0.5, 0.6] + assert haystack_docs[1].score is None + + assert haystack_docs[2].id == "3" + assert haystack_docs[2].content is None + assert haystack_docs[2].dataframe is None + assert haystack_docs[2].blob.data == b"test" + assert haystack_docs[2].blob.meta == {"blob_meta_key": "blob_meta_value"} + assert haystack_docs[2].blob.mime_type == "mime_type" + assert haystack_docs[2].meta == {"meta_key": "meta_value"} + assert haystack_docs[2].embedding == [0.7, 0.8, 0.9] + assert haystack_docs[2].score is None diff --git a/integrations/pgvector/tests/test_embedding_retrieval.py b/integrations/pgvector/tests/test_embedding_retrieval.py index 1d5e8e297..2c384f57c 100644 --- a/integrations/pgvector/tests/test_embedding_retrieval.py +++ b/integrations/pgvector/tests/test_embedding_retrieval.py @@ -10,6 +10,7 @@ from numpy.random import rand +@pytest.mark.integration class TestEmbeddingRetrieval: @pytest.fixture def document_store_w_hnsw_index(self, request): diff --git a/integrations/pgvector/tests/test_filters.py b/integrations/pgvector/tests/test_filters.py index 8b2dc8ec9..bda10e3c0 100644 --- a/integrations/pgvector/tests/test_filters.py +++ b/integrations/pgvector/tests/test_filters.py @@ -15,6 +15,7 @@ from psycopg.types.json import Jsonb +@pytest.mark.integration class TestFilters(FilterDocumentsTest): def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): """ @@ -35,6 +36,9 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do received_doc.embedding, expected_doc.embedding = None, None assert received_doc == expected_doc + @pytest.mark.skip(reason="NOT operator is not supported in PgvectorDocumentStore") + def test_not_operator(self, document_store, filterable_docs): ... + def test_complex_filter(self, document_store, filterable_docs): document_store.write_documents(filterable_docs) filters = { @@ -69,111 +73,119 @@ def test_complex_filter(self, document_store, filterable_docs): ], ) - @pytest.mark.skip(reason="NOT operator is not supported in PgvectorDocumentStore") - def test_not_operator(self, document_store, filterable_docs): ... - def test_treat_meta_field(self): - assert _treat_meta_field(field="meta.number", value=9) == "(meta->>'number')::integer" - assert _treat_meta_field(field="meta.number", value=[1, 2, 3]) == "(meta->>'number')::integer" - assert _treat_meta_field(field="meta.name", value="my_name") == "meta->>'name'" - assert _treat_meta_field(field="meta.name", value=["my_name"]) == "meta->>'name'" - assert _treat_meta_field(field="meta.number", value=1.1) == "(meta->>'number')::real" - assert _treat_meta_field(field="meta.number", value=[1.1, 2.2, 3.3]) == "(meta->>'number')::real" - assert _treat_meta_field(field="meta.bool", value=True) == "(meta->>'bool')::boolean" - assert _treat_meta_field(field="meta.bool", value=[True, False, True]) == "(meta->>'bool')::boolean" - - # do not cast the field if its value is not one of the known types, an empty list or None - assert _treat_meta_field(field="meta.other", value={"a": 3, "b": "example"}) == "meta->>'other'" - assert _treat_meta_field(field="meta.empty_list", value=[]) == "meta->>'empty_list'" - assert _treat_meta_field(field="meta.name", value=None) == "meta->>'name'" - - def test_comparison_condition_dataframe_jsonb_conversion(self): - dataframe = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) - condition = {"field": "meta.df", "operator": "==", "value": dataframe} - field, values = _parse_comparison_condition(condition) - assert field == "(meta.df)::jsonb = %s" - - # we check each slot of the Jsonb object because it does not implement __eq__ - assert values[0].obj == Jsonb(dataframe.to_json()).obj - assert values[0].dumps == Jsonb(dataframe.to_json()).dumps - - def test_comparison_condition_missing_operator(self): - condition = {"field": "meta.type", "value": "article"} - with pytest.raises(FilterError): - _parse_comparison_condition(condition) - - def test_comparison_condition_missing_value(self): - condition = {"field": "meta.type", "operator": "=="} - with pytest.raises(FilterError): - _parse_comparison_condition(condition) - - def test_comparison_condition_unknown_operator(self): - condition = {"field": "meta.type", "operator": "unknown", "value": "article"} - with pytest.raises(FilterError): - _parse_comparison_condition(condition) - - def test_logical_condition_missing_operator(self): - condition = {"conditions": []} - with pytest.raises(FilterError): - _parse_logical_condition(condition) - - def test_logical_condition_missing_conditions(self): - condition = {"operator": "AND"} - with pytest.raises(FilterError): - _parse_logical_condition(condition) - - def test_logical_condition_unknown_operator(self): - condition = {"operator": "unknown", "conditions": []} - with pytest.raises(FilterError): - _parse_logical_condition(condition) - - def test_logical_condition_nested(self): - condition = { - "operator": "AND", - "conditions": [ - { - "operator": "OR", - "conditions": [ - {"field": "meta.domain", "operator": "!=", "value": "science"}, - {"field": "meta.chapter", "operator": "in", "value": ["intro", "conclusion"]}, - ], - }, - { - "operator": "OR", - "conditions": [ - {"field": "meta.number", "operator": ">=", "value": 90}, - {"field": "meta.author", "operator": "not in", "value": ["John", "Jane"]}, - ], - }, - ], - } - query, values = _parse_logical_condition(condition) - assert query == ( - "((meta->>'domain' IS DISTINCT FROM %s OR meta->>'chapter' = ANY(%s)) " - "AND ((meta->>'number')::integer >= %s OR meta->>'author' IS NULL OR meta->>'author' != ALL(%s)))" - ) - assert values == ["science", [["intro", "conclusion"]], 90, [["John", "Jane"]]] - - def test_convert_filters_to_where_clause_and_params(self): - filters = { - "operator": "AND", - "conditions": [ - {"field": "meta.number", "operator": "==", "value": 100}, - {"field": "meta.chapter", "operator": "==", "value": "intro"}, - ], - } - where_clause, params = _convert_filters_to_where_clause_and_params(filters) - assert where_clause == SQL(" WHERE ") + SQL("((meta->>'number')::integer = %s AND meta->>'chapter' = %s)") - assert params == (100, "intro") - - def test_convert_filters_to_where_clause_and_params_handle_null(self): - filters = { - "operator": "AND", - "conditions": [ - {"field": "meta.number", "operator": "==", "value": None}, - {"field": "meta.chapter", "operator": "==", "value": "intro"}, - ], - } - where_clause, params = _convert_filters_to_where_clause_and_params(filters) - assert where_clause == SQL(" WHERE ") + SQL("(meta->>'number' IS NULL AND meta->>'chapter' = %s)") - assert params == ("intro",) +def test_treat_meta_field(): + assert _treat_meta_field(field="meta.number", value=9) == "(meta->>'number')::integer" + assert _treat_meta_field(field="meta.number", value=[1, 2, 3]) == "(meta->>'number')::integer" + assert _treat_meta_field(field="meta.name", value="my_name") == "meta->>'name'" + assert _treat_meta_field(field="meta.name", value=["my_name"]) == "meta->>'name'" + assert _treat_meta_field(field="meta.number", value=1.1) == "(meta->>'number')::real" + assert _treat_meta_field(field="meta.number", value=[1.1, 2.2, 3.3]) == "(meta->>'number')::real" + assert _treat_meta_field(field="meta.bool", value=True) == "(meta->>'bool')::boolean" + assert _treat_meta_field(field="meta.bool", value=[True, False, True]) == "(meta->>'bool')::boolean" + + # do not cast the field if its value is not one of the known types, an empty list or None + assert _treat_meta_field(field="meta.other", value={"a": 3, "b": "example"}) == "meta->>'other'" + assert _treat_meta_field(field="meta.empty_list", value=[]) == "meta->>'empty_list'" + assert _treat_meta_field(field="meta.name", value=None) == "meta->>'name'" + + +def test_comparison_condition_dataframe_jsonb_conversion(): + dataframe = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) + condition = {"field": "meta.df", "operator": "==", "value": dataframe} + field, values = _parse_comparison_condition(condition) + assert field == "(meta.df)::jsonb = %s" + + # we check each slot of the Jsonb object because it does not implement __eq__ + assert values[0].obj == Jsonb(dataframe.to_json()).obj + assert values[0].dumps == Jsonb(dataframe.to_json()).dumps + + +def test_comparison_condition_missing_operator(): + condition = {"field": "meta.type", "value": "article"} + with pytest.raises(FilterError): + _parse_comparison_condition(condition) + + +def test_comparison_condition_missing_value(): + condition = {"field": "meta.type", "operator": "=="} + with pytest.raises(FilterError): + _parse_comparison_condition(condition) + + +def test_comparison_condition_unknown_operator(): + condition = {"field": "meta.type", "operator": "unknown", "value": "article"} + with pytest.raises(FilterError): + _parse_comparison_condition(condition) + + +def test_logical_condition_missing_operator(): + condition = {"conditions": []} + with pytest.raises(FilterError): + _parse_logical_condition(condition) + + +def test_logical_condition_missing_conditions(): + condition = {"operator": "AND"} + with pytest.raises(FilterError): + _parse_logical_condition(condition) + + +def test_logical_condition_unknown_operator(): + condition = {"operator": "unknown", "conditions": []} + with pytest.raises(FilterError): + _parse_logical_condition(condition) + + +def test_logical_condition_nested(): + condition = { + "operator": "AND", + "conditions": [ + { + "operator": "OR", + "conditions": [ + {"field": "meta.domain", "operator": "!=", "value": "science"}, + {"field": "meta.chapter", "operator": "in", "value": ["intro", "conclusion"]}, + ], + }, + { + "operator": "OR", + "conditions": [ + {"field": "meta.number", "operator": ">=", "value": 90}, + {"field": "meta.author", "operator": "not in", "value": ["John", "Jane"]}, + ], + }, + ], + } + query, values = _parse_logical_condition(condition) + assert query == ( + "((meta->>'domain' IS DISTINCT FROM %s OR meta->>'chapter' = ANY(%s)) " + "AND ((meta->>'number')::integer >= %s OR meta->>'author' IS NULL OR meta->>'author' != ALL(%s)))" + ) + assert values == ["science", [["intro", "conclusion"]], 90, [["John", "Jane"]]] + + +def test_convert_filters_to_where_clause_and_params(): + filters = { + "operator": "AND", + "conditions": [ + {"field": "meta.number", "operator": "==", "value": 100}, + {"field": "meta.chapter", "operator": "==", "value": "intro"}, + ], + } + where_clause, params = _convert_filters_to_where_clause_and_params(filters) + assert where_clause == SQL(" WHERE ") + SQL("((meta->>'number')::integer = %s AND meta->>'chapter' = %s)") + assert params == (100, "intro") + + +def test_convert_filters_to_where_clause_and_params_handle_null(): + filters = { + "operator": "AND", + "conditions": [ + {"field": "meta.number", "operator": "==", "value": None}, + {"field": "meta.chapter", "operator": "==", "value": "intro"}, + ], + } + where_clause, params = _convert_filters_to_where_clause_and_params(filters) + assert where_clause == SQL(" WHERE ") + SQL("(meta->>'number' IS NULL AND meta->>'chapter' = %s)") + assert params == ("intro",) diff --git a/integrations/pgvector/tests/test_retriever.py b/integrations/pgvector/tests/test_retriever.py index 8eab10de5..0dd153fec 100644 --- a/integrations/pgvector/tests/test_retriever.py +++ b/integrations/pgvector/tests/test_retriever.py @@ -10,25 +10,25 @@ class TestRetriever: - def test_init_default(self, document_store: PgvectorDocumentStore): - retriever = PgvectorEmbeddingRetriever(document_store=document_store) - assert retriever.document_store == document_store + def test_init_default(self, mock_store): + retriever = PgvectorEmbeddingRetriever(document_store=mock_store) + assert retriever.document_store == mock_store assert retriever.filters == {} assert retriever.top_k == 10 - assert retriever.vector_function == document_store.vector_function + assert retriever.vector_function == mock_store.vector_function - def test_init(self, document_store: PgvectorDocumentStore): + def test_init(self, mock_store): retriever = PgvectorEmbeddingRetriever( - document_store=document_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance" + document_store=mock_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance" ) - assert retriever.document_store == document_store + assert retriever.document_store == mock_store assert retriever.filters == {"field": "value"} assert retriever.top_k == 5 assert retriever.vector_function == "l2_distance" - def test_to_dict(self, document_store: PgvectorDocumentStore): + def test_to_dict(self, mock_store): retriever = PgvectorEmbeddingRetriever( - document_store=document_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance" + document_store=mock_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance" ) res = retriever.to_dict() t = "haystack_integrations.components.retrievers.pgvector.embedding_retriever.PgvectorEmbeddingRetriever" @@ -39,7 +39,7 @@ def test_to_dict(self, document_store: PgvectorDocumentStore): "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", "init_parameters": { "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"}, - "table_name": "haystack_test_to_dict", + "table_name": "haystack", "embedding_dimension": 768, "vector_function": "cosine_similarity", "recreate_table": True, @@ -55,7 +55,10 @@ def test_to_dict(self, document_store: PgvectorDocumentStore): }, } - def test_from_dict(self): + def test_from_dict( + self, patches_for_unit_tests, monkeypatch # noqa:ARG002 patches are not explicitly called but necessary + ): + monkeypatch.setenv("PG_CONN_STR", "some-connection-string") t = "haystack_integrations.components.retrievers.pgvector.embedding_retriever.PgvectorEmbeddingRetriever" data = { "type": t, From 6e6c6d1e56cb90b97558317976674715ae0a3f0f Mon Sep 17 00:00:00 2001 From: anakin87 Date: Tue, 12 Mar 2024 17:36:03 +0100 Subject: [PATCH 03/16] small change to weaviate --- integrations/weaviate/tests/test_document_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py index a2b32d578..ab2ad0ff0 100644 --- a/integrations/weaviate/tests/test_document_store.py +++ b/integrations/weaviate/tests/test_document_store.py @@ -34,7 +34,7 @@ EmbeddedOptions, ) - +@pytest.mark.integration class TestWeaviateDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest): @pytest.fixture def document_store(self, request) -> WeaviateDocumentStore: From 6809cc1f38f54f24ff66efa83004f1c962a9ccd1 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Tue, 12 Mar 2024 17:38:59 +0100 Subject: [PATCH 04/16] fix format --- integrations/elasticsearch/tests/test_document_store.py | 2 ++ integrations/opensearch/tests/test_document_store.py | 2 ++ integrations/weaviate/tests/test_document_store.py | 1 + 3 files changed, 5 insertions(+) diff --git a/integrations/elasticsearch/tests/test_document_store.py b/integrations/elasticsearch/tests/test_document_store.py index a1e992a9f..308486a78 100644 --- a/integrations/elasticsearch/tests/test_document_store.py +++ b/integrations/elasticsearch/tests/test_document_store.py @@ -28,6 +28,7 @@ def test_to_dict(_mock_elasticsearch_client): }, } + @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") def test_from_dict(_mock_elasticsearch_client): data = { @@ -43,6 +44,7 @@ def test_from_dict(_mock_elasticsearch_client): assert document_store._index == "default" assert document_store._embedding_similarity_function == "cosine" + @pytest.mark.integration class TestDocumentStore(DocumentStoreBaseTests): """ diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py index 765df7afb..bc0d1c434 100644 --- a/integrations/opensearch/tests/test_document_store.py +++ b/integrations/opensearch/tests/test_document_store.py @@ -26,6 +26,7 @@ def test_to_dict(_mock_opensearch_client): }, } + @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_from_dict(_mock_opensearch_client): data = { @@ -39,6 +40,7 @@ def test_from_dict(_mock_opensearch_client): assert document_store._hosts == "some hosts" assert document_store._index == "default" + @pytest.mark.integration class TestDocumentStore(DocumentStoreBaseTests): """ diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py index ab2ad0ff0..801751483 100644 --- a/integrations/weaviate/tests/test_document_store.py +++ b/integrations/weaviate/tests/test_document_store.py @@ -34,6 +34,7 @@ EmbeddedOptions, ) + @pytest.mark.integration class TestWeaviateDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest): @pytest.fixture From 2cfb58628745ab04be63cb7f6d6e57645c27af11 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 09:22:18 +0100 Subject: [PATCH 05/16] wip --- .github/workflows/fastembed.yml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml index fe736029a..66f69b2cf 100644 --- a/.github/workflows/fastembed.yml +++ b/.github/workflows/fastembed.yml @@ -43,3 +43,34 @@ jobs: - name: Run tests run: hatch run cov + + - name: Calculate alert data + id: calculator + shell: bash + if: (success() || failure()) && github.ref_name == 'notify-nightly-failures' + run: | + if [ "${{ job.status }}" = "success" ]; then + echo "alert_type=success" >> "$GITHUB_OUTPUT"; + else + echo "alert_type=error" >> "$GITHUB_OUTPUT"; + fi + + - name: Send event to Datadog + if: (success() || failure()) && github.ref_name == 'notify-nightly-failures' + uses: masci/datadog@v1 + with: + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} + api-url: https://api.datadoghq.eu + events: | + - title: "${{ github.workflow }} workflow" + text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" + alert_type: "${{ steps.calculator.outputs.alert_type }}" + source_type_name: "Github" + host: ${{ github.repository_owner }} + tags: + - "project:${{ github.repository }}" + - "job:${{ github.job }}" + - "run_id:${{ github.run_id }}" + - "workflow:${{ github.workflow }}" + - "branch:${{ github.ref_name }}" + - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" From 86edc6ae144f0fa0c1d3dddfcf67ed82d2cda5e2 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 09:33:08 +0100 Subject: [PATCH 06/16] retry --- .github/workflows/fastembed.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml index 66f69b2cf..74a3311e6 100644 --- a/.github/workflows/fastembed.yml +++ b/.github/workflows/fastembed.yml @@ -47,7 +47,7 @@ jobs: - name: Calculate alert data id: calculator shell: bash - if: (success() || failure()) && github.ref_name == 'notify-nightly-failures' + if: (success() || failure()) run: | if [ "${{ job.status }}" = "success" ]; then echo "alert_type=success" >> "$GITHUB_OUTPUT"; @@ -56,7 +56,7 @@ jobs: fi - name: Send event to Datadog - if: (success() || failure()) && github.ref_name == 'notify-nightly-failures' + if: (success() || failure()) uses: masci/datadog@v1 with: api-key: ${{ secrets.CORE_DATADOG_API_KEY }} From 77c0da6917829c96fdfb442ff8bf0e4b307e7915 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 10:00:00 +0100 Subject: [PATCH 07/16] try failur --- .github/workflows/fastembed.yml | 19 ++++--------------- .../tests/test_fastembed_text_embedder.py | 2 +- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml index 74a3311e6..42d66f7f2 100644 --- a/.github/workflows/fastembed.yml +++ b/.github/workflows/fastembed.yml @@ -44,27 +44,16 @@ jobs: - name: Run tests run: hatch run cov - - name: Calculate alert data - id: calculator - shell: bash - if: (success() || failure()) - run: | - if [ "${{ job.status }}" = "success" ]; then - echo "alert_type=success" >> "$GITHUB_OUTPUT"; - else - echo "alert_type=error" >> "$GITHUB_OUTPUT"; - fi - - - name: Send event to Datadog - if: (success() || failure()) + - name: Send failure event to Datadog + if: failure() uses: masci/datadog@v1 with: api-key: ${{ secrets.CORE_DATADOG_API_KEY }} api-url: https://api.datadoghq.eu events: | - - title: "${{ github.workflow }} workflow" + - title: "core-integrations nightly failure: ${{ github.workflow }}" text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" - alert_type: "${{ steps.calculator.outputs.alert_type }}" + alert_type: "error" source_type_name: "Github" host: ${{ github.repository_owner }} tags: diff --git a/integrations/fastembed/tests/test_fastembed_text_embedder.py b/integrations/fastembed/tests/test_fastembed_text_embedder.py index 402980485..73cc395ab 100644 --- a/integrations/fastembed/tests/test_fastembed_text_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_text_embedder.py @@ -207,5 +207,5 @@ def test_run(self): embedding = result["embedding"] assert isinstance(embedding, list) - assert len(embedding) == 384 + assert len(embedding) == 383 assert all(isinstance(emb, float) for emb in embedding) From b81bf06ae09c90d413bab64b2dcbee16eabff7ea Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 10:02:49 +0100 Subject: [PATCH 08/16] restrict --- .github/workflows/fastembed.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml index 42d66f7f2..ab85584ae 100644 --- a/.github/workflows/fastembed.yml +++ b/.github/workflows/fastembed.yml @@ -45,7 +45,7 @@ jobs: run: hatch run cov - name: Send failure event to Datadog - if: failure() + if: matrix.python-version == '3.9' && runner.os == 'Linux' && failure() uses: masci/datadog@v1 with: api-key: ${{ secrets.CORE_DATADOG_API_KEY }} From 0a8f77f14b24631831bc0ae7898bac5f7af2812b Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 10:22:37 +0100 Subject: [PATCH 09/16] retry --- .github/workflows/fastembed.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml index ab85584ae..42d66f7f2 100644 --- a/.github/workflows/fastembed.yml +++ b/.github/workflows/fastembed.yml @@ -45,7 +45,7 @@ jobs: run: hatch run cov - name: Send failure event to Datadog - if: matrix.python-version == '3.9' && runner.os == 'Linux' && failure() + if: failure() uses: masci/datadog@v1 with: api-key: ${{ secrets.CORE_DATADOG_API_KEY }} From a9431053a676b6549896f6dedf9ebb47055bec67 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 11:10:51 +0100 Subject: [PATCH 10/16] try using composite action --- .github/workflows/CI_send_failure_event.yml | 25 +++++++++++++++++++++ .github/workflows/fastembed.yml | 22 +++++------------- 2 files changed, 30 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/CI_send_failure_event.yml diff --git a/.github/workflows/CI_send_failure_event.yml b/.github/workflows/CI_send_failure_event.yml new file mode 100644 index 000000000..ce5232e9c --- /dev/null +++ b/.github/workflows/CI_send_failure_event.yml @@ -0,0 +1,25 @@ +name: "Send failure event to Datadog" +inputs: + api-key: + description: "Datadog API key" + required: true +runs: + using: "composite" + steps: + - uses: masci/datadog@v1 + with: + api-key: ${{ inputs.api-key }} + api-url: https://api.datadoghq.eu + events: | + - title: "core-integrations nightly failure: ${{ github.workflow }}" + text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" + alert_type: "error" + source_type_name: "Github" + host: ${{ github.repository_owner }} + tags: + - "project:${{ github.repository }}" + - "job:${{ github.job }}" + - "run_id:${{ github.run_id }}" + - "workflow:${{ github.workflow }}" + - "branch:${{ github.ref_name }}" + - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ No newline at end of file diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml index 42d66f7f2..db2309ff5 100644 --- a/.github/workflows/fastembed.yml +++ b/.github/workflows/fastembed.yml @@ -45,21 +45,9 @@ jobs: run: hatch run cov - name: Send failure event to Datadog - if: failure() - uses: masci/datadog@v1 + # we want to send a failure event to Datadog and notify the team + # only if the job fails during scheduled nightly runs + if: failure() #github.event_name == 'schedule' && + uses: ./.github/workflows/CI_send_failure_event.yml with: - api-key: ${{ secrets.CORE_DATADOG_API_KEY }} - api-url: https://api.datadoghq.eu - events: | - - title: "core-integrations nightly failure: ${{ github.workflow }}" - text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" - alert_type: "error" - source_type_name: "Github" - host: ${{ github.repository_owner }} - tags: - - "project:${{ github.repository }}" - - "job:${{ github.job }}" - - "run_id:${{ github.run_id }}" - - "workflow:${{ github.workflow }}" - - "branch:${{ github.ref_name }}" - - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + api-key: ${{ secrets.DATADOG_API_KEY }} From 3dfa3535ece017d5ac3f9d5f92f0adeb71ff2bec Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 11:20:25 +0100 Subject: [PATCH 11/16] retry composite action --- .../send_failure/action.yml} | 0 .github/workflows/fastembed.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename .github/{workflows/CI_send_failure_event.yml => actions/send_failure/action.yml} (100%) diff --git a/.github/workflows/CI_send_failure_event.yml b/.github/actions/send_failure/action.yml similarity index 100% rename from .github/workflows/CI_send_failure_event.yml rename to .github/actions/send_failure/action.yml diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml index db2309ff5..3d55239f1 100644 --- a/.github/workflows/fastembed.yml +++ b/.github/workflows/fastembed.yml @@ -48,6 +48,6 @@ jobs: # we want to send a failure event to Datadog and notify the team # only if the job fails during scheduled nightly runs if: failure() #github.event_name == 'schedule' && - uses: ./.github/workflows/CI_send_failure_event.yml + uses: ./.github/actions/send_failure with: api-key: ${{ secrets.DATADOG_API_KEY }} From 19bbe73196779842c048985a8c07756f55fc8c9d Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 11:48:17 +0100 Subject: [PATCH 12/16] fix typo --- .github/workflows/fastembed.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml index 3d55239f1..0fcccef26 100644 --- a/.github/workflows/fastembed.yml +++ b/.github/workflows/fastembed.yml @@ -50,4 +50,4 @@ jobs: if: failure() #github.event_name == 'schedule' && uses: ./.github/actions/send_failure with: - api-key: ${{ secrets.DATADOG_API_KEY }} + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} From 3ed6b9be4aec1def50fbfb5e2e029e341b687f5d Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 12:12:10 +0100 Subject: [PATCH 13/16] update all workflows --- .github/actions/send_failure/action.yml | 5 ++++- .github/workflows/astra.yml | 9 ++++++++- .github/workflows/chroma.yml | 7 +++++++ .github/workflows/cohere.yml | 7 +++++++ .github/workflows/deepeval.yml | 7 +++++++ .github/workflows/elasticsearch.yml | 7 +++++++ .github/workflows/fastembed.yml | 7 +++---- .github/workflows/google_ai.yml | 7 +++++++ .github/workflows/google_vertex.yml | 7 +++++++ .github/workflows/gradient.yml | 9 ++++++++- .github/workflows/instructor_embedders.yml | 7 +++++++ .github/workflows/jina.yml | 7 +++++++ .github/workflows/llama_cpp.yml | 7 +++++++ .github/workflows/mistral.yml | 7 +++++++ .github/workflows/mongodb_atlas.yml | 7 +++++++ .github/workflows/nvidia.yml | 7 +++++++ .github/workflows/ollama.yml | 7 +++++++ .github/workflows/opensearch.yml | 7 +++++++ .github/workflows/optimum.yml | 7 +++++++ .github/workflows/pgvector.yml | 7 +++++++ .github/workflows/pinecone.yml | 7 +++++++ .github/workflows/qdrant.yml | 7 +++++++ .github/workflows/ragas.yml | 7 +++++++ .github/workflows/unstructured.yml | 7 +++++++ .github/workflows/uptrain.yml | 7 +++++++ .github/workflows/weaviate.yml | 7 +++++++ 26 files changed, 177 insertions(+), 7 deletions(-) diff --git a/.github/actions/send_failure/action.yml b/.github/actions/send_failure/action.yml index ce5232e9c..4a92890d5 100644 --- a/.github/actions/send_failure/action.yml +++ b/.github/actions/send_failure/action.yml @@ -3,6 +3,9 @@ inputs: api-key: description: "Datadog API key" required: true + title: + description: "Custom title for the event" + required: true runs: using: "composite" steps: @@ -11,7 +14,7 @@ runs: api-key: ${{ inputs.api-key }} api-url: https://api.datadoghq.eu events: | - - title: "core-integrations nightly failure: ${{ github.workflow }}" + - title: ${{ inputs.title }} text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" alert_type: "error" source_type_name: "Github" diff --git a/.github/workflows/astra.yml b/.github/workflows/astra.yml index d859626ff..e90edc2e9 100644 --- a/.github/workflows/astra.yml +++ b/.github/workflows/astra.yml @@ -61,4 +61,11 @@ jobs: env: ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_API_ENDPOINT }} ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_TOKEN }} - run: hatch run cov \ No newline at end of file + run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} \ No newline at end of file diff --git a/.github/workflows/chroma.yml b/.github/workflows/chroma.yml index fec309f6b..e6712d807 100644 --- a/.github/workflows/chroma.yml +++ b/.github/workflows/chroma.yml @@ -58,3 +58,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} \ No newline at end of file diff --git a/.github/workflows/cohere.yml b/.github/workflows/cohere.yml index fb6b00680..6f23760a0 100644 --- a/.github/workflows/cohere.yml +++ b/.github/workflows/cohere.yml @@ -55,3 +55,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} \ No newline at end of file diff --git a/.github/workflows/deepeval.yml b/.github/workflows/deepeval.yml index e2468fa8c..a9efc2f3a 100644 --- a/.github/workflows/deepeval.yml +++ b/.github/workflows/deepeval.yml @@ -58,3 +58,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/elasticsearch.yml b/.github/workflows/elasticsearch.yml index 688e5c48f..21efcbc34 100644 --- a/.github/workflows/elasticsearch.yml +++ b/.github/workflows/elasticsearch.yml @@ -56,3 +56,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml index 0fcccef26..a2b076c1a 100644 --- a/.github/workflows/fastembed.yml +++ b/.github/workflows/fastembed.yml @@ -44,10 +44,9 @@ jobs: - name: Run tests run: hatch run cov - - name: Send failure event to Datadog - # we want to send a failure event to Datadog and notify the team - # only if the job fails during scheduled nightly runs - if: failure() #github.event_name == 'schedule' && + - name: Send event to Datadog for nightly failures + if: failure() #github.event_name == 'schedule' && uses: ./.github/actions/send_failure with: + title: "core-integrations nightly failure: ${{ github.workflow }}" api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/google_ai.yml b/.github/workflows/google_ai.yml index 6093df4a4..9efeb8590 100644 --- a/.github/workflows/google_ai.yml +++ b/.github/workflows/google_ai.yml @@ -59,3 +59,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/google_vertex.yml b/.github/workflows/google_vertex.yml index 6f6c6d0d9..03890ed4a 100644 --- a/.github/workflows/google_vertex.yml +++ b/.github/workflows/google_vertex.yml @@ -58,3 +58,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/gradient.yml b/.github/workflows/gradient.yml index 61a04be7b..8fbaf6f18 100644 --- a/.github/workflows/gradient.yml +++ b/.github/workflows/gradient.yml @@ -57,4 +57,11 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov \ No newline at end of file + run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} \ No newline at end of file diff --git a/.github/workflows/instructor_embedders.yml b/.github/workflows/instructor_embedders.yml index 09d04e9d3..5282c8e18 100644 --- a/.github/workflows/instructor_embedders.yml +++ b/.github/workflows/instructor_embedders.yml @@ -36,3 +36,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/jina.yml b/.github/workflows/jina.yml index 1f8e83a7d..1ab0e2a2b 100644 --- a/.github/workflows/jina.yml +++ b/.github/workflows/jina.yml @@ -58,3 +58,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/llama_cpp.yml b/.github/workflows/llama_cpp.yml index 89c7e5426..712e91fa2 100644 --- a/.github/workflows/llama_cpp.yml +++ b/.github/workflows/llama_cpp.yml @@ -58,3 +58,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/mistral.yml b/.github/workflows/mistral.yml index a02b5ad43..029bb974a 100644 --- a/.github/workflows/mistral.yml +++ b/.github/workflows/mistral.yml @@ -59,3 +59,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/mongodb_atlas.yml b/.github/workflows/mongodb_atlas.yml index af19776cd..bf48a75c2 100644 --- a/.github/workflows/mongodb_atlas.yml +++ b/.github/workflows/mongodb_atlas.yml @@ -56,3 +56,10 @@ jobs: - name: Run tests working-directory: integrations/mongodb_atlas run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/nvidia.yml b/.github/workflows/nvidia.yml index 6e7562c17..8b6ec030a 100644 --- a/.github/workflows/nvidia.yml +++ b/.github/workflows/nvidia.yml @@ -55,3 +55,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml index 28b522890..c977ba116 100644 --- a/.github/workflows/ollama.yml +++ b/.github/workflows/ollama.yml @@ -76,3 +76,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/opensearch.yml b/.github/workflows/opensearch.yml index 72a01d090..da177b83c 100644 --- a/.github/workflows/opensearch.yml +++ b/.github/workflows/opensearch.yml @@ -57,3 +57,10 @@ jobs: - name: Run tests working-directory: integrations/opensearch run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/optimum.yml b/.github/workflows/optimum.yml index 3b0d137da..077413920 100644 --- a/.github/workflows/optimum.yml +++ b/.github/workflows/optimum.yml @@ -58,3 +58,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/pgvector.yml b/.github/workflows/pgvector.yml index badb2565b..647f520e1 100644 --- a/.github/workflows/pgvector.yml +++ b/.github/workflows/pgvector.yml @@ -62,3 +62,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/pinecone.yml b/.github/workflows/pinecone.yml index a82fb74de..49d421813 100644 --- a/.github/workflows/pinecone.yml +++ b/.github/workflows/pinecone.yml @@ -57,3 +57,10 @@ jobs: - name: Run tests working-directory: integrations/pinecone run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/qdrant.yml b/.github/workflows/qdrant.yml index 9f031031f..3c72b0f02 100644 --- a/.github/workflows/qdrant.yml +++ b/.github/workflows/qdrant.yml @@ -58,3 +58,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/ragas.yml b/.github/workflows/ragas.yml index e2ce46764..d3def92ca 100644 --- a/.github/workflows/ragas.yml +++ b/.github/workflows/ragas.yml @@ -58,3 +58,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/unstructured.yml b/.github/workflows/unstructured.yml index 83cad6dfc..b2778431c 100644 --- a/.github/workflows/unstructured.yml +++ b/.github/workflows/unstructured.yml @@ -70,3 +70,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/uptrain.yml b/.github/workflows/uptrain.yml index bacfa27fb..64453b0fd 100644 --- a/.github/workflows/uptrain.yml +++ b/.github/workflows/uptrain.yml @@ -54,3 +54,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/.github/workflows/weaviate.yml b/.github/workflows/weaviate.yml index 051415336..69447b96b 100644 --- a/.github/workflows/weaviate.yml +++ b/.github/workflows/weaviate.yml @@ -55,3 +55,10 @@ jobs: - name: Run tests run: hatch run cov + + - name: Send event to Datadog for nightly failures + if: github.event_name == 'schedule' && failure() + uses: ./.github/actions/send_failure + with: + title: "core-integrations nightly failure: ${{ github.workflow }}" + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} From 41872384236b8edb833102d16d46e05a5ff04c51 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 12:17:04 +0100 Subject: [PATCH 14/16] retry --- .github/actions/send_failure/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/send_failure/action.yml b/.github/actions/send_failure/action.yml index 4a92890d5..8481b0b80 100644 --- a/.github/actions/send_failure/action.yml +++ b/.github/actions/send_failure/action.yml @@ -14,7 +14,7 @@ runs: api-key: ${{ inputs.api-key }} api-url: https://api.datadoghq.eu events: | - - title: ${{ inputs.title }} + - title: "${{ inputs.title }}" text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" alert_type: "error" source_type_name: "Github" From 42cff9ddb0b46b9dc01375ec672a00aad75dbd11 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 12:30:53 +0100 Subject: [PATCH 15/16] fix test --- integrations/fastembed/tests/test_fastembed_text_embedder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/fastembed/tests/test_fastembed_text_embedder.py b/integrations/fastembed/tests/test_fastembed_text_embedder.py index 73cc395ab..402980485 100644 --- a/integrations/fastembed/tests/test_fastembed_text_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_text_embedder.py @@ -207,5 +207,5 @@ def test_run(self): embedding = result["embedding"] assert isinstance(embedding, list) - assert len(embedding) == 383 + assert len(embedding) == 384 assert all(isinstance(emb, float) for emb in embedding) From ad55fcc6b39cec807c62afc22ed05d980196739c Mon Sep 17 00:00:00 2001 From: anakin87 Date: Wed, 13 Mar 2024 13:39:58 +0100 Subject: [PATCH 16/16] fix --- integrations/pgvector/tests/test_document_store.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index bd8d4478d..bf5ccd5d4 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -39,12 +39,8 @@ def test_write_dataframe(self, document_store: PgvectorDocumentStore): assert retrieved_docs == docs -<<<<<<< HEAD -def test_init(patches_for_unit_tests, monkeypatch): # noqa: ARG001 patches are not explicitly called but necessary -======= @pytest.mark.usefixtures("patches_for_unit_tests") def test_init(monkeypatch): ->>>>>>> main monkeypatch.setenv("PG_CONN_STR", "some_connection_string") document_store = PgvectorDocumentStore(