Skip to content

Commit

Permalink
refact!: Remove symbols under the haystack.document_stores namespace (
Browse files Browse the repository at this point in the history
#6714)

* remove symbols under the haystack.document_stores namespace

* Update haystack/document_stores/types/protocol.py

Co-authored-by: Silvano Cerza <[email protected]>

* fix

* same for retrievers

* leftovers

* more leftovers

* add relnote

* leftovers

* one more

* fix examples

---------

Co-authored-by: Silvano Cerza <[email protected]>
  • Loading branch information
masci and silvanocerza authored Jan 10, 2024
1 parent 374a937 commit e1ec4e5
Show file tree
Hide file tree
Showing 42 changed files with 102 additions and 88 deletions.
4 changes: 2 additions & 2 deletions docs/pydoc/config/retriever.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
loaders:
- type: loaders.CustomPythonLoader
search_path: [../../../haystack/components/retrievers]
modules: ["in_memory_bm25_retriever", "in_memory_embedding_retriever"]
search_path: [../../../haystack/components/retrievers/in_memory]
modules: ["bm25_retriever", "embedding_retriever"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter
Expand Down
4 changes: 2 additions & 2 deletions e2e/pipelines/test_dense_doc_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever


def test_dense_doc_search_pipeline(tmp_path, samples_path):
Expand Down
4 changes: 2 additions & 2 deletions e2e/pipelines/test_eval_dense_doc_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.dataclasses import Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.evaluation.eval import eval


Expand Down
4 changes: 2 additions & 2 deletions e2e/pipelines/test_eval_extractive_qa_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

from haystack import Pipeline
from haystack.components.readers import ExtractiveReader
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.dataclasses import Document, ExtractedAnswer
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.evaluation.eval import eval
from haystack.evaluation.metrics import Metric

Expand Down
4 changes: 2 additions & 2 deletions e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from haystack import Document, Pipeline
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.rankers import TransformersSimilarityRanker
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.joiners.document_joiner import DocumentJoiner
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.evaluation.eval import eval


Expand Down
4 changes: 2 additions & 2 deletions e2e/pipelines/test_eval_rag_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.generators import HuggingFaceLocalGenerator
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.writers import DocumentWriter
from haystack.dataclasses import Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.evaluation.eval import eval
from haystack.evaluation.metrics import Metric

Expand Down
4 changes: 2 additions & 2 deletions e2e/pipelines/test_extractive_qa_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import json

from haystack import Pipeline, Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.components.readers import ExtractiveReader


Expand Down
4 changes: 2 additions & 2 deletions e2e/pipelines/test_hybrid_doc_search_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.rankers import TransformersSimilarityRanker
from haystack.components.joiners.document_joiner import DocumentJoiner
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever


def test_hybrid_doc_search_pipeline(tmp_path):
Expand Down
2 changes: 1 addition & 1 deletion e2e/pipelines/test_preprocessing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from haystack.components.classifiers import DocumentLanguageClassifier
from haystack.components.routers import FileTypeRouter, MetadataRouter
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore


def test_preprocessing_pipeline(tmp_path):
Expand Down
4 changes: 2 additions & 2 deletions e2e/pipelines/test_rag_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import pytest

from haystack import Pipeline, Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.writers import DocumentWriter
from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.answer_builder import AnswerBuilder
Expand Down
2 changes: 1 addition & 1 deletion examples/getting_started/rag.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from haystack import Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.pipeline_utils import build_rag_pipeline

API_KEY = "SET YOUR OPENAI API KEY HERE"
Expand Down
2 changes: 1 addition & 1 deletion examples/getting_started/rag_custom_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.pipeline_utils import build_rag_pipeline, build_indexing_pipeline
from haystack.pipeline_utils.indexing import download_files

Expand Down
2 changes: 1 addition & 1 deletion examples/pipelines/indexing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore


# Create components and an indexing pipeline that converts txt and pdf files to documents, cleans and splits them, and
Expand Down
5 changes: 3 additions & 2 deletions examples/pipelines/indexing_pipeline_with_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from haystack.components.others import Multiplexer
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.routers import FileTypeRouter, DocumentJoiner
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore


document_store = InMemoryDocumentStore()
Expand Down
4 changes: 2 additions & 2 deletions examples/pipelines/rag_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from haystack import Pipeline, Document
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
Expand Down
8 changes: 4 additions & 4 deletions examples/rag/rag_self_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import logging
from pprint import pprint

from canals.component.types import Variadic
from haystack import Pipeline, Document, component, default_to_dict, default_from_dict, DeserializationError
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.others import Multiplexer
from haystack.components.routers.conditional_router import ConditionalRouter
from haystack.core.component.types import Variadic


logging.getLogger().setLevel(logging.DEBUG)
Expand Down Expand Up @@ -64,7 +64,7 @@ def run(

if self.retrieved_documents is None:
self.retrieved_documents = self.retriever.run(
query=query[0], filters=filters, top_k=top_k, scale_score=scale_score
query=query[0], filters=filters, top_k=top_k, scale_score=scale_score # type: ignore
)["documents"]

if not self.retrieved_documents:
Expand Down
4 changes: 2 additions & 2 deletions examples/retrievers/in_memory_bm25_documentsearch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from haystack import Document
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.pipeline import Pipeline

# Create components and a query pipeline
Expand Down
4 changes: 2 additions & 2 deletions examples/retrievers/in_memory_bm25_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore

# Create a RAG query pipeline
prompt_template = """
Expand Down
2 changes: 1 addition & 1 deletion haystack/components/caching/cache_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging

from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError
from haystack.document_stores import DocumentStore
from haystack.document_stores.types import DocumentStore


logger = logging.getLogger(__name__)
Expand Down
4 changes: 0 additions & 4 deletions haystack/components/retrievers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@
from haystack.components.retrievers.in_memory_bm25_retriever import InMemoryBM25Retriever
from haystack.components.retrievers.in_memory_embedding_retriever import InMemoryEmbeddingRetriever

__all__ = ["InMemoryBM25Retriever", "InMemoryEmbeddingRetriever"]
4 changes: 4 additions & 0 deletions haystack/components/retrievers/in_memory/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from haystack.components.retrievers.in_memory.bm25_retriever import InMemoryBM25Retriever
from haystack.components.retrievers.in_memory.embedding_retriever import InMemoryEmbeddingRetriever

__all__ = ["InMemoryBM25Retriever", "InMemoryEmbeddingRetriever"]
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Dict, List, Any, Optional

from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore


@component
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Dict, List, Any, Optional

from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError
from haystack.document_stores import InMemoryDocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore


@component
Expand Down
2 changes: 1 addition & 1 deletion haystack/components/writers/document_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging

from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError
from haystack.document_stores import DocumentStore, DuplicatePolicy
from haystack.document_stores.types import DocumentStore, DuplicatePolicy

logger = logging.getLogger(__name__)

Expand Down
12 changes: 0 additions & 12 deletions haystack/document_stores/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +0,0 @@
from haystack.document_stores.protocol import DocumentStore, DuplicatePolicy
from haystack.document_stores.in_memory.document_store import InMemoryDocumentStore
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError, MissingDocumentError

__all__ = [
"DocumentStore",
"DuplicatePolicy",
"InMemoryDocumentStore",
"DocumentStoreError",
"DuplicateDocumentError",
"MissingDocumentError",
]
3 changes: 3 additions & 0 deletions haystack/document_stores/errors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .errors import DocumentStoreError, DuplicateDocumentError, MissingDocumentError

__all__ = ["DocumentStoreError", "DuplicateDocumentError", "MissingDocumentError"]
File renamed without changes.
2 changes: 1 addition & 1 deletion haystack/document_stores/in_memory/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from haystack import default_from_dict, default_to_dict
from haystack.dataclasses import Document
from haystack.document_stores.protocol import DuplicatePolicy
from haystack.document_stores.types import DuplicatePolicy
from haystack.utils.filters import document_matches_filter, convert
from haystack.document_stores.errors import DuplicateDocumentError, DocumentStoreError
from haystack.utils import expit
Expand Down
4 changes: 4 additions & 0 deletions haystack/document_stores/types/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .protocol import DocumentStore
from .policy import DuplicatePolicy

__all__ = ["DocumentStore", "DuplicatePolicy"]
8 changes: 8 additions & 0 deletions haystack/document_stores/types/policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from enum import Enum


class DuplicatePolicy(Enum):
NONE = "none"
SKIP = "skip"
OVERWRITE = "overwrite"
FAIL = "fail"
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
from typing import Protocol, Optional, Dict, Any, List
import logging
from enum import Enum

from haystack.dataclasses import Document

from haystack.document_stores.types.policy import DuplicatePolicy

# Ellipsis are needed for the type checker, it's safe to disable module-wide
# pylint: disable=unnecessary-ellipsis

logger = logging.getLogger(__name__)


class DuplicatePolicy(Enum):
NONE = "none"
SKIP = "skip"
OVERWRITE = "overwrite"
FAIL = "fail"


class DocumentStore(Protocol):
"""
Stores Documents to be used by the components of a Pipeline.
Expand Down
2 changes: 1 addition & 1 deletion haystack/pipeline_utils/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.document_stores.protocol import DocumentStore
from haystack.document_stores.types import DocumentStore


def download_files(sources: List[str]) -> List[str]:
Expand Down
5 changes: 3 additions & 2 deletions haystack/pipeline_utils/rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.generators import OpenAIGenerator, HuggingFaceTGIGenerator
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.dataclasses import Answer
from haystack.document_stores import InMemoryDocumentStore, DocumentStore
from haystack.document_stores.types import DocumentStore
from haystack.document_stores.in_memory import InMemoryDocumentStore


def build_rag_pipeline(
Expand Down
2 changes: 1 addition & 1 deletion haystack/testing/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd

from haystack.dataclasses import Document
from haystack.document_stores import DocumentStore, DuplicatePolicy
from haystack.document_stores.types import DocumentStore, DuplicatePolicy
from haystack.document_stores.errors import DuplicateDocumentError
from haystack.errors import FilterError

Expand Down
2 changes: 1 addition & 1 deletion haystack/testing/factory.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Any, Dict, Optional, Tuple, Type, List, Union

from haystack.dataclasses import Document
from haystack.document_stores import DocumentStore, DuplicatePolicy
from haystack.document_stores.types import DocumentStore, DuplicatePolicy
from haystack.core.component import component, Component
from haystack.core.serialization import default_to_dict, default_from_dict

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
upgrade:
- |
Change the imports for in_memory document store and retrievers from:
from haystack.document_stores import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryEmbeddingRetriever
to:
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
enhancements:
- |
Stop exposing `in_memory` package symbols in the `haystack.document_store` and
`haystack.components.retrievers` root namespaces.
Loading

0 comments on commit e1ec4e5

Please sign in to comment.