Skip to content

Commit

Permalink
Rename file_converters to converters (#6390)
Browse files Browse the repository at this point in the history
  • Loading branch information
vblagoje authored Nov 23, 2023
1 parent b557f30 commit cfff0d5
Show file tree
Hide file tree
Showing 17 changed files with 33 additions and 33 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
loaders:
- type: loaders.CustomPythonLoader
search_path: [../../../haystack/preview/components/file_converters]
search_path: [../../../haystack/preview/components/converters]
modules: ["azure", "html", "markdown", "pypdf", "tika", "txt"]
ignore_when_discovered: ["__init__"]
processors:
Expand All @@ -15,12 +15,12 @@ renderer:
type: renderers.ReadmePreviewRenderer
excerpt: Extracts text from files in different formats and converts it into the unified Document format.
category_slug: haystack-classes
title: File Converter API
slug: file-converter-api
title: Converter API
slug: converter-api
order: 50
markdown:
descriptive_class_title: false
descriptive_module_title: true
add_method_class_prefix: true
add_member_class_prefix: false
filename: file_converter_api.md
filename: converter_api.md
2 changes: 1 addition & 1 deletion e2e/preview/pipelines/test_preprocessing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from haystack.preview import Pipeline
from haystack.preview.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.preview.components.file_converters import TextFileToDocument
from haystack.preview.components.converters import TextFileToDocument
from haystack.preview.components.preprocessors import DocumentSplitter, DocumentCleaner
from haystack.preview.components.classifiers import DocumentLanguageClassifier
from haystack.preview.components.routers import FileTypeRouter, MetadataRouter
Expand Down
15 changes: 15 additions & 0 deletions haystack/preview/components/converters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from haystack.preview.components.converters.txt import TextFileToDocument
from haystack.preview.components.converters.tika import TikaDocumentConverter
from haystack.preview.components.converters.azure import AzureOCRDocumentConverter
from haystack.preview.components.converters.pypdf import PyPDFToDocument
from haystack.preview.components.converters.html import HTMLToDocument
from haystack.preview.components.converters.markdown import MarkdownToDocument

__all__ = [
"TextFileToDocument",
"TikaDocumentConverter",
"AzureOCRDocumentConverter",
"PyPDFToDocument",
"HTMLToDocument",
"MarkdownToDocument",
]
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class HTMLToDocument:
Usage example:
```python
from haystack.preview.components.file_converters.html import HTMLToDocument
from haystack.preview.components.converters.html import HTMLToDocument
converter = HTMLToDocument()
results = converter.run(sources=["sample.html"])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class MarkdownToDocument:
Usage example:
```python
from haystack.preview.components.file_converters.markdown import MarkdownToDocument
from haystack.preview.components.converters.markdown import MarkdownToDocument
converter = MarkdownToDocument()
results = converter.run(sources=["sample.md"])
Expand Down
15 changes: 0 additions & 15 deletions haystack/preview/components/file_converters/__init__.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

from haystack.preview.components.file_converters.azure import AzureOCRDocumentConverter
from haystack.preview.components.converters.azure import AzureOCRDocumentConverter


class TestAzureOCRDocumentConverter:
Expand All @@ -18,13 +18,13 @@ def test_to_dict(self):
component = AzureOCRDocumentConverter(endpoint="test_endpoint", api_key="test_credential_key")
data = component.to_dict()
assert data == {
"type": "haystack.preview.components.file_converters.azure.AzureOCRDocumentConverter",
"type": "haystack.preview.components.converters.azure.AzureOCRDocumentConverter",
"init_parameters": {"endpoint": "test_endpoint", "model_id": "prebuilt-read"},
}

@pytest.mark.unit
def test_run(self, preview_samples_path):
with patch("haystack.preview.components.file_converters.azure.DocumentAnalysisClient") as mock_azure_client:
with patch("haystack.preview.components.converters.azure.DocumentAnalysisClient") as mock_azure_client:
mock_result = Mock(pages=[Mock(lines=[Mock(content="mocked line 1"), Mock(content="mocked line 2")])])
mock_result.to_dict.return_value = {
"api_version": "2023-02-28-preview",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from haystack.preview.components.file_converters import HTMLToDocument
from haystack.preview.components.converters import HTMLToDocument
from haystack.preview.dataclasses import ByteStream


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from haystack.preview.components.file_converters.markdown import MarkdownToDocument
from haystack.preview.components.converters.markdown import MarkdownToDocument
from haystack.preview.dataclasses import ByteStream


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pypdf import PdfReader

from haystack.preview import Document
from haystack.preview.components.file_converters.pypdf import PyPDFToDocument
from haystack.preview.components.converters.pypdf import PyPDFToDocument
from haystack.preview.dataclasses import ByteStream


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

from haystack.preview.dataclasses import ByteStream
from haystack.preview.components.file_converters.txt import TextFileToDocument
from haystack.preview.components.converters.txt import TextFileToDocument


class TestTextfileToDocument:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

import pytest

from haystack.preview.components.file_converters.tika import TikaDocumentConverter
from haystack.preview.components.converters.tika import TikaDocumentConverter


class TestTikaDocumentConverter:
@pytest.mark.unit
def test_run(self):
component = TikaDocumentConverter()
with patch("haystack.preview.components.file_converters.tika.tika_parser.from_file") as mock_tika_parser:
with patch("haystack.preview.components.converters.tika.tika_parser.from_file") as mock_tika_parser:
mock_tika_parser.return_value = {"content": "Content of mock_file.pdf"}
documents = component.run(paths=["mock_file.pdf"])["documents"]

Expand All @@ -19,7 +19,7 @@ def test_run(self):
@pytest.mark.unit
def test_run_logs_warning_if_content_empty(self, caplog):
component = TikaDocumentConverter()
with patch("haystack.preview.components.file_converters.tika.tika_parser.from_file") as mock_tika_parser:
with patch("haystack.preview.components.converters.tika.tika_parser.from_file") as mock_tika_parser:
mock_tika_parser.return_value = {"content": ""}
with caplog.at_level("WARNING"):
component.run(paths=["mock_file.pdf"])
Expand All @@ -28,7 +28,7 @@ def test_run_logs_warning_if_content_empty(self, caplog):
@pytest.mark.unit
def test_run_logs_error(self, caplog):
component = TikaDocumentConverter()
with patch("haystack.preview.components.file_converters.tika.tika_parser.from_file") as mock_tika_parser:
with patch("haystack.preview.components.converters.tika.tika_parser.from_file") as mock_tika_parser:
mock_tika_parser.side_effect = Exception("Some error")
with caplog.at_level("ERROR"):
component.run(paths=["mock_file.pdf"])
Expand Down

0 comments on commit cfff0d5

Please sign in to comment.