diff --git a/haystack/__init__.py b/haystack/__init__.py index c62e89a20b..68f8e11047 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -1,9 +1,9 @@ +import haystack.logging from haystack.core.component import component from haystack.core.errors import ComponentError, DeserializationError from haystack.core.pipeline import Pipeline, PredefinedPipeline from haystack.core.serialization import default_from_dict, default_to_dict from haystack.dataclasses import Answer, Document, ExtractedAnswer, GeneratedAnswer -import haystack.logging # Initialize the logging configuration # This is a no-op unless `structlog` is installed diff --git a/haystack/components/audio/whisper_local.py b/haystack/components/audio/whisper_local.py index b364eebbf1..bbbc864cdf 100644 --- a/haystack/components/audio/whisper_local.py +++ b/haystack/components/audio/whisper_local.py @@ -1,10 +1,8 @@ -from typing import List, Optional, Dict, Any, Union, Literal, get_args - -import logging import tempfile from pathlib import Path +from typing import Any, Dict, List, Literal, Optional, Union, get_args -from haystack import component, Document, default_to_dict, ComponentError, default_from_dict +from haystack import ComponentError, Document, component, default_from_dict, default_to_dict, logging from haystack.dataclasses import ByteStream from haystack.lazy_imports import LazyImport from haystack.utils import ComponentDevice diff --git a/haystack/components/audio/whisper_remote.py b/haystack/components/audio/whisper_remote.py index e8a5bb625c..b2f3a9d7a0 100644 --- a/haystack/components/audio/whisper_remote.py +++ b/haystack/components/audio/whisper_remote.py @@ -1,11 +1,10 @@ import io -import logging from pathlib import Path from typing import Any, Dict, List, Optional, Union from openai import OpenAI -from haystack import Document, component, default_from_dict, default_to_dict +from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.dataclasses import ByteStream from haystack.utils import Secret, deserialize_secrets_inplace diff --git a/haystack/components/builders/__init__.py b/haystack/components/builders/__init__.py index 47a6bf388a..ef48d60ce8 100644 --- a/haystack/components/builders/__init__.py +++ b/haystack/components/builders/__init__.py @@ -1,6 +1,6 @@ from haystack.components.builders.answer_builder import AnswerBuilder -from haystack.components.builders.prompt_builder import PromptBuilder -from haystack.components.builders.dynamic_prompt_builder import DynamicPromptBuilder from haystack.components.builders.dynamic_chat_prompt_builder import DynamicChatPromptBuilder +from haystack.components.builders.dynamic_prompt_builder import DynamicPromptBuilder +from haystack.components.builders.prompt_builder import PromptBuilder __all__ = ["AnswerBuilder", "PromptBuilder", "DynamicPromptBuilder", "DynamicChatPromptBuilder"] diff --git a/haystack/components/builders/answer_builder.py b/haystack/components/builders/answer_builder.py index 9be95cb1a2..b28e99e948 100644 --- a/haystack/components/builders/answer_builder.py +++ b/haystack/components/builders/answer_builder.py @@ -1,9 +1,7 @@ -import logging import re -from typing import List, Dict, Any, Optional - -from haystack import component, GeneratedAnswer, Document +from typing import Any, Dict, List, Optional +from haystack import Document, GeneratedAnswer, component, logging logger = logging.getLogger(__name__) @@ -114,7 +112,9 @@ def run( try: referenced_docs.append(documents[idx]) except IndexError: - logger.warning("Document index '%s' referenced in Generator output is out of range. ", idx + 1) + logger.warning( + "Document index '{index}' referenced in Generator output is out of range. ", index=idx + 1 + ) answer_string = AnswerBuilder._extract_answer_string(reply, pattern) answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=metadata) diff --git a/haystack/components/builders/dynamic_chat_prompt_builder.py b/haystack/components/builders/dynamic_chat_prompt_builder.py index 12f6d0e5fb..b798ebc67d 100644 --- a/haystack/components/builders/dynamic_chat_prompt_builder.py +++ b/haystack/components/builders/dynamic_chat_prompt_builder.py @@ -1,9 +1,8 @@ -import logging -from typing import Dict, Any, Optional, List, Set +from typing import Any, Dict, List, Optional, Set from jinja2 import Template, meta -from haystack import component +from haystack import component, logging from haystack.dataclasses.chat_message import ChatMessage, ChatRole logger = logging.getLogger(__name__) diff --git a/haystack/components/builders/dynamic_prompt_builder.py b/haystack/components/builders/dynamic_prompt_builder.py index 7434de0c70..3580ecd8b0 100644 --- a/haystack/components/builders/dynamic_prompt_builder.py +++ b/haystack/components/builders/dynamic_prompt_builder.py @@ -1,9 +1,8 @@ -import logging -from typing import Dict, Any, Optional, List, Set +from typing import Any, Dict, List, Optional, Set from jinja2 import Template, meta -from haystack import component +from haystack import component, logging logger = logging.getLogger(__name__) diff --git a/haystack/components/caching/cache_checker.py b/haystack/components/caching/cache_checker.py index 77fe2599d8..2f3c8f9c31 100644 --- a/haystack/components/caching/cache_checker.py +++ b/haystack/components/caching/cache_checker.py @@ -1,13 +1,9 @@ -from typing import List, Dict, Any - import importlib +from typing import Any, Dict, List -import logging - -from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError +from haystack import DeserializationError, Document, component, default_from_dict, default_to_dict, logging from haystack.document_stores.types import DocumentStore - logger = logging.getLogger(__name__) @@ -80,7 +76,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "CacheChecker": try: module_name, type_ = init_params["document_store"]["type"].rsplit(".", 1) - logger.debug("Trying to import %s", module_name) + logger.debug("Trying to import module '{module}'", module=module_name) module = importlib.import_module(module_name) except (ImportError, DeserializationError) as e: raise DeserializationError( diff --git a/haystack/components/classifiers/document_language_classifier.py b/haystack/components/classifiers/document_language_classifier.py index 784ebef41a..6f0108589f 100644 --- a/haystack/components/classifiers/document_language_classifier.py +++ b/haystack/components/classifiers/document_language_classifier.py @@ -1,7 +1,6 @@ -import logging -from typing import List, Dict, Optional +from typing import Dict, List, Optional -from haystack import component, Document +from haystack import Document, component, logging from haystack.lazy_imports import LazyImport logger = logging.getLogger(__name__) @@ -96,6 +95,8 @@ def _detect_language(self, document: Document) -> Optional[str]: try: language = langdetect.detect(document.content) except langdetect.LangDetectException: - logger.warning("Langdetect cannot detect the language of Document with id: %s", document.id) + logger.warning( + "Langdetect cannot detect the language of Document with id: {document_id}", document_id=document.id + ) language = None return language diff --git a/haystack/components/connectors/openapi_service.py b/haystack/components/connectors/openapi_service.py index d10ed7ad83..9149be064e 100644 --- a/haystack/components/connectors/openapi_service.py +++ b/haystack/components/connectors/openapi_service.py @@ -1,10 +1,9 @@ import json -import logging from collections import defaultdict from copy import copy -from typing import List, Dict, Any, Optional, Union +from typing import Any, Dict, List, Optional, Union -from haystack import component +from haystack import component, logging from haystack.dataclasses import ChatMessage, ChatRole from haystack.lazy_imports import LazyImport @@ -179,7 +178,7 @@ def _invoke_method(self, openapi_service: OpenAPI, method_invocation_descriptor: :rtype: Any :raises RuntimeError: If the method is not found or invocation fails. """ - name = method_invocation_descriptor.get("name", None) + name = method_invocation_descriptor.get("name") invocation_arguments = copy(method_invocation_descriptor.get("arguments", {})) if not name or not invocation_arguments: raise ValueError( diff --git a/haystack/components/converters/__init__.py b/haystack/components/converters/__init__.py index 61697488ae..5ff6e6554f 100644 --- a/haystack/components/converters/__init__.py +++ b/haystack/components/converters/__init__.py @@ -1,11 +1,11 @@ -from haystack.components.converters.txt import TextFileToDocument -from haystack.components.converters.tika import TikaDocumentConverter from haystack.components.converters.azure import AzureOCRDocumentConverter -from haystack.components.converters.pypdf import PyPDFToDocument from haystack.components.converters.html import HTMLToDocument from haystack.components.converters.markdown import MarkdownToDocument from haystack.components.converters.openapi_functions import OpenAPIServiceToFunctions from haystack.components.converters.output_adapter import OutputAdapter +from haystack.components.converters.pypdf import PyPDFToDocument +from haystack.components.converters.tika import TikaDocumentConverter +from haystack.components.converters.txt import TextFileToDocument __all__ = [ "TextFileToDocument", diff --git a/haystack/components/converters/azure.py b/haystack/components/converters/azure.py index c12ac44604..edc028e439 100644 --- a/haystack/components/converters/azure.py +++ b/haystack/components/converters/azure.py @@ -1,17 +1,16 @@ from pathlib import Path -from typing import List, Union, Dict, Any, Optional -import logging +from typing import Any, Dict, List, Optional, Union -from haystack.lazy_imports import LazyImport -from haystack import component, Document, default_to_dict, default_from_dict -from haystack.dataclasses import ByteStream +from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata +from haystack.dataclasses import ByteStream +from haystack.lazy_imports import LazyImport from haystack.utils import Secret, deserialize_secrets_inplace logger = logging.getLogger(__name__) with LazyImport(message="Run 'pip install \"azure-ai-formrecognizer>=3.2.0b2\"'") as azure_import: - from azure.ai.formrecognizer import DocumentAnalysisClient, AnalyzeResult + from azure.ai.formrecognizer import AnalyzeResult, DocumentAnalysisClient from azure.core.credentials import AzureKeyCredential @@ -83,7 +82,7 @@ def run(self, sources: List[Union[str, Path, ByteStream]], meta: Optional[List[D try: bytestream = get_bytestream_from_source(source=source) except Exception as e: - logger.warning("Could not read %s. Skipping it. Error: %s", source, e) + logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e) continue poller = self.document_analysis_client.begin_analyze_document( diff --git a/haystack/components/converters/html.py b/haystack/components/converters/html.py index dea38dbd1a..fa9135b51d 100644 --- a/haystack/components/converters/html.py +++ b/haystack/components/converters/html.py @@ -1,11 +1,11 @@ -import logging from pathlib import Path -from typing import Any, Dict, List, Optional, Union, Literal +from typing import Any, Dict, List, Literal, Optional, Union + from boilerpy3 import extractors -from haystack import Document, component, default_from_dict, default_to_dict -from haystack.dataclasses import ByteStream +from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata +from haystack.dataclasses import ByteStream logger = logging.getLogger(__name__) @@ -84,13 +84,17 @@ def run( try: bytestream = get_bytestream_from_source(source=source) except Exception as e: - logger.warning("Could not read %s. Skipping it. Error: %s", source, e) + logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e) continue try: file_content = bytestream.data.decode("utf-8") text = extractor.get_content(file_content) except Exception as conversion_e: - logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e) + logger.warning( + "Failed to extract text from {source}. Skipping it. Error: {error}", + source=source, + error=conversion_e, + ) continue merged_metadata = {**bytestream.meta, **metadata} diff --git a/haystack/components/converters/markdown.py b/haystack/components/converters/markdown.py index 9ba9756cdb..12a44dec84 100644 --- a/haystack/components/converters/markdown.py +++ b/haystack/components/converters/markdown.py @@ -1,13 +1,12 @@ -import logging from pathlib import Path from typing import Any, Dict, List, Optional, Union from tqdm import tqdm -from haystack import Document, component +from haystack import Document, component, logging +from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata from haystack.dataclasses import ByteStream from haystack.lazy_imports import LazyImport -from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata with LazyImport("Run 'pip install markdown-it-py mdit_plain'") as markdown_conversion_imports: from markdown_it import MarkdownIt @@ -77,13 +76,17 @@ def run( try: bytestream = get_bytestream_from_source(source) except Exception as e: - logger.warning("Could not read %s. Skipping it. Error: %s", source, e) + logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e) continue try: file_content = bytestream.data.decode("utf-8") text = parser.render(file_content) except Exception as conversion_e: - logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e) + logger.warning( + "Failed to extract text from {source}. Skipping it. Error: {error}", + source=source, + error=conversion_e, + ) continue merged_metadata = {**bytestream.meta, **metadata} diff --git a/haystack/components/converters/openapi_functions.py b/haystack/components/converters/openapi_functions.py index 311ae0ffb9..8d4e31dc92 100644 --- a/haystack/components/converters/openapi_functions.py +++ b/haystack/components/converters/openapi_functions.py @@ -1,14 +1,13 @@ import json -import logging import os from pathlib import Path -from typing import List, Dict, Any, Union, Optional +from typing import Any, Dict, List, Optional, Union import requests import yaml from requests import RequestException -from haystack import component, Document +from haystack import Document, component, logging from haystack.dataclasses.byte_stream import ByteStream from haystack.lazy_imports import LazyImport @@ -80,7 +79,9 @@ def run( elif isinstance(source, ByteStream): openapi_spec_content = source.data.decode("utf-8") else: - logger.warning("Invalid source type %s. Only str, Path, and ByteStream are supported.", type(source)) + logger.warning( + "Invalid source type {source}. Only str, Path, and ByteStream are supported.", source=type(source) + ) continue if openapi_spec_content: @@ -94,7 +95,9 @@ def run( doc = Document(content=json.dumps(function), meta=meta) documents.append(doc) except Exception as e: - logger.error("Error processing OpenAPI specification from source %s: %s", source, e) + logger.error( + "Error processing OpenAPI specification from source {source}: {error}", source=source, error=e + ) return {"documents": documents} @@ -167,7 +170,9 @@ def _parse_endpoint_spec(self, resolved_spec: Dict[str, Any]) -> Optional[Dict[s if function_name and description and schema["properties"]: return {"name": function_name, "description": description, "parameters": schema} else: - logger.warning("Invalid OpenAPI spec format provided. Could not extract function from %s", resolved_spec) + logger.warning( + "Invalid OpenAPI spec format provided. Could not extract function from {spec}", spec=resolved_spec + ) return {} def _parse_property_attributes( @@ -246,7 +251,7 @@ def _read_from_file(self, path: Union[str, Path]) -> Optional[str]: with open(path, "r") as f: return f.read() except IOError as e: - logger.warning("IO error reading file: %s. Error: %s", path, e) + logger.warning("IO error reading file: {path}. Error: {error}", path=path, error=e) return None def _read_from_url(self, url: str) -> Optional[str]: @@ -261,5 +266,5 @@ def _read_from_url(self, url: str) -> Optional[str]: response.raise_for_status() return response.text except RequestException as e: - logger.warning("Error fetching URL: %s. Error: %s", url, e) + logger.warning("Error fetching URL: {url}. Error: {error}", url=url, error=e) return None diff --git a/haystack/components/converters/output_adapter.py b/haystack/components/converters/output_adapter.py index 848e17712b..21d9ef4288 100644 --- a/haystack/components/converters/output_adapter.py +++ b/haystack/components/converters/output_adapter.py @@ -1,12 +1,12 @@ -from typing import Optional, Dict, Any, Set, Callable +from typing import Any, Callable, Dict, Optional, Set import jinja2.runtime from jinja2 import TemplateSyntaxError, meta from jinja2.nativetypes import NativeEnvironment from typing_extensions import TypeAlias -from haystack import component, default_to_dict, default_from_dict -from haystack.utils import serialize_callable, deserialize_callable, serialize_type, deserialize_type +from haystack import component, default_from_dict, default_to_dict +from haystack.utils import deserialize_callable, deserialize_type, serialize_callable, serialize_type class OutputAdaptationException(Exception): diff --git a/haystack/components/converters/pypdf.py b/haystack/components/converters/pypdf.py index 0f0adff9e5..fb325970f5 100644 --- a/haystack/components/converters/pypdf.py +++ b/haystack/components/converters/pypdf.py @@ -1,12 +1,11 @@ import io -import logging -from typing import List, Union, Protocol, Dict, Any, Optional from pathlib import Path +from typing import Any, Dict, List, Optional, Protocol, Union +from haystack import Document, component, default_to_dict, logging +from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata from haystack.dataclasses import ByteStream from haystack.lazy_imports import LazyImport -from haystack import Document, component, default_to_dict -from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata with LazyImport("Run 'pip install pypdf'") as pypdf_import: from pypdf import PdfReader @@ -105,13 +104,15 @@ def run( try: bytestream = get_bytestream_from_source(source) except Exception as e: - logger.warning("Could not read %s. Skipping it. Error: %s", source, e) + logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e) continue try: pdf_reader = PdfReader(io.BytesIO(bytestream.data)) document = self._converter.convert(pdf_reader) except Exception as e: - logger.warning("Could not read %s and convert it to Document, skipping. %s", source, e) + logger.warning( + "Could not read {source} and convert it to Document, skipping. {error}", source=source, error=e + ) continue merged_metadata = {**bytestream.meta, **metadata} diff --git a/haystack/components/converters/tika.py b/haystack/components/converters/tika.py index 51d4a59b95..909945b57b 100644 --- a/haystack/components/converters/tika.py +++ b/haystack/components/converters/tika.py @@ -1,13 +1,11 @@ -import logging -from pathlib import Path -from typing import List, Union, Dict, Any, Optional import io +from pathlib import Path +from typing import Any, Dict, List, Optional, Union -from haystack.lazy_imports import LazyImport -from haystack import component, Document -from haystack.dataclasses import ByteStream +from haystack import Document, component, logging from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata - +from haystack.dataclasses import ByteStream +from haystack.lazy_imports import LazyImport with LazyImport("Run 'pip install tika'") as tika_import: from tika import parser as tika_parser @@ -74,12 +72,16 @@ def run( try: bytestream = get_bytestream_from_source(source) except Exception as e: - logger.warning("Could not read %s. Skipping it. Error: %s", source, e) + logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e) continue try: text = tika_parser.from_buffer(io.BytesIO(bytestream.data), serverEndpoint=self.tika_url)["content"] except Exception as conversion_e: - logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e) + logger.warning( + "Failed to extract text from {source}. Skipping it. Error: {error}", + source=source, + error=conversion_e, + ) continue merged_metadata = {**bytestream.meta, **metadata} diff --git a/haystack/components/converters/txt.py b/haystack/components/converters/txt.py index 1af25cfe1e..bb986fab28 100644 --- a/haystack/components/converters/txt.py +++ b/haystack/components/converters/txt.py @@ -1,11 +1,9 @@ -import logging from pathlib import Path -from typing import List, Union, Dict, Any, Optional +from typing import Any, Dict, List, Optional, Union -from haystack import Document, component -from haystack.dataclasses import ByteStream +from haystack import Document, component, logging from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata - +from haystack.dataclasses import ByteStream logger = logging.getLogger(__name__) @@ -64,13 +62,15 @@ def run( try: bytestream = get_bytestream_from_source(source) except Exception as e: - logger.warning("Could not read %s. Skipping it. Error: %s", source, e) + logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e) continue try: encoding = bytestream.meta.get("encoding", self.encoding) text = bytestream.data.decode(encoding) except Exception as e: - logger.warning("Could not convert file %s. Skipping it. Error message: %s", source, e) + logger.warning( + "Could not convert file {source}. Skipping it. Error message: {error}", source=source, error=e + ) continue merged_metadata = {**bytestream.meta, **metadata} diff --git a/haystack/components/converters/utils.py b/haystack/components/converters/utils.py index 128e4a134f..8666722a7f 100644 --- a/haystack/components/converters/utils.py +++ b/haystack/components/converters/utils.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List, Union, Dict, Any, Optional +from typing import Any, Dict, List, Optional, Union from haystack.dataclasses import ByteStream diff --git a/haystack/components/embedders/__init__.py b/haystack/components/embedders/__init__.py index 5a6730e5be..6ff4e339a7 100644 --- a/haystack/components/embedders/__init__.py +++ b/haystack/components/embedders/__init__.py @@ -1,11 +1,11 @@ -from haystack.components.embedders.hugging_face_tei_text_embedder import HuggingFaceTEITextEmbedder +from haystack.components.embedders.azure_document_embedder import AzureOpenAIDocumentEmbedder +from haystack.components.embedders.azure_text_embedder import AzureOpenAITextEmbedder from haystack.components.embedders.hugging_face_tei_document_embedder import HuggingFaceTEIDocumentEmbedder -from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder -from haystack.components.embedders.sentence_transformers_document_embedder import SentenceTransformersDocumentEmbedder +from haystack.components.embedders.hugging_face_tei_text_embedder import HuggingFaceTEITextEmbedder from haystack.components.embedders.openai_document_embedder import OpenAIDocumentEmbedder from haystack.components.embedders.openai_text_embedder import OpenAITextEmbedder -from haystack.components.embedders.azure_text_embedder import AzureOpenAITextEmbedder -from haystack.components.embedders.azure_document_embedder import AzureOpenAIDocumentEmbedder +from haystack.components.embedders.sentence_transformers_document_embedder import SentenceTransformersDocumentEmbedder +from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder __all__ = [ "HuggingFaceTEITextEmbedder", diff --git a/haystack/components/embedders/azure_document_embedder.py b/haystack/components/embedders/azure_document_embedder.py index 0a7a7f364f..2cdde26f85 100644 --- a/haystack/components/embedders/azure_document_embedder.py +++ b/haystack/components/embedders/azure_document_embedder.py @@ -1,10 +1,10 @@ import os -from typing import List, Optional, Dict, Any, Tuple +from typing import Any, Dict, List, Optional, Tuple from openai.lib.azure import AzureOpenAI from tqdm import tqdm -from haystack import component, Document, default_to_dict, default_from_dict +from haystack import Document, component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace diff --git a/haystack/components/embedders/azure_text_embedder.py b/haystack/components/embedders/azure_text_embedder.py index bfbec34ea6..c79abf32f0 100644 --- a/haystack/components/embedders/azure_text_embedder.py +++ b/haystack/components/embedders/azure_text_embedder.py @@ -1,9 +1,9 @@ import os -from typing import List, Optional, Dict, Any +from typing import Any, Dict, List, Optional from openai.lib.azure import AzureOpenAI -from haystack import component, Document, default_to_dict, default_from_dict +from haystack import Document, component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace diff --git a/haystack/components/embedders/backends/sentence_transformers_backend.py b/haystack/components/embedders/backends/sentence_transformers_backend.py index b604aa4904..c0fdfac094 100644 --- a/haystack/components/embedders/backends/sentence_transformers_backend.py +++ b/haystack/components/embedders/backends/sentence_transformers_backend.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Dict +from typing import Dict, List, Optional from haystack.lazy_imports import LazyImport from haystack.utils.auth import Secret diff --git a/haystack/components/embedders/hugging_face_tei_document_embedder.py b/haystack/components/embedders/hugging_face_tei_document_embedder.py index 38123f0480..e6b019d237 100644 --- a/haystack/components/embedders/hugging_face_tei_document_embedder.py +++ b/haystack/components/embedders/hugging_face_tei_document_embedder.py @@ -1,14 +1,13 @@ -import logging from typing import Any, Dict, List, Optional from urllib.parse import urlparse from tqdm import tqdm +from haystack import component, default_from_dict, default_to_dict, logging from haystack.dataclasses import Document from haystack.lazy_imports import LazyImport from haystack.utils import Secret, deserialize_secrets_inplace -from haystack.utils.hf import check_valid_model, HFModelType -from haystack import component, default_to_dict, default_from_dict +from haystack.utils.hf import HFModelType, check_valid_model with LazyImport(message="Run 'pip install transformers'") as transformers_import: from huggingface_hub import InferenceClient diff --git a/haystack/components/embedders/hugging_face_tei_text_embedder.py b/haystack/components/embedders/hugging_face_tei_text_embedder.py index 339454b1de..844e666695 100644 --- a/haystack/components/embedders/hugging_face_tei_text_embedder.py +++ b/haystack/components/embedders/hugging_face_tei_text_embedder.py @@ -1,11 +1,10 @@ -import logging from typing import Any, Dict, List, Optional from urllib.parse import urlparse -from haystack import component, default_to_dict, default_from_dict +from haystack import component, default_from_dict, default_to_dict, logging from haystack.lazy_imports import LazyImport from haystack.utils import Secret, deserialize_secrets_inplace -from haystack.utils.hf import check_valid_model, HFModelType +from haystack.utils.hf import HFModelType, check_valid_model with LazyImport(message="Run 'pip install transformers'") as transformers_import: from huggingface_hub import InferenceClient diff --git a/haystack/components/embedders/openai_document_embedder.py b/haystack/components/embedders/openai_document_embedder.py index 7f409178e0..1147e8ff54 100644 --- a/haystack/components/embedders/openai_document_embedder.py +++ b/haystack/components/embedders/openai_document_embedder.py @@ -1,9 +1,9 @@ -from typing import List, Optional, Dict, Any, Tuple +from typing import Any, Dict, List, Optional, Tuple from openai import OpenAI from tqdm import tqdm -from haystack import component, Document, default_to_dict, default_from_dict +from haystack import Document, component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace diff --git a/haystack/components/embedders/sentence_transformers_document_embedder.py b/haystack/components/embedders/sentence_transformers_document_embedder.py index 10ffc8e67c..c45bb56e52 100644 --- a/haystack/components/embedders/sentence_transformers_document_embedder.py +++ b/haystack/components/embedders/sentence_transformers_document_embedder.py @@ -1,10 +1,10 @@ -from typing import List, Optional, Dict, Any +from typing import Any, Dict, List, Optional -from haystack import component, Document, default_to_dict, default_from_dict +from haystack import Document, component, default_from_dict, default_to_dict from haystack.components.embedders.backends.sentence_transformers_backend import ( _SentenceTransformersEmbeddingBackendFactory, ) -from haystack.utils import Secret, deserialize_secrets_inplace, ComponentDevice +from haystack.utils import ComponentDevice, Secret, deserialize_secrets_inplace @component diff --git a/haystack/components/embedders/sentence_transformers_text_embedder.py b/haystack/components/embedders/sentence_transformers_text_embedder.py index ae5395a030..0e5dc6fea2 100644 --- a/haystack/components/embedders/sentence_transformers_text_embedder.py +++ b/haystack/components/embedders/sentence_transformers_text_embedder.py @@ -1,10 +1,10 @@ -from typing import List, Optional, Dict, Any +from typing import Any, Dict, List, Optional -from haystack import component, default_to_dict, default_from_dict +from haystack import component, default_from_dict, default_to_dict from haystack.components.embedders.backends.sentence_transformers_backend import ( _SentenceTransformersEmbeddingBackendFactory, ) -from haystack.utils import Secret, deserialize_secrets_inplace, ComponentDevice +from haystack.utils import ComponentDevice, Secret, deserialize_secrets_inplace @component diff --git a/haystack/components/extractors/named_entity_extractor.py b/haystack/components/extractors/named_entity_extractor.py index f8d8d717cc..f4c43fe2f9 100644 --- a/haystack/components/extractors/named_entity_extractor.py +++ b/haystack/components/extractors/named_entity_extractor.py @@ -9,9 +9,8 @@ from haystack.utils.device import ComponentDevice with LazyImport(message="Run 'pip install transformers[torch]'") as transformers_import: - from transformers import AutoModelForTokenClassification, AutoTokenizer + from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline from transformers import Pipeline as HfPipeline - from transformers import pipeline with LazyImport(message="Run 'pip install spacy'") as spacy_import: import spacy diff --git a/haystack/components/fetchers/link_content.py b/haystack/components/fetchers/link_content.py index 3f437b84f3..2ccf81e6b6 100644 --- a/haystack/components/fetchers/link_content.py +++ b/haystack/components/fetchers/link_content.py @@ -1,4 +1,3 @@ -import logging from collections import defaultdict from concurrent.futures import ThreadPoolExecutor from typing import Callable, Dict, List, Optional, Tuple @@ -8,7 +7,7 @@ from requests.exceptions import HTTPError from tenacity import RetryCallState, retry, retry_if_exception_type, stop_after_attempt, wait_exponential -from haystack import component +from haystack import component, logging from haystack.dataclasses import ByteStream from haystack.version import __version__ @@ -173,7 +172,7 @@ def _fetch(self, url: str) -> Tuple[Dict[str, str], ByteStream]: if self.raise_on_failure: raise e # less verbose log as this is expected to happen often (requests failing, blocked, etc.) - logger.debug("Couldn't retrieve content from %s because %s", url, str(e)) + logger.debug("Couldn't retrieve content from {url} because {error}", url=url, error=str(e)) finally: self.current_user_agent_idx = 0 @@ -194,7 +193,7 @@ def _fetch_with_exception_suppression(self, url: str) -> Tuple[Optional[Dict[str try: return self._fetch(url) except Exception as e: - logger.warning("Error fetching %s: %s", url, str(e)) + logger.warning("Error fetching {url}: {error}", url=url, error=str(e)) return {"content_type": "Unknown", "url": url}, None else: return self._fetch(url) @@ -217,4 +216,4 @@ def _switch_user_agent(self, retry_state: RetryCallState) -> None: :param retry_state: The retry state (unused, required by tenacity). """ self.current_user_agent_idx = (self.current_user_agent_idx + 1) % len(self.user_agents) - logger.debug("Switched user agent to %s", self.user_agents[self.current_user_agent_idx]) + logger.debug("Switched user agent to {user_agent}", user_agent=self.user_agents[self.current_user_agent_idx]) diff --git a/haystack/components/generators/__init__.py b/haystack/components/generators/__init__.py index 30407bba86..3578ba63e5 100644 --- a/haystack/components/generators/__init__.py +++ b/haystack/components/generators/__init__.py @@ -1,6 +1,8 @@ +from haystack.components.generators.openai import ( # noqa: I001 (otherwise we end up with partial imports) + OpenAIGenerator, +) +from haystack.components.generators.azure import AzureOpenAIGenerator from haystack.components.generators.hugging_face_local import HuggingFaceLocalGenerator from haystack.components.generators.hugging_face_tgi import HuggingFaceTGIGenerator -from haystack.components.generators.openai import OpenAIGenerator -from haystack.components.generators.azure import AzureOpenAIGenerator __all__ = ["HuggingFaceLocalGenerator", "HuggingFaceTGIGenerator", "OpenAIGenerator", "AzureOpenAIGenerator"] diff --git a/haystack/components/generators/azure.py b/haystack/components/generators/azure.py index 28039fdbd2..ae43abaef6 100644 --- a/haystack/components/generators/azure.py +++ b/haystack/components/generators/azure.py @@ -1,14 +1,13 @@ -import logging import os -from typing import Optional, Callable, Dict, Any +from typing import Any, Callable, Dict, Optional # pylint: disable=import-error from openai.lib.azure import AzureOpenAI -from haystack import default_to_dict, default_from_dict +from haystack import default_from_dict, default_to_dict, logging from haystack.components.generators import OpenAIGenerator from haystack.dataclasses import StreamingChunk -from haystack.utils import Secret, deserialize_secrets_inplace, serialize_callable, deserialize_callable +from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable logger = logging.getLogger(__name__) diff --git a/haystack/components/generators/chat/__init__.py b/haystack/components/generators/chat/__init__.py index c8f908e157..225fc10f08 100644 --- a/haystack/components/generators/chat/__init__.py +++ b/haystack/components/generators/chat/__init__.py @@ -1,8 +1,9 @@ +from haystack.components.generators.chat.openai import ( # noqa: I001 (otherwise we end up with partial imports) + OpenAIChatGenerator, +) +from haystack.components.generators.chat.azure import AzureOpenAIChatGenerator from haystack.components.generators.chat.hugging_face_local import HuggingFaceLocalChatGenerator from haystack.components.generators.chat.hugging_face_tgi import HuggingFaceTGIChatGenerator -from haystack.components.generators.chat.openai import OpenAIChatGenerator -from haystack.components.generators.chat.azure import AzureOpenAIChatGenerator - __all__ = [ "HuggingFaceLocalChatGenerator", diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py index 0a2f9fa0a7..6a1e8fb648 100644 --- a/haystack/components/generators/chat/azure.py +++ b/haystack/components/generators/chat/azure.py @@ -1,14 +1,13 @@ -import logging import os -from typing import Optional, Callable, Dict, Any +from typing import Any, Callable, Dict, Optional # pylint: disable=import-error from openai.lib.azure import AzureOpenAI -from haystack import default_to_dict, default_from_dict +from haystack import default_from_dict, default_to_dict, logging from haystack.components.generators.chat import OpenAIChatGenerator from haystack.dataclasses import StreamingChunk -from haystack.utils import Secret, deserialize_secrets_inplace, serialize_callable, deserialize_callable +from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable logger = logging.getLogger(__name__) diff --git a/haystack/components/generators/chat/hugging_face_local.py b/haystack/components/generators/chat/hugging_face_local.py index 6464f30e77..dfdb087d16 100644 --- a/haystack/components/generators/chat/hugging_face_local.py +++ b/haystack/components/generators/chat/hugging_face_local.py @@ -1,23 +1,28 @@ -import logging import sys -from typing import Any, Dict, List, Literal, Optional, Union, Callable +from typing import Any, Callable, Dict, List, Literal, Optional, Union -from haystack import component, default_to_dict, default_from_dict +from haystack import component, default_from_dict, default_to_dict, logging from haystack.dataclasses import ChatMessage, StreamingChunk from haystack.lazy_imports import LazyImport -from haystack.utils import ComponentDevice -from haystack.utils import Secret, deserialize_secrets_inplace, serialize_callable, deserialize_callable +from haystack.utils import ( + ComponentDevice, + Secret, + deserialize_callable, + deserialize_secrets_inplace, + serialize_callable, +) logger = logging.getLogger(__name__) with LazyImport(message="Run 'pip install transformers[torch]'") as torch_and_transformers_import: from huggingface_hub import model_info - from transformers import StoppingCriteriaList, pipeline, PreTrainedTokenizer, PreTrainedTokenizerFast + from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast, StoppingCriteriaList, pipeline + from haystack.utils.hf import ( # pylint: disable=ungrouped-imports - StopWordsCriteria, HFTokenStreamingHandler, - serialize_hf_model_kwargs, + StopWordsCriteria, deserialize_hf_model_kwargs, + serialize_hf_model_kwargs, ) @@ -338,8 +343,8 @@ def _validate_stop_words(self, stop_words: Optional[List[str]]) -> Optional[List if stop_words and not all(isinstance(word, str) for word in stop_words): logger.warning( "Invalid stop words provided. Stop words must be specified as a list of strings. " - "Ignoring stop words: %s", - stop_words, + "Ignoring stop words: {stop_words}", + stop_words=stop_words, ) return None diff --git a/haystack/components/generators/chat/hugging_face_tgi.py b/haystack/components/generators/chat/hugging_face_tgi.py index bbbd4c5ff5..4fbed43eb1 100644 --- a/haystack/components/generators/chat/hugging_face_tgi.py +++ b/haystack/components/generators/chat/hugging_face_tgi.py @@ -1,18 +1,16 @@ -import logging from dataclasses import asdict -from typing import Any, Dict, List, Optional, Iterable, Callable +from typing import Any, Callable, Dict, Iterable, List, Optional from urllib.parse import urlparse -from haystack import component, default_to_dict, default_from_dict +from haystack import component, default_from_dict, default_to_dict, logging from haystack.dataclasses import ChatMessage, StreamingChunk from haystack.lazy_imports import LazyImport -from haystack.utils import Secret, deserialize_secrets_inplace -from haystack.utils import serialize_callable, deserialize_callable -from haystack.utils.hf import check_valid_model, HFModelType, check_generation_params, list_inference_deployed_models +from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable +from haystack.utils.hf import HFModelType, check_generation_params, check_valid_model, list_inference_deployed_models with LazyImport(message="Run 'pip install transformers'") as transformers_import: from huggingface_hub import InferenceClient - from huggingface_hub.inference._text_generation import TextGenerationStreamResponse, TextGenerationResponse, Token + from huggingface_hub.inference._text_generation import TextGenerationResponse, TextGenerationStreamResponse, Token from transformers import AutoTokenizer logger = logging.getLogger(__name__) @@ -160,10 +158,10 @@ def warm_up(self) -> None: chat_template = getattr(self.tokenizer, "chat_template", None) if not chat_template and not self.chat_template: logger.warning( - "The model '%s' doesn't have a default chat_template, and no chat_template was supplied during " + "The model '{model}' doesn't have a default chat_template, and no chat_template was supplied during " "this component's initialization. It’s possible that the model doesn't support ChatML inference " "format, potentially leading to unexpected behavior.", - self.model, + model=self.model, ) def to_dict(self) -> Dict[str, Any]: diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index d5f05d50e3..e3ce9e1c03 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -1,17 +1,16 @@ import copy import dataclasses import json -import logging -from typing import Optional, List, Callable, Dict, Any, Union +from typing import Any, Callable, Dict, List, Optional, Union from openai import OpenAI, Stream # type: ignore -from openai.types.chat import ChatCompletionChunk, ChatCompletion, ChatCompletionMessage +from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessage from openai.types.chat.chat_completion import Choice from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice -from haystack import component, default_from_dict, default_to_dict -from haystack.dataclasses import StreamingChunk, ChatMessage -from haystack.utils import Secret, deserialize_secrets_inplace, serialize_callable, deserialize_callable +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.dataclasses import ChatMessage, StreamingChunk +from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable logger = logging.getLogger(__name__) @@ -332,11 +331,14 @@ def _check_finish_reason(self, message: ChatMessage) -> None: """ if message.meta["finish_reason"] == "length": logger.warning( - "The completion for index %s has been truncated before reaching a natural stopping point. " + "The completion for index {index} has been truncated before reaching a natural stopping point. " "Increase the max_tokens parameter to allow for longer completions.", - message.meta["index"], + index=message.meta["index"], + finish_reason=message.meta["finish_reason"], ) if message.meta["finish_reason"] == "content_filter": logger.warning( - "The completion for index %s has been truncated due to the content filter.", message.meta["index"] + "The completion for index {index} has been truncated due to the content filter.", + index=message.meta["index"], + finish_reason=message.meta["finish_reason"], ) diff --git a/haystack/components/generators/hugging_face_local.py b/haystack/components/generators/hugging_face_local.py index 2760ce7bef..77c4506984 100644 --- a/haystack/components/generators/hugging_face_local.py +++ b/haystack/components/generators/hugging_face_local.py @@ -1,12 +1,9 @@ -import logging from typing import Any, Dict, List, Literal, Optional -from haystack import component, default_from_dict, default_to_dict - +from haystack import component, default_from_dict, default_to_dict, logging from haystack.lazy_imports import LazyImport -from haystack.utils import ComponentDevice +from haystack.utils import ComponentDevice, Secret, deserialize_secrets_inplace from haystack.utils.hf import deserialize_hf_model_kwargs, serialize_hf_model_kwargs -from haystack.utils import Secret, deserialize_secrets_inplace logger = logging.getLogger(__name__) @@ -15,6 +12,7 @@ with LazyImport(message="Run 'pip install transformers[torch]'") as transformers_import: from huggingface_hub import model_info from transformers import StoppingCriteriaList, pipeline + from haystack.utils.hf import StopWordsCriteria # pylint: disable=ungrouped-imports diff --git a/haystack/components/generators/hugging_face_tgi.py b/haystack/components/generators/hugging_face_tgi.py index 45c50ac527..7da5d10434 100644 --- a/haystack/components/generators/hugging_face_tgi.py +++ b/haystack/components/generators/hugging_face_tgi.py @@ -1,17 +1,16 @@ -import logging from dataclasses import asdict -from typing import Any, Dict, List, Optional, Iterable, Callable +from typing import Any, Callable, Dict, Iterable, List, Optional from urllib.parse import urlparse -from haystack import component, default_to_dict, default_from_dict +from haystack import component, default_from_dict, default_to_dict, logging from haystack.dataclasses import StreamingChunk from haystack.lazy_imports import LazyImport -from haystack.utils import Secret, deserialize_secrets_inplace, serialize_callable, deserialize_callable -from haystack.utils.hf import check_valid_model, HFModelType, check_generation_params, list_inference_deployed_models +from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable +from haystack.utils.hf import HFModelType, check_generation_params, check_valid_model, list_inference_deployed_models with LazyImport(message="Run 'pip install transformers'") as transformers_import: from huggingface_hub import InferenceClient - from huggingface_hub.inference._text_generation import TextGenerationStreamResponse, TextGenerationResponse, Token + from huggingface_hub.inference._text_generation import TextGenerationResponse, TextGenerationStreamResponse, Token from transformers import AutoTokenizer diff --git a/haystack/components/generators/openai.py b/haystack/components/generators/openai.py index fd402efeae..48701ffff8 100644 --- a/haystack/components/generators/openai.py +++ b/haystack/components/generators/openai.py @@ -1,14 +1,12 @@ import dataclasses -import logging -from typing import Optional, List, Callable, Dict, Any, Union +from typing import Any, Callable, Dict, List, Optional, Union from openai import OpenAI, Stream -from openai.types.chat import ChatCompletionChunk, ChatCompletion +from openai.types.chat import ChatCompletion, ChatCompletionChunk -from haystack import component, default_from_dict, default_to_dict -from haystack.dataclasses import StreamingChunk, ChatMessage -from haystack.utils import Secret, deserialize_secrets_inplace -from haystack.utils import serialize_callable, deserialize_callable +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.dataclasses import ChatMessage, StreamingChunk +from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable logger = logging.getLogger(__name__) @@ -286,11 +284,14 @@ def _check_finish_reason(self, message: ChatMessage) -> None: """ if message.meta["finish_reason"] == "length": logger.warning( - "The completion for index %s has been truncated before reaching a natural stopping point. " + "The completion for index {index} has been truncated before reaching a natural stopping point. " "Increase the max_tokens parameter to allow for longer completions.", - message.meta["index"], + index=message.meta["index"], + finish_reason=message.meta["finish_reason"], ) if message.meta["finish_reason"] == "content_filter": logger.warning( - "The completion for index %s has been truncated due to the content filter.", message.meta["index"] + "The completion for index {index} has been truncated due to the content filter.", + index=message.meta["index"], + finish_reason=message.meta["finish_reason"], ) diff --git a/haystack/components/generators/utils.py b/haystack/components/generators/utils.py index 3e5fefc415..17e225f1dc 100644 --- a/haystack/components/generators/utils.py +++ b/haystack/components/generators/utils.py @@ -1,7 +1,7 @@ -from typing import Optional, Callable +from typing import Callable, Optional from haystack.dataclasses import StreamingChunk -from haystack.utils import serialize_callable, deserialize_callable +from haystack.utils import deserialize_callable, serialize_callable def print_streaming_chunk(chunk: StreamingChunk) -> None: diff --git a/haystack/components/joiners/document_joiner.py b/haystack/components/joiners/document_joiner.py index 87e616bb6a..a18bbc4df8 100644 --- a/haystack/components/joiners/document_joiner.py +++ b/haystack/components/joiners/document_joiner.py @@ -1,12 +1,10 @@ import itertools -import logging from collections import defaultdict from math import inf from typing import List, Optional -from haystack.core.component.types import Variadic - -from haystack import component, Document +from haystack import Document, component, logging +from haystack.core.component.types import Variadic logger = logging.getLogger(__name__) diff --git a/haystack/components/others/multiplexer.py b/haystack/components/others/multiplexer.py index 03cf9910bb..9c630bf7eb 100644 --- a/haystack/components/others/multiplexer.py +++ b/haystack/components/others/multiplexer.py @@ -1,8 +1,7 @@ -import logging import sys from typing import Any, Dict -from haystack import component, default_from_dict, default_to_dict +from haystack import component, default_from_dict, default_to_dict, logging from haystack.core.component.types import Variadic from haystack.utils import deserialize_type, serialize_type diff --git a/haystack/components/preprocessors/document_cleaner.py b/haystack/components/preprocessors/document_cleaner.py index ed149a56e9..ab3cbe9c50 100644 --- a/haystack/components/preprocessors/document_cleaner.py +++ b/haystack/components/preprocessors/document_cleaner.py @@ -1,11 +1,10 @@ -import logging import re from copy import deepcopy from functools import partial, reduce from itertools import chain from typing import Generator, List, Optional, Set -from haystack import Document, component +from haystack import Document, component, logging logger = logging.getLogger(__name__) @@ -73,8 +72,8 @@ def run(self, documents: List[Document]): for doc in documents: if doc.content is None: logger.warning( - "DocumentCleaner only cleans text documents but document.content for document ID %s is None.", - doc.id, + "DocumentCleaner only cleans text documents but document.content for document ID %{document_id} is None.", + document_id=doc.id, ) cleaned_docs.append(doc) continue @@ -174,7 +173,9 @@ def _find_and_remove_header_footer( if found_footer: pages = [page.replace(found_footer, "") for page in pages] - logger.debug("Removed header '%s' and footer '%s' in document", found_header, found_footer) + logger.debug( + "Removed header '{header}' and footer '{footer}' in document", header=found_header, footer=found_footer + ) text = "\f".join(pages) return text diff --git a/haystack/components/preprocessors/document_splitter.py b/haystack/components/preprocessors/document_splitter.py index fabbee11c2..b76bdde66f 100644 --- a/haystack/components/preprocessors/document_splitter.py +++ b/haystack/components/preprocessors/document_splitter.py @@ -3,7 +3,7 @@ from more_itertools import windowed -from haystack import component, Document +from haystack import Document, component @component diff --git a/haystack/components/rankers/meta_field.py b/haystack/components/rankers/meta_field.py index 4f59123b62..2ce4876c32 100644 --- a/haystack/components/rankers/meta_field.py +++ b/haystack/components/rankers/meta_field.py @@ -1,10 +1,9 @@ -import logging from collections import defaultdict -from typing import List, Dict, Any, Optional, Literal, Callable -from dateutil.parser import parse as date_parse +from typing import Any, Callable, Dict, List, Literal, Optional +from dateutil.parser import parse as date_parse -from haystack import Document, component, default_to_dict +from haystack import Document, component, default_to_dict, logging logger = logging.getLogger(__name__) @@ -203,20 +202,20 @@ def run( # If all docs are missing self.meta_field return original documents if len(docs_with_meta_field) == 0: logger.warning( - "The parameter is currently set to '%s', but none of the provided Documents with IDs %s have this meta key.\n" + "The parameter is currently set to '{meta_field}', but none of the provided Documents with IDs {document_ids} have this meta key.\n" "Set to the name of a field that is present within the provided Documents.\n" "Returning the of the original Documents since there are no values to rank.", - self.meta_field, - ",".join([doc.id for doc in documents]), + meta_field=self.meta_field, + document_ids=",".join([doc.id for doc in documents]), ) return {"documents": documents[:top_k]} if len(docs_missing_meta_field) > 0: logger.warning( - "The parameter is currently set to '%s' but the Documents with IDs %s don't have this meta key.\n" + "The parameter is currently set to '{meta_field}' but the Documents with IDs {document_ids} don't have this meta key.\n" "These Documents will be placed at the end of the sorting order.", - self.meta_field, - ",".join([doc.id for doc in docs_missing_meta_field]), + meta_field=self.meta_field, + document_ids=",".join([doc.id for doc in docs_missing_meta_field]), ) # If meta_value_type is provided try to parse the meta values @@ -230,10 +229,10 @@ def run( except TypeError as error: # Return original documents if mixed types that are not comparable are returned (e.g. int and list) logger.warning( - "Tried to sort Documents with IDs %s, but got TypeError with the message: %s\n" + "Tried to sort Documents with IDs {document_ids}, but got TypeError with the message: {error}\n" "Returning the of the original Documents since meta field ranking is not possible.", - ",".join([doc.id for doc in docs_with_meta_field]), - error, + document_ids=",".join([doc.id for doc in docs_with_meta_field]), + error=error, ) return {"documents": documents[:top_k]} @@ -255,12 +254,12 @@ def _parse_meta( unique_meta_values = {doc.meta[self.meta_field] for doc in docs_with_meta_field} if not all(isinstance(meta_value, str) for meta_value in unique_meta_values): logger.warning( - "The parameter is currently set to '%s', but not all of meta values in the " - "provided Documents with IDs %s are strings.\n" + "The parameter is currently set to '{meta_field}', but not all of meta values in the " + "provided Documents with IDs {document_ids} are strings.\n" "Skipping parsing of the meta values.\n" "Set all meta values found under the parameter to strings to use .", - meta_value_type, - ",".join([doc.id for doc in docs_with_meta_field]), + meta_field=meta_value_type, + document_ids=",".join([doc.id for doc in docs_with_meta_field]), ) return [d.meta[self.meta_field] for d in docs_with_meta_field] @@ -276,10 +275,10 @@ def _parse_meta( meta_values = [parse_fn(d.meta[self.meta_field]) for d in docs_with_meta_field] except ValueError as error: logger.warning( - "Tried to parse the meta values of Documents with IDs %s, but got ValueError with the message: %s\n" + "Tried to parse the meta values of Documents with IDs {document_ids}, but got ValueError with the message: {error}\n" "Skipping parsing of the meta values.", - ",".join([doc.id for doc in docs_with_meta_field]), - error, + document_ids=",".join([doc.id for doc in docs_with_meta_field]), + error=error, ) meta_values = [d.meta[self.meta_field] for d in docs_with_meta_field] @@ -302,9 +301,9 @@ def _merge_rankings(self, documents: List[Document], sorted_documents: List[Docu logger.warning("The score wasn't provided; defaulting to 0.") elif document.score < 0 or document.score > 1: logger.warning( - "The score %s for Document %s is outside the [0,1] range; defaulting to 0", - document.score, - document.id, + "The score {score} for Document {document_id} is outside the [0,1] range; defaulting to 0", + score=document.score, + document_id=document.id, ) else: score = document.score diff --git a/haystack/components/rankers/transformers_similarity.py b/haystack/components/rankers/transformers_similarity.py index 203fc5b7ca..f69a1af83b 100644 --- a/haystack/components/rankers/transformers_similarity.py +++ b/haystack/components/rankers/transformers_similarity.py @@ -1,20 +1,18 @@ -import logging from pathlib import Path from typing import Any, Dict, List, Optional, Union -from haystack import ComponentError, Document, component, default_from_dict, default_to_dict +from haystack import ComponentError, Document, component, default_from_dict, default_to_dict, logging from haystack.lazy_imports import LazyImport -from haystack.utils import ComponentDevice, DeviceMap -from haystack.utils.hf import deserialize_hf_model_kwargs, serialize_hf_model_kwargs, resolve_hf_device_map -from haystack.utils import Secret, deserialize_secrets_inplace +from haystack.utils import ComponentDevice, DeviceMap, Secret, deserialize_secrets_inplace +from haystack.utils.hf import deserialize_hf_model_kwargs, resolve_hf_device_map, serialize_hf_model_kwargs logger = logging.getLogger(__name__) with LazyImport(message="Run 'pip install transformers[torch,sentencepiece]'") as torch_and_transformers_import: + import accelerate # pylint: disable=unused-import # the library is used but not directly referenced import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer - import accelerate # pylint: disable=unused-import # the library is used but not directly referenced @component diff --git a/haystack/components/readers/extractive.py b/haystack/components/readers/extractive.py index 17970c2006..724c6b8133 100644 --- a/haystack/components/readers/extractive.py +++ b/haystack/components/readers/extractive.py @@ -1,19 +1,18 @@ -import logging import math import warnings from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union -from haystack import ComponentError, Document, ExtractedAnswer, component, default_from_dict, default_to_dict +from haystack import ComponentError, Document, ExtractedAnswer, component, default_from_dict, default_to_dict, logging from haystack.lazy_imports import LazyImport from haystack.utils import ComponentDevice, DeviceMap -from haystack.utils.hf import deserialize_hf_model_kwargs, serialize_hf_model_kwargs, resolve_hf_device_map +from haystack.utils.hf import deserialize_hf_model_kwargs, resolve_hf_device_map, serialize_hf_model_kwargs with LazyImport("Run 'pip install transformers[torch,sentencepiece]'") as torch_and_transformers_import: + import accelerate # pylint: disable=unused-import # the library is used but not directly referenced import torch from tokenizers import Encoding from transformers import AutoModelForQuestionAnswering, AutoTokenizer - import accelerate # pylint: disable=unused-import # the library is used but not directly referenced logger = logging.getLogger(__name__) diff --git a/haystack/components/retrievers/filter_retriever.py b/haystack/components/retrievers/filter_retriever.py index 5456ad47d7..d17bffb788 100644 --- a/haystack/components/retrievers/filter_retriever.py +++ b/haystack/components/retrievers/filter_retriever.py @@ -1,12 +1,9 @@ import importlib -import logging +from typing import Any, Dict, List, Optional -from typing import Dict, List, Any, Optional - -from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError +from haystack import DeserializationError, Document, component, default_from_dict, default_to_dict, logging from haystack.document_stores.types import DocumentStore - logger = logging.getLogger(__name__) @@ -74,7 +71,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "FilterRetriever": raise DeserializationError("Missing 'type' in document store's serialization data") try: module_name, type_ = init_params["document_store"]["type"].rsplit(".", 1) - logger.debug("Trying to import %s", module_name) + logger.debug("Trying to import module '{module}'", module=module_name) module = importlib.import_module(module_name) except (ImportError, DeserializationError) as e: raise DeserializationError( diff --git a/haystack/components/retrievers/in_memory/bm25_retriever.py b/haystack/components/retrievers/in_memory/bm25_retriever.py index 653ad10035..3a6e0318d7 100644 --- a/haystack/components/retrievers/in_memory/bm25_retriever.py +++ b/haystack/components/retrievers/in_memory/bm25_retriever.py @@ -1,6 +1,6 @@ -from typing import Dict, List, Any, Optional +from typing import Any, Dict, List, Optional -from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError +from haystack import DeserializationError, Document, component, default_from_dict, default_to_dict from haystack.document_stores.in_memory import InMemoryDocumentStore diff --git a/haystack/components/retrievers/in_memory/embedding_retriever.py b/haystack/components/retrievers/in_memory/embedding_retriever.py index e8b3ff7205..2d51f4a60b 100644 --- a/haystack/components/retrievers/in_memory/embedding_retriever.py +++ b/haystack/components/retrievers/in_memory/embedding_retriever.py @@ -1,6 +1,6 @@ -from typing import Dict, List, Any, Optional +from typing import Any, Dict, List, Optional -from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError +from haystack import DeserializationError, Document, component, default_from_dict, default_to_dict from haystack.document_stores.in_memory import InMemoryDocumentStore diff --git a/haystack/components/routers/__init__.py b/haystack/components/routers/__init__.py index 3eaeff616c..68aaeb3f53 100644 --- a/haystack/components/routers/__init__.py +++ b/haystack/components/routers/__init__.py @@ -1,6 +1,6 @@ +from haystack.components.routers.conditional_router import ConditionalRouter from haystack.components.routers.file_type_router import FileTypeRouter from haystack.components.routers.metadata_router import MetadataRouter -from haystack.components.routers.conditional_router import ConditionalRouter from haystack.components.routers.text_language_router import TextLanguageRouter __all__ = ["FileTypeRouter", "MetadataRouter", "TextLanguageRouter", "ConditionalRouter"] diff --git a/haystack/components/routers/conditional_router.py b/haystack/components/routers/conditional_router.py index 042ba7b395..9760eed9ae 100644 --- a/haystack/components/routers/conditional_router.py +++ b/haystack/components/routers/conditional_router.py @@ -1,11 +1,10 @@ -import logging -from typing import List, Dict, Any, Set +from typing import Any, Dict, List, Set -from jinja2 import meta, Environment, TemplateSyntaxError +from jinja2 import Environment, TemplateSyntaxError, meta from jinja2.nativetypes import NativeEnvironment -from haystack import component, default_from_dict, default_to_dict -from haystack.utils import serialize_type, deserialize_type +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import deserialize_type, serialize_type logger = logging.getLogger(__name__) diff --git a/haystack/components/routers/file_type_router.py b/haystack/components/routers/file_type_router.py index 14d8f254b0..82b4a613db 100644 --- a/haystack/components/routers/file_type_router.py +++ b/haystack/components/routers/file_type_router.py @@ -1,10 +1,9 @@ -import logging import mimetypes from collections import defaultdict from pathlib import Path -from typing import List, Union, Optional, Dict +from typing import Dict, List, Optional, Union -from haystack import component +from haystack import component, logging from haystack.dataclasses import ByteStream logger = logging.getLogger(__name__) diff --git a/haystack/components/routers/metadata_router.py b/haystack/components/routers/metadata_router.py index ced8f39bfe..e4a0bdab28 100644 --- a/haystack/components/routers/metadata_router.py +++ b/haystack/components/routers/metadata_router.py @@ -1,7 +1,7 @@ from typing import Dict, List -from haystack import component, Document -from haystack.utils.filters import document_matches_filter, convert +from haystack import Document, component +from haystack.utils.filters import convert, document_matches_filter @component diff --git a/haystack/components/routers/text_language_router.py b/haystack/components/routers/text_language_router.py index 2a89db23ec..67bcb932eb 100644 --- a/haystack/components/routers/text_language_router.py +++ b/haystack/components/routers/text_language_router.py @@ -1,7 +1,6 @@ -import logging -from typing import List, Dict, Optional +from typing import Dict, List, Optional -from haystack import component +from haystack import component, logging from haystack.lazy_imports import LazyImport logger = logging.getLogger(__name__) @@ -87,8 +86,8 @@ def _detect_language(self, text: str) -> Optional[str]: try: language = langdetect.detect(text) except langdetect.LangDetectException as exception: - logger.warning("Langdetect cannot detect the language of text. Error: %s", exception) + logger.warning("Langdetect cannot detect the language of text. Error: {error}", error=exception) # Only log the text in debug mode, as it might contain sensitive information - logger.debug("Langdetect cannot detect the language of text: %s", text) + logger.debug("Langdetect cannot detect the language of text: {text}", text=text) language = None return language diff --git a/haystack/components/samplers/top_p.py b/haystack/components/samplers/top_p.py index 28aee9f4c7..c1cb5b8cee 100644 --- a/haystack/components/samplers/top_p.py +++ b/haystack/components/samplers/top_p.py @@ -1,7 +1,6 @@ -import logging from typing import List, Optional -from haystack import ComponentError, Document, component +from haystack import ComponentError, Document, component, logging from haystack.lazy_imports import LazyImport logger = logging.getLogger(__name__) @@ -102,9 +101,9 @@ def run(self, documents: List[Document], top_p: Optional[float] = None): # return at least one document if not selected_docs: logger.warning( - "Top-p sampling with p=%s resulted in no documents being selected. " + "Top-p sampling with p={top_p} resulted in no documents being selected. " "Returning the document with the highest similarity score.", - top_p, + top_p=top_p, ) highest_prob_indices = torch.argsort(probs, descending=True) selected_docs = [documents[int(highest_prob_indices[0].item())]] diff --git a/haystack/components/validators/json_schema.py b/haystack/components/validators/json_schema.py index 4a776f2fb9..1fc1d06c66 100644 --- a/haystack/components/validators/json_schema.py +++ b/haystack/components/validators/json_schema.py @@ -1,12 +1,12 @@ import json -from typing import List, Any, Dict, Optional +from typing import Any, Dict, List, Optional from haystack import component from haystack.dataclasses import ChatMessage from haystack.lazy_imports import LazyImport with LazyImport(message="Run 'pip install jsonschema'") as jsonschema_import: - from jsonschema import validate, ValidationError + from jsonschema import ValidationError, validate @component diff --git a/haystack/components/websearch/__init__.py b/haystack/components/websearch/__init__.py index 6d0e930bab..8b9217bf5f 100644 --- a/haystack/components/websearch/__init__.py +++ b/haystack/components/websearch/__init__.py @@ -1,4 +1,4 @@ -from haystack.components.websearch.serper_dev import SerperDevWebSearch from haystack.components.websearch.searchapi import SearchApiWebSearch +from haystack.components.websearch.serper_dev import SerperDevWebSearch __all__ = ["SerperDevWebSearch", "SearchApiWebSearch"] diff --git a/haystack/components/websearch/searchapi.py b/haystack/components/websearch/searchapi.py index fcc6a6be00..383e17d3ba 100644 --- a/haystack/components/websearch/searchapi.py +++ b/haystack/components/websearch/searchapi.py @@ -1,10 +1,9 @@ import json -import logging -from typing import Dict, List, Optional, Any, Union +from typing import Any, Dict, List, Optional, Union import requests -from haystack import Document, component, default_to_dict, ComponentError, default_from_dict +from haystack import ComponentError, Document, component, default_from_dict, default_to_dict, logging from haystack.utils import Secret, deserialize_secrets_inplace logger = logging.getLogger(__name__) @@ -167,5 +166,9 @@ def run(self, query: str) -> Dict[str, Union[List[Document], List[str]]]: links = [result["link"] for result in json_result["organic_results"]] - logger.debug("SearchApi returned %s documents for the query '%s'", len(documents), query) + logger.debug( + "SearchApi returned {number_documents} documents for the query '{query}'", + number_documents=len(documents), + query=query, + ) return {"documents": documents[: self.top_k], "links": links[: self.top_k]} diff --git a/haystack/components/websearch/serper_dev.py b/haystack/components/websearch/serper_dev.py index 87ee740655..4fcd73fdb5 100644 --- a/haystack/components/websearch/serper_dev.py +++ b/haystack/components/websearch/serper_dev.py @@ -1,10 +1,9 @@ import json -import logging -from typing import Dict, List, Optional, Any, Union +from typing import Any, Dict, List, Optional, Union import requests -from haystack import Document, component, default_to_dict, ComponentError, default_from_dict +from haystack import ComponentError, Document, component, default_from_dict, default_to_dict, logging from haystack.utils import Secret, deserialize_secrets_inplace logger = logging.getLogger(__name__) @@ -163,5 +162,9 @@ def run(self, query: str) -> Dict[str, Union[List[Document], List[str]]]: links = [result["link"] for result in json_result["organic"]] - logger.debug("Serper Dev returned %s documents for the query '%s'", len(documents), query) + logger.debug( + "Serper Dev returned {number_documents} documents for the query '{query}'", + number_documents=len(documents), + query=query, + ) return {"documents": documents[: self.top_k], "links": links[: self.top_k]} diff --git a/haystack/components/writers/document_writer.py b/haystack/components/writers/document_writer.py index 2703bedc18..437f3c81e5 100644 --- a/haystack/components/writers/document_writer.py +++ b/haystack/components/writers/document_writer.py @@ -1,9 +1,7 @@ -from typing import List, Optional, Dict, Any - import importlib -import logging +from typing import Any, Dict, List, Optional -from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError +from haystack import DeserializationError, Document, component, default_from_dict, default_to_dict, logging from haystack.document_stores.types import DocumentStore, DuplicatePolicy logger = logging.getLogger(__name__) @@ -53,7 +51,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "DocumentWriter": try: module_name, type_ = init_params["document_store"]["type"].rsplit(".", 1) - logger.debug("Trying to import %s", module_name) + logger.debug("Trying to import module '{module}'", module=module_name) module = importlib.import_module(module_name) except (ImportError, DeserializationError) as e: raise DeserializationError( diff --git a/haystack/core/component/component.py b/haystack/core/component/component.py index 2046aea2d7..e18e8ef418 100644 --- a/haystack/core/component/component.py +++ b/haystack/core/component/component.py @@ -69,11 +69,11 @@ """ import inspect -import logging from copy import deepcopy from types import new_class from typing import Any, Optional, Protocol, runtime_checkable +from haystack import logging from haystack.core.errors import ComponentError from .sockets import Sockets @@ -162,10 +162,10 @@ def __call__(cls, *args, **kwargs): # We can have this information only at instance creation time, so we do it here. is_variadic = any(socket.is_variadic for socket in instance.__haystack_input__._sockets_dict.values()) if not is_variadic and cls.__haystack_is_greedy__: - logging.warning( - "Component '%s' has no variadic input, but it's marked as greedy. " + logger.warning( + "Component '{component}' has no variadic input, but it's marked as greedy. " "This is not supported and can lead to unexpected behavior.", - cls.__name__, + component=cls.__name__, ) return instance @@ -322,7 +322,7 @@ def _component(self, cls, is_greedy: bool = False): """ Decorator validating the structure of the component and registering it in the components registry. """ - logger.debug("Registering %s as a component", cls) + logger.debug("Registering {component} as a component", component=cls) # Check for required methods and fail as soon as possible if not hasattr(cls, "run"): @@ -351,13 +351,13 @@ def copy_class_namespace(namespace): if class_path in self.registry: # Corner case, but it may occur easily in notebooks when re-running cells. logger.debug( - "Component %s is already registered. Previous imported from '%s', new imported from '%s'", - class_path, - self.registry[class_path], - cls, + "Component {component} is already registered. Previous imported from '{module}', new imported from '{new_module}'", + component=class_path, + module=self.registry[class_path], + new_module=cls, ) self.registry[class_path] = cls - logger.debug("Registered Component %s", cls) + logger.debug("Registered Component {component}", component=cls) # Override the __repr__ method with a default one cls.__repr__ = _component_repr diff --git a/haystack/core/component/sockets.py b/haystack/core/component/sockets.py index 374ae63032..3da03e4eac 100644 --- a/haystack/core/component/sockets.py +++ b/haystack/core/component/sockets.py @@ -2,9 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -import logging from typing import Dict, Type, Union +from haystack import logging from haystack.core.type_utils import _type_name from .types import InputSocket, OutputSocket diff --git a/haystack/core/pipeline/descriptions.py b/haystack/core/pipeline/descriptions.py index 0e7c042091..518e399d42 100644 --- a/haystack/core/pipeline/descriptions.py +++ b/haystack/core/pipeline/descriptions.py @@ -1,11 +1,11 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -import logging from typing import Dict, List import networkx # type:ignore +from haystack import logging from haystack.core.component.types import InputSocket, OutputSocket from haystack.core.type_utils import _type_name diff --git a/haystack/core/pipeline/draw.py b/haystack/core/pipeline/draw.py index e7d22e441d..9da9c69f6a 100644 --- a/haystack/core/pipeline/draw.py +++ b/haystack/core/pipeline/draw.py @@ -2,11 +2,11 @@ # # SPDX-License-Identifier: Apache-2.0 import base64 -import logging import networkx # type:ignore import requests +from haystack import logging from haystack.core.errors import PipelineDrawingError from haystack.core.pipeline.descriptions import find_pipeline_inputs, find_pipeline_outputs from haystack.core.type_utils import _type_name @@ -72,18 +72,23 @@ def _to_mermaid_image(graph: networkx.MultiDiGraph): base64_string = base64_bytes.decode("ascii") url = f"https://mermaid.ink/img/{base64_string}?type=png" - logging.debug("Rendeding graph at %s", url) + logger.debug("Rendering graph at {url}", url=url) try: resp = requests.get(url, timeout=10) if resp.status_code >= 400: - logger.warning("Failed to draw the pipeline: https://mermaid.ink/img/ returned status %s", resp.status_code) - logger.info("Exact URL requested: %s", url) + logger.warning( + "Failed to draw the pipeline: https://mermaid.ink/img/ returned status {status_code}", + status_code=resp.status_code, + ) + logger.info("Exact URL requested: {url}", url=url) logger.warning("No pipeline diagram will be saved.") resp.raise_for_status() except Exception as exc: # pylint: disable=broad-except - logger.warning("Failed to draw the pipeline: could not connect to https://mermaid.ink/img/ (%s)", exc) - logger.info("Exact URL requested: %s", url) + logger.warning( + "Failed to draw the pipeline: could not connect to https://mermaid.ink/img/ ({error})", error=exc + ) + logger.info("Exact URL requested: {url}", url=url) logger.warning("No pipeline diagram will be saved.") raise PipelineDrawingError( "There was an issue with https://mermaid.ink/, see the stacktrace for details." @@ -140,6 +145,6 @@ def _to_mermaid_text(graph: networkx.MultiDiGraph) -> str: connections = "\n".join(connections_list + input_connections + output_connections) graph_styled = MERMAID_STYLED_TEMPLATE.format(connections=connections) - logger.debug("Mermaid diagram:\n%s", graph_styled) + logger.debug("Mermaid diagram:\n{diagram}", diagram=graph_styled) return graph_styled diff --git a/haystack/core/pipeline/pipeline.py b/haystack/core/pipeline/pipeline.py index 8201fc96cb..e5585b361b 100644 --- a/haystack/core/pipeline/pipeline.py +++ b/haystack/core/pipeline/pipeline.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 import importlib import itertools -import logging from collections import defaultdict from copy import copy, deepcopy from datetime import datetime @@ -12,6 +11,7 @@ import networkx # type:ignore +from haystack import logging, tracing from haystack.core.component import Component, InputSocket, OutputSocket, component from haystack.core.errors import ( PipelineConnectError, @@ -19,15 +19,14 @@ PipelineError, PipelineMaxLoops, PipelineRuntimeError, - PipelineValidationError, PipelineUnmarshalError, + PipelineValidationError, ) from haystack.core.serialization import component_from_dict, component_to_dict from haystack.core.type_utils import _type_name, _types_are_compatible from haystack.marshal import Marshaller, YamlMarshaller from haystack.telemetry import pipeline_running from haystack.utils import is_in_jupyter -from haystack import tracing from .descriptions import find_pipeline_inputs, find_pipeline_outputs from .draw import _to_mermaid_image @@ -180,7 +179,7 @@ def from_dict(cls: Type[T], data: Dict[str, Any], **kwargs) -> T: try: # Import the module first... module, _ = component_data["type"].rsplit(".", 1) - logger.debug("Trying to import %s", module) + logger.debug("Trying to import {module}", module=module) importlib.import_module(module) # ...then try again if component_data["type"] not in component.registry: @@ -298,7 +297,7 @@ def add_component(self, name: str, instance: Component) -> None: setattr(instance, "__haystack_added_to_pipeline__", self) # Add component to the graph, disconnected - logger.debug("Adding component '%s' (%s)", name, instance) + logger.debug("Adding component '{component_name}' ({component})", component_name=name, component=instance) # We're completely sure the fields exist so we ignore the type error self.graph.add_node( name, @@ -437,11 +436,11 @@ def connect(self, sender: str, receiver: str) -> "Pipeline": raise PipelineConnectError(msg) logger.debug( - "Connecting '%s.%s' to '%s.%s'", - sender_component_name, - sender_socket.name, - receiver_component_name, - receiver_socket.name, + "Connecting '{sender_component}.{sender_socket_name}' to '{receiver_component}.{receiver_socket_name}'", + sender_component=sender_component_name, + sender_socket_name=sender_socket.name, + receiver_component=receiver_component_name, + receiver_socket_name=receiver_socket.name, ) if receiver_component_name in sender_socket.receivers and sender_component_name in receiver_socket.senders: @@ -572,7 +571,7 @@ def warm_up(self): """ for node in self.graph.nodes: if hasattr(self.graph.nodes[node]["instance"], "warm_up"): - logger.info("Warming up component %s...", node) + logger.info("Warming up component {node}...", node=node) self.graph.nodes[node]["instance"].warm_up() def _validate_input(self, data: Dict[str, Any]): @@ -683,8 +682,8 @@ def run(self, word: str): data, unresolved_inputs = self._prepare_component_input_data(data) if unresolved_inputs: logger.warning( - "Inputs %s were not matched to any component inputs, please check your run parameters.", - list(unresolved_inputs.keys()), + "Inputs {input_keys} were not matched to any component inputs, please check your run parameters.", + input_keys=list(unresolved_inputs.keys()), ) # Raise if input is malformed in some way diff --git a/haystack/core/pipeline/template.py b/haystack/core/pipeline/template.py index 72b08bdaa1..eb5e2007e4 100644 --- a/haystack/core/pipeline/template.py +++ b/haystack/core/pipeline/template.py @@ -1,9 +1,8 @@ from enum import Enum from pathlib import Path -from typing import Dict, Any, Optional, Union - -from jinja2 import meta, TemplateSyntaxError, Environment, PackageLoader +from typing import Any, Dict, Optional, Union +from jinja2 import Environment, PackageLoader, TemplateSyntaxError, meta TEMPLATE_FILE_EXTENSION = ".yaml.jinja2" TEMPLATE_HOME_DIR = Path(__file__).resolve().parent / "predefined" diff --git a/haystack/core/serialization.py b/haystack/core/serialization.py index 1020375e10..dbe5400bb7 100644 --- a/haystack/core/serialization.py +++ b/haystack/core/serialization.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 import inspect -from typing import Type, Dict, Any +from typing import Any, Dict, Type from haystack.core.errors import DeserializationError, SerializationError diff --git a/haystack/core/type_utils.py b/haystack/core/type_utils.py index 47a10810a0..8f4f36c1bf 100644 --- a/haystack/core/type_utils.py +++ b/haystack/core/type_utils.py @@ -1,10 +1,9 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Union, get_args, get_origin, Any - -import logging +from typing import Any, Union, get_args, get_origin +from haystack import logging logger = logging.getLogger(__name__) diff --git a/haystack/dataclasses/__init__.py b/haystack/dataclasses/__init__.py index ddf491fd3d..695f8c47fd 100644 --- a/haystack/dataclasses/__init__.py +++ b/haystack/dataclasses/__init__.py @@ -1,8 +1,7 @@ -from haystack.dataclasses.document import Document -from haystack.dataclasses.answer import ExtractedAnswer, GeneratedAnswer, Answer +from haystack.dataclasses.answer import Answer, ExtractedAnswer, GeneratedAnswer from haystack.dataclasses.byte_stream import ByteStream -from haystack.dataclasses.chat_message import ChatMessage -from haystack.dataclasses.chat_message import ChatRole +from haystack.dataclasses.chat_message import ChatMessage, ChatRole +from haystack.dataclasses.document import Document from haystack.dataclasses.streaming_chunk import StreamingChunk __all__ = [ diff --git a/haystack/dataclasses/answer.py b/haystack/dataclasses/answer.py index 1d197a74e3..61fa23f872 100644 --- a/haystack/dataclasses/answer.py +++ b/haystack/dataclasses/answer.py @@ -1,6 +1,6 @@ import io +from dataclasses import asdict, dataclass, field from typing import Any, Dict, List, Optional, Protocol, runtime_checkable -from dataclasses import dataclass, field, asdict from pandas import DataFrame, read_json diff --git a/haystack/dataclasses/byte_stream.py b/haystack/dataclasses/byte_stream.py index 9c228d5f2c..fdb7c979d6 100644 --- a/haystack/dataclasses/byte_stream.py +++ b/haystack/dataclasses/byte_stream.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import Optional, Dict, Any +from typing import Any, Dict, Optional @dataclass diff --git a/haystack/dataclasses/chat_message.py b/haystack/dataclasses/chat_message.py index 550214efae..e9a9b0a15b 100644 --- a/haystack/dataclasses/chat_message.py +++ b/haystack/dataclasses/chat_message.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from enum import Enum -from typing import Dict, Any, Optional +from typing import Any, Dict, Optional class ChatRole(str, Enum): diff --git a/haystack/dataclasses/document.py b/haystack/dataclasses/document.py index 9cddb5ce37..bfdf69dde1 100644 --- a/haystack/dataclasses/document.py +++ b/haystack/dataclasses/document.py @@ -1,12 +1,12 @@ import hashlib import io -import logging from dataclasses import asdict, dataclass, field, fields from typing import Any, Dict, List, Optional from numpy import ndarray from pandas import DataFrame, read_json +from haystack import logging from haystack.dataclasses.byte_stream import ByteStream logger = logging.getLogger(__name__) diff --git a/haystack/dataclasses/streaming_chunk.py b/haystack/dataclasses/streaming_chunk.py index b88a0b57f5..90adc48940 100644 --- a/haystack/dataclasses/streaming_chunk.py +++ b/haystack/dataclasses/streaming_chunk.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import Dict, Any +from typing import Any, Dict @dataclass diff --git a/haystack/document_stores/in_memory/document_store.py b/haystack/document_stores/in_memory/document_store.py index 3968c08726..4046af966e 100644 --- a/haystack/document_stores/in_memory/document_store.py +++ b/haystack/document_stores/in_memory/document_store.py @@ -1,18 +1,16 @@ import re -from typing import Literal, Any, Dict, List, Optional, Iterable - -import logging +from typing import Any, Dict, Iterable, List, Literal, Optional import numpy as np from haystack_bm25 import rank_bm25 from tqdm.auto import tqdm -from haystack import default_from_dict, default_to_dict +from haystack import default_from_dict, default_to_dict, logging from haystack.dataclasses import Document +from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.document_stores.types import DuplicatePolicy -from haystack.utils.filters import document_matches_filter, convert -from haystack.document_stores.errors import DuplicateDocumentError, DocumentStoreError from haystack.utils import expit +from haystack.utils.filters import convert, document_matches_filter logger = logging.getLogger(__name__) @@ -131,7 +129,7 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D if policy == DuplicatePolicy.FAIL: raise DuplicateDocumentError(f"ID '{document.id}' already exists.") if policy == DuplicatePolicy.SKIP: - logger.warning("ID '%s' already exists", document.id) + logger.warning("ID '{document_id}' already exists", document_id=document.id) written_documents -= 1 continue self.storage[document.id] = document @@ -181,15 +179,17 @@ def bm25_retrieval( lower_case_documents = [] for doc in all_documents: if doc.content is None and doc.dataframe is None: - logger.info("Document '%s' has no text or dataframe content. Skipping it.", doc.id) + logger.info( + "Document '{document_id}' has no text or dataframe content. Skipping it.", document_id=doc.id + ) else: if doc.content is not None: lower_case_documents.append(doc.content.lower()) if doc.dataframe is not None: logger.warning( - "Document '%s' has both text and dataframe content. " + "Document '{document_id}' has both text and dataframe content. " "Using text content and skipping dataframe content.", - doc.id, + document_id=doc.id, ) continue if doc.dataframe is not None: diff --git a/haystack/document_stores/types/__init__.py b/haystack/document_stores/types/__init__.py index 1033ccd63f..1713bdbede 100644 --- a/haystack/document_stores/types/__init__.py +++ b/haystack/document_stores/types/__init__.py @@ -1,4 +1,4 @@ -from .protocol import DocumentStore from .policy import DuplicatePolicy +from .protocol import DocumentStore __all__ = ["DocumentStore", "DuplicatePolicy"] diff --git a/haystack/document_stores/types/protocol.py b/haystack/document_stores/types/protocol.py index 31eae12d5a..3a0654e4b5 100644 --- a/haystack/document_stores/types/protocol.py +++ b/haystack/document_stores/types/protocol.py @@ -1,8 +1,7 @@ -from typing import Protocol, Optional, Dict, Any, List -import logging +from typing import Any, Dict, List, Optional, Protocol +from haystack import logging from haystack.dataclasses import Document - from haystack.document_stores.types.policy import DuplicatePolicy # Ellipsis are needed for the type checker, it's safe to disable module-wide diff --git a/haystack/lazy_imports.py b/haystack/lazy_imports.py index 5f474beef9..98b71ccc87 100644 --- a/haystack/lazy_imports.py +++ b/haystack/lazy_imports.py @@ -1,7 +1,7 @@ -from typing import Optional, Type from types import TracebackType -from lazy_imports.try_import import _DeferredImportExceptionContextManager +from typing import Optional, Type +from lazy_imports.try_import import _DeferredImportExceptionContextManager DEFAULT_IMPORT_ERROR_MSG = "Try 'pip install {}'" diff --git a/haystack/logging.py b/haystack/logging.py index 94489ed0a7..6cd314d6f0 100644 --- a/haystack/logging.py +++ b/haystack/logging.py @@ -1,25 +1,266 @@ import builtins +import functools import logging import os import sys import typing -from typing import List, Optional - -import haystack.tracing.tracer -import haystack.utils.jupyter +from typing import Any, List, Optional if typing.TYPE_CHECKING: - from structlog.typing import Processor, WrappedLogger, EventDict + from structlog.typing import EventDict, Processor, WrappedLogger HAYSTACK_LOGGING_USE_JSON_ENV_VAR = "HAYSTACK_LOGGING_USE_JSON" HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR = "HAYSTACK_LOGGING_IGNORE_STRUCTLOG" +class PatchedLogger(typing.Protocol): + """Class which enables using type checkers to find wrong logger usage.""" + + def debug( + self, + msg: str, + *, + _: Any = None, + exc_info: Any = None, + stack_info: Any = False, + stacklevel: int = 1, + **kwargs: Any, + ) -> None: + ... + + def info( + self, + msg: str, + *, + _: Any = None, + exc_info: Any = None, + stack_info: Any = False, + stacklevel: int = 1, + **kwargs: Any, + ) -> None: + ... + + def warn( + self, + msg: str, + *, + _: Any = None, + exc_info: Any = None, + stack_info: Any = False, + stacklevel: int = 1, + **kwargs: Any, + ) -> None: + ... + + def warning( + self, + msg: str, + *, + _: Any = None, + exc_info: Any = None, + stack_info: Any = False, + stacklevel: int = 1, + **kwargs: Any, + ) -> None: + ... + + def error( + self, + msg: str, + *, + _: Any = None, + exc_info: Any = None, + stack_info: Any = False, + stacklevel: int = 1, + **kwargs: Any, + ) -> None: + ... + + def critical( + self, + msg: str, + *, + _: Any = None, + exc_info: Any = None, + stack_info: Any = False, + stacklevel: int = 1, + **kwargs: Any, + ) -> None: + ... + + def exception( + self, + msg: str, + *, + _: Any = None, + exc_info: Any = None, + stack_info: Any = False, + stacklevel: int = 1, + **kwargs: Any, + ) -> None: + ... + + def fatal( + self, + msg: str, + *, + _: Any = None, + exc_info: Any = None, + stack_info: Any = False, + stacklevel: int = 1, + **kwargs: Any, + ) -> None: + ... + + def log( + self, + level: int, + msg: str, + *, + _: Any = None, + exc_info: Any = None, + stack_info: Any = False, + stacklevel: int = 1, + **kwargs: Any, + ) -> None: + ... + + def setLevel(self, level: int) -> None: + ... + + +def patch_log_method_to_kwargs_only(func: typing.Callable) -> typing.Callable: + """A decorator to make sure that a function is only called with keyword arguments.""" + + @functools.wraps(func) + def log_only_with_kwargs( + msg, *, _: Any = None, exc_info: Any = None, stack_info: Any = False, stacklevel: int = 1, **kwargs: Any + ) -> Any: # we need the `_` to avoid a syntax error + existing_extra = kwargs.pop("extra", {}) + return func( + # we need to increase the stacklevel by 1 to point to the correct caller + # (otherwise it points to this function) + msg, + exc_info=exc_info, + stack_info=stack_info, + stacklevel=stacklevel + 1, + extra={**existing_extra, **kwargs}, + ) + + return log_only_with_kwargs + + +def patch_log_with_level_method_to_kwargs_only(func: typing.Callable) -> typing.Callable: + """A decorator to make sure that a function is only called with keyword arguments.""" + + @functools.wraps(func) + def log_only_with_kwargs( + level, + msg, + *, + _: Any = None, + exc_info: Any = None, + stack_info: Any = False, + stacklevel: int = 1, + **kwargs: Any, # we need the `_` to avoid a syntax error + ) -> Any: + existing_extra = kwargs.pop("extra", {}) + + return func( + level, + msg, + exc_info=exc_info, + stack_info=stack_info, + # we need to increase the stacklevel by 1 to point to the correct caller + # (otherwise it points to this function) + stacklevel=stacklevel + 1, + extra={**existing_extra, **kwargs}, + ) + + return log_only_with_kwargs + + +def patch_make_records_to_use_kwarg_string_interpolation(original_make_records: typing.Callable) -> typing.Callable: + @functools.wraps(original_make_records) + def wrapper(name, level, fn, lno, msg, args, exc_info, func=None, extra=None, sinfo=None) -> Any: + safe_extra = extra or {} + interpolated_msg = msg.format(**safe_extra) + return original_make_records(name, level, fn, lno, interpolated_msg, (), exc_info, func, extra, sinfo) + + return wrapper + + +def _patch_structlog_call_information(logger: logging.Logger) -> None: + # structlog patches the findCaller to hide itself from the traceback. + # We need to patch their patch to hide `haystack.logging` from the traceback. + try: + from structlog._frames import _find_first_app_frame_and_name, _format_stack + from structlog.stdlib import _FixedFindCallerLogger + + if not isinstance(logger, _FixedFindCallerLogger): + return + + # completely copied from structlog. We only add `haystack.logging` to the list of ignored frames + # pylint: disable=unused-variable + def findCaller(stack_info: bool = False, stacklevel: int = 1) -> typing.Tuple[str, int, str, Optional[str]]: + try: + sinfo: Optional[str] + # we need to exclude `haystack.logging` from the stack + f, name = _find_first_app_frame_and_name(["logging", "haystack.logging"]) + sinfo = _format_stack(f) if stack_info else None + except Exception as error: + print(f"Error in findCaller: {error}") + + return f.f_code.co_filename, f.f_lineno, f.f_code.co_name, sinfo + + logger.findCaller = findCaller # type: ignore + except ImportError: + pass + + +def getLogger(name: str) -> PatchedLogger: + logger = logging.getLogger(name) + # We patch the default logger methods to make sure that they are only called with keyword arguments. + # We enforce keyword-arguments because + # - it brings in consistency + # - it makes structure logging effective, not just an available feature + logger.debug = patch_log_method_to_kwargs_only(logger.debug) # type: ignore + logger.info = patch_log_method_to_kwargs_only(logger.info) # type: ignore + logger.warn = patch_log_method_to_kwargs_only(logger.warn) # type: ignore + logger.warning = patch_log_method_to_kwargs_only(logger.warning) # type: ignore + logger.error = patch_log_method_to_kwargs_only(logger.error) # type: ignore + logger.critical = patch_log_method_to_kwargs_only(logger.critical) # type: ignore + logger.exception = patch_log_method_to_kwargs_only(logger.exception) # type: ignore + logger.fatal = patch_log_method_to_kwargs_only(logger.fatal) # type: ignore + logger.log = patch_log_with_level_method_to_kwargs_only(logger.log) # type: ignore + + _patch_structlog_call_information(logger) + + # We also patch the `makeRecord` method to use keyword string interpolation + logger.makeRecord = patch_make_records_to_use_kwarg_string_interpolation(logger.makeRecord) # type: ignore + + return typing.cast(PatchedLogger, logger) + + +def add_line_and_file(_: "WrappedLogger", __: str, event_dict: "EventDict") -> "EventDict": + """Add line and file to log entries.""" + stdlib_record = event_dict.get("_record") + if not stdlib_record: + return event_dict + + event_dict["lineno"] = stdlib_record.lineno + event_dict["module"] = stdlib_record.name + + return event_dict + + def correlate_logs_with_traces(_: "WrappedLogger", __: str, event_dict: "EventDict") -> "EventDict": """Add correlation data for logs. This is useful if you want to correlate logs with traces. """ + import haystack.tracing.tracer # to avoid circular imports + if not haystack.tracing.is_tracing_enabled(): return event_dict @@ -41,6 +282,8 @@ def configure_logging(use_json: Optional[bool] = None) -> None: - setting the `use_json` parameter to `True` when calling this function - setting the environment variable `HAYSTACK_LOGGING_USE_JSON` to `true` """ + import haystack.utils.jupyter # to avoid circular imports + try: import structlog from structlog.processors import ExceptionRenderer @@ -75,6 +318,7 @@ def configure_logging(use_json: Optional[bool] = None) -> None: structlog.stdlib.add_log_level, # Adds the current timestamp in ISO format to logs structlog.processors.TimeStamper(fmt="iso"), + add_line_and_file, ] if use_json: @@ -83,7 +327,7 @@ def configure_logging(use_json: Optional[bool] = None) -> None: structlog.configure( processors=shared_processors + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter], - logger_factory=structlog.stdlib.LoggerFactory(), + logger_factory=structlog.stdlib.LoggerFactory(ignore_frame_names=["haystack.logging"]), cache_logger_on_first_use=True, # This is a filter that will filter out log entries that are below the log level of the root logger. wrapper_class=structlog.make_filtering_bound_logger(min_level=logging.root.getEffectiveLevel()), diff --git a/haystack/marshal/protocol.py b/haystack/marshal/protocol.py index 06663b7534..ee3cd5c52e 100644 --- a/haystack/marshal/protocol.py +++ b/haystack/marshal/protocol.py @@ -1,4 +1,4 @@ -from typing import Protocol, Dict, Any, Union +from typing import Any, Dict, Protocol, Union class Marshaller(Protocol): diff --git a/haystack/marshal/yaml.py b/haystack/marshal/yaml.py index 5fca27fb6f..5966e29b0a 100644 --- a/haystack/marshal/yaml.py +++ b/haystack/marshal/yaml.py @@ -1,4 +1,4 @@ -from typing import Dict, Any, Union +from typing import Any, Dict, Union import yaml diff --git a/haystack/telemetry/_environment.py b/haystack/telemetry/_environment.py index 0b15a9ab60..afbd252ece 100644 --- a/haystack/telemetry/_environment.py +++ b/haystack/telemetry/_environment.py @@ -1,10 +1,10 @@ # pylint: disable=global-statement -import logging import os import platform import sys -from typing import Optional, Dict, Any +from typing import Any, Dict, Optional +from haystack import logging from haystack.version import __version__ logger = logging.getLogger(__name__) diff --git a/haystack/telemetry/_telemetry.py b/haystack/telemetry/_telemetry.py index a0b9be8ff5..cb41cfac15 100644 --- a/haystack/telemetry/_telemetry.py +++ b/haystack/telemetry/_telemetry.py @@ -9,6 +9,7 @@ import posthog import yaml +from haystack import logging as haystack_logging from haystack.telemetry._environment import collect_system_specs if TYPE_CHECKING: @@ -22,7 +23,7 @@ MIN_SECONDS_BETWEEN_EVENTS = 60 -logger = logging.getLogger(__name__) +logger = haystack_logging.getLogger(__name__) class Telemetry: @@ -64,7 +65,9 @@ def __init__(self): if "user_id" in config: self.user_id = config["user_id"] except Exception as e: - logger.debug("Telemetry could not read the config file %s", CONFIG_PATH, exc_info=e) + logger.debug( + "Telemetry could not read the config file {config_path}", config_path=CONFIG_PATH, exc_info=e + ) else: # Create the config file logger.info( @@ -80,7 +83,9 @@ def __init__(self): with open(CONFIG_PATH, "w") as outfile: yaml.dump({"user_id": self.user_id}, outfile, default_flow_style=False) except Exception as e: - logger.debug("Telemetry could not write config file to %s", CONFIG_PATH, exc_info=e) + logger.debug( + "Telemetry could not write config file to {config_path}", config_path=CONFIG_PATH, exc_info=e + ) self.event_properties = collect_system_specs() diff --git a/haystack/testing/document_store.py b/haystack/testing/document_store.py index c757415aa5..77b1659015 100644 --- a/haystack/testing/document_store.py +++ b/haystack/testing/document_store.py @@ -1,14 +1,14 @@ # pylint: disable=too-many-public-methods -from typing import List import random from datetime import datetime +from typing import List -import pytest import pandas as pd +import pytest from haystack.dataclasses import Document -from haystack.document_stores.types import DocumentStore, DuplicatePolicy from haystack.document_stores.errors import DuplicateDocumentError +from haystack.document_stores.types import DocumentStore, DuplicatePolicy from haystack.errors import FilterError diff --git a/haystack/testing/factory.py b/haystack/testing/factory.py index aeb719a634..b1b72477fb 100644 --- a/haystack/testing/factory.py +++ b/haystack/testing/factory.py @@ -1,9 +1,9 @@ -from typing import Any, Dict, Optional, Tuple, Type, List, Union +from typing import Any, Dict, List, Optional, Tuple, Type, Union +from haystack.core.component import Component, component +from haystack.core.serialization import default_from_dict, default_to_dict from haystack.dataclasses import Document from haystack.document_stores.types import DocumentStore, DuplicatePolicy -from haystack.core.component import component, Component -from haystack.core.serialization import default_to_dict, default_from_dict def document_store_class( diff --git a/haystack/testing/sample_components/accumulate.py b/haystack/testing/sample_components/accumulate.py index 50e7383b17..c288fa4961 100644 --- a/haystack/testing/sample_components/accumulate.py +++ b/haystack/testing/sample_components/accumulate.py @@ -1,14 +1,14 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Callable, Optional, Dict, Any -import sys import builtins +import sys from importlib import import_module +from typing import Any, Callable, Dict, Optional -from haystack.core.serialization import default_to_dict from haystack.core.component import component from haystack.core.errors import ComponentDeserializationError +from haystack.core.serialization import default_to_dict def _default_function(first: int, second: int) -> int: diff --git a/haystack/testing/sample_components/concatenate.py b/haystack/testing/sample_components/concatenate.py index d6b921b640..79315bb5c8 100644 --- a/haystack/testing/sample_components/concatenate.py +++ b/haystack/testing/sample_components/concatenate.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Union, List +from typing import List, Union from haystack.core.component import component diff --git a/haystack/testing/sample_components/fstring.py b/haystack/testing/sample_components/fstring.py index 65fdd4b341..f4fed972c9 100644 --- a/haystack/testing/sample_components/fstring.py +++ b/haystack/testing/sample_components/fstring.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import List, Any, Optional +from typing import Any, List, Optional from haystack.core.component import component diff --git a/haystack/testing/sample_components/greet.py b/haystack/testing/sample_components/greet.py index c284361e6d..1eb844ec10 100644 --- a/haystack/testing/sample_components/greet.py +++ b/haystack/testing/sample_components/greet.py @@ -1,13 +1,13 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Optional import logging +from typing import Optional +import haystack.logging as haystack_logging from haystack.core.component import component - -logger = logging.getLogger(__name__) +logger = haystack_logging.getLogger(__name__) @component diff --git a/haystack/testing/test_utils.py b/haystack/testing/test_utils.py index 596feb7001..467c0b9417 100644 --- a/haystack/testing/test_utils.py +++ b/haystack/testing/test_utils.py @@ -1,8 +1,9 @@ import os import random -import logging + import numpy as np +from haystack import logging logger = logging.getLogger(__name__) @@ -31,4 +32,4 @@ def set_all_seeds(seed: int, deterministic_cudnn: bool = False) -> None: torch.backends.cudnn.benchmark = False except (ImportError, ModuleNotFoundError) as exc: - logger.info("Could not set PyTorch seed because torch is not installed. Exception: %s", exc) + logger.info("Could not set PyTorch seed because torch is not installed. Exception: {exception}", exception=exc) diff --git a/haystack/tracing/__init__.py b/haystack/tracing/__init__.py index 8d522f286b..46af7f5a52 100644 --- a/haystack/tracing/__init__.py +++ b/haystack/tracing/__init__.py @@ -1,2 +1,10 @@ -from .tracer import Tracer, Span, enable_tracing, disable_tracing, is_tracing_enabled, auto_enable_tracing, tracer -from .opentelemetry import OpenTelemetryTracer +from haystack.tracing.tracer import ( # noqa: I001 (otherwise we end up with partial imports) + Span, + Tracer, + auto_enable_tracing, + disable_tracing, + enable_tracing, + is_tracing_enabled, + tracer, +) +from haystack.tracing.opentelemetry import OpenTelemetryTracer diff --git a/haystack/tracing/datadog.py b/haystack/tracing/datadog.py index e94dbddce1..6badbb1070 100644 --- a/haystack/tracing/datadog.py +++ b/haystack/tracing/datadog.py @@ -1,8 +1,8 @@ import contextlib -from typing import Optional, Dict, Any, Iterator +from typing import Any, Dict, Iterator, Optional from haystack.lazy_imports import LazyImport -from haystack.tracing import Tracer, Span +from haystack.tracing import Span, Tracer from haystack.tracing import utils as tracing_utils with LazyImport("Run 'pip install ddtrace'") as ddtrace_import: diff --git a/haystack/tracing/opentelemetry.py b/haystack/tracing/opentelemetry.py index 4e0a46ccd5..78cafb6995 100644 --- a/haystack/tracing/opentelemetry.py +++ b/haystack/tracing/opentelemetry.py @@ -1,11 +1,10 @@ import contextlib -from typing import Optional, Dict, Any, Iterator +from typing import Any, Dict, Iterator, Optional from haystack.lazy_imports import LazyImport -from haystack.tracing import Tracer, Span +from haystack.tracing import Span, Tracer from haystack.tracing import utils as tracing_utils - with LazyImport("Run 'pip install opentelemetry-sdk'") as opentelemetry_import: import opentelemetry import opentelemetry.trace diff --git a/haystack/tracing/tracer.py b/haystack/tracing/tracer.py index 756f13b815..f42443694b 100644 --- a/haystack/tracing/tracer.py +++ b/haystack/tracing/tracer.py @@ -1,8 +1,9 @@ import abc import contextlib -import logging import os -from typing import Dict, Any, Optional, Iterator +from typing import Any, Dict, Iterator, Optional + +from haystack import logging HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR = "HAYSTACK_AUTO_TRACE_ENABLED" HAYSTACK_CONTENT_TRACING_ENABLED_ENV_VAR = "HAYSTACK_CONTENT_TRACING_ENABLED" @@ -157,7 +158,9 @@ def auto_enable_tracing() -> None: Note that it will only work correctly if tracing was configured _before_ Haystack is imported. """ if os.getenv(HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR, "true").lower() == "false": - logger.info("Tracing disabled via '%s'", HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR) + logger.info( + "Tracing disabled via environment variable '{env_key}'", env_key=HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR + ) return if is_tracing_enabled(): @@ -166,7 +169,7 @@ def auto_enable_tracing() -> None: tracer = _auto_configured_opentelemetry_tracer() or _auto_configured_datadog_tracer() if tracer: enable_tracing(tracer) - logger.info("Tracing enabled via '%s'", tracer.__class__.__name__) + logger.info("Auto-enabled tracing for '{tracer}'", tracer=tracer.__class__.__name__) def _auto_configured_opentelemetry_tracer() -> Optional[Tracer]: @@ -195,6 +198,7 @@ def _auto_configured_datadog_tracer() -> Optional[Tracer]: # we implement this here and not in the `datadog` module to avoid import warnings when Datadog is not installed try: from ddtrace import tracer + from haystack.tracing.datadog import DatadogTracer if tracer.enabled: diff --git a/haystack/tracing/utils.py b/haystack/tracing/utils.py index f380f5c975..532a66ab4f 100644 --- a/haystack/tracing/utils.py +++ b/haystack/tracing/utils.py @@ -1,7 +1,7 @@ import json -import logging from typing import Any, Union +from haystack import logging logger = logging.getLogger(__name__) @@ -28,7 +28,7 @@ def coerce_tag_value(value: Any) -> Union[bool, str, int, float]: serializable = _serializable_value(value) return json.dumps(serializable) except Exception as error: - logger.debug("Failed to coerce tag value to string: %s", error, exc_info=True) + logger.debug("Failed to coerce tag value to string: {error}", error=error) # Our last resort is to convert the value to a string return str(value) diff --git a/haystack/utils/__init__.py b/haystack/utils/__init__.py index 5e372021c4..8c44cf6480 100644 --- a/haystack/utils/__init__.py +++ b/haystack/utils/__init__.py @@ -1,11 +1,11 @@ from .auth import Secret, deserialize_secrets_inplace +from .callable_serialization import deserialize_callable, serialize_callable from .device import ComponentDevice, Device, DeviceMap, DeviceType from .expit import expit from .filters import document_matches_filter from .jupyter import is_in_jupyter from .requests_utils import request_with_retry -from .callable_serialization import serialize_callable, deserialize_callable -from .type_serialization import serialize_type, deserialize_type +from .type_serialization import deserialize_type, serialize_type __all__ = [ "Secret", diff --git a/haystack/utils/auth.py b/haystack/utils/auth.py index 88d70dd488..75c348c7a6 100644 --- a/haystack/utils/auth.py +++ b/haystack/utils/auth.py @@ -1,8 +1,8 @@ -from enum import Enum import os -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union -from dataclasses import dataclass from abc import ABC, abstractmethod +from dataclasses import dataclass +from enum import Enum +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union class SecretType(Enum): diff --git a/haystack/utils/callable_serialization.py b/haystack/utils/callable_serialization.py index e8758abb53..e919f61499 100644 --- a/haystack/utils/callable_serialization.py +++ b/haystack/utils/callable_serialization.py @@ -1,6 +1,6 @@ import inspect import sys -from typing import Optional, Callable +from typing import Callable, Optional from haystack import DeserializationError diff --git a/haystack/utils/device.py b/haystack/utils/device.py index 0126066e51..64593b3009 100644 --- a/haystack/utils/device.py +++ b/haystack/utils/device.py @@ -1,9 +1,9 @@ -import logging import os from dataclasses import dataclass, field from enum import Enum from typing import Any, Dict, Optional, Tuple, Union +from haystack import logging from haystack.lazy_imports import LazyImport logger = logging.getLogger(__name__) diff --git a/haystack/utils/filters.py b/haystack/utils/filters.py index 7551594e11..13ee16db80 100644 --- a/haystack/utils/filters.py +++ b/haystack/utils/filters.py @@ -1,6 +1,6 @@ -from typing import List, Any, Union, Dict from dataclasses import fields from datetime import datetime +from typing import Any, Dict, List, Union import pandas as pd diff --git a/haystack/utils/hf.py b/haystack/utils/hf.py index bc8a5e721d..ff4e65b33f 100644 --- a/haystack/utils/hf.py +++ b/haystack/utils/hf.py @@ -1,11 +1,11 @@ import copy import inspect -import logging from enum import Enum -from typing import Any, Dict, Optional, List, Union, Callable +from typing import Any, Callable, Dict, List, Optional, Union import requests +from haystack import logging from haystack.dataclasses import StreamingChunk from haystack.lazy_imports import LazyImport from haystack.utils.auth import Secret @@ -15,8 +15,8 @@ import torch with LazyImport(message="Run 'pip install transformers'") as transformers_import: + from huggingface_hub import HfApi, InferenceClient from huggingface_hub.utils import RepositoryNotFoundError - from huggingface_hub import InferenceClient, HfApi logger = logging.getLogger(__name__) @@ -173,7 +173,7 @@ def check_generation_params(kwargs: Optional[Dict[str, Any]], additional_accepte with LazyImport(message="Run 'pip install transformers[torch]'") as torch_and_transformers_import: - from transformers import StoppingCriteria, PreTrainedTokenizer, PreTrainedTokenizerFast, TextStreamer + from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast, StoppingCriteria, TextStreamer transformers_import.check() torch_import.check() diff --git a/haystack/utils/requests_utils.py b/haystack/utils/requests_utils.py index 245d7737fb..6d207f45b0 100644 --- a/haystack/utils/requests_utils.py +++ b/haystack/utils/requests_utils.py @@ -1,9 +1,8 @@ -from typing import Optional, List - import logging +from typing import List, Optional -from tenacity import retry, wait_exponential, retry_if_exception_type, stop_after_attempt, before_log, after_log import requests +from tenacity import after_log, before_log, retry, retry_if_exception_type, stop_after_attempt, wait_exponential logger = logging.getLogger(__file__) diff --git a/pyproject.toml b/pyproject.toml index ddb0fc9ae8..dfc34c8835 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -297,6 +297,7 @@ select = [ "T10", # flake8-debugger "W", # pycodestyle "YTT", # flake8-2020 + "I" # isort # "E", # pycodestyle # "NPY", # NumPy-specific rules # "PD", # pandas-vet diff --git a/releasenotes/notes/internal-logging-changes-a96533da55e309c3.yaml b/releasenotes/notes/internal-logging-changes-a96533da55e309c3.yaml new file mode 100644 index 0000000000..2bcf0d6626 --- /dev/null +++ b/releasenotes/notes/internal-logging-changes-a96533da55e309c3.yaml @@ -0,0 +1,5 @@ +--- +enhancements: + - | + Added a new `Logger` implementation which eases and enforces logging via key-word arguments. + This is an internal change only. The behavior of instances created via `logging.getLogger` is not affected. diff --git a/test/core/component/test_component.py b/test/core/component/test_component.py index 1bd9711148..8e43823a32 100644 --- a/test/core/component/test_component.py +++ b/test/core/component/test_component.py @@ -269,6 +269,6 @@ def run(self, value: int): assert MockComponent().__haystack_is_greedy__ assert ( caplog.text - == "WARNING root:component.py:165 Component 'MockComponent' has no variadic input, but it's marked as greedy." + == "WARNING haystack.core.component.component:component.py:165 Component 'MockComponent' has no variadic input, but it's marked as greedy." " This is not supported and can lead to unexpected behavior.\n" ) diff --git a/test/test_logging.py b/test/test_logging.py index d2fc495e55..8bb93642ed 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -1,19 +1,20 @@ import builtins import json import logging +import os import sys from datetime import datetime, timezone from pathlib import Path -from unittest.mock import ANY, Mock +from test.tracing.utils import SpyingTracer +from unittest.mock import ANY import pytest from _pytest.capture import CaptureFixture from _pytest.logging import LogCaptureFixture from _pytest.monkeypatch import MonkeyPatch -from haystack import logging as haystack_logging -from test.tracing.utils import SpyingTracer import haystack.utils.jupyter +from haystack import logging as haystack_logging @pytest.fixture(autouse=True) @@ -40,23 +41,6 @@ def test_skip_logging_configuration( # Nothing should be captured by capfd since structlog is not configured assert capfd.readouterr().err == "" - def test_skip_logging_if_structlog_not_installed( - self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture - ) -> None: - monkeypatch.delitem(sys.modules, "structlog", raising=False) - monkeypatch.setattr(builtins, "__import__", Mock(side_effect=ImportError)) - - haystack_logging.configure_logging() - - logger = logging.getLogger(__name__) - logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) - - # the pytest fixture caplog only captures logs being rendered from the stdlib logging module - assert caplog.messages == ["Hello, structured logging!"] - - # Nothing should be captured by capfd since structlog is not configured - assert capfd.readouterr().err == "" - class TestStructuredLoggingConsoleRendering: def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None: @@ -204,6 +188,8 @@ def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: M "key2": "value2", "level": "warning", "timestamp": ANY, + "lineno": ANY, + "module": "test.test_logging", } def test_logging_as_json(self, capfd: CaptureFixture) -> None: @@ -222,6 +208,8 @@ def test_logging_as_json(self, capfd: CaptureFixture) -> None: "key2": "value2", "level": "warning", "timestamp": ANY, + "lineno": ANY, + "module": "test.test_logging", } def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None: @@ -241,6 +229,8 @@ def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypat "key2": "value2", "level": "warning", "timestamp": ANY, + "lineno": ANY, + "module": "test.test_logging", } def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None: @@ -264,6 +254,8 @@ def function_that_raises_and_adds_to_stack_trace(): "event": "An error happened ", "level": "error", "timestamp": ANY, + "lineno": ANY, + "module": "test.test_logging", "exception": [ { "exc_type": "ValueError", @@ -322,6 +314,8 @@ def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, ca "timestamp": ANY, "trace_id": span.trace_id, "span_id": span.span_id, + "lineno": ANY, + "module": "test.test_logging", } def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None: @@ -340,6 +334,8 @@ def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: "key2": "value2", "level": "warning", "timestamp": ANY, + "lineno": ANY, + "module": "test.test_logging", } def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None: @@ -358,4 +354,179 @@ def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None: "key2": "value2", "level": "warning", "timestamp": ANY, + "lineno": ANY, + "module": "test.test_logging", + } + + +class TestCompositeLogger: + def test_correct_stack_level_with_stdlib_rendering( + self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture + ) -> None: + monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") + haystack_logging.configure_logging() + + logger = logging.getLogger(__name__) + logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) + + # the pytest fixture caplog only captures logs being rendered from the stdlib logging module + assert caplog.messages == ["Hello, structured logging!"] + assert caplog.records[0].name == "test.test_logging" + assert caplog.records[0].lineno == 370 + + # Nothing should be captured by capfd since structlog is not configured + assert capfd.readouterr().err == "" + + def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture) -> None: + haystack_logging.configure_logging(use_json=False) + + logger = haystack_logging.getLogger(__name__) + logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) + + output = capfd.readouterr().err + assert "test.test_logging" in output + assert "384" in output + + @pytest.mark.parametrize( + "method, expected_level", + [ + ("debug", "debug"), + ("info", "info"), + ("warning", "warning"), + ("error", "error"), + ("fatal", "critical"), + ("exception", "error"), + ("critical", "critical"), + ], + ) + def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_level: str) -> None: + haystack_logging.configure_logging(use_json=True) + + logger = haystack_logging.getLogger(__name__) + + logger.setLevel(logging.DEBUG) + + getattr(logger, method)("Hello, structured {key}!", key="logging", key1="value1", key2="value2") + + output = capfd.readouterr().err + parsed_output = json.loads(output) # should not raise an error + + assert parsed_output == { + "event": "Hello, structured logging!", + "key": "logging", + "key1": "value1", + "key2": "value2", + "level": expected_level, + "timestamp": ANY, + "lineno": ANY, + "module": "test.test_logging", + } + + def test_log(self, capfd: LogCaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True) + + logger = haystack_logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + + logger.log(logging.DEBUG, "Hello, structured '{key}'!", key="logging", key1="value1", key2="value2") + + output = capfd.readouterr().err + parsed_output = json.loads(output) + + assert parsed_output == { + "event": "Hello, structured 'logging'!", + "key": "logging", + "key1": "value1", + "key2": "value2", + "level": "debug", + "timestamp": ANY, + "lineno": ANY, + "module": "test.test_logging", + } + + def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True) + + logger = haystack_logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + + logger.log(logging.DEBUG, "Hello, structured '{key}'!", key=LogCaptureFixture, key1="value1", key2="value2") + + output = capfd.readouterr().err + parsed_output = json.loads(output) + + assert parsed_output == { + "event": "Hello, structured ''!", + "key": "", + "key1": "value1", + "key2": "value2", + "level": "debug", + "timestamp": ANY, + "lineno": ANY, + "module": "test.test_logging", } + + @pytest.mark.parametrize( + "method, expected_level", + [ + ("debug", "debug"), + ("info", "info"), + ("warning", "warning"), + ("error", "error"), + ("fatal", "critical"), + ("exception", "exception"), + ("critical", "critical"), + ], + ) + def test_haystack_logger_with_positional_args(self, method: str, expected_level: str) -> None: + haystack_logging.configure_logging(use_json=True) + + logger = haystack_logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + + with pytest.raises(TypeError): + getattr(logger, method)("Hello, structured logging %s!", "logging") + + @pytest.mark.parametrize( + "method, expected_level", + [ + ("debug", "debug"), + ("info", "info"), + ("warning", "warning"), + ("error", "error"), + ("fatal", "critical"), + ("exception", "exception"), + ("critical", "critical"), + ], + ) + def test_haystack_logger_with_old_interpolation(self, method: str, expected_level: str) -> None: + haystack_logging.configure_logging(use_json=True) + + logger = haystack_logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + + # does not raise - hence we need to check this separately + getattr(logger, method)("Hello, structured logging %s!", key="logging") + + def test_that_haystack_logger_is_used(self) -> None: + """Forces the usage of the Haystack logger instead of the standard library logger.""" + allowed_list = [Path("haystack") / "logging.py"] + for root, dirs, files in os.walk("haystack"): + for file in files: + path = Path(root) / file + + if not path.suffix.endswith(".py"): + continue + + if path in allowed_list: + continue + + content = path.read_text(encoding="utf-8") + + # that looks like somebody is using our standard logger + if " logging.getLogger" in content: + haystack_logger_in_content = " haystack import logging" in content or ", logging" in content + assert haystack_logger_in_content, ( + f"{path} doesn't use the Haystack logger. Please use the Haystack logger instead of the " + f"standard library logger and add plenty of keyword arguments." + )