Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: migrate to canals==0.7.0 #5647

Merged
merged 22 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
8c10b39
add default_to_dict and default_from_dict placeholders to ease migrat…
ZanSara Aug 28, 2023
788f451
canals==0.7.0
ZanSara Aug 28, 2023
268bd0e
whisper components
ZanSara Aug 28, 2023
ce22408
add to_dict/from_dict stubs
ZanSara Aug 28, 2023
66e06d9
import serialization methods in init to hide canals imports
ZanSara Aug 28, 2023
da7d463
reno
ZanSara Aug 28, 2023
fd98c4e
export deserializationerror too
ZanSara Aug 29, 2023
7e637a2
Update haystack/preview/__init__.py
ZanSara Aug 29, 2023
c9b9df1
Merge branch 'main' into default-to-from-dict
ZanSara Aug 29, 2023
7f20eaa
serialization methods for LocalWhisperTranscriber (#5648)
ZanSara Aug 29, 2023
3476ac1
chore: serialization methods for `FileExtensionClassifier` (#5651)
ZanSara Aug 29, 2023
78f79a3
chore: serialization methods for `SentenceTransformersDocumentEmbedde…
ZanSara Aug 29, 2023
5420e6f
serialization methods for SentenceTransformersTextEmbedder (#5653)
ZanSara Aug 29, 2023
fbfd129
serialization methods for TextFileToDocument (#5654)
ZanSara Aug 29, 2023
731b0ff
chore: serialization methods for `RemoteWhisperTranscriber` (#5650)
ZanSara Aug 29, 2023
7067f67
Add default to_dict and from_dict in document stores built with facto…
silvanocerza Aug 29, 2023
a8e5df1
fix tests (#5671)
ZanSara Aug 29, 2023
ec65f7e
chore: simplify serialization methods for `MemoryDocumentStore` (#5667)
ZanSara Aug 29, 2023
fa9e3f0
chore: serialization methods for `MemoryRetriever` (#5663)
ZanSara Aug 29, 2023
156d44c
chore: serialization methods for `DocumentWriter` (#5661)
ZanSara Aug 29, 2023
80affa3
Merge branch 'main' into default-to-from-dict
ZanSara Aug 29, 2023
358d696
black
ZanSara Aug 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions haystack/preview/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from canals import component, Pipeline
from canals.serialization import default_from_dict, default_to_dict, DeserializationError
ZanSara marked this conversation as resolved.
Show resolved Hide resolved
from haystack.preview.dataclasses import *
13 changes: 13 additions & 0 deletions haystack/preview/components/audio/whisper_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,19 @@ def warm_up(self) -> None:
if not self._model:
self._model = whisper.load_model(self.model_name, device=self.device)

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
"""
# return default_to_dict(self, model_name_or_path=self.model_name, device=self.device, whisper_params=self.whisper_params)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "LocalWhisperTranscriber":
"""
Deserialize this component from a dictionary.
"""
# return default_from_dict(cls, data)

@component.output_types(documents=List[Document])
def run(self, audio_files: List[Path], whisper_params: Optional[Dict[str, Any]] = None):
"""
Expand Down
20 changes: 13 additions & 7 deletions haystack/preview/components/audio/whisper_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,23 @@ def __init__(
if not api_key:
raise ValueError("API key is None.")

self.model_name = model_name
self.api_key = api_key
self.api_base = api_base
self.whisper_params = whisper_params or {}

self.model_name = model_name
self.init_parameters = {
"api_key": self.api_key,
"model_name": self.model_name,
"api_base": self.api_base,
"whisper_params": self.whisper_params,
}
def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
"""
# return default_to_dict(self, model_name=self.model_name, api_key=self.api_key, api_base=self.api_base, whisper_params=self.whisper_params)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "RemoteWhisperTranscriber":
"""
Deserialize this component from a dictionary.
"""
# return default_from_dict(cls, data)

@component.output_types(documents=List[Document])
def run(self, audio_files: List[Path], whisper_params: Optional[Dict[str, Any]] = None):
Expand Down
15 changes: 14 additions & 1 deletion haystack/preview/components/classifiers/file_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import mimetypes
from collections import defaultdict
from pathlib import Path
from typing import List, Union, Optional
from typing import List, Union, Optional, Dict, Any

from haystack.preview import component

Expand Down Expand Up @@ -44,6 +44,19 @@ def __init__(self, mime_types: List[str]):
component.set_output_types(self, unclassified=List[Path], **{mime_type: List[Path] for mime_type in mime_types})
self.mime_types = mime_types

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
"""
# return default_to_dict(self, model_name_or_path=self.model_name, device=self.device, whisper_params=self.whisper_params)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "FileExtensionClassifier":
"""
Deserialize this component from a dictionary.
"""
# return default_from_dict(cls, data)

def run(self, paths: List[Union[str, Path]]):
"""
Run the FileExtensionClassifier.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional, Union
from typing import List, Optional, Union, Dict, Any

from haystack.preview import component
from haystack.preview import Document
Expand Down Expand Up @@ -50,6 +50,19 @@ def __init__(
self.metadata_fields_to_embed = metadata_fields_to_embed or []
self.embedding_separator = embedding_separator

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
"""
# return default_to_dict(self, ...)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SentenceTransformersDocumentEmbedder":
"""
Deserialize this component from a dictionary.
"""
# return default_from_dict(cls, data)

def warm_up(self):
"""
Load the embedding backend.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional, Union
from typing import List, Optional, Union, Dict, Any

from haystack.preview import component
from haystack.preview.embedding_backends.sentence_transformers_backend import (
Expand Down Expand Up @@ -48,6 +48,19 @@ def __init__(
self.progress_bar = progress_bar
self.normalize_embeddings = normalize_embeddings

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
"""
# return default_to_dict(self, ...)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SentenceTransformersTextEmbedder":
"""
Deserialize this component from a dictionary.
"""
# return default_from_dict(cls, data)

def warm_up(self):
"""
Load the embedding backend.
Expand Down
15 changes: 14 additions & 1 deletion haystack/preview/components/file_converters/txt.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from pathlib import Path
from typing import Optional, List, Union, Dict
from typing import Optional, List, Union, Dict, Any

from canals.errors import PipelineRuntimeError
from tqdm import tqdm
Expand Down Expand Up @@ -61,6 +61,19 @@ def __init__(
self.id_hash_keys = id_hash_keys or []
self.progress_bar = progress_bar

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
"""
# return default_to_dict(self, ...)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "TextFileToDocument":
"""
Deserialize this component from a dictionary.
"""
# return default_from_dict(cls, data)

@component.output_types(documents=List[Document])
def run(
self,
Expand Down
13 changes: 13 additions & 0 deletions haystack/preview/components/retrievers/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,19 @@ def __init__(
self.top_k = top_k
self.scale_score = scale_score

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
"""
# return default_to_dict(self, ...)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "MemoryRetriever":
"""
Deserialize this component from a dictionary.
"""
# return default_from_dict(cls, data)

@component.output_types(documents=List[List[Document]])
def run(
self,
Expand Down
15 changes: 14 additions & 1 deletion haystack/preview/components/writers/document_writer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional
from typing import List, Optional, Dict, Any

from haystack.preview import component, Document
from haystack.preview.document_stores import DocumentStore, DuplicatePolicy
Expand All @@ -19,6 +19,19 @@ def __init__(self, document_store: DocumentStore, policy: DuplicatePolicy = Dupl
self.document_store = document_store
self.policy = policy

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
"""
# return default_to_dict(self, ...)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "DocumentWriter":
"""
Deserialize this component from a dictionary.
"""
# return default_from_dict(cls, data)

def run(self, documents: List[Document], policy: Optional[DuplicatePolicy] = None):
"""
Run DocumentWriter on the given input data.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ dependencies = [
"jsonschema",

# Preview
"canals==0.5.0",
"canals==0.7.0",

# Agent events
"events",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
preview:
- Migrate all components to Canals==0.7.0
- Add serialization and deserialization methods for all Haystack components
Loading