Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add metadata properties to Python client #3818

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ These are the section headers that we use:

- Added new endpoint `POST /api/v1/datasets/:dataset_id/metadata-properties` for dataset metadata property creation ([#3813](https://github.com/argilla-io/argilla/pull/3813))
- Added new endpoint `GET /api/v1/datasets/:dataset_id/metadata-properties` for listing dataset metadata property ([#3813](https://github.com/argilla-io/argilla/pull/3813))
- Added `TermsMetadataProperty`, `IntegerMetadataProperty` and `FloatMetadataProperty` classes allowing to define metadata properties for a `FeedbackDataset` ([#3818](https://github.com/argilla-io/argilla/pull/3818)).

### Changed

Expand Down
6 changes: 6 additions & 0 deletions src/argilla/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,14 @@
from argilla.feedback import (
FeedbackDataset,
FeedbackRecord,
FloatMetadataProperty,
IntegerMetadataProperty,
LabelQuestion,
MultiLabelQuestion,
RankingQuestion,
RatingQuestion,
ResponseSchema,
TermsMetadataProperty,
TextField,
TextQuestion,
ValueSchema,
Expand All @@ -109,6 +112,9 @@
"TextField",
"TextQuestion",
"ValueSchema",
"IntegerMetadataProperty",
"FloatMetadataProperty",
"TermsMetadataProperty",
],
"client.api": [
"copy",
Expand Down
28 changes: 27 additions & 1 deletion src/argilla/client/feedback/dataset/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
FeedbackRecord,
FieldSchema,
)
from argilla.client.feedback.schemas.types import AllowedQuestionTypes
from argilla.client.feedback.schemas.types import AllowedMetadataPropertyTypes, AllowedQuestionTypes
from argilla.client.feedback.training.schemas import (
TrainingTaskForChatCompletion,
TrainingTaskForDPO,
Expand All @@ -45,6 +45,7 @@
from argilla.client.feedback.schemas.types import (
AllowedFieldTypes,
AllowedRemoteFieldTypes,
AllowedRemoteMetadataPropertyTypes,
AllowedRemoteQuestionTypes,
)

Expand All @@ -60,13 +61,20 @@ def __init__(
*,
fields: Union[List["AllowedFieldTypes"], List["AllowedRemoteFieldTypes"]],
questions: Union[List["AllowedQuestionTypes"], List["AllowedRemoteQuestionTypes"]],
metadata_properties: Optional[
Union[List["AllowedMetadataPropertyTypes"], List["AllowedRemoteMetadataPropertyTypes"]]
] = None,
guidelines: Optional[str] = None,
# TODO: uncomment once ready in the API
# extra_metadata_allowed: bool = True,
) -> None:
"""Initializes a `FeedbackDatasetBase` instance locally.

Args:
fields: contains the fields that will define the schema of the records in the dataset.
questions: contains the questions that will be used to annotate the dataset.
metadata_properties: contains the metadata properties that will be indexed
and could be used to filter the dataset. Defaults to `None`.
guidelines: contains the guidelines for annotating the dataset. Defaults to `None`.

Raises:
Expand Down Expand Up @@ -117,6 +125,22 @@ def __init__(
raise ValueError("At least one question in `questions` must be required (`required=True`).")
self._questions = questions

if metadata_properties is not None:
unique_names = set()
for metadata_property in metadata_properties:
if not isinstance(metadata_property, AllowedMetadataPropertyTypes.__args__):
raise TypeError(
f"Expected `metadata_properties` to be a list of"
f" `{'`, `'.join([arg.__name__ for arg in AllowedMetadataPropertyTypes.__args__])}` got a"
f" metadata property in the list with type type {type(metadata_property)} instead"
)
if metadata_property.name in unique_names:
raise ValueError(
f"Expected `metadata_properties` to have unique names, got '{metadata_property.name}' twice instead."
)
unique_names.add(metadata_property.name)
self._metadata_properties = metadata_properties

if guidelines is not None:
if not isinstance(guidelines, str):
raise TypeError(
Expand All @@ -127,6 +151,8 @@ def __init__(
"Expected `guidelines` to be either None (default) or a non-empty string, minimum length is 1."
)
self._guidelines = guidelines
# TODO: uncomment once ready in the API
# self._extra_metadata_allowed = extra_metadata_allowed

@property
@abstractproperty
Expand Down
37 changes: 35 additions & 2 deletions src/argilla/client/feedback/dataset/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@

if TYPE_CHECKING:
from argilla.client.feedback.schemas.records import FeedbackRecord
from argilla.client.feedback.schemas.types import AllowedFieldTypes, AllowedQuestionTypes
from argilla.client.feedback.schemas.types import (
AllowedFieldTypes,
AllowedMetadataPropertyTypes,
AllowedQuestionTypes,
)


class FeedbackDataset(FeedbackDatasetBase, ArgillaMixin, UnificationMixin):
Expand All @@ -29,13 +33,18 @@ def __init__(
*,
fields: List["AllowedFieldTypes"],
questions: List["AllowedQuestionTypes"],
metadata_properties: Optional[List["AllowedMetadataPropertyTypes"]] = None,
guidelines: Optional[str] = None,
# TODO: uncomment when supported in the API
# extra_metadata_allowed: bool = True,
) -> None:
"""Initializes a `FeedbackDataset` instance locally.

Args:
fields: contains the fields that will define the schema of the records in the dataset.
questions: contains the questions that will be used to annotate the dataset.
metadata_properties: contains the metadata properties that will be indexed
and could be used to filter the dataset. Defaults to `None`.
guidelines: contains the guidelines for annotating the dataset. Defaults to `None`.

Raises:
Expand Down Expand Up @@ -80,10 +89,34 @@ def __init__(
... labels=["category-1", "category-2", "category-3"],
... ),
... ],
... metadata_properties=[
... rg.TermsMetadataProperty(
... name="metadata-property-1",
... values=["a", "b", "c"]
... ),
... rg.IntMetadataProperty(
... name="metadata-property-2",
... gt=0,
... lt=10,
... ),
... rg.FloatMetadataProperty(
... name="metadata-property-2",
... gt=-10.0,
... lt=10.0,
... ),
... ],
... guidelines="These are the annotation guidelines.",
... extra_metadata_allowed=False,
... )
"""
super().__init__(fields=fields, questions=questions, guidelines=guidelines)
super().__init__(
fields=fields,
questions=questions,
metadata_properties=metadata_properties,
guidelines=guidelines,
# TODO: uncomment when supported in the API
# extra_metadata_allowed=extra_metadata_allowed,
)

self._records = []

Expand Down
Loading
Loading