argilla-io · gabrielmbmb · Sep 26, 2023 · Sep 25, 2023 · Sep 25, 2023 · Sep 25, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,6 +20,7 @@ These are the section headers that we use:
 
 - Added new endpoint `POST /api/v1/datasets/:dataset_id/metadata-properties` for dataset metadata property creation ([#3813](https://github.com/argilla-io/argilla/pull/3813))
 - Added new endpoint `GET /api/v1/datasets/:dataset_id/metadata-properties` for listing dataset metadata property ([#3813](https://github.com/argilla-io/argilla/pull/3813))
+- Added `TermsMetadataProperty`, `IntegerMetadataProperty` and `FloatMetadataProperty` classes allowing to define metadata properties for a `FeedbackDataset` ([#3818](https://github.com/argilla-io/argilla/pull/3818)).
 
 ### Changed
 

diff --git a/src/argilla/__init__.py b/src/argilla/__init__.py
@@ -78,11 +78,14 @@
     from argilla.feedback import (
         FeedbackDataset,
         FeedbackRecord,
+        FloatMetadataProperty,
+        IntegerMetadataProperty,
         LabelQuestion,
         MultiLabelQuestion,
         RankingQuestion,
         RatingQuestion,
         ResponseSchema,
+        TermsMetadataProperty,
         TextField,
         TextQuestion,
         ValueSchema,
@@ -109,6 +112,9 @@
         "TextField",
         "TextQuestion",
         "ValueSchema",
+        "IntegerMetadataProperty",
+        "FloatMetadataProperty",
+        "TermsMetadataProperty",
     ],
     "client.api": [
         "copy",

diff --git a/src/argilla/client/feedback/dataset/base.py b/src/argilla/client/feedback/dataset/base.py
@@ -23,7 +23,7 @@
     FeedbackRecord,
     FieldSchema,
 )
-from argilla.client.feedback.schemas.types import AllowedQuestionTypes
+from argilla.client.feedback.schemas.types import AllowedMetadataPropertyTypes, AllowedQuestionTypes
 from argilla.client.feedback.training.schemas import (
     TrainingTaskForChatCompletion,
     TrainingTaskForDPO,
@@ -45,6 +45,7 @@
     from argilla.client.feedback.schemas.types import (
         AllowedFieldTypes,
         AllowedRemoteFieldTypes,
+        AllowedRemoteMetadataPropertyTypes,
         AllowedRemoteQuestionTypes,
     )
 
@@ -60,13 +61,20 @@ def __init__(
         *,
         fields: Union[List["AllowedFieldTypes"], List["AllowedRemoteFieldTypes"]],
         questions: Union[List["AllowedQuestionTypes"], List["AllowedRemoteQuestionTypes"]],
+        metadata_properties: Optional[
+            Union[List["AllowedMetadataPropertyTypes"], List["AllowedRemoteMetadataPropertyTypes"]]
+        ] = None,
         guidelines: Optional[str] = None,
+        # TODO: uncomment once ready in the API
+        # extra_metadata_allowed: bool = True,
     ) -> None:
         """Initializes a `FeedbackDatasetBase` instance locally.
 
         Args:
             fields: contains the fields that will define the schema of the records in the dataset.
             questions: contains the questions that will be used to annotate the dataset.
+            metadata_properties: contains the metadata properties that will be indexed
+                and could be used to filter the dataset. Defaults to `None`.
             guidelines: contains the guidelines for annotating the dataset. Defaults to `None`.
 
         Raises:
@@ -117,6 +125,22 @@ def __init__(
             raise ValueError("At least one question in `questions` must be required (`required=True`).")
         self._questions = questions
 
+        if metadata_properties is not None:
+            unique_names = set()
+            for metadata_property in metadata_properties:
+                if not isinstance(metadata_property, AllowedMetadataPropertyTypes.__args__):
+                    raise TypeError(
+                        f"Expected `metadata_properties` to be a list of"
+                        f" `{'`, `'.join([arg.__name__ for arg in AllowedMetadataPropertyTypes.__args__])}` got a"
+                        f" metadata property in the list with type type {type(metadata_property)} instead"
+                    )
+                if metadata_property.name in unique_names:
+                    raise ValueError(
+                        f"Expected `metadata_properties` to have unique names, got '{metadata_property.name}' twice instead."
+                    )
+                unique_names.add(metadata_property.name)
+        self._metadata_properties = metadata_properties
+
         if guidelines is not None:
             if not isinstance(guidelines, str):
                 raise TypeError(
@@ -127,6 +151,8 @@ def __init__(
                     "Expected `guidelines` to be either None (default) or a non-empty string, minimum length is 1."
                 )
         self._guidelines = guidelines
+        # TODO: uncomment once ready in the API
+        # self._extra_metadata_allowed = extra_metadata_allowed
 
     @property
     @abstractproperty

diff --git a/src/argilla/client/feedback/dataset/local.py b/src/argilla/client/feedback/dataset/local.py
@@ -20,7 +20,11 @@
 
 if TYPE_CHECKING:
     from argilla.client.feedback.schemas.records import FeedbackRecord
-    from argilla.client.feedback.schemas.types import AllowedFieldTypes, AllowedQuestionTypes
+    from argilla.client.feedback.schemas.types import (
+        AllowedFieldTypes,
+        AllowedMetadataPropertyTypes,
+        AllowedQuestionTypes,
+    )
 
 
 class FeedbackDataset(FeedbackDatasetBase, ArgillaMixin, UnificationMixin):
@@ -29,13 +33,18 @@ def __init__(
         *,
         fields: List["AllowedFieldTypes"],
         questions: List["AllowedQuestionTypes"],
+        metadata_properties: Optional[List["AllowedMetadataPropertyTypes"]] = None,
         guidelines: Optional[str] = None,
+        # TODO: uncomment when supported in the API
+        # extra_metadata_allowed: bool = True,
     ) -> None:
         """Initializes a `FeedbackDataset` instance locally.
 
         Args:
             fields: contains the fields that will define the schema of the records in the dataset.
             questions: contains the questions that will be used to annotate the dataset.
+            metadata_properties: contains the metadata properties that will be indexed
+                and could be used to filter the dataset. Defaults to `None`.
             guidelines: contains the guidelines for annotating the dataset. Defaults to `None`.
 
         Raises:
@@ -80,10 +89,34 @@ def __init__(
             ...             labels=["category-1", "category-2", "category-3"],
             ...         ),
             ...     ],
+            ...     metadata_properties=[
+            ...         rg.TermsMetadataProperty(
+            ...             name="metadata-property-1",
+            ...             values=["a", "b", "c"]
+            ...         ),
+            ...         rg.IntMetadataProperty(
+            ...             name="metadata-property-2",
+            ...             gt=0,
+            ...             lt=10,
+            ...         ),
+            ...         rg.FloatMetadataProperty(
+            ...             name="metadata-property-2",
+            ...             gt=-10.0,
+            ...             lt=10.0,
+            ...         ),
+            ...     ],
             ...     guidelines="These are the annotation guidelines.",
+            ...     extra_metadata_allowed=False,
             ... )
         """
-        super().__init__(fields=fields, questions=questions, guidelines=guidelines)
+        super().__init__(
+            fields=fields,
+            questions=questions,
+            metadata_properties=metadata_properties,
+            guidelines=guidelines,
+            # TODO: uncomment when supported in the API
+            # extra_metadata_allowed=extra_metadata_allowed,
+        )
 
         self._records = []