From e9e993856ea3fa7ffb0452bb8e1164f0f06e4096 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome Date: Mon, 19 Jun 2023 17:36:20 +0200 Subject: [PATCH 1/7] fix: append `responses` if any --- src/argilla/client/feedback/dataset.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/argilla/client/feedback/dataset.py b/src/argilla/client/feedback/dataset.py index 0dc3a70e48..36e780b4a2 100644 --- a/src/argilla/client/feedback/dataset.py +++ b/src/argilla/client/feedback/dataset.py @@ -759,13 +759,12 @@ def format_as(self, format: Literal["datasets"]) -> "Dataset": dataset[question.name].append( [ { - "user_id": r.user_id, - "value": r.values[question.name].value, - "status": r.status, + "user_id": r.user_id or None, + "value": r.values[question.name].value or None, + "status": r.status or None, } for r in record.responses ] - or None ) dataset["metadata"].append(json.dumps(record.metadata) if record.metadata else None) dataset["external_id"].append(record.external_id or None) From 8610db00bdee41e1a06e19f4a9e662f2b2f070a2 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome Date: Mon, 19 Jun 2023 17:37:29 +0200 Subject: [PATCH 2/7] test: `FeedbackRecord` with no responses fixture --- tests/client/conftest.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/client/conftest.py b/tests/client/conftest.py index cac7dcd577..b17f56a4a8 100644 --- a/tests/client/conftest.py +++ b/tests/client/conftest.py @@ -428,17 +428,6 @@ def feedback_dataset_records() -> List[FeedbackRecord]: ), FeedbackRecord( fields={"text": "This is a negative example", "label": "negative"}, - responses=[ - { - "values": { - "question-1": {"value": "This is a response to question 1"}, - "question-2": {"value": 1}, - "question-3": {"value": "a"}, - "question-4": {"value": ["a", "b"]}, - }, - "status": "submitted", - } - ], metadata={"another unit": "test"}, external_id="2", ), From de59d08c5bf9876a6d78f58529d8d3a361cd071e Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome Date: Mon, 19 Jun 2023 17:49:10 +0200 Subject: [PATCH 3/7] docs: update `CHANGELOG.md` --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5113acc526..ca69ab56d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ These are the section headers that we use: ### Fixed - Replaced `np.float` alias by `float` to avoid `AttributeError` when using `find_label_errors` function with `numpy>=1.24.0` ([#3214](https://github.com/argilla-io/argilla/pull/3214)). +- Fixed `format_as("datasets")` when no responses or optional respones in `FeedbackRecord`, to set their value to what 🤗 Datasets expects instead of just `None` ([#3224](https://github.com/argilla-io/argilla/issues/3224)). ### Added From dc92f201f9bbcf25bcde8173b73e56343ef23ee3 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome Date: Mon, 19 Jun 2023 17:50:13 +0200 Subject: [PATCH 4/7] docs: rename `issues` to `pull` --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca69ab56d8..ac8dea4b65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,7 @@ These are the section headers that we use: ### Fixed - Replaced `np.float` alias by `float` to avoid `AttributeError` when using `find_label_errors` function with `numpy>=1.24.0` ([#3214](https://github.com/argilla-io/argilla/pull/3214)). -- Fixed `format_as("datasets")` when no responses or optional respones in `FeedbackRecord`, to set their value to what 🤗 Datasets expects instead of just `None` ([#3224](https://github.com/argilla-io/argilla/issues/3224)). +- Fixed `format_as("datasets")` when no responses or optional respones in `FeedbackRecord`, to set their value to what 🤗 Datasets expects instead of just `None` ([#3224](https://github.com/argilla-io/argilla/pull/3224)). ### Added From d48543acedf0028fb33566a94950c9e579a09a4b Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome Date: Tue, 20 Jun 2023 11:58:49 +0200 Subject: [PATCH 5/7] fix: `format_as("datasets")` responses --- src/argilla/client/feedback/dataset.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/argilla/client/feedback/dataset.py b/src/argilla/client/feedback/dataset.py index 36e780b4a2..a17ac379a0 100644 --- a/src/argilla/client/feedback/dataset.py +++ b/src/argilla/client/feedback/dataset.py @@ -757,14 +757,16 @@ def format_as(self, format: Literal["datasets"]) -> "Dataset": dataset[field.name].append(record.fields[field.name]) for question in self.questions: dataset[question.name].append( - [ - { - "user_id": r.user_id or None, - "value": r.values[question.name].value or None, - "status": r.status or None, - } - for r in record.responses - ] + { + "user_id": [r.user_id for r in record.responses], + "value": [ + r.values[question.name].value if question.name in r.values else None + for r in record.responses + ], + "status": [r.status for r in record.responses], + } + if record.responses + else None ) dataset["metadata"].append(json.dumps(record.metadata) if record.metadata else None) dataset["external_id"].append(record.external_id or None) From b68278db465e6a31441c783a4fd89c6453a9d4e8 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome Date: Tue, 20 Jun 2023 12:14:36 +0200 Subject: [PATCH 6/7] fix: `push_to_huggingface` parsing of UUIDs (#3231) # Description Due to the recent constraint for the `FeedbackRecord.user_id`s to be `UUID`s instead of `str`s, the conversion to `dict()` was failing, as the `UUID`s are not JSON-serializable, which was leading to some issues when trying to add the example record to the `DatasetCard` via the `dict()` conversion. **Type of change** - [X] Bug fix (non-breaking change which fixes an issue) **How Has This Been Tested** - [X] Re-run unit tests to pass when generating the `DatasetCard` for `FeedbackDataset`s **Checklist** - [X] I have merged the original branch into my forked branch - [ ] I added relevant documentation - [X] follows the style guidelines of this project - [X] I did a self-review of my code - [ ] I made corresponding changes to the documentation - [X] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --- src/argilla/client/feedback/dataset.py | 5 +++-- src/argilla/client/feedback/utils.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/argilla/client/feedback/dataset.py b/src/argilla/client/feedback/dataset.py index a17ac379a0..15db954474 100644 --- a/src/argilla/client/feedback/dataset.py +++ b/src/argilla/client/feedback/dataset.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import json import logging import tempfile @@ -844,7 +845,7 @@ def push_to_huggingface(self, repo_id: str, generate_card: Optional[bool] = True argilla_fields=self.fields, argilla_questions=self.questions, argilla_guidelines=self.guidelines, - argilla_record=self.records[0].dict(), + argilla_record=json.loads(self.records[0].json()), huggingface_record=hfds[0], ) card.push_to_hub(repo_id, repo_type="dataset", token=kwargs.get("token")) @@ -893,7 +894,7 @@ def from_huggingface(cls, repo_id: str, *args: Any, **kwargs: Any) -> "FeedbackD repo_type="dataset", **hub_auth, ) - with open(config_path, "rb") as f: + with open(config_path, "r") as f: config = FeedbackDatasetConfig.parse_raw(f.read()) cls = cls( diff --git a/src/argilla/client/feedback/utils.py b/src/argilla/client/feedback/utils.py index 9fc39291b4..f72f7190e0 100644 --- a/src/argilla/client/feedback/utils.py +++ b/src/argilla/client/feedback/utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, Union from pydantic import ( BaseModel, From b23acdb74efb5abbec8bfc261314f4b05547743a Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome Date: Tue, 20 Jun 2023 12:21:57 +0200 Subject: [PATCH 7/7] docs: update `CHANGELOG.md` --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac8dea4b65..d64f1a9030 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ These are the section headers that we use: - Replaced `np.float` alias by `float` to avoid `AttributeError` when using `find_label_errors` function with `numpy>=1.24.0` ([#3214](https://github.com/argilla-io/argilla/pull/3214)). - Fixed `format_as("datasets")` when no responses or optional respones in `FeedbackRecord`, to set their value to what 🤗 Datasets expects instead of just `None` ([#3224](https://github.com/argilla-io/argilla/pull/3224)). +- Fixed `push_to_huggingface()` when `generate_card=True` (default behaviour), as we were passing a sample record to the `ArgillaDatasetCard` class, and `UUID`s introduced in 1.10.0 ([#3192](https://github.com/argilla-io/argilla/pull/3192)), are not JSON-serializable ([#3231](https://github.com/argilla-io/argilla/pull/3231)). ### Added