Skip to content

Commit

Permalink
Support features in metadata configs (#7182)
Browse files Browse the repository at this point in the history
* Test metadata configs with features

* Support features as metadata configs
  • Loading branch information
albertvillanova authored Oct 9, 2024
1 parent 3822805 commit afe875a
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/datasets/utils/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from huggingface_hub import DatasetCardData

from ..config import METADATA_CONFIGS_FIELD
from ..features import Features
from ..info import DatasetInfo, DatasetInfosDict
from ..naming import _split_re
from ..utils.logging import get_logger
Expand Down Expand Up @@ -152,8 +153,12 @@ def from_dataset_card_data(cls, dataset_card_data: DatasetCardData) -> "Metadata
cls._raise_if_data_files_field_not_valid(metadata_config)
return cls(
{
config["config_name"]: {param: value for param, value in config.items() if param != "config_name"}
for config in metadata_configs
config.pop("config_name"): {
param: value if param != "features" else Features._from_yaml_list(value)
for param, value in config.items()
}
for metadata_config in metadata_configs
if (config := metadata_config.copy())
}
)
return cls()
Expand Down
24 changes: 24 additions & 0 deletions tests/test_metadata_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from huggingface_hub import DatasetCard, DatasetCardData

from datasets.config import METADATA_CONFIGS_FIELD
from datasets.features import Features, Value
from datasets.info import DatasetInfo
from datasets.utils.metadata import MetadataConfigs

Expand Down Expand Up @@ -93,6 +94,21 @@ def _dedent(string: str) -> str:
"""


README_METADATA_WITH_FEATURES = f"""\
---
{METADATA_CONFIGS_FIELD}:
- config_name: default
features:
- name: id
dtype: int64
- name: name
dtype: string
- name: score
dtype: float64
---
"""


EXPECTED_METADATA_SINGLE_CONFIG = {"custom": {"data_dir": "v1", "drop_labels": True}}
EXPECTED_METADATA_TWO_CONFIGS_DEFAULT_FLAG = {
"v1": {"data_dir": "v1", "drop_labels": True},
Expand All @@ -102,6 +118,13 @@ def _dedent(string: str) -> str:
"custom": {"data_dir": "custom", "drop_labels": True},
"default": {"data_dir": "data", "drop_labels": False},
}
EXPECTED_METADATA_WITH_FEATURES = {
"default": {
"features": Features(
{"id": Value(dtype="int64"), "name": Value(dtype="string"), "score": Value(dtype="float64")}
)
}
}


@pytest.fixture
Expand Down Expand Up @@ -227,6 +250,7 @@ def test_from_yaml_string(self):
(README_METADATA_SINGLE_CONFIG, EXPECTED_METADATA_SINGLE_CONFIG, "custom"),
(README_METADATA_TWO_CONFIGS_WITH_DEFAULT_FLAG, EXPECTED_METADATA_TWO_CONFIGS_DEFAULT_FLAG, "v2"),
(README_METADATA_TWO_CONFIGS_WITH_DEFAULT_NAME, EXPECTED_METADATA_TWO_CONFIGS_DEFAULT_NAME, "default"),
(README_METADATA_WITH_FEATURES, EXPECTED_METADATA_WITH_FEATURES, "default"),
],
)
def test_metadata_configs_dataset_card_data(
Expand Down

0 comments on commit afe875a

Please sign in to comment.