diff --git a/CHANGELOG.md b/CHANGELOG.md index f23af2e02..d1b546191 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 shared memory implementation can be used by passing `use_legacy_shared_mem_impl` to `unshard.py`. - Refactor weight initialization. IMPORTANT: this does not maintain backwards-compatibility with older configs; the jobs will still run, but may produce different outputs. - Changed the behavior of the Lion optimizer to only record the update cosine similarity when `optimizer.record_update_metrics` is `True` in order to be consistent with the API. +- Added HF datasets into `olmo_data`, and changed downstream eval to load from the package. ### Fixed diff --git a/olmo/config.py b/olmo/config.py index 3b49e3250..a0b09a21f 100644 --- a/olmo/config.py +++ b/olmo/config.py @@ -1176,6 +1176,8 @@ class TrainConfig(BaseConfig): hf_datasets_cache_dir: Optional[str] = None """ + Deprecated, HF datasets are now stored in `olmo_data.hf_datasets`. + Path to cache directory of HF datasets saved with `datasets.save_to_disk`. """ diff --git a/olmo/eval/__init__.py b/olmo/eval/__init__.py index b2d203012..bc8313c78 100644 --- a/olmo/eval/__init__.py +++ b/olmo/eval/__init__.py @@ -32,9 +32,7 @@ def build_downstream_evaluator( task_class = label_to_task_map[eval_cfg.label] if isinstance(task_class, tuple): task_class, task_kwargs = task_class - ds_eval_dataset = task_class( - tokenizer=tokenizer, datasets_cache_dir=train_config.hf_datasets_cache_dir, **task_kwargs - ) # type: ignore + ds_eval_dataset = task_class(tokenizer=tokenizer, **task_kwargs) # type: ignore data_config = eval_cfg.data if is_unit_test: ds_eval_sampler = None diff --git a/olmo/eval/downstream.py b/olmo/eval/downstream.py index d340d7786..a8da366e9 100644 --- a/olmo/eval/downstream.py +++ b/olmo/eval/downstream.py @@ -159,7 +159,6 @@ def __init__( tokenizer: Tokenizer, dataset_path: str, dataset_name: Union[str, Sequence[str], None] = None, - datasets_cache_dir: Optional[str] = None, model_ctx_len: int = 2048, split="validation", prompts=[None], # List of prompt variants to use @@ -183,7 +182,7 @@ def __init__( dataset_list = [] for ds_name in dataset_names: - dataset = load_hf_dataset(self.dataset_path, ds_name, split, datasets_cache_dir) + dataset = load_hf_dataset(self.dataset_path, ds_name, split) dataset_list.append(dataset) self.dataset = datasets.concatenate_datasets(dataset_list) @@ -400,13 +399,15 @@ class PIQA(ICLMultiChoiceTaskDataset): metric_type = "len_norm" def __init__( - self, tokenizer, dataset_path="piqa", dataset_name="plain_text", datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="piqa", + dataset_name="plain_text", ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -441,13 +442,15 @@ class HellaSwag(ICLMultiChoiceTaskDataset): metric_type = "len_norm" def __init__( - self, tokenizer, dataset_path="hellaswag", dataset_name=None, datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="hellaswag", + dataset_name=None, ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) @classmethod @@ -506,14 +509,12 @@ def __init__( tokenizer, dataset_path="winogrande", dataset_name="winogrande_xl", - datasets_cache_dir: Optional[str] = None, ): # all winogrande datasets have same val set super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def prep_examples(self): @@ -604,13 +605,15 @@ class OpenBookQA(ICLMultiChoiceTaskDataset): metric_type = "len_norm" def __init__( - self, tokenizer, dataset_path="openbookqa", dataset_name="main", datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="openbookqa", + dataset_name="main", ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -642,13 +645,15 @@ class BoolQ(ICLMultiChoiceTaskDataset): metric_type = "acc" def __init__( - self, tokenizer, dataset_path="boolq", dataset_name=None, datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="boolq", + dataset_name=None, ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -690,13 +695,15 @@ class SciQ(ICLMultiChoiceTaskDataset): metric_type = "acc" def __init__( - self, tokenizer, dataset_path="sciq", dataset_name=None, datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="sciq", + dataset_name=None, ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -735,13 +742,15 @@ class ArcEasy(ICLMultiChoiceTaskDataset): metric_type = "acc" def __init__( - self, tokenizer, dataset_path="ai2_arc", dataset_name="ARC-Easy", datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="ai2_arc", + dataset_name="ARC-Easy", ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -777,13 +786,11 @@ def __init__( tokenizer, dataset_path="ai2_arc", dataset_name="ARC-Challenge", - datasets_cache_dir: Optional[str] = None, ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) @@ -819,13 +826,11 @@ def __init__( tokenizer, dataset_path="allenai/basic_arithmetic", dataset_name=None, - datasets_cache_dir: Optional[str] = None, ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) @@ -847,13 +852,11 @@ def __init__( tokenizer, dataset_path="tau/commonsense_qa", dataset_name=None, - datasets_cache_dir: Optional[str] = None, ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) @@ -870,13 +873,15 @@ class SocialIQa(ICLMultiChoiceTaskDataset): metric_type = "len_norm" def __init__( - self, tokenizer, dataset_path="social_i_qa", dataset_name=None, datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="social_i_qa", + dataset_name=None, ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -920,13 +925,15 @@ class COPA(ICLMultiChoiceTaskDataset): metric_type = "acc" def __init__( - self, tokenizer, dataset_path="super_glue", dataset_name="copa", datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="super_glue", + dataset_name="copa", ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -965,13 +972,15 @@ class RTE(ICLMultiChoiceTaskDataset): metric_type = "len_norm" def __init__( - self, tokenizer, dataset_path="glue", dataset_name="rte", datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="glue", + dataset_name="rte", ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -1007,13 +1016,15 @@ class CommitmentBank(ICLMultiChoiceTaskDataset): metric_type = "acc" def __init__( - self, tokenizer, dataset_path="super_glue", dataset_name="cb", datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="super_glue", + dataset_name="cb", ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -1047,13 +1058,15 @@ class MRPC(ICLMultiChoiceTaskDataset): metric_type = "f1" def __init__( - self, tokenizer, dataset_path="glue", dataset_name="mrpc", datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="glue", + dataset_name="mrpc", ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) @classmethod @@ -1115,13 +1128,15 @@ class SST2(ICLMultiChoiceTaskDataset): metric_type = "acc" def __init__( - self, tokenizer, dataset_path="glue", dataset_name="sst2", datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="glue", + dataset_name="sst2", ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) @classmethod @@ -1243,7 +1258,6 @@ def __init__( split="validation", prompt_variations=None, mc_labels=False, - datasets_cache_dir: Optional[str] = None, ): dataset_names = [] # Collect the relevant categories @@ -1270,7 +1284,7 @@ def __init__( raise ValueError(f"Unknown prompt variations: {prompt_variations}") # Need to grab the dev set for the few-shot prompts for name in dataset_names: - dev_set = load_hf_dataset(dataset_path, name, "dev", datasets_cache_dir) + dev_set = load_hf_dataset(dataset_path, name, "dev") self.dev_set[name] = dev_set super().__init__( tokenizer=tokenizer, @@ -1278,7 +1292,6 @@ def __init__( dataset_name=dataset_names, split=split, prompts=prompts, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -1353,13 +1366,11 @@ def __init__( tokenizer, dataset_path="trivia_qa", dataset_name="rc.wikipedia.nocontext", - datasets_cache_dir: Optional[str] = None, ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): @@ -1388,13 +1399,15 @@ class NaturalQuestionsCELoss(ICLMultiChoiceTaskDataset): metric_type = "ce_loss" def __init__( - self, tokenizer, dataset_path="nq_open", dataset_name=None, datasets_cache_dir: Optional[str] = None + self, + tokenizer, + dataset_path="nq_open", + dataset_name=None, ): super().__init__( tokenizer=tokenizer, dataset_path=dataset_path, dataset_name=dataset_name, - datasets_cache_dir=datasets_cache_dir, ) def doc_to_text(self, doc): diff --git a/olmo/util.py b/olmo/util.py index 56bffd26a..c320b7a26 100644 --- a/olmo/util.py +++ b/olmo/util.py @@ -26,6 +26,8 @@ from rich.text import Text from rich.traceback import Traceback +from olmo_data.data import get_data_path + from .aliases import PathOrStr from .exceptions import ( OLMoCliError, @@ -34,14 +36,7 @@ OLMoNetworkError, OLMoThreadError, ) -from .torch_util import ( - barrier, - get_fs_local_rank, - get_global_rank, - get_local_rank, - get_node_rank, - is_distributed, -) +from .torch_util import get_global_rank, get_local_rank, get_node_rank, is_distributed try: from functools import cache @@ -656,67 +651,44 @@ def _http_get_bytes_range(scheme: str, host_name: str, path: str, bytes_start: i return result -def _load_hf_dataset_from_disk(hf_path: str, name: Optional[str], split: str, datasets_dir: str): - dataset_path = os.path.join(datasets_dir, hf_path, name or "none", split) - return datasets.load_from_disk(dataset_path) - - -def _save_hf_dataset_to_disk( +def save_hf_dataset_to_disk( dataset: datasets.DatasetDict | datasets.Dataset, hf_path: str, name: Optional[str], split: str, - datasets_dir: str, + datasets_dir: PathOrStr, ): - dataset_path = os.path.join(datasets_dir, hf_path, name or "none", split) - return dataset.save_to_disk(dataset_path) + """ + Saves a HF dataset to disk under the `datasets_dir`. It can be used to add a HF dataset + to `olmo_data` as follows: + ``` + import datasets -def load_hf_dataset(path: str, name: Optional[str], split: str, datasets_cache_dir: Optional[str] = None): - dataset = None + from olmo.util import save_hf_dataset_to_disk - # First try to load dataset on only FS rank 0, to avoid unnecessary network load. - # This will hopefully cache the dataset for use in other FS ranks. - if get_fs_local_rank() == 0: - # Try get dataset from disk. - if datasets_cache_dir is not None: - try: - dataset = _load_hf_dataset_from_disk(path, name, split, datasets_cache_dir) - except FileNotFoundError: - log.info( - "Path %s name %s split %s not present in local dir %s, loading from online", - path, - name, - split, - datasets_cache_dir, - ) + path, name, split = ... - # Get dataset from online if not available on disk - if dataset is None: - dataset = datasets.load_dataset( - path=path, - name=name, - split=split, - trust_remote_code=True, + dataset = datasets.load_dataset(path, name=name, split=split) + save_hf_dataset_to_disk(dataset, path, name, split, "olmo_data/hf_datasets") + ``` + """ + dataset_path = Path(datasets_dir) / hf_path / (name or "none") / split + return dataset.save_to_disk(str(dataset_path)) + + +def load_hf_dataset(path: str, name: Optional[str], split: str): + """ + Loads a HuggingFace dataset. The dataset is assumed to be saved using + `save_hf_dataset_to_disk` and located in `olmo_data/hf_datasets`. + """ + dataset_rel_path = os.path.join("hf_datasets", path, name or "none", split) + with get_data_path(dataset_rel_path) as dataset_path: + if not dataset_path.is_dir(): + raise NotADirectoryError( + f"HF dataset {path} name {name} split {split} not found in directory {dataset_rel_path}" ) - assert isinstance(dataset, (datasets.DatasetDict, datasets.Dataset)) - if datasets_cache_dir is not None: - _save_hf_dataset_to_disk(dataset, path, name, split, datasets_cache_dir) - barrier() - - # Dataset is loaded in FS rank 0 - if dataset is not None: - return dataset - - # Load dataset on non-zero FS ranks - if datasets_cache_dir is not None: - return _load_hf_dataset_from_disk(path, name, split, datasets_cache_dir) - return datasets.load_dataset( - path=path, - name=name, - split=split, - trust_remote_code=True, - ) + return datasets.load_from_disk(str(dataset_path)) def default_thread_count() -> int: diff --git a/olmo_data/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..f6f429f77 Binary files /dev/null and b/olmo_data/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json b/olmo_data/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json new file mode 100644 index 000000000..50dbf3b97 --- /dev/null +++ b/olmo_data/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json @@ -0,0 +1,79 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "ARC-Challenge", + "dataset_name": "ai2_arc", + "dataset_size": 821931, + "description": "", + "download_checksums": { + "hf://datasets/ai2_arc@210d026faf9955653af8916fad021475a3f00453/ARC-Challenge/train-00000-of-00001.parquet": { + "num_bytes": 189909, + "checksum": null + }, + "hf://datasets/ai2_arc@210d026faf9955653af8916fad021475a3f00453/ARC-Challenge/test-00000-of-00001.parquet": { + "num_bytes": 203808, + "checksum": null + }, + "hf://datasets/ai2_arc@210d026faf9955653af8916fad021475a3f00453/ARC-Challenge/validation-00000-of-00001.parquet": { + "num_bytes": 55743, + "checksum": null + } + }, + "download_size": 449460, + "features": { + "id": { + "dtype": "string", + "_type": "Value" + }, + "question": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "label": { + "dtype": "string", + "_type": "Value" + } + }, + "_type": "Sequence" + }, + "answerKey": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 1271391, + "splits": { + "train": { + "name": "train", + "num_bytes": 349760, + "num_examples": 1119, + "dataset_name": "ai2_arc" + }, + "test": { + "name": "test", + "num_bytes": 375511, + "num_examples": 1172, + "dataset_name": "ai2_arc" + }, + "validation": { + "name": "validation", + "num_bytes": 96660, + "num_examples": 299, + "dataset_name": "ai2_arc" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json b/olmo_data/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json new file mode 100644 index 000000000..9b1d2ba71 --- /dev/null +++ b/olmo_data/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "5336412f86bd5bc2", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/ai2_arc/ARC-Easy/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/ai2_arc/ARC-Easy/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..e060f69d3 Binary files /dev/null and b/olmo_data/hf_datasets/ai2_arc/ARC-Easy/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/ai2_arc/ARC-Easy/validation/dataset_info.json b/olmo_data/hf_datasets/ai2_arc/ARC-Easy/validation/dataset_info.json new file mode 100644 index 000000000..5018cf4d8 --- /dev/null +++ b/olmo_data/hf_datasets/ai2_arc/ARC-Easy/validation/dataset_info.json @@ -0,0 +1,79 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "ARC-Easy", + "dataset_name": "ai2_arc", + "dataset_size": 1433908, + "description": "", + "download_checksums": { + "hf://datasets/ai2_arc@210d026faf9955653af8916fad021475a3f00453/ARC-Easy/train-00000-of-00001.parquet": { + "num_bytes": 330598, + "checksum": null + }, + "hf://datasets/ai2_arc@210d026faf9955653af8916fad021475a3f00453/ARC-Easy/test-00000-of-00001.parquet": { + "num_bytes": 346257, + "checksum": null + }, + "hf://datasets/ai2_arc@210d026faf9955653af8916fad021475a3f00453/ARC-Easy/validation-00000-of-00001.parquet": { + "num_bytes": 86080, + "checksum": null + } + }, + "download_size": 762935, + "features": { + "id": { + "dtype": "string", + "_type": "Value" + }, + "question": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "label": { + "dtype": "string", + "_type": "Value" + } + }, + "_type": "Sequence" + }, + "answerKey": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 2196843, + "splits": { + "train": { + "name": "train", + "num_bytes": 619000, + "num_examples": 2251, + "dataset_name": "ai2_arc" + }, + "test": { + "name": "test", + "num_bytes": 657514, + "num_examples": 2376, + "dataset_name": "ai2_arc" + }, + "validation": { + "name": "validation", + "num_bytes": 157394, + "num_examples": 570, + "dataset_name": "ai2_arc" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/ai2_arc/ARC-Easy/validation/state.json b/olmo_data/hf_datasets/ai2_arc/ARC-Easy/validation/state.json new file mode 100644 index 000000000..d38e28802 --- /dev/null +++ b/olmo_data/hf_datasets/ai2_arc/ARC-Easy/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e5c473da5a36fe31", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/allenai/basic_arithmetic/none/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/allenai/basic_arithmetic/none/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..973f2a774 Binary files /dev/null and b/olmo_data/hf_datasets/allenai/basic_arithmetic/none/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/allenai/basic_arithmetic/none/validation/dataset_info.json b/olmo_data/hf_datasets/allenai/basic_arithmetic/none/validation/dataset_info.json new file mode 100644 index 000000000..9853ee47f --- /dev/null +++ b/olmo_data/hf_datasets/allenai/basic_arithmetic/none/validation/dataset_info.json @@ -0,0 +1,66 @@ +{ + "builder_name": "json", + "citation": "", + "config_name": "default", + "dataset_name": "basic_arithmetic", + "dataset_size": 354087, + "description": "", + "download_checksums": { + "hf://datasets/allenai/basic_arithmetic@2d2dd39418ba88d2d4c8dbe6619050229ed4fe2f/validation.jsonl": { + "num_bytes": 564087, + "checksum": null + } + }, + "download_size": 564087, + "features": { + "id": { + "dtype": "string", + "_type": "Value" + }, + "question": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "text": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "label": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + } + }, + "answerKey": { + "dtype": "string", + "_type": "Value" + }, + "type_tag": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 918174, + "splits": { + "validation": { + "name": "validation", + "num_bytes": 354087, + "num_examples": 3000, + "dataset_name": "basic_arithmetic" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/allenai/basic_arithmetic/none/validation/state.json b/olmo_data/hf_datasets/allenai/basic_arithmetic/none/validation/state.json new file mode 100644 index 000000000..72d1efb7f --- /dev/null +++ b/olmo_data/hf_datasets/allenai/basic_arithmetic/none/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7d1f788fe546eab0", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/boolq/none/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/boolq/none/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..9b89d60cf Binary files /dev/null and b/olmo_data/hf_datasets/boolq/none/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/boolq/none/validation/dataset_info.json b/olmo_data/hf_datasets/boolq/none/validation/dataset_info.json new file mode 100644 index 000000000..22a3ca1cb --- /dev/null +++ b/olmo_data/hf_datasets/boolq/none/validation/dataset_info.json @@ -0,0 +1,56 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "default", + "dataset_name": "boolq", + "dataset_size": 7827766, + "description": "", + "download_checksums": { + "hf://datasets/boolq@35b264d03638db9f4ce671b711558bf7ff0f80d5/data/train-00000-of-00001.parquet": { + "num_bytes": 3685146, + "checksum": null + }, + "hf://datasets/boolq@35b264d03638db9f4ce671b711558bf7ff0f80d5/data/validation-00000-of-00001.parquet": { + "num_bytes": 1257630, + "checksum": null + } + }, + "download_size": 4942776, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "answer": { + "dtype": "bool", + "_type": "Value" + }, + "passage": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 12770542, + "splits": { + "train": { + "name": "train", + "num_bytes": 5829584, + "num_examples": 9427, + "dataset_name": "boolq" + }, + "validation": { + "name": "validation", + "num_bytes": 1998182, + "num_examples": 3270, + "dataset_name": "boolq" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/boolq/none/validation/state.json b/olmo_data/hf_datasets/boolq/none/validation/state.json new file mode 100644 index 000000000..131ee5db0 --- /dev/null +++ b/olmo_data/hf_datasets/boolq/none/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "479c1c569ebf02ea", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/glue/mrpc/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/glue/mrpc/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..c9cedfdc9 Binary files /dev/null and b/olmo_data/hf_datasets/glue/mrpc/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/glue/mrpc/validation/dataset_info.json b/olmo_data/hf_datasets/glue/mrpc/validation/dataset_info.json new file mode 100644 index 000000000..2a08de533 --- /dev/null +++ b/olmo_data/hf_datasets/glue/mrpc/validation/dataset_info.json @@ -0,0 +1,73 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "mrpc", + "dataset_name": "glue", + "dataset_size": 1493584, + "description": "", + "download_checksums": { + "hf://datasets/glue@bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c/mrpc/train-00000-of-00001.parquet": { + "num_bytes": 649281, + "checksum": null + }, + "hf://datasets/glue@bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c/mrpc/validation-00000-of-00001.parquet": { + "num_bytes": 75678, + "checksum": null + }, + "hf://datasets/glue@bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c/mrpc/test-00000-of-00001.parquet": { + "num_bytes": 308441, + "checksum": null + } + }, + "download_size": 1033400, + "features": { + "sentence1": { + "dtype": "string", + "_type": "Value" + }, + "sentence2": { + "dtype": "string", + "_type": "Value" + }, + "label": { + "names": [ + "not_equivalent", + "equivalent" + ], + "_type": "ClassLabel" + }, + "idx": { + "dtype": "int32", + "_type": "Value" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 2526984, + "splits": { + "train": { + "name": "train", + "num_bytes": 944761, + "num_examples": 3668, + "dataset_name": "glue" + }, + "validation": { + "name": "validation", + "num_bytes": 105981, + "num_examples": 408, + "dataset_name": "glue" + }, + "test": { + "name": "test", + "num_bytes": 442842, + "num_examples": 1725, + "dataset_name": "glue" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/glue/mrpc/validation/state.json b/olmo_data/hf_datasets/glue/mrpc/validation/state.json new file mode 100644 index 000000000..1f12b74f5 --- /dev/null +++ b/olmo_data/hf_datasets/glue/mrpc/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "d505079e8646ba79", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/glue/rte/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/glue/rte/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..a8c63242f Binary files /dev/null and b/olmo_data/hf_datasets/glue/rte/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/glue/rte/validation/dataset_info.json b/olmo_data/hf_datasets/glue/rte/validation/dataset_info.json new file mode 100644 index 000000000..004d774ed --- /dev/null +++ b/olmo_data/hf_datasets/glue/rte/validation/dataset_info.json @@ -0,0 +1,73 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "rte", + "dataset_name": "glue", + "dataset_size": 1913545, + "description": "", + "download_checksums": { + "hf://datasets/glue@bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c/rte/train-00000-of-00001.parquet": { + "num_bytes": 583976, + "checksum": null + }, + "hf://datasets/glue@bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c/rte/validation-00000-of-00001.parquet": { + "num_bytes": 69020, + "checksum": null + }, + "hf://datasets/glue@bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c/rte/test-00000-of-00001.parquet": { + "num_bytes": 621413, + "checksum": null + } + }, + "download_size": 1274409, + "features": { + "sentence1": { + "dtype": "string", + "_type": "Value" + }, + "sentence2": { + "dtype": "string", + "_type": "Value" + }, + "label": { + "names": [ + "entailment", + "not_entailment" + ], + "_type": "ClassLabel" + }, + "idx": { + "dtype": "int32", + "_type": "Value" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 3187954, + "splits": { + "train": { + "name": "train", + "num_bytes": 847944, + "num_examples": 2490, + "dataset_name": "glue" + }, + "validation": { + "name": "validation", + "num_bytes": 90798, + "num_examples": 277, + "dataset_name": "glue" + }, + "test": { + "name": "test", + "num_bytes": 974803, + "num_examples": 3000, + "dataset_name": "glue" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/glue/rte/validation/state.json b/olmo_data/hf_datasets/glue/rte/validation/state.json new file mode 100644 index 000000000..fbacb5ef8 --- /dev/null +++ b/olmo_data/hf_datasets/glue/rte/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "38f466bd379e51fb", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/glue/sst2/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/glue/sst2/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..0c6a563b0 Binary files /dev/null and b/olmo_data/hf_datasets/glue/sst2/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/glue/sst2/validation/dataset_info.json b/olmo_data/hf_datasets/glue/sst2/validation/dataset_info.json new file mode 100644 index 000000000..5e145145a --- /dev/null +++ b/olmo_data/hf_datasets/glue/sst2/validation/dataset_info.json @@ -0,0 +1,69 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "sst2", + "dataset_name": "glue", + "dataset_size": 5022007, + "description": "", + "download_checksums": { + "hf://datasets/glue@bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c/sst2/train-00000-of-00001.parquet": { + "num_bytes": 3110468, + "checksum": null + }, + "hf://datasets/glue@bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c/sst2/validation-00000-of-00001.parquet": { + "num_bytes": 72819, + "checksum": null + }, + "hf://datasets/glue@bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c/sst2/test-00000-of-00001.parquet": { + "num_bytes": 147793, + "checksum": null + } + }, + "download_size": 3331080, + "features": { + "sentence": { + "dtype": "string", + "_type": "Value" + }, + "label": { + "names": [ + "negative", + "positive" + ], + "_type": "ClassLabel" + }, + "idx": { + "dtype": "int32", + "_type": "Value" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 8353087, + "splits": { + "train": { + "name": "train", + "num_bytes": 4698441, + "num_examples": 67349, + "dataset_name": "glue" + }, + "validation": { + "name": "validation", + "num_bytes": 106470, + "num_examples": 872, + "dataset_name": "glue" + }, + "test": { + "name": "test", + "num_bytes": 217096, + "num_examples": 1821, + "dataset_name": "glue" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/glue/sst2/validation/state.json b/olmo_data/hf_datasets/glue/sst2/validation/state.json new file mode 100644 index 000000000..c4135cb46 --- /dev/null +++ b/olmo_data/hf_datasets/glue/sst2/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8d45d63f787290f0", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..2f7dc66fe Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/dataset_info.json new file mode 100644 index 000000000..45e64a8cc --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "abstract_algebra", + "dataset_name": "mmlu_no_train", + "dataset_size": 24466, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166209426, + "splits": { + "test": { + "name": "test", + "num_bytes": 21316, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2232, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 918, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/state.json new file mode 100644 index 000000000..8085a89a6 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "20d155736fab0b1c", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..3ff75076f Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/dataset_info.json new file mode 100644 index 000000000..45e64a8cc --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "abstract_algebra", + "dataset_name": "mmlu_no_train", + "dataset_size": 24466, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166209426, + "splits": { + "test": { + "name": "test", + "num_bytes": 21316, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2232, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 918, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/state.json new file mode 100644 index 000000000..ba23793fb --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "dca2c38b239e3cda", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..41e0b00dd Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/dataset_info.json new file mode 100644 index 000000000..45e64a8cc --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "abstract_algebra", + "dataset_name": "mmlu_no_train", + "dataset_size": 24466, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166209426, + "splits": { + "test": { + "name": "test", + "num_bytes": 21316, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2232, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 918, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/state.json new file mode 100644 index 000000000..0f9157d87 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8c88bd434285a816", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..405c8b95b Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/dev/dataset_info.json new file mode 100644 index 000000000..551e26d75 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "anatomy", + "dataset_name": "mmlu_no_train", + "dataset_size": 38886, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166223846, + "splits": { + "test": { + "name": "test", + "num_bytes": 34594, + "num_examples": 135, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3282, + "num_examples": 14, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1010, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/dev/state.json new file mode 100644 index 000000000..53f3698f5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "17d827409c29fa1f", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..8c3445c0b Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/test/dataset_info.json new file mode 100644 index 000000000..551e26d75 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "anatomy", + "dataset_name": "mmlu_no_train", + "dataset_size": 38886, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166223846, + "splits": { + "test": { + "name": "test", + "num_bytes": 34594, + "num_examples": 135, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3282, + "num_examples": 14, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1010, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/test/state.json new file mode 100644 index 000000000..6aa49978d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8deee05ec40376c0", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..ec6a03ea5 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/validation/dataset_info.json new file mode 100644 index 000000000..551e26d75 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "anatomy", + "dataset_name": "mmlu_no_train", + "dataset_size": 38886, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166223846, + "splits": { + "test": { + "name": "test", + "num_bytes": 34594, + "num_examples": 135, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3282, + "num_examples": 14, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1010, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/validation/state.json new file mode 100644 index 000000000..a7a21393c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/anatomy/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "ee364dfdfd373b7a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..80b81780e Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/dev/dataset_info.json new file mode 100644 index 000000000..0cabb53df --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "astronomy", + "dataset_name": "mmlu_no_train", + "dataset_size": 56087, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166241047, + "splits": { + "test": { + "name": "test", + "num_bytes": 48735, + "num_examples": 152, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5223, + "num_examples": 16, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2129, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/dev/state.json new file mode 100644 index 000000000..439e0a563 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "00f55ab2bf4be215", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..0b3975b02 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/test/dataset_info.json new file mode 100644 index 000000000..0cabb53df --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "astronomy", + "dataset_name": "mmlu_no_train", + "dataset_size": 56087, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166241047, + "splits": { + "test": { + "name": "test", + "num_bytes": 48735, + "num_examples": 152, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5223, + "num_examples": 16, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2129, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/test/state.json new file mode 100644 index 000000000..edadd3685 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "281cb9bd08be8d90", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..ace35ff02 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/validation/dataset_info.json new file mode 100644 index 000000000..0cabb53df --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "astronomy", + "dataset_name": "mmlu_no_train", + "dataset_size": 56087, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166241047, + "splits": { + "test": { + "name": "test", + "num_bytes": 48735, + "num_examples": 152, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5223, + "num_examples": 16, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2129, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/validation/state.json new file mode 100644 index 000000000..523591ed8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/astronomy/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "bd1f46d2378d0355", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..816144904 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/dev/dataset_info.json new file mode 100644 index 000000000..6e6dbeb19 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "business_ethics", + "dataset_name": "mmlu_no_train", + "dataset_size": 40648, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166225608, + "splits": { + "test": { + "name": "test", + "num_bytes": 35140, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3235, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2273, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/dev/state.json new file mode 100644 index 000000000..01a36421d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "20224bbf8789ffdc", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..0da142c0d Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/test/dataset_info.json new file mode 100644 index 000000000..6e6dbeb19 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "business_ethics", + "dataset_name": "mmlu_no_train", + "dataset_size": 40648, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166225608, + "splits": { + "test": { + "name": "test", + "num_bytes": 35140, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3235, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2273, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/test/state.json new file mode 100644 index 000000000..2e72fd661 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "cbd085d70aa4bcb9", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..a49fcd901 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/validation/dataset_info.json new file mode 100644 index 000000000..6e6dbeb19 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "business_ethics", + "dataset_name": "mmlu_no_train", + "dataset_size": 40648, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166225608, + "splits": { + "test": { + "name": "test", + "num_bytes": 35140, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3235, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2273, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/validation/state.json new file mode 100644 index 000000000..402166ff9 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/business_ethics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "dc36aa2960570bcf", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..582e92d24 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/dataset_info.json new file mode 100644 index 000000000..6042fdda4 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "clinical_knowledge", + "dataset_name": "mmlu_no_train", + "dataset_size": 77170, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166262130, + "splits": { + "test": { + "name": "test", + "num_bytes": 68572, + "num_examples": 265, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7290, + "num_examples": 29, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1308, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/state.json new file mode 100644 index 000000000..4eb77dc28 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e060acf395f8ad0b", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..2180dde4a Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/dataset_info.json new file mode 100644 index 000000000..6042fdda4 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "clinical_knowledge", + "dataset_name": "mmlu_no_train", + "dataset_size": 77170, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166262130, + "splits": { + "test": { + "name": "test", + "num_bytes": 68572, + "num_examples": 265, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7290, + "num_examples": 29, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1308, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/state.json new file mode 100644 index 000000000..b429876e5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "88ca8e31425354e1", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..d3c65161e Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/dataset_info.json new file mode 100644 index 000000000..6042fdda4 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "clinical_knowledge", + "dataset_name": "mmlu_no_train", + "dataset_size": 77170, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166262130, + "splits": { + "test": { + "name": "test", + "num_bytes": 68572, + "num_examples": 265, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7290, + "num_examples": 29, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1308, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/state.json new file mode 100644 index 000000000..caef45471 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "9cf681398904b421", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..8ad3c4e2a Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/dev/dataset_info.json new file mode 100644 index 000000000..aac7bf2b5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_biology", + "dataset_name": "mmlu_no_train", + "dataset_size": 58247, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166243207, + "splits": { + "test": { + "name": "test", + "num_bytes": 51521, + "num_examples": 144, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5111, + "num_examples": 16, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1615, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/dev/state.json new file mode 100644 index 000000000..318d415c1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1aa3f8d419751552", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..ab54af082 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/test/dataset_info.json new file mode 100644 index 000000000..aac7bf2b5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_biology", + "dataset_name": "mmlu_no_train", + "dataset_size": 58247, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166243207, + "splits": { + "test": { + "name": "test", + "num_bytes": 51521, + "num_examples": 144, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5111, + "num_examples": 16, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1615, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/test/state.json new file mode 100644 index 000000000..869089f51 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e4396961f6635a3a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..383e26c63 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/validation/dataset_info.json new file mode 100644 index 000000000..aac7bf2b5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_biology", + "dataset_name": "mmlu_no_train", + "dataset_size": 58247, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166243207, + "splits": { + "test": { + "name": "test", + "num_bytes": 51521, + "num_examples": 144, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5111, + "num_examples": 16, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1615, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/validation/state.json new file mode 100644 index 000000000..36432031a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_biology/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "76a7259023a114a3", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..7c7aaaed5 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/dataset_info.json new file mode 100644 index 000000000..1420d0b78 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_chemistry", + "dataset_name": "mmlu_no_train", + "dataset_size": 30704, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166215664, + "splits": { + "test": { + "name": "test", + "num_bytes": 26796, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2484, + "num_examples": 8, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1424, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/state.json new file mode 100644 index 000000000..9fb7ccc3f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "0c78504a82387e31", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..e25e396fa Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/test/dataset_info.json new file mode 100644 index 000000000..1420d0b78 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_chemistry", + "dataset_name": "mmlu_no_train", + "dataset_size": 30704, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166215664, + "splits": { + "test": { + "name": "test", + "num_bytes": 26796, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2484, + "num_examples": 8, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1424, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/test/state.json new file mode 100644 index 000000000..a46ac6344 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1983845a412bd613", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..4ac55079e Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/dataset_info.json new file mode 100644 index 000000000..1420d0b78 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_chemistry", + "dataset_name": "mmlu_no_train", + "dataset_size": 30704, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166215664, + "splits": { + "test": { + "name": "test", + "num_bytes": 26796, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2484, + "num_examples": 8, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1424, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/state.json new file mode 100644 index 000000000..9b9c90b72 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c04908b072442d3d", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..cf3836db9 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/dataset_info.json new file mode 100644 index 000000000..2ee23d6cd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_computer_science", + "dataset_name": "mmlu_no_train", + "dataset_size": 53281, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166238241, + "splits": { + "test": { + "name": "test", + "num_bytes": 45429, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4959, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2893, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/state.json new file mode 100644 index 000000000..7d0457f8c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a37cb419aa1dee47", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..812c92ab3 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/test/dataset_info.json new file mode 100644 index 000000000..2ee23d6cd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_computer_science", + "dataset_name": "mmlu_no_train", + "dataset_size": 53281, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166238241, + "splits": { + "test": { + "name": "test", + "num_bytes": 45429, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4959, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2893, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/test/state.json new file mode 100644 index 000000000..ed83d5e94 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "002abde3a5352321", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..80bc02cdc Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/dataset_info.json new file mode 100644 index 000000000..2ee23d6cd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_computer_science", + "dataset_name": "mmlu_no_train", + "dataset_size": 53281, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166238241, + "splits": { + "test": { + "name": "test", + "num_bytes": 45429, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4959, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2893, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/state.json new file mode 100644 index 000000000..3e4203fd1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "948e41ce3a58bfdd", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..5bab031bb Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/dataset_info.json new file mode 100644 index 000000000..5fdd5f1d7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_mathematics", + "dataset_name": "mmlu_no_train", + "dataset_size": 31504, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166216464, + "splits": { + "test": { + "name": "test", + "num_bytes": 26999, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2909, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1596, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/state.json new file mode 100644 index 000000000..6dc1264aa --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "222fe21e229a3b00", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..5e20ad18e Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/test/dataset_info.json new file mode 100644 index 000000000..5fdd5f1d7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_mathematics", + "dataset_name": "mmlu_no_train", + "dataset_size": 31504, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166216464, + "splits": { + "test": { + "name": "test", + "num_bytes": 26999, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2909, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1596, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/test/state.json new file mode 100644 index 000000000..56759c80f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "55305f09ac0c7084", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..4c0fc77d0 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/dataset_info.json new file mode 100644 index 000000000..5fdd5f1d7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_mathematics", + "dataset_name": "mmlu_no_train", + "dataset_size": 31504, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166216464, + "splits": { + "test": { + "name": "test", + "num_bytes": 26999, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2909, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1596, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/state.json new file mode 100644 index 000000000..266f3aa69 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "4de7c965c7cf800b", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..2971f285d Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/dev/dataset_info.json new file mode 100644 index 000000000..627ff0363 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_medicine", + "dataset_name": "mmlu_no_train", + "dataset_size": 95940, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166280900, + "splits": { + "test": { + "name": "test", + "num_bytes": 85845, + "num_examples": 173, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 8337, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1758, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/dev/state.json new file mode 100644 index 000000000..584ee7c86 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b1126a770a5c857f", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..dd3b4728b Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/test/dataset_info.json new file mode 100644 index 000000000..627ff0363 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_medicine", + "dataset_name": "mmlu_no_train", + "dataset_size": 95940, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166280900, + "splits": { + "test": { + "name": "test", + "num_bytes": 85845, + "num_examples": 173, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 8337, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1758, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/test/state.json new file mode 100644 index 000000000..2cdb5dfb5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c18eeec0949b5285", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..baeb8696d Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/validation/dataset_info.json new file mode 100644 index 000000000..627ff0363 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_medicine", + "dataset_name": "mmlu_no_train", + "dataset_size": 95940, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166280900, + "splits": { + "test": { + "name": "test", + "num_bytes": 85845, + "num_examples": 173, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 8337, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1758, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/validation/state.json new file mode 100644 index 000000000..53e57bca2 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_medicine/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "ad9e7753bff0ef4c", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..412ac350e Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/dev/dataset_info.json new file mode 100644 index 000000000..7666ee7a8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_physics", + "dataset_name": "mmlu_no_train", + "dataset_size": 37289, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166222249, + "splits": { + "test": { + "name": "test", + "num_bytes": 32107, + "num_examples": 102, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3687, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1495, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/dev/state.json new file mode 100644 index 000000000..f53903cab --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "832fced8b6b15e40", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..6307d606f Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/test/dataset_info.json new file mode 100644 index 000000000..7666ee7a8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_physics", + "dataset_name": "mmlu_no_train", + "dataset_size": 37289, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166222249, + "splits": { + "test": { + "name": "test", + "num_bytes": 32107, + "num_examples": 102, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3687, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1495, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/test/state.json new file mode 100644 index 000000000..71f0b5322 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1339dc91a06c2cdb", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..eb320fbeb Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/validation/dataset_info.json new file mode 100644 index 000000000..7666ee7a8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "college_physics", + "dataset_name": "mmlu_no_train", + "dataset_size": 37289, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166222249, + "splits": { + "test": { + "name": "test", + "num_bytes": 32107, + "num_examples": 102, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3687, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1495, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/validation/state.json new file mode 100644 index 000000000..095832d79 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/college_physics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "59cdc11245c08838", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..3631399a7 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/dev/dataset_info.json new file mode 100644 index 000000000..d69a642f7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "computer_security", + "dataset_name": "mmlu_no_train", + "dataset_size": 35174, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166220134, + "splits": { + "test": { + "name": "test", + "num_bytes": 29212, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4768, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1194, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/dev/state.json new file mode 100644 index 000000000..2be9053e5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "4ff795eaafd7eecd", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..b3c1b89b0 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/test/dataset_info.json new file mode 100644 index 000000000..d69a642f7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "computer_security", + "dataset_name": "mmlu_no_train", + "dataset_size": 35174, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166220134, + "splits": { + "test": { + "name": "test", + "num_bytes": 29212, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4768, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1194, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/test/state.json new file mode 100644 index 000000000..8917ff087 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "62fee97de2c29e76", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..dae1ae264 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/validation/dataset_info.json new file mode 100644 index 000000000..d69a642f7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "computer_security", + "dataset_name": "mmlu_no_train", + "dataset_size": 35174, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166220134, + "splits": { + "test": { + "name": "test", + "num_bytes": 29212, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4768, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1194, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/validation/state.json new file mode 100644 index 000000000..ce68af870 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/computer_security/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "47e3149326ffca1a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..58c2abd9f Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/dataset_info.json new file mode 100644 index 000000000..44995d30a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "conceptual_physics", + "dataset_name": "mmlu_no_train", + "dataset_size": 51933, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166236893, + "splits": { + "test": { + "name": "test", + "num_bytes": 45867, + "num_examples": 235, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5034, + "num_examples": 26, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1032, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/state.json new file mode 100644 index 000000000..976e647c9 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b761ecd40d39a1ad", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..f37dda1bc Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/dataset_info.json new file mode 100644 index 000000000..44995d30a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "conceptual_physics", + "dataset_name": "mmlu_no_train", + "dataset_size": 51933, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166236893, + "splits": { + "test": { + "name": "test", + "num_bytes": 45867, + "num_examples": 235, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5034, + "num_examples": 26, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1032, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/state.json new file mode 100644 index 000000000..596df67cc --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a3bda514c3d926ee", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..3e624fa93 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/dataset_info.json new file mode 100644 index 000000000..44995d30a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "conceptual_physics", + "dataset_name": "mmlu_no_train", + "dataset_size": 51933, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166236893, + "splits": { + "test": { + "name": "test", + "num_bytes": 45867, + "num_examples": 235, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5034, + "num_examples": 26, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1032, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/state.json new file mode 100644 index 000000000..2fab56d5b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "2f1b2a7b5d4287db", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..559b613cd Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/dev/dataset_info.json new file mode 100644 index 000000000..a06db14eb --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "econometrics", + "dataset_name": "mmlu_no_train", + "dataset_size": 55218, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166240178, + "splits": { + "test": { + "name": "test", + "num_bytes": 48359, + "num_examples": 114, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5147, + "num_examples": 12, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1712, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/dev/state.json new file mode 100644 index 000000000..3fe22c208 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "aa482dc1a13ee3ef", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..59e816624 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/test/dataset_info.json new file mode 100644 index 000000000..a06db14eb --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "econometrics", + "dataset_name": "mmlu_no_train", + "dataset_size": 55218, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166240178, + "splits": { + "test": { + "name": "test", + "num_bytes": 48359, + "num_examples": 114, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5147, + "num_examples": 12, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1712, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/test/state.json new file mode 100644 index 000000000..b03f97156 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a1d106cced570179", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..81611f2be Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/validation/dataset_info.json new file mode 100644 index 000000000..a06db14eb --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "econometrics", + "dataset_name": "mmlu_no_train", + "dataset_size": 55218, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166240178, + "splits": { + "test": { + "name": "test", + "num_bytes": 48359, + "num_examples": 114, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5147, + "num_examples": 12, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1712, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/validation/state.json new file mode 100644 index 000000000..d8048c172 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/econometrics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c19f619be72337ae", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..f05b62899 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/dataset_info.json new file mode 100644 index 000000000..ca1aac906 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "electrical_engineering", + "dataset_name": "mmlu_no_train", + "dataset_size": 33297, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166218257, + "splits": { + "test": { + "name": "test", + "num_bytes": 28900, + "num_examples": 145, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3307, + "num_examples": 16, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1090, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/state.json new file mode 100644 index 000000000..059c785d2 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "584ac4a847a12ab3", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..3d87bc7bd Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/dataset_info.json new file mode 100644 index 000000000..ca1aac906 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "electrical_engineering", + "dataset_name": "mmlu_no_train", + "dataset_size": 33297, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166218257, + "splits": { + "test": { + "name": "test", + "num_bytes": 28900, + "num_examples": 145, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3307, + "num_examples": 16, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1090, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/state.json new file mode 100644 index 000000000..3b0bd713f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c03b70084f477eb9", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..15614dc47 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/dataset_info.json new file mode 100644 index 000000000..ca1aac906 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "electrical_engineering", + "dataset_name": "mmlu_no_train", + "dataset_size": 33297, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166218257, + "splits": { + "test": { + "name": "test", + "num_bytes": 28900, + "num_examples": 145, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3307, + "num_examples": 16, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1090, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/state.json new file mode 100644 index 000000000..eb8a54b71 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1a49fa6022f56e82", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..fbea5f9b2 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/dataset_info.json new file mode 100644 index 000000000..0b905cb3d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "elementary_mathematics", + "dataset_name": "mmlu_no_train", + "dataset_size": 91524, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166276484, + "splits": { + "test": { + "name": "test", + "num_bytes": 79924, + "num_examples": 378, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 10042, + "num_examples": 41, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1558, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/state.json new file mode 100644 index 000000000..2172d97c7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1d7c3cb041ed7523", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..e9ff174b4 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/dataset_info.json new file mode 100644 index 000000000..0b905cb3d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "elementary_mathematics", + "dataset_name": "mmlu_no_train", + "dataset_size": 91524, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166276484, + "splits": { + "test": { + "name": "test", + "num_bytes": 79924, + "num_examples": 378, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 10042, + "num_examples": 41, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1558, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/state.json new file mode 100644 index 000000000..ba52c9141 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "799f512b4ea42b80", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..d63de83db Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/dataset_info.json new file mode 100644 index 000000000..0b905cb3d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "elementary_mathematics", + "dataset_name": "mmlu_no_train", + "dataset_size": 91524, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166276484, + "splits": { + "test": { + "name": "test", + "num_bytes": 79924, + "num_examples": 378, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 10042, + "num_examples": 41, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1558, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/state.json new file mode 100644 index 000000000..cb912a4ce --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a3acaaf5ad71135e", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..8a7ed0e1c Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/dev/dataset_info.json new file mode 100644 index 000000000..98ea63fcd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "formal_logic", + "dataset_name": "mmlu_no_train", + "dataset_size": 60078, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166245038, + "splits": { + "test": { + "name": "test", + "num_bytes": 51789, + "num_examples": 126, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 6464, + "num_examples": 14, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1825, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/dev/state.json new file mode 100644 index 000000000..fb2669be0 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e5094a5c5c98f9bc", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..76bc5ceb9 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/test/dataset_info.json new file mode 100644 index 000000000..98ea63fcd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "formal_logic", + "dataset_name": "mmlu_no_train", + "dataset_size": 60078, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166245038, + "splits": { + "test": { + "name": "test", + "num_bytes": 51789, + "num_examples": 126, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 6464, + "num_examples": 14, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1825, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/test/state.json new file mode 100644 index 000000000..edadc7131 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "2447d143a40d1c18", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..235c29a2b Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/validation/dataset_info.json new file mode 100644 index 000000000..98ea63fcd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "formal_logic", + "dataset_name": "mmlu_no_train", + "dataset_size": 60078, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166245038, + "splits": { + "test": { + "name": "test", + "num_bytes": 51789, + "num_examples": 126, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 6464, + "num_examples": 14, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1825, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/validation/state.json new file mode 100644 index 000000000..0066485d2 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/formal_logic/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c65a8ac7d0915655", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..6883c5211 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/dev/dataset_info.json new file mode 100644 index 000000000..cf52e606c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "global_facts", + "dataset_name": "mmlu_no_train", + "dataset_size": 23301, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166208261, + "splits": { + "test": { + "name": "test", + "num_bytes": 19991, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2013, + "num_examples": 10, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1297, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/dev/state.json new file mode 100644 index 000000000..1170ab22c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8774d8c6d5d3b4ee", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..15a350209 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/test/dataset_info.json new file mode 100644 index 000000000..cf52e606c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "global_facts", + "dataset_name": "mmlu_no_train", + "dataset_size": 23301, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166208261, + "splits": { + "test": { + "name": "test", + "num_bytes": 19991, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2013, + "num_examples": 10, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1297, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/test/state.json new file mode 100644 index 000000000..ca66dac54 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "06fa75c0f85b61da", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..93a9ff9f4 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/validation/dataset_info.json new file mode 100644 index 000000000..cf52e606c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "global_facts", + "dataset_name": "mmlu_no_train", + "dataset_size": 23301, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166208261, + "splits": { + "test": { + "name": "test", + "num_bytes": 19991, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2013, + "num_examples": 10, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1297, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/validation/state.json new file mode 100644 index 000000000..eb7ba3cf8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/global_facts/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c716d4b511d690ff", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..31e3d0320 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/dataset_info.json new file mode 100644 index 000000000..5a5e9410c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_biology", + "dataset_name": "mmlu_no_train", + "dataset_size": 130372, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166315332, + "splits": { + "test": { + "name": "test", + "num_bytes": 116850, + "num_examples": 310, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 11746, + "num_examples": 32, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1776, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/state.json new file mode 100644 index 000000000..c31be6b00 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "0633c1d57fc000b7", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..91b61bfb8 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/test/dataset_info.json new file mode 100644 index 000000000..5a5e9410c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_biology", + "dataset_name": "mmlu_no_train", + "dataset_size": 130372, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166315332, + "splits": { + "test": { + "name": "test", + "num_bytes": 116850, + "num_examples": 310, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 11746, + "num_examples": 32, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1776, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/test/state.json new file mode 100644 index 000000000..7caae05d6 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "cdb5912f765d19c2", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..00412c0a1 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/dataset_info.json new file mode 100644 index 000000000..5a5e9410c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_biology", + "dataset_name": "mmlu_no_train", + "dataset_size": 130372, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166315332, + "splits": { + "test": { + "name": "test", + "num_bytes": 116850, + "num_examples": 310, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 11746, + "num_examples": 32, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1776, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/state.json new file mode 100644 index 000000000..7bdd248c5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "ebfe4711fb7fabe7", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..3fba6e9f8 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/dataset_info.json new file mode 100644 index 000000000..d702c26e7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_chemistry", + "dataset_name": "mmlu_no_train", + "dataset_size": 72490, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166257450, + "splits": { + "test": { + "name": "test", + "num_bytes": 63527, + "num_examples": 203, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7630, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1333, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/state.json new file mode 100644 index 000000000..cc327084c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "19d61b0b1c456d15", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..e36fdba24 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/dataset_info.json new file mode 100644 index 000000000..d702c26e7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_chemistry", + "dataset_name": "mmlu_no_train", + "dataset_size": 72490, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166257450, + "splits": { + "test": { + "name": "test", + "num_bytes": 63527, + "num_examples": 203, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7630, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1333, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/state.json new file mode 100644 index 000000000..3eea5952b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b41be291d0efe099", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..7087e1964 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/dataset_info.json new file mode 100644 index 000000000..d702c26e7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_chemistry", + "dataset_name": "mmlu_no_train", + "dataset_size": 72490, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166257450, + "splits": { + "test": { + "name": "test", + "num_bytes": 63527, + "num_examples": 203, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7630, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1333, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/state.json new file mode 100644 index 000000000..43552e110 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f62f91b339bf9346", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..e4ffcbf90 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/dataset_info.json new file mode 100644 index 000000000..2934531d7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_computer_science", + "dataset_name": "mmlu_no_train", + "dataset_size": 54349, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166239309, + "splits": { + "test": { + "name": "test", + "num_bytes": 47664, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3619, + "num_examples": 9, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 3066, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/state.json new file mode 100644 index 000000000..61775baaf --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "236843273a34aee0", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..e19089ed8 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/dataset_info.json new file mode 100644 index 000000000..2934531d7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_computer_science", + "dataset_name": "mmlu_no_train", + "dataset_size": 54349, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166239309, + "splits": { + "test": { + "name": "test", + "num_bytes": 47664, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3619, + "num_examples": 9, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 3066, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/state.json new file mode 100644 index 000000000..26a3ece30 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "aa1d78b34a77e5bb", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..9bc53a453 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/dataset_info.json new file mode 100644 index 000000000..2934531d7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_computer_science", + "dataset_name": "mmlu_no_train", + "dataset_size": 54349, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166239309, + "splits": { + "test": { + "name": "test", + "num_bytes": 47664, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3619, + "num_examples": 9, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 3066, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/state.json new file mode 100644 index 000000000..233a948df --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b5b58ee6995bb47d", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..5eca78e8a Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/dataset_info.json new file mode 100644 index 000000000..768a40237 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_european_history", + "dataset_name": "mmlu_no_train", + "dataset_size": 317476, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166502436, + "splits": { + "test": { + "name": "test", + "num_bytes": 275568, + "num_examples": 165, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 30196, + "num_examples": 18, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 11712, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/state.json new file mode 100644 index 000000000..85987039a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1d994bb11ac3254f", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..2b8014b91 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/dataset_info.json new file mode 100644 index 000000000..768a40237 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_european_history", + "dataset_name": "mmlu_no_train", + "dataset_size": 317476, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166502436, + "splits": { + "test": { + "name": "test", + "num_bytes": 275568, + "num_examples": 165, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 30196, + "num_examples": 18, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 11712, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/state.json new file mode 100644 index 000000000..c9475c0c2 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "480449749cb2b082", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..b323089b1 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/dataset_info.json new file mode 100644 index 000000000..768a40237 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_european_history", + "dataset_name": "mmlu_no_train", + "dataset_size": 317476, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166502436, + "splits": { + "test": { + "name": "test", + "num_bytes": 275568, + "num_examples": 165, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 30196, + "num_examples": 18, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 11712, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/state.json new file mode 100644 index 000000000..713aca008 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e56fc4122593f632", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..7dc14abb0 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/dataset_info.json new file mode 100644 index 000000000..c98f94b80 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_geography", + "dataset_name": "mmlu_no_train", + "dataset_size": 53358, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166238318, + "splits": { + "test": { + "name": "test", + "num_bytes": 46972, + "num_examples": 198, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4870, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1516, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/state.json new file mode 100644 index 000000000..a713b1068 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "20511542e3283886", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..7a6f55f13 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/test/dataset_info.json new file mode 100644 index 000000000..c98f94b80 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_geography", + "dataset_name": "mmlu_no_train", + "dataset_size": 53358, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166238318, + "splits": { + "test": { + "name": "test", + "num_bytes": 46972, + "num_examples": 198, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4870, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1516, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/test/state.json new file mode 100644 index 000000000..7c6594208 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "d1be97d315bdf95d", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..c94be6db5 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/dataset_info.json new file mode 100644 index 000000000..c98f94b80 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_geography", + "dataset_name": "mmlu_no_train", + "dataset_size": 53358, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166238318, + "splits": { + "test": { + "name": "test", + "num_bytes": 46972, + "num_examples": 198, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4870, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1516, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/state.json new file mode 100644 index 000000000..05b1ae467 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f77ad34af6fc51f3", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..5a01dbdc3 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/dataset_info.json new file mode 100644 index 000000000..92f08e2a1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_government_and_politics", + "dataset_name": "mmlu_no_train", + "dataset_size": 83421, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166268381, + "splits": { + "test": { + "name": "test", + "num_bytes": 73589, + "num_examples": 193, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7870, + "num_examples": 21, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1962, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/state.json new file mode 100644 index 000000000..0a4b94f05 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c1659513735417f2", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..8f9b38c11 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/dataset_info.json new file mode 100644 index 000000000..92f08e2a1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_government_and_politics", + "dataset_name": "mmlu_no_train", + "dataset_size": 83421, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166268381, + "splits": { + "test": { + "name": "test", + "num_bytes": 73589, + "num_examples": 193, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7870, + "num_examples": 21, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1962, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/state.json new file mode 100644 index 000000000..9e18b30c5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "abfcd2ea63940fc5", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..b4ac2379b Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/dataset_info.json new file mode 100644 index 000000000..92f08e2a1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_government_and_politics", + "dataset_name": "mmlu_no_train", + "dataset_size": 83421, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166268381, + "splits": { + "test": { + "name": "test", + "num_bytes": 73589, + "num_examples": 193, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7870, + "num_examples": 21, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1962, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/state.json new file mode 100644 index 000000000..0eab5b412 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c09f4d859db6fa3e", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..e24d7f68e Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/dataset_info.json new file mode 100644 index 000000000..350570fd7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_macroeconomics", + "dataset_name": "mmlu_no_train", + "dataset_size": 145139, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166330099, + "splits": { + "test": { + "name": "test", + "num_bytes": 129375, + "num_examples": 390, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 14298, + "num_examples": 43, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1466, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/state.json new file mode 100644 index 000000000..667e0900b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a3be1fcf374ada6a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..f61e820b3 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/dataset_info.json new file mode 100644 index 000000000..350570fd7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_macroeconomics", + "dataset_name": "mmlu_no_train", + "dataset_size": 145139, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166330099, + "splits": { + "test": { + "name": "test", + "num_bytes": 129375, + "num_examples": 390, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 14298, + "num_examples": 43, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1466, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/state.json new file mode 100644 index 000000000..560b0dac6 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "5171a01e77504546", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..92d0f89f1 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/dataset_info.json new file mode 100644 index 000000000..350570fd7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_macroeconomics", + "dataset_name": "mmlu_no_train", + "dataset_size": 145139, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166330099, + "splits": { + "test": { + "name": "test", + "num_bytes": 129375, + "num_examples": 390, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 14298, + "num_examples": 43, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1466, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/state.json new file mode 100644 index 000000000..78db3b711 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1fc263055941b33f", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..a1ab8e8e7 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/dataset_info.json new file mode 100644 index 000000000..171328c5c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_mathematics", + "dataset_name": "mmlu_no_train", + "dataset_size": 70088, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166255048, + "splits": { + "test": { + "name": "test", + "num_bytes": 62132, + "num_examples": 270, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 6536, + "num_examples": 29, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1420, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/state.json new file mode 100644 index 000000000..cfbbb0028 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "fdba997591f7572a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..fb117459e Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/dataset_info.json new file mode 100644 index 000000000..171328c5c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_mathematics", + "dataset_name": "mmlu_no_train", + "dataset_size": 70088, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166255048, + "splits": { + "test": { + "name": "test", + "num_bytes": 62132, + "num_examples": 270, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 6536, + "num_examples": 29, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1420, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/state.json new file mode 100644 index 000000000..84fb747ad --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f6d1438629210208", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..fa1784f62 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/dataset_info.json new file mode 100644 index 000000000..171328c5c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_mathematics", + "dataset_name": "mmlu_no_train", + "dataset_size": 70088, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166255048, + "splits": { + "test": { + "name": "test", + "num_bytes": 62132, + "num_examples": 270, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 6536, + "num_examples": 29, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1420, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/state.json new file mode 100644 index 000000000..37f38b85e --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e5186d0211426a05", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..cedefc667 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/dataset_info.json new file mode 100644 index 000000000..215cbafcd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_microeconomics", + "dataset_name": "mmlu_no_train", + "dataset_size": 92588, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166277548, + "splits": { + "test": { + "name": "test", + "num_bytes": 82831, + "num_examples": 238, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 8321, + "num_examples": 26, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1436, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/state.json new file mode 100644 index 000000000..5b1e443e4 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "51a244e59f751242", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..0e539903c Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/dataset_info.json new file mode 100644 index 000000000..215cbafcd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_microeconomics", + "dataset_name": "mmlu_no_train", + "dataset_size": 92588, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166277548, + "splits": { + "test": { + "name": "test", + "num_bytes": 82831, + "num_examples": 238, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 8321, + "num_examples": 26, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1436, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/state.json new file mode 100644 index 000000000..e1f3f56e0 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "4cd44cc8b1d07e02", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..90f758278 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/dataset_info.json new file mode 100644 index 000000000..215cbafcd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_microeconomics", + "dataset_name": "mmlu_no_train", + "dataset_size": 92588, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166277548, + "splits": { + "test": { + "name": "test", + "num_bytes": 82831, + "num_examples": 238, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 8321, + "num_examples": 26, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1436, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/state.json new file mode 100644 index 000000000..8906ef1d1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "05bdabc98bdfc467", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..23610971c Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/dataset_info.json new file mode 100644 index 000000000..ab002da50 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_physics", + "dataset_name": "mmlu_no_train", + "dataset_size": 71741, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166256701, + "splits": { + "test": { + "name": "test", + "num_bytes": 62999, + "num_examples": 151, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7150, + "num_examples": 17, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1592, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/state.json new file mode 100644 index 000000000..7f258e68c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f6283c98f76a7943", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..9db9e55a3 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/test/dataset_info.json new file mode 100644 index 000000000..ab002da50 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_physics", + "dataset_name": "mmlu_no_train", + "dataset_size": 71741, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166256701, + "splits": { + "test": { + "name": "test", + "num_bytes": 62999, + "num_examples": 151, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7150, + "num_examples": 17, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1592, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/test/state.json new file mode 100644 index 000000000..07fd4336e --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "252b40f4b19b762a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..3b6e71725 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/dataset_info.json new file mode 100644 index 000000000..ab002da50 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_physics", + "dataset_name": "mmlu_no_train", + "dataset_size": 71741, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166256701, + "splits": { + "test": { + "name": "test", + "num_bytes": 62999, + "num_examples": 151, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7150, + "num_examples": 17, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1592, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/state.json new file mode 100644 index 000000000..b4cb622b2 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "98d630de2600c9cf", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..6567a6b23 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/dataset_info.json new file mode 100644 index 000000000..d97309735 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_psychology", + "dataset_name": "mmlu_no_train", + "dataset_size": 194405, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166379365, + "splits": { + "test": { + "name": "test", + "num_bytes": 173565, + "num_examples": 545, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 18817, + "num_examples": 60, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2023, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/state.json new file mode 100644 index 000000000..1ce26eac9 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "338f7d249a4397fb", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..ca6fd18b1 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/dataset_info.json new file mode 100644 index 000000000..d97309735 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_psychology", + "dataset_name": "mmlu_no_train", + "dataset_size": 194405, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166379365, + "splits": { + "test": { + "name": "test", + "num_bytes": 173565, + "num_examples": 545, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 18817, + "num_examples": 60, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2023, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/state.json new file mode 100644 index 000000000..0a3f639c5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7161e40d2b654853", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..e1b1051e2 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/dataset_info.json new file mode 100644 index 000000000..d97309735 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_psychology", + "dataset_name": "mmlu_no_train", + "dataset_size": 194405, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166379365, + "splits": { + "test": { + "name": "test", + "num_bytes": 173565, + "num_examples": 545, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 18817, + "num_examples": 60, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2023, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/state.json new file mode 100644 index 000000000..2ac1ea399 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a4e5c8c8c774ae30", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..fab795535 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/dataset_info.json new file mode 100644 index 000000000..d9d70e92b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_statistics", + "dataset_name": "mmlu_no_train", + "dataset_size": 129535, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166314495, + "splits": { + "test": { + "name": "test", + "num_bytes": 116306, + "num_examples": 216, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 10583, + "num_examples": 23, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2646, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/state.json new file mode 100644 index 000000000..dabb89741 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "cccb6ff5e82160fa", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..f79458047 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/dataset_info.json new file mode 100644 index 000000000..d9d70e92b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_statistics", + "dataset_name": "mmlu_no_train", + "dataset_size": 129535, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166314495, + "splits": { + "test": { + "name": "test", + "num_bytes": 116306, + "num_examples": 216, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 10583, + "num_examples": 23, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2646, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/state.json new file mode 100644 index 000000000..39c1a45a2 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a94dcfe93aa4586f", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..5133f35c1 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/dataset_info.json new file mode 100644 index 000000000..d9d70e92b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_statistics", + "dataset_name": "mmlu_no_train", + "dataset_size": 129535, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166314495, + "splits": { + "test": { + "name": "test", + "num_bytes": 116306, + "num_examples": 216, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 10583, + "num_examples": 23, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2646, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/state.json new file mode 100644 index 000000000..fb16a1fdf --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "9ac32f17f19685b8", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..612408167 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/dataset_info.json new file mode 100644 index 000000000..48971b97a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_us_history", + "dataset_name": "mmlu_no_train", + "dataset_size": 343274, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166528234, + "splits": { + "test": { + "name": "test", + "num_bytes": 302026, + "num_examples": 204, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 32266, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 8982, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/state.json new file mode 100644 index 000000000..79e238264 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e85379a01e115a76", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..4773eda8b Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/dataset_info.json new file mode 100644 index 000000000..48971b97a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_us_history", + "dataset_name": "mmlu_no_train", + "dataset_size": 343274, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166528234, + "splits": { + "test": { + "name": "test", + "num_bytes": 302026, + "num_examples": 204, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 32266, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 8982, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/state.json new file mode 100644 index 000000000..6d8f6cc32 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "43ddac66f666fa05", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..db0359634 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/dataset_info.json new file mode 100644 index 000000000..48971b97a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_us_history", + "dataset_name": "mmlu_no_train", + "dataset_size": 343274, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166528234, + "splits": { + "test": { + "name": "test", + "num_bytes": 302026, + "num_examples": 204, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 32266, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 8982, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/state.json new file mode 100644 index 000000000..f56033390 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "59df1cbcbcd89698", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..9a85df20f Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/dataset_info.json new file mode 100644 index 000000000..0339f4bd8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_world_history", + "dataset_name": "mmlu_no_train", + "dataset_size": 436736, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166621696, + "splits": { + "test": { + "name": "test", + "num_bytes": 385478, + "num_examples": 237, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 46243, + "num_examples": 26, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 5015, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/state.json new file mode 100644 index 000000000..c3dac9d39 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "2a41b781a0f61fd9", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..a1288c0fa Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/dataset_info.json new file mode 100644 index 000000000..0339f4bd8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_world_history", + "dataset_name": "mmlu_no_train", + "dataset_size": 436736, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166621696, + "splits": { + "test": { + "name": "test", + "num_bytes": 385478, + "num_examples": 237, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 46243, + "num_examples": 26, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 5015, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/state.json new file mode 100644 index 000000000..7da835b2c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "5aec070596710b81", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..52f9abf56 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/dataset_info.json new file mode 100644 index 000000000..0339f4bd8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "high_school_world_history", + "dataset_name": "mmlu_no_train", + "dataset_size": 436736, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166621696, + "splits": { + "test": { + "name": "test", + "num_bytes": 385478, + "num_examples": 237, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 46243, + "num_examples": 26, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 5015, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/state.json new file mode 100644 index 000000000..3558e96d8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e3c9582fbfb93d8e", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..e9b338037 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/dev/dataset_info.json new file mode 100644 index 000000000..857737c5d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "human_aging", + "dataset_name": "mmlu_no_train", + "dataset_size": 55542, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166240502, + "splits": { + "test": { + "name": "test", + "num_bytes": 49431, + "num_examples": 223, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5040, + "num_examples": 23, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1071, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/dev/state.json new file mode 100644 index 000000000..508c9bed9 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "ba7bcdadc5b28a0d", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..20559f533 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/test/dataset_info.json new file mode 100644 index 000000000..857737c5d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "human_aging", + "dataset_name": "mmlu_no_train", + "dataset_size": 55542, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166240502, + "splits": { + "test": { + "name": "test", + "num_bytes": 49431, + "num_examples": 223, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5040, + "num_examples": 23, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1071, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/test/state.json new file mode 100644 index 000000000..8c59c5ca7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "324ed4c42296e339", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..466bca518 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/validation/dataset_info.json new file mode 100644 index 000000000..857737c5d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "human_aging", + "dataset_name": "mmlu_no_train", + "dataset_size": 55542, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166240502, + "splits": { + "test": { + "name": "test", + "num_bytes": 49431, + "num_examples": 223, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5040, + "num_examples": 23, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1071, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/validation/state.json new file mode 100644 index 000000000..1cf186690 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_aging/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "80bccb979e65c0fa", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..32aa21f67 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/dataset_info.json new file mode 100644 index 000000000..43345a6f0 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "human_sexuality", + "dataset_name": "mmlu_no_train", + "dataset_size": 38384, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166223344, + "splits": { + "test": { + "name": "test", + "num_bytes": 34587, + "num_examples": 131, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2637, + "num_examples": 12, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1160, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/state.json new file mode 100644 index 000000000..cafa4394f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "157b386122007fa0", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..45df6c57d Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/test/dataset_info.json new file mode 100644 index 000000000..43345a6f0 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "human_sexuality", + "dataset_name": "mmlu_no_train", + "dataset_size": 38384, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166223344, + "splits": { + "test": { + "name": "test", + "num_bytes": 34587, + "num_examples": 131, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2637, + "num_examples": 12, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1160, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/test/state.json new file mode 100644 index 000000000..aad6787b8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "2b3f4eb92b47e593", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..b7917fa34 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/dataset_info.json new file mode 100644 index 000000000..43345a6f0 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "human_sexuality", + "dataset_name": "mmlu_no_train", + "dataset_size": 38384, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166223344, + "splits": { + "test": { + "name": "test", + "num_bytes": 34587, + "num_examples": 131, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 2637, + "num_examples": 12, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1160, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/state.json new file mode 100644 index 000000000..189fcc2b1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a4c606a4dc176d44", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..3bcb9eb04 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/dev/dataset_info.json new file mode 100644 index 000000000..24e82046f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "international_law", + "dataset_name": "mmlu_no_train", + "dataset_size": 65305, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166250265, + "splits": { + "test": { + "name": "test", + "num_bytes": 56060, + "num_examples": 121, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 6734, + "num_examples": 13, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2511, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/dev/state.json new file mode 100644 index 000000000..b0739923e --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c71c68ae6d322ecd", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..665fe0e7c Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/test/dataset_info.json new file mode 100644 index 000000000..24e82046f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "international_law", + "dataset_name": "mmlu_no_train", + "dataset_size": 65305, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166250265, + "splits": { + "test": { + "name": "test", + "num_bytes": 56060, + "num_examples": 121, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 6734, + "num_examples": 13, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2511, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/test/state.json new file mode 100644 index 000000000..e9ab58ec3 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a2c2cba37acd51da", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..4936e4195 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/validation/dataset_info.json new file mode 100644 index 000000000..24e82046f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "international_law", + "dataset_name": "mmlu_no_train", + "dataset_size": 65305, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166250265, + "splits": { + "test": { + "name": "test", + "num_bytes": 56060, + "num_examples": 121, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 6734, + "num_examples": 13, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2511, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/validation/state.json new file mode 100644 index 000000000..eb8890df4 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/international_law/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "be245d1755c42498", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..0ecc8da56 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/dataset_info.json new file mode 100644 index 000000000..77be19d1b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "jurisprudence", + "dataset_name": "mmlu_no_train", + "dataset_size": 41090, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166226050, + "splits": { + "test": { + "name": "test", + "num_bytes": 35810, + "num_examples": 108, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3904, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1376, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/state.json new file mode 100644 index 000000000..e3c0cacc4 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "be97b2b406b8981c", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..5c5bc10f0 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/test/dataset_info.json new file mode 100644 index 000000000..77be19d1b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "jurisprudence", + "dataset_name": "mmlu_no_train", + "dataset_size": 41090, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166226050, + "splits": { + "test": { + "name": "test", + "num_bytes": 35810, + "num_examples": 108, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3904, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1376, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/test/state.json new file mode 100644 index 000000000..dd855ce57 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "71bc643a4a388f69", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..da204b09e Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/dataset_info.json new file mode 100644 index 000000000..77be19d1b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "jurisprudence", + "dataset_name": "mmlu_no_train", + "dataset_size": 41090, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166226050, + "splits": { + "test": { + "name": "test", + "num_bytes": 35810, + "num_examples": 108, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3904, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1376, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/state.json new file mode 100644 index 000000000..02959ac80 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "27b2d03775694505", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..d36f1afa5 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/dataset_info.json new file mode 100644 index 000000000..dd6768532 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "logical_fallacies", + "dataset_name": "mmlu_no_train", + "dataset_size": 60663, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166245623, + "splits": { + "test": { + "name": "test", + "num_bytes": 53528, + "num_examples": 163, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5469, + "num_examples": 18, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1666, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/state.json new file mode 100644 index 000000000..578c7b456 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a5f621079a9e5be8", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..7565f7be8 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/dataset_info.json new file mode 100644 index 000000000..dd6768532 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "logical_fallacies", + "dataset_name": "mmlu_no_train", + "dataset_size": 60663, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166245623, + "splits": { + "test": { + "name": "test", + "num_bytes": 53528, + "num_examples": 163, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5469, + "num_examples": 18, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1666, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/state.json new file mode 100644 index 000000000..4b1c96094 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "5ee5a18bf21e4bd3", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..e59f2333f Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/dataset_info.json new file mode 100644 index 000000000..dd6768532 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "logical_fallacies", + "dataset_name": "mmlu_no_train", + "dataset_size": 60663, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166245623, + "splits": { + "test": { + "name": "test", + "num_bytes": 53528, + "num_examples": 163, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5469, + "num_examples": 18, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1666, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/state.json new file mode 100644 index 000000000..e02855fb2 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7c7cf856e53aac3c", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..451dad5e5 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/dev/dataset_info.json new file mode 100644 index 000000000..0226f8a42 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "machine_learning", + "dataset_name": "mmlu_no_train", + "dataset_size": 41959, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166226919, + "splits": { + "test": { + "name": "test", + "num_bytes": 36108, + "num_examples": 112, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3440, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2411, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/dev/state.json new file mode 100644 index 000000000..d41a9b386 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b196e16b4e56f533", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..06d9d2ccd Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/test/dataset_info.json new file mode 100644 index 000000000..0226f8a42 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "machine_learning", + "dataset_name": "mmlu_no_train", + "dataset_size": 41959, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166226919, + "splits": { + "test": { + "name": "test", + "num_bytes": 36108, + "num_examples": 112, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3440, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2411, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/test/state.json new file mode 100644 index 000000000..99eada0d1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a6b3032f1a46a701", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..339b60d40 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/validation/dataset_info.json new file mode 100644 index 000000000..0226f8a42 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "machine_learning", + "dataset_name": "mmlu_no_train", + "dataset_size": 41959, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166226919, + "splits": { + "test": { + "name": "test", + "num_bytes": 36108, + "num_examples": 112, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3440, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2411, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/validation/state.json new file mode 100644 index 000000000..2a79d8335 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/machine_learning/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "caa69f7c960d1451", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/management/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/management/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..a788242fe Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/management/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/management/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/management/dev/dataset_info.json new file mode 100644 index 000000000..8547a82bd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/management/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "management", + "dataset_name": "mmlu_no_train", + "dataset_size": 24350, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166209310, + "splits": { + "test": { + "name": "test", + "num_bytes": 21432, + "num_examples": 103, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 1962, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 956, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/management/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/management/dev/state.json new file mode 100644 index 000000000..64f99ffbb --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/management/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "0e43db3884e9aff1", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/management/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/management/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..09f42c26d Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/management/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/management/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/management/test/dataset_info.json new file mode 100644 index 000000000..8547a82bd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/management/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "management", + "dataset_name": "mmlu_no_train", + "dataset_size": 24350, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166209310, + "splits": { + "test": { + "name": "test", + "num_bytes": 21432, + "num_examples": 103, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 1962, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 956, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/management/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/management/test/state.json new file mode 100644 index 000000000..0d134791e --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/management/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "633a2ba199495783", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/management/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/management/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..7b08f88c4 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/management/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/management/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/management/validation/dataset_info.json new file mode 100644 index 000000000..8547a82bd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/management/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "management", + "dataset_name": "mmlu_no_train", + "dataset_size": 24350, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166209310, + "splits": { + "test": { + "name": "test", + "num_bytes": 21432, + "num_examples": 103, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 1962, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 956, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/management/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/management/validation/state.json new file mode 100644 index 000000000..1759a190d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/management/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f18577dd2efaa2d2", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..bc1293fa8 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/dev/dataset_info.json new file mode 100644 index 000000000..154c7070c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "marketing", + "dataset_name": "mmlu_no_train", + "dataset_size": 75296, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166260256, + "splits": { + "test": { + "name": "test", + "num_bytes": 66055, + "num_examples": 234, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7707, + "num_examples": 25, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1534, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/dev/state.json new file mode 100644 index 000000000..75cfa6cf3 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b2d8494f8ed11222", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..06e1affd6 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/test/dataset_info.json new file mode 100644 index 000000000..154c7070c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "marketing", + "dataset_name": "mmlu_no_train", + "dataset_size": 75296, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166260256, + "splits": { + "test": { + "name": "test", + "num_bytes": 66055, + "num_examples": 234, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7707, + "num_examples": 25, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1534, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/test/state.json new file mode 100644 index 000000000..0fa23559d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f00b001eb0cfd167", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..cc5ce153c Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/validation/dataset_info.json new file mode 100644 index 000000000..154c7070c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "marketing", + "dataset_name": "mmlu_no_train", + "dataset_size": 75296, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166260256, + "splits": { + "test": { + "name": "test", + "num_bytes": 66055, + "num_examples": 234, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7707, + "num_examples": 25, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1534, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/validation/state.json new file mode 100644 index 000000000..e82964386 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/marketing/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "bde873ad8a86829e", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..1c2fab4a7 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/dataset_info.json new file mode 100644 index 000000000..e6235110f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "medical_genetics", + "dataset_name": "mmlu_no_train", + "dataset_size": 27242, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166212202, + "splits": { + "test": { + "name": "test", + "num_bytes": 22852, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3213, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1177, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/state.json new file mode 100644 index 000000000..3c3acc041 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "d0d851bff1fdfe0b", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..d9dbc5e37 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/test/dataset_info.json new file mode 100644 index 000000000..e6235110f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "medical_genetics", + "dataset_name": "mmlu_no_train", + "dataset_size": 27242, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166212202, + "splits": { + "test": { + "name": "test", + "num_bytes": 22852, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3213, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1177, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/test/state.json new file mode 100644 index 000000000..727f856b3 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f2a4fa595b2dcd1f", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..906ecad29 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/dataset_info.json new file mode 100644 index 000000000..e6235110f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "medical_genetics", + "dataset_name": "mmlu_no_train", + "dataset_size": 27242, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166212202, + "splits": { + "test": { + "name": "test", + "num_bytes": 22852, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3213, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1177, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/state.json new file mode 100644 index 000000000..e04887206 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "af71610776e81294", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..f04e72720 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/dataset_info.json new file mode 100644 index 000000000..dedba9719 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "miscellaneous", + "dataset_name": "mmlu_no_train", + "dataset_size": 177555, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166362515, + "splits": { + "test": { + "name": "test", + "num_bytes": 161003, + "num_examples": 783, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 15780, + "num_examples": 86, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 772, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/state.json new file mode 100644 index 000000000..66a12c573 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1d5ccb1485700225", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..10b24cea7 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/test/dataset_info.json new file mode 100644 index 000000000..dedba9719 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "miscellaneous", + "dataset_name": "mmlu_no_train", + "dataset_size": 177555, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166362515, + "splits": { + "test": { + "name": "test", + "num_bytes": 161003, + "num_examples": 783, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 15780, + "num_examples": 86, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 772, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/test/state.json new file mode 100644 index 000000000..0a5fe8e4b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f9de05202963fc75", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..f0b81b687 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/dataset_info.json new file mode 100644 index 000000000..dedba9719 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "miscellaneous", + "dataset_name": "mmlu_no_train", + "dataset_size": 177555, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166362515, + "splits": { + "test": { + "name": "test", + "num_bytes": 161003, + "num_examples": 783, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 15780, + "num_examples": 86, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 772, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/state.json new file mode 100644 index 000000000..f0204fbad --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "71a067f0b1b293b9", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..b385de016 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/dataset_info.json new file mode 100644 index 000000000..08270f123 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "moral_disputes", + "dataset_name": "mmlu_no_train", + "dataset_size": 128959, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166313919, + "splits": { + "test": { + "name": "test", + "num_bytes": 114034, + "num_examples": 346, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 13092, + "num_examples": 38, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1833, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/state.json new file mode 100644 index 000000000..94286b3bd --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "05f8d323418290df", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..642c29859 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/test/dataset_info.json new file mode 100644 index 000000000..08270f123 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "moral_disputes", + "dataset_name": "mmlu_no_train", + "dataset_size": 128959, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166313919, + "splits": { + "test": { + "name": "test", + "num_bytes": 114034, + "num_examples": 346, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 13092, + "num_examples": 38, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1833, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/test/state.json new file mode 100644 index 000000000..e57275108 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "74135db3d6d37f16", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..e0593ec41 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/dataset_info.json new file mode 100644 index 000000000..08270f123 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "moral_disputes", + "dataset_name": "mmlu_no_train", + "dataset_size": 128959, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166313919, + "splits": { + "test": { + "name": "test", + "num_bytes": 114034, + "num_examples": 346, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 13092, + "num_examples": 38, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1833, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/state.json new file mode 100644 index 000000000..47013fcc2 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b19652aa9e4aa075", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..011a26242 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/dataset_info.json new file mode 100644 index 000000000..89192f276 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "moral_scenarios", + "dataset_name": "mmlu_no_train", + "dataset_size": 437386, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166622346, + "splits": { + "test": { + "name": "test", + "num_bytes": 391019, + "num_examples": 895, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 44226, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2141, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/state.json new file mode 100644 index 000000000..3ed26a3a7 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "ee4a7fb876009da2", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..13042e4cd Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/dataset_info.json new file mode 100644 index 000000000..89192f276 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "moral_scenarios", + "dataset_name": "mmlu_no_train", + "dataset_size": 437386, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166622346, + "splits": { + "test": { + "name": "test", + "num_bytes": 391019, + "num_examples": 895, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 44226, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2141, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/state.json new file mode 100644 index 000000000..fb32e8756 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e6c87ce7884335d3", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..4d95fea39 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/dataset_info.json new file mode 100644 index 000000000..89192f276 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "moral_scenarios", + "dataset_name": "mmlu_no_train", + "dataset_size": 437386, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166622346, + "splits": { + "test": { + "name": "test", + "num_bytes": 391019, + "num_examples": 895, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 44226, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2141, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/state.json new file mode 100644 index 000000000..bf5baf654 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7248069621edcad0", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..1781c5be7 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/dev/dataset_info.json new file mode 100644 index 000000000..7a952ea6a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "nutrition", + "dataset_name": "mmlu_no_train", + "dataset_size": 107367, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166292327, + "splits": { + "test": { + "name": "test", + "num_bytes": 96376, + "num_examples": 306, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 8853, + "num_examples": 33, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2138, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/dev/state.json new file mode 100644 index 000000000..898344ead --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "2a885d2d82ab7d56", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..8024e87ff Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/test/dataset_info.json new file mode 100644 index 000000000..7a952ea6a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "nutrition", + "dataset_name": "mmlu_no_train", + "dataset_size": 107367, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166292327, + "splits": { + "test": { + "name": "test", + "num_bytes": 96376, + "num_examples": 306, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 8853, + "num_examples": 33, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2138, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/test/state.json new file mode 100644 index 000000000..953d91866 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e8cb6b6ae4881443", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..07fb20433 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/validation/dataset_info.json new file mode 100644 index 000000000..7a952ea6a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "nutrition", + "dataset_name": "mmlu_no_train", + "dataset_size": 107367, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166292327, + "splits": { + "test": { + "name": "test", + "num_bytes": 96376, + "num_examples": 306, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 8853, + "num_examples": 33, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2138, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/validation/state.json new file mode 100644 index 000000000..9fb755c16 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/nutrition/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1815496614ea9299", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..47fd518af Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/dev/dataset_info.json new file mode 100644 index 000000000..f913fe885 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "philosophy", + "dataset_name": "mmlu_no_train", + "dataset_size": 95109, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166280069, + "splits": { + "test": { + "name": "test", + "num_bytes": 84415, + "num_examples": 311, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 9648, + "num_examples": 34, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1046, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/dev/state.json new file mode 100644 index 000000000..92e00cf4b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "189278999a273b54", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..ad614a54b Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/test/dataset_info.json new file mode 100644 index 000000000..f913fe885 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "philosophy", + "dataset_name": "mmlu_no_train", + "dataset_size": 95109, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166280069, + "splits": { + "test": { + "name": "test", + "num_bytes": 84415, + "num_examples": 311, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 9648, + "num_examples": 34, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1046, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/test/state.json new file mode 100644 index 000000000..3c66583d3 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7483bf4f3ab0737c", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..69a29d539 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/validation/dataset_info.json new file mode 100644 index 000000000..f913fe885 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "philosophy", + "dataset_name": "mmlu_no_train", + "dataset_size": 95109, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166280069, + "splits": { + "test": { + "name": "test", + "num_bytes": 84415, + "num_examples": 311, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 9648, + "num_examples": 34, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1046, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/validation/state.json new file mode 100644 index 000000000..5d3cf0088 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/philosophy/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "108a3a0b6531be26", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..a6118f7ab Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/dev/dataset_info.json new file mode 100644 index 000000000..e0a293964 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "prehistory", + "dataset_name": "mmlu_no_train", + "dataset_size": 106817, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166291777, + "splits": { + "test": { + "name": "test", + "num_bytes": 94118, + "num_examples": 324, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 10763, + "num_examples": 35, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1936, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/dev/state.json new file mode 100644 index 000000000..3814972ad --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "4b2d5e67642ce059", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..9019d8b53 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/test/dataset_info.json new file mode 100644 index 000000000..e0a293964 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "prehistory", + "dataset_name": "mmlu_no_train", + "dataset_size": 106817, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166291777, + "splits": { + "test": { + "name": "test", + "num_bytes": 94118, + "num_examples": 324, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 10763, + "num_examples": 35, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1936, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/test/state.json new file mode 100644 index 000000000..c43863570 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b772f2a98fbdbb69", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..f6f82463f Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/validation/dataset_info.json new file mode 100644 index 000000000..e0a293964 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "prehistory", + "dataset_name": "mmlu_no_train", + "dataset_size": 106817, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166291777, + "splits": { + "test": { + "name": "test", + "num_bytes": 94118, + "num_examples": 324, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 10763, + "num_examples": 35, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1936, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/validation/state.json new file mode 100644 index 000000000..00bba37c6 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/prehistory/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "81331e81e5b40812", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..529774d75 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/dataset_info.json new file mode 100644 index 000000000..a522ef00d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_accounting", + "dataset_name": "mmlu_no_train", + "dataset_size": 149620, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166334580, + "splits": { + "test": { + "name": "test", + "num_bytes": 132152, + "num_examples": 282, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 15197, + "num_examples": 31, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2271, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/state.json new file mode 100644 index 000000000..772e95de3 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7a37bb73c230072b", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..fd46fae7a Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/test/dataset_info.json new file mode 100644 index 000000000..a522ef00d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_accounting", + "dataset_name": "mmlu_no_train", + "dataset_size": 149620, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166334580, + "splits": { + "test": { + "name": "test", + "num_bytes": 132152, + "num_examples": 282, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 15197, + "num_examples": 31, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2271, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/test/state.json new file mode 100644 index 000000000..a99dbd38a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "60bce76fbb51330e", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..a16928a68 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/dataset_info.json new file mode 100644 index 000000000..a522ef00d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_accounting", + "dataset_name": "mmlu_no_train", + "dataset_size": 149620, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166334580, + "splits": { + "test": { + "name": "test", + "num_bytes": 132152, + "num_examples": 282, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 15197, + "num_examples": 31, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2271, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/state.json new file mode 100644 index 000000000..473c99862 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "49887e665a8c5ed4", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..dcb2da04e Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/dev/dataset_info.json new file mode 100644 index 000000000..19542c4b3 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_law", + "dataset_name": "mmlu_no_train", + "dataset_size": 2136035, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 168320995, + "splits": { + "test": { + "name": "test", + "num_bytes": 1922430, + "num_examples": 1534, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 206907, + "num_examples": 170, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 6698, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/dev/state.json new file mode 100644 index 000000000..bb8a97a21 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "71dee5b440d08bac", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..c07a89655 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/test/dataset_info.json new file mode 100644 index 000000000..19542c4b3 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_law", + "dataset_name": "mmlu_no_train", + "dataset_size": 2136035, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 168320995, + "splits": { + "test": { + "name": "test", + "num_bytes": 1922430, + "num_examples": 1534, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 206907, + "num_examples": 170, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 6698, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/test/state.json new file mode 100644 index 000000000..7a0fb0c9a --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "3258b12090812d7c", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..18d4eccb4 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/validation/dataset_info.json new file mode 100644 index 000000000..19542c4b3 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_law", + "dataset_name": "mmlu_no_train", + "dataset_size": 2136035, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 168320995, + "splits": { + "test": { + "name": "test", + "num_bytes": 1922430, + "num_examples": 1534, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 206907, + "num_examples": 170, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 6698, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/validation/state.json new file mode 100644 index 000000000..98df05f65 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_law/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "26cd1d1c01ea63c8", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..b1dc86239 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/dataset_info.json new file mode 100644 index 000000000..5414e0c55 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_medicine", + "dataset_name": "mmlu_no_train", + "dataset_size": 252879, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166437839, + "splits": { + "test": { + "name": "test", + "num_bytes": 224349, + "num_examples": 272, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 24610, + "num_examples": 31, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 3920, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/state.json new file mode 100644 index 000000000..ad0e06997 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "872fb834fa470cf8", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..992fd2e28 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/test/dataset_info.json new file mode 100644 index 000000000..5414e0c55 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_medicine", + "dataset_name": "mmlu_no_train", + "dataset_size": 252879, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166437839, + "splits": { + "test": { + "name": "test", + "num_bytes": 224349, + "num_examples": 272, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 24610, + "num_examples": 31, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 3920, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/test/state.json new file mode 100644 index 000000000..edcc94714 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "d635efcb40b3db20", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..05d43c4a2 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/dataset_info.json new file mode 100644 index 000000000..5414e0c55 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_medicine", + "dataset_name": "mmlu_no_train", + "dataset_size": 252879, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166437839, + "splits": { + "test": { + "name": "test", + "num_bytes": 224349, + "num_examples": 272, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 24610, + "num_examples": 31, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 3920, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/state.json new file mode 100644 index 000000000..62e8d7188 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "800738ede9c5344a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..d017b5abb Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/dataset_info.json new file mode 100644 index 000000000..5aa9b280d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_psychology", + "dataset_name": "mmlu_no_train", + "dataset_size": 275753, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166460713, + "splits": { + "test": { + "name": "test", + "num_bytes": 242411, + "num_examples": 612, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 30952, + "num_examples": 69, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2390, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/state.json new file mode 100644 index 000000000..9ed325f8f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7956992fec89d96a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..38f98a036 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/test/dataset_info.json new file mode 100644 index 000000000..5aa9b280d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_psychology", + "dataset_name": "mmlu_no_train", + "dataset_size": 275753, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166460713, + "splits": { + "test": { + "name": "test", + "num_bytes": 242411, + "num_examples": 612, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 30952, + "num_examples": 69, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2390, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/test/state.json new file mode 100644 index 000000000..2c0c4b0ce --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1fe2948ecfe8efbe", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..8f335d550 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/dataset_info.json new file mode 100644 index 000000000..5aa9b280d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "professional_psychology", + "dataset_name": "mmlu_no_train", + "dataset_size": 275753, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166460713, + "splits": { + "test": { + "name": "test", + "num_bytes": 242411, + "num_examples": 612, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 30952, + "num_examples": 69, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 2390, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/state.json new file mode 100644 index 000000000..f1799fba2 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "4c1c6203ffd8e681", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..b453b7da6 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/dev/dataset_info.json new file mode 100644 index 000000000..52455b558 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "public_relations", + "dataset_name": "mmlu_no_train", + "dataset_size": 37326, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166222286, + "splits": { + "test": { + "name": "test", + "num_bytes": 30948, + "num_examples": 110, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4794, + "num_examples": 12, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1584, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/dev/state.json new file mode 100644 index 000000000..bceb74d58 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8ea2239c27f9682a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..3c76c47b1 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/test/dataset_info.json new file mode 100644 index 000000000..52455b558 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "public_relations", + "dataset_name": "mmlu_no_train", + "dataset_size": 37326, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166222286, + "splits": { + "test": { + "name": "test", + "num_bytes": 30948, + "num_examples": 110, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4794, + "num_examples": 12, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1584, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/test/state.json new file mode 100644 index 000000000..07f4ddb6d --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a4029ee86f9a4d47", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..f9e5cd739 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/validation/dataset_info.json new file mode 100644 index 000000000..52455b558 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "public_relations", + "dataset_name": "mmlu_no_train", + "dataset_size": 37326, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166222286, + "splits": { + "test": { + "name": "test", + "num_bytes": 30948, + "num_examples": 110, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 4794, + "num_examples": 12, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1584, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/validation/state.json new file mode 100644 index 000000000..9a465c79c --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/public_relations/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "04ecaee7c26d1413", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..244a3773b Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/dev/dataset_info.json new file mode 100644 index 000000000..df7be81c1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "security_studies", + "dataset_name": "mmlu_no_train", + "dataset_size": 238320, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166423280, + "splits": { + "test": { + "name": "test", + "num_bytes": 209732, + "num_examples": 245, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 23165, + "num_examples": 27, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 5423, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/dev/state.json new file mode 100644 index 000000000..889fec986 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8b9eda9017d8114c", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..037e87ea9 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/test/dataset_info.json new file mode 100644 index 000000000..df7be81c1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "security_studies", + "dataset_name": "mmlu_no_train", + "dataset_size": 238320, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166423280, + "splits": { + "test": { + "name": "test", + "num_bytes": 209732, + "num_examples": 245, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 23165, + "num_examples": 27, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 5423, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/test/state.json new file mode 100644 index 000000000..72c356331 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f1c9d865f880ba6f", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..5c62c3ee9 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/validation/dataset_info.json new file mode 100644 index 000000000..df7be81c1 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "security_studies", + "dataset_name": "mmlu_no_train", + "dataset_size": 238320, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166423280, + "splits": { + "test": { + "name": "test", + "num_bytes": 209732, + "num_examples": 245, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 23165, + "num_examples": 27, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 5423, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/validation/state.json new file mode 100644 index 000000000..1379ab938 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/security_studies/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8488581c295e3561", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..14218401d Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/dev/dataset_info.json new file mode 100644 index 000000000..930c057eb --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "sociology", + "dataset_name": "mmlu_no_train", + "dataset_size": 77968, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166262928, + "splits": { + "test": { + "name": "test", + "num_bytes": 68844, + "num_examples": 201, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7458, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1666, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/dev/state.json new file mode 100644 index 000000000..27d53645e --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b6dfd1c680db404d", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..da014d7e4 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/test/dataset_info.json new file mode 100644 index 000000000..930c057eb --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "sociology", + "dataset_name": "mmlu_no_train", + "dataset_size": 77968, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166262928, + "splits": { + "test": { + "name": "test", + "num_bytes": 68844, + "num_examples": 201, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7458, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1666, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/test/state.json new file mode 100644 index 000000000..13f4d48f3 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "445e5f0b6dc402d5", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..c6126a332 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/validation/dataset_info.json new file mode 100644 index 000000000..930c057eb --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "sociology", + "dataset_name": "mmlu_no_train", + "dataset_size": 77968, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166262928, + "splits": { + "test": { + "name": "test", + "num_bytes": 68844, + "num_examples": 201, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 7458, + "num_examples": 22, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1666, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/validation/state.json new file mode 100644 index 000000000..9b3189b39 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/sociology/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "92b9ea67f8f9d7c1", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..0140f9cce Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/dataset_info.json new file mode 100644 index 000000000..200439bf8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "us_foreign_policy", + "dataset_name": "mmlu_no_train", + "dataset_size": 35718, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166220678, + "splits": { + "test": { + "name": "test", + "num_bytes": 30531, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3483, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1704, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/state.json new file mode 100644 index 000000000..88d7b978f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a0a530b6c3c1ce09", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..ed9a76267 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/dataset_info.json new file mode 100644 index 000000000..200439bf8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "us_foreign_policy", + "dataset_name": "mmlu_no_train", + "dataset_size": 35718, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166220678, + "splits": { + "test": { + "name": "test", + "num_bytes": 30531, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3483, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1704, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/state.json new file mode 100644 index 000000000..a3b2d3d14 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "060dd16f3358895b", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..f12d68c5b Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/dataset_info.json new file mode 100644 index 000000000..200439bf8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "us_foreign_policy", + "dataset_name": "mmlu_no_train", + "dataset_size": 35718, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166220678, + "splits": { + "test": { + "name": "test", + "num_bytes": 30531, + "num_examples": 100, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3483, + "num_examples": 11, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1704, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/state.json new file mode 100644 index 000000000..575b19a25 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "090d4a120ad1b821", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/virology/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..99ac7222a Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/virology/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/dev/dataset_info.json new file mode 100644 index 000000000..e0d73ab9b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "virology", + "dataset_name": "mmlu_no_train", + "dataset_size": 47550, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166232510, + "splits": { + "test": { + "name": "test", + "num_bytes": 40739, + "num_examples": 166, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5667, + "num_examples": 18, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1144, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/virology/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/dev/state.json new file mode 100644 index 000000000..c8f333c71 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "6a93076594896b3e", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/virology/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..767d2ace0 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/virology/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/test/dataset_info.json new file mode 100644 index 000000000..e0d73ab9b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "virology", + "dataset_name": "mmlu_no_train", + "dataset_size": 47550, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166232510, + "splits": { + "test": { + "name": "test", + "num_bytes": 40739, + "num_examples": 166, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5667, + "num_examples": 18, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1144, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/virology/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/test/state.json new file mode 100644 index 000000000..c127fb1e3 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c09188a0a273216a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/virology/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..5d01e8a26 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/virology/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/validation/dataset_info.json new file mode 100644 index 000000000..e0d73ab9b --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "virology", + "dataset_name": "mmlu_no_train", + "dataset_size": 47550, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166232510, + "splits": { + "test": { + "name": "test", + "num_bytes": 40739, + "num_examples": 166, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 5667, + "num_examples": 18, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 1144, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/virology/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/validation/state.json new file mode 100644 index 000000000..d962e50a0 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/virology/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "5fed6e3919aa025f", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/dev/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/dev/data-00000-of-00001.arrow new file mode 100644 index 000000000..4fce96213 Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/dev/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/dev/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/dev/dataset_info.json new file mode 100644 index 000000000..9871184f5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/dev/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "world_religions", + "dataset_name": "mmlu_no_train", + "dataset_size": 32378, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166217338, + "splits": { + "test": { + "name": "test", + "num_bytes": 28511, + "num_examples": 171, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3114, + "num_examples": 19, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 753, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/dev/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/dev/state.json new file mode 100644 index 000000000..0308a6aef --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/dev/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "86cf0e1681022422", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "dev" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/test/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/test/data-00000-of-00001.arrow new file mode 100644 index 000000000..4a6c13e0c Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/test/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/test/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/test/dataset_info.json new file mode 100644 index 000000000..9871184f5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/test/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "world_religions", + "dataset_name": "mmlu_no_train", + "dataset_size": 32378, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166217338, + "splits": { + "test": { + "name": "test", + "num_bytes": 28511, + "num_examples": 171, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3114, + "num_examples": 19, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 753, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/test/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/test/state.json new file mode 100644 index 000000000..344a3ef0f --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "187c421635b19f5d", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "test" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..4dbbe469a Binary files /dev/null and b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/validation/dataset_info.json b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/validation/dataset_info.json new file mode 100644 index 000000000..9871184f5 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/validation/dataset_info.json @@ -0,0 +1,70 @@ +{ + "builder_name": "mmlu_no_train", + "citation": "@article{hendryckstest2021,\n title={Measuring Massive Multitask Language Understanding},\n author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},\n journal={Proceedings of the International Conference on Learning Representations (ICLR)},\n year={2021}\n }\n", + "config_name": "world_religions", + "dataset_name": "mmlu_no_train", + "dataset_size": 32378, + "description": "This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.\n", + "download_checksums": { + "https://huggingface.co/datasets/cais/mmlu/resolve/main/data.tar": { + "num_bytes": 166184960, + "checksum": null + } + }, + "download_size": 166184960, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "subject": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "answer": { + "names": [ + "A", + "B", + "C", + "D" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/hendrycks/test", + "license": "", + "size_in_bytes": 166217338, + "splits": { + "test": { + "name": "test", + "num_bytes": 28511, + "num_examples": 171, + "dataset_name": "mmlu_no_train" + }, + "validation": { + "name": "validation", + "num_bytes": 3114, + "num_examples": 19, + "dataset_name": "mmlu_no_train" + }, + "dev": { + "name": "dev", + "num_bytes": 753, + "num_examples": 5, + "dataset_name": "mmlu_no_train" + } + }, + "version": { + "version_str": "1.0.0", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/validation/state.json b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/validation/state.json new file mode 100644 index 000000000..e6a72d2b8 --- /dev/null +++ b/olmo_data/hf_datasets/hails/mmlu_no_train/world_religions/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1f94975f68796ebf", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hellaswag/none/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/hellaswag/none/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..882ec4aed Binary files /dev/null and b/olmo_data/hf_datasets/hellaswag/none/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/hellaswag/none/validation/dataset_info.json b/olmo_data/hf_datasets/hellaswag/none/validation/dataset_info.json new file mode 100644 index 000000000..486e88bac --- /dev/null +++ b/olmo_data/hf_datasets/hellaswag/none/validation/dataset_info.json @@ -0,0 +1,97 @@ +{ + "builder_name": "hellaswag", + "citation": "@inproceedings{zellers2019hellaswag,\n title={HellaSwag: Can a Machine Really Finish Your Sentence?},\n author={Zellers, Rowan and Holtzman, Ari and Bisk, Yonatan and Farhadi, Ali and Choi, Yejin},\n booktitle ={Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},\n year={2019}\n}\n", + "config_name": "default", + "dataset_name": "hellaswag", + "dataset_size": 65200194, + "description": "\nHellaSwag: Can a Machine Really Finish Your Sentence? is a new dataset for commonsense NLI. A paper was published at ACL2019.\n", + "download_checksums": { + "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_train.jsonl": { + "num_bytes": 47496131, + "checksum": null + }, + "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_test.jsonl": { + "num_bytes": 11752147, + "checksum": null + }, + "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_val.jsonl": { + "num_bytes": 12246618, + "checksum": null + } + }, + "download_size": 71494896, + "features": { + "ind": { + "dtype": "int32", + "_type": "Value" + }, + "activity_label": { + "dtype": "string", + "_type": "Value" + }, + "ctx_a": { + "dtype": "string", + "_type": "Value" + }, + "ctx_b": { + "dtype": "string", + "_type": "Value" + }, + "ctx": { + "dtype": "string", + "_type": "Value" + }, + "endings": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "source_id": { + "dtype": "string", + "_type": "Value" + }, + "split": { + "dtype": "string", + "_type": "Value" + }, + "split_type": { + "dtype": "string", + "_type": "Value" + }, + "label": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "https://rowanzellers.com/hellaswag/", + "license": "", + "size_in_bytes": 136695090, + "splits": { + "train": { + "name": "train", + "num_bytes": 43232624, + "num_examples": 39905, + "dataset_name": "hellaswag" + }, + "test": { + "name": "test", + "num_bytes": 10791853, + "num_examples": 10003, + "dataset_name": "hellaswag" + }, + "validation": { + "name": "validation", + "num_bytes": 11175717, + "num_examples": 10042, + "dataset_name": "hellaswag" + } + }, + "version": { + "version_str": "0.1.0", + "major": 0, + "minor": 1, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/hellaswag/none/validation/state.json b/olmo_data/hf_datasets/hellaswag/none/validation/state.json new file mode 100644 index 000000000..7f14e4245 --- /dev/null +++ b/olmo_data/hf_datasets/hellaswag/none/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "9042f0834e9b2f00", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/nq_open/none/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/nq_open/none/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..d44073aa6 Binary files /dev/null and b/olmo_data/hf_datasets/nq_open/none/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/nq_open/none/validation/dataset_info.json b/olmo_data/hf_datasets/nq_open/none/validation/dataset_info.json new file mode 100644 index 000000000..668266537 --- /dev/null +++ b/olmo_data/hf_datasets/nq_open/none/validation/dataset_info.json @@ -0,0 +1,55 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "nq_open", + "dataset_name": "nq_open", + "dataset_size": 6965065, + "description": "", + "download_checksums": { + "hf://datasets/nq_open@5dd9790a83002ad084ddeb7c420dc716852c6f28/nq_open/train-00000-of-00001.parquet": { + "num_bytes": 4464642, + "checksum": null + }, + "hf://datasets/nq_open@5dd9790a83002ad084ddeb7c420dc716852c6f28/nq_open/validation-00000-of-00001.parquet": { + "num_bytes": 213603, + "checksum": null + } + }, + "download_size": 4678245, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "answer": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 11643310, + "splits": { + "train": { + "name": "train", + "num_bytes": 6651236, + "num_examples": 87925, + "dataset_name": "nq_open" + }, + "validation": { + "name": "validation", + "num_bytes": 313829, + "num_examples": 3610, + "dataset_name": "nq_open" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/nq_open/none/validation/state.json b/olmo_data/hf_datasets/nq_open/none/validation/state.json new file mode 100644 index 000000000..674de2c40 --- /dev/null +++ b/olmo_data/hf_datasets/nq_open/none/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8f40661efe3d6e1c", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/openbookqa/main/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/openbookqa/main/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..38bbf1b62 Binary files /dev/null and b/olmo_data/hf_datasets/openbookqa/main/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/openbookqa/main/validation/dataset_info.json b/olmo_data/hf_datasets/openbookqa/main/validation/dataset_info.json new file mode 100644 index 000000000..bc69850cc --- /dev/null +++ b/olmo_data/hf_datasets/openbookqa/main/validation/dataset_info.json @@ -0,0 +1,79 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "main", + "dataset_name": "openbookqa", + "dataset_size": 1082573, + "description": "", + "download_checksums": { + "hf://datasets/openbookqa@388097ea7776314e93a529163e0fea805b8a6454/main/train-00000-of-00001.parquet": { + "num_bytes": 495845, + "checksum": null + }, + "hf://datasets/openbookqa@388097ea7776314e93a529163e0fea805b8a6454/main/validation-00000-of-00001.parquet": { + "num_bytes": 58233, + "checksum": null + }, + "hf://datasets/openbookqa@388097ea7776314e93a529163e0fea805b8a6454/main/test-00000-of-00001.parquet": { + "num_bytes": 55535, + "checksum": null + } + }, + "download_size": 609613, + "features": { + "id": { + "dtype": "string", + "_type": "Value" + }, + "question_stem": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "label": { + "dtype": "string", + "_type": "Value" + } + }, + "_type": "Sequence" + }, + "answerKey": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 1692186, + "splits": { + "train": { + "name": "train", + "num_bytes": 895386, + "num_examples": 4957, + "dataset_name": "openbookqa" + }, + "validation": { + "name": "validation", + "num_bytes": 95428, + "num_examples": 500, + "dataset_name": "openbookqa" + }, + "test": { + "name": "test", + "num_bytes": 91759, + "num_examples": 500, + "dataset_name": "openbookqa" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/openbookqa/main/validation/state.json b/olmo_data/hf_datasets/openbookqa/main/validation/state.json new file mode 100644 index 000000000..47a5cb28d --- /dev/null +++ b/olmo_data/hf_datasets/openbookqa/main/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f77d50ae1177c468", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/piqa/plain_text/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/piqa/plain_text/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..9a460181e Binary files /dev/null and b/olmo_data/hf_datasets/piqa/plain_text/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/piqa/plain_text/validation/dataset_info.json b/olmo_data/hf_datasets/piqa/plain_text/validation/dataset_info.json new file mode 100644 index 000000000..6d073d396 --- /dev/null +++ b/olmo_data/hf_datasets/piqa/plain_text/validation/dataset_info.json @@ -0,0 +1,69 @@ +{ + "builder_name": "piqa", + "citation": "@inproceedings{Bisk2020,\n author = {Yonatan Bisk and Rowan Zellers and\n Ronan Le Bras and Jianfeng Gao\n and Yejin Choi},\n title = {PIQA: Reasoning about Physical Commonsense in\n Natural Language},\n booktitle = {Thirty-Fourth AAAI Conference on\n Artificial Intelligence},\n year = {2020},\n}\n", + "config_name": "plain_text", + "dataset_name": "piqa", + "dataset_size": 5329820, + "description": "To apply eyeshadow without a brush, should I use a cotton swab or a toothpick?\nQuestions requiring this kind of physical commonsense pose a challenge to state-of-the-art\nnatural language understanding systems. The PIQA dataset introduces the task of physical commonsense reasoning\nand a corresponding benchmark dataset Physical Interaction: Question Answering or PIQA.\n\nPhysical commonsense knowledge is a major challenge on the road to true AI-completeness,\nincluding robots that interact with the world and understand natural language.\n\nPIQA focuses on everyday situations with a preference for atypical solutions.\nThe dataset is inspired by instructables.com, which provides users with instructions on how to build, craft,\nbake, or manipulate objects using everyday materials.\n\nThe underlying task is formualted as multiple choice question answering:\ngiven a question `q` and two possible solutions `s1`, `s2`, a model or\na human must choose the most appropriate solution, of which exactly one is correct.\nThe dataset is further cleaned of basic artifacts using the AFLite algorithm which is an improvement of\nadversarial filtering. The dataset contains 16,000 examples for training, 2,000 for development and 3,000 for testing.\n", + "download_checksums": { + "https://storage.googleapis.com/ai2-mosaic/public/physicaliqa/physicaliqa-train-dev.zip": { + "num_bytes": 1824009, + "checksum": null + }, + "https://yonatanbisk.com/piqa/data/tests.jsonl": { + "num_bytes": 814616, + "checksum": null + } + }, + "download_size": 2638625, + "features": { + "goal": { + "dtype": "string", + "_type": "Value" + }, + "sol1": { + "dtype": "string", + "_type": "Value" + }, + "sol2": { + "dtype": "string", + "_type": "Value" + }, + "label": { + "names": [ + "0", + "1" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://yonatanbisk.com/piqa/", + "license": "", + "size_in_bytes": 7968445, + "splits": { + "train": { + "name": "train", + "num_bytes": 4104002, + "num_examples": 16113, + "dataset_name": "piqa" + }, + "test": { + "name": "test", + "num_bytes": 761509, + "num_examples": 3084, + "dataset_name": "piqa" + }, + "validation": { + "name": "validation", + "num_bytes": 464309, + "num_examples": 1838, + "dataset_name": "piqa" + } + }, + "version": { + "version_str": "1.1.0", + "major": 1, + "minor": 1, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/piqa/plain_text/validation/state.json b/olmo_data/hf_datasets/piqa/plain_text/validation/state.json new file mode 100644 index 000000000..c53a36e69 --- /dev/null +++ b/olmo_data/hf_datasets/piqa/plain_text/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c305b46a7e84e936", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/sciq/none/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/sciq/none/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..b890b7685 Binary files /dev/null and b/olmo_data/hf_datasets/sciq/none/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/sciq/none/validation/dataset_info.json b/olmo_data/hf_datasets/sciq/none/validation/dataset_info.json new file mode 100644 index 000000000..66bf12f20 --- /dev/null +++ b/olmo_data/hf_datasets/sciq/none/validation/dataset_info.json @@ -0,0 +1,78 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "default", + "dataset_name": "sciq", + "dataset_size": 7664230, + "description": "", + "download_checksums": { + "hf://datasets/sciq@2c94ad3e1aafab77146f384e23536f97a4849815/data/train-00000-of-00001.parquet": { + "num_bytes": 3993099, + "checksum": null + }, + "hf://datasets/sciq@2c94ad3e1aafab77146f384e23536f97a4849815/data/validation-00000-of-00001.parquet": { + "num_bytes": 338503, + "checksum": null + }, + "hf://datasets/sciq@2c94ad3e1aafab77146f384e23536f97a4849815/data/test-00000-of-00001.parquet": { + "num_bytes": 342808, + "checksum": null + } + }, + "download_size": 4674410, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "distractor3": { + "dtype": "string", + "_type": "Value" + }, + "distractor1": { + "dtype": "string", + "_type": "Value" + }, + "distractor2": { + "dtype": "string", + "_type": "Value" + }, + "correct_answer": { + "dtype": "string", + "_type": "Value" + }, + "support": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 12338640, + "splits": { + "train": { + "name": "train", + "num_bytes": 6546183, + "num_examples": 11679, + "dataset_name": "sciq" + }, + "validation": { + "name": "validation", + "num_bytes": 554120, + "num_examples": 1000, + "dataset_name": "sciq" + }, + "test": { + "name": "test", + "num_bytes": 563927, + "num_examples": 1000, + "dataset_name": "sciq" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/sciq/none/validation/state.json b/olmo_data/hf_datasets/sciq/none/validation/state.json new file mode 100644 index 000000000..d277bcb2a --- /dev/null +++ b/olmo_data/hf_datasets/sciq/none/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f4b2e70569612ae4", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/social_i_qa/none/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/social_i_qa/none/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..035a5f4ae Binary files /dev/null and b/olmo_data/hf_datasets/social_i_qa/none/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/social_i_qa/none/validation/dataset_info.json b/olmo_data/hf_datasets/social_i_qa/none/validation/dataset_info.json new file mode 100644 index 000000000..8809ca0ec --- /dev/null +++ b/olmo_data/hf_datasets/social_i_qa/none/validation/dataset_info.json @@ -0,0 +1,64 @@ +{ + "builder_name": "social_i_qa", + "citation": "\n", + "config_name": "default", + "dataset_name": "social_i_qa", + "dataset_size": 6700024, + "description": "We introduce Social IQa: Social Interaction QA, a new question-answering benchmark for testing social commonsense intelligence. Contrary to many prior benchmarks that focus on physical or taxonomic knowledge, Social IQa focuses on reasoning about people\u2019s actions and their social implications. For example, given an action like \"Jesse saw a concert\" and a question like \"Why did Jesse do this?\", humans can easily infer that Jesse wanted \"to see their favorite performer\" or \"to enjoy the music\", and not \"to see what's happening inside\" or \"to see if it works\". The actions in Social IQa span a wide variety of social situations, and answer candidates contain both human-curated answers and adversarially-filtered machine-generated candidates. Social IQa contains over 37,000 QA pairs for evaluating models\u2019 abilities to reason about the social implications of everyday events and situations. (Less)\n", + "download_checksums": { + "https://storage.googleapis.com/ai2-mosaic/public/socialiqa/socialiqa-train-dev.zip": { + "num_bytes": 2198056, + "checksum": null + } + }, + "download_size": 2198056, + "features": { + "context": { + "dtype": "string", + "_type": "Value" + }, + "question": { + "dtype": "string", + "_type": "Value" + }, + "answerA": { + "dtype": "string", + "_type": "Value" + }, + "answerB": { + "dtype": "string", + "_type": "Value" + }, + "answerC": { + "dtype": "string", + "_type": "Value" + }, + "label": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "https://leaderboard.allenai.org/socialiqa/submissions/get-started", + "license": "", + "size_in_bytes": 8898080, + "splits": { + "train": { + "name": "train", + "num_bytes": 6327209, + "num_examples": 33410, + "dataset_name": "social_i_qa" + }, + "validation": { + "name": "validation", + "num_bytes": 372815, + "num_examples": 1954, + "dataset_name": "social_i_qa" + } + }, + "version": { + "version_str": "0.1.0", + "major": 0, + "minor": 1, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/social_i_qa/none/validation/state.json b/olmo_data/hf_datasets/social_i_qa/none/validation/state.json new file mode 100644 index 000000000..e90459ae5 --- /dev/null +++ b/olmo_data/hf_datasets/social_i_qa/none/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a74b93e67cb9ff1e", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/super_glue/cb/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/super_glue/cb/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..27a60b06e Binary files /dev/null and b/olmo_data/hf_datasets/super_glue/cb/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/super_glue/cb/validation/dataset_info.json b/olmo_data/hf_datasets/super_glue/cb/validation/dataset_info.json new file mode 100644 index 000000000..a7657eb24 --- /dev/null +++ b/olmo_data/hf_datasets/super_glue/cb/validation/dataset_info.json @@ -0,0 +1,66 @@ +{ + "builder_name": "super_glue", + "citation": "@article{de marneff_simons_tonhauser_2019,\n title={The CommitmentBank: Investigating projection in naturally occurring discourse},\n journal={proceedings of Sinn und Bedeutung 23},\n author={De Marneff, Marie-Catherine and Simons, Mandy and Tonhauser, Judith},\n year={2019}\n}\n@article{wang2019superglue,\n title={SuperGLUE: A Stickier Benchmark for General-Purpose Language Understanding Systems},\n author={Wang, Alex and Pruksachatkun, Yada and Nangia, Nikita and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel R},\n journal={arXiv preprint arXiv:1905.00537},\n year={2019}\n}\n\nNote that each SuperGLUE dataset has its own citation. Please see the source to\nget the correct citation for each contained dataset.\n", + "config_name": "cb", + "dataset_name": "super_glue", + "dataset_size": 202393, + "description": "SuperGLUE (https://super.gluebenchmark.com/) is a new benchmark styled after\nGLUE with a new set of more difficult language understanding tasks, improved\nresources, and a new public leaderboard.\n\nThe CommitmentBank (De Marneffe et al., 2019) is a corpus of short texts in which at least\none sentence contains an embedded clause. Each of these embedded clauses is annotated with the\ndegree to which we expect that the person who wrote the text is committed to the truth of the clause.\nThe resulting task framed as three-class textual entailment on examples that are drawn from the Wall\nStreet Journal, fiction from the British National Corpus, and Switchboard. Each example consists\nof a premise containing an embedded clause and the corresponding hypothesis is the extraction of\nthat clause. We use a subset of the data that had inter-annotator agreement above 0.85. The data is\nimbalanced (relatively fewer neutral examples), so we evaluate using accuracy and F1, where for\nmulti-class F1 we compute the unweighted average of the F1 per class.", + "download_checksums": { + "https://dl.fbaipublicfiles.com/glue/superglue/data/v2/CB.zip": { + "num_bytes": 75482, + "checksum": null + } + }, + "download_size": 75482, + "features": { + "premise": { + "dtype": "string", + "_type": "Value" + }, + "hypothesis": { + "dtype": "string", + "_type": "Value" + }, + "idx": { + "dtype": "int32", + "_type": "Value" + }, + "label": { + "names": [ + "entailment", + "contradiction", + "neutral" + ], + "_type": "ClassLabel" + } + }, + "homepage": "https://github.com/mcdm/CommitmentBank", + "license": "", + "size_in_bytes": 277875, + "splits": { + "train": { + "name": "train", + "num_bytes": 87050, + "num_examples": 250, + "dataset_name": "super_glue" + }, + "validation": { + "name": "validation", + "num_bytes": 21851, + "num_examples": 56, + "dataset_name": "super_glue" + }, + "test": { + "name": "test", + "num_bytes": 93492, + "num_examples": 250, + "dataset_name": "super_glue" + } + }, + "version": { + "version_str": "1.0.3", + "major": 1, + "minor": 0, + "patch": 3 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/super_glue/cb/validation/state.json b/olmo_data/hf_datasets/super_glue/cb/validation/state.json new file mode 100644 index 000000000..77ab9028a --- /dev/null +++ b/olmo_data/hf_datasets/super_glue/cb/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "21634d8a438233eb", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/super_glue/copa/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/super_glue/copa/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..0bd85038f Binary files /dev/null and b/olmo_data/hf_datasets/super_glue/copa/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/super_glue/copa/validation/dataset_info.json b/olmo_data/hf_datasets/super_glue/copa/validation/dataset_info.json new file mode 100644 index 000000000..af5a62539 --- /dev/null +++ b/olmo_data/hf_datasets/super_glue/copa/validation/dataset_info.json @@ -0,0 +1,73 @@ +{ + "builder_name": "super_glue", + "citation": "@inproceedings{roemmele2011choice,\n title={Choice of plausible alternatives: An evaluation of commonsense causal reasoning},\n author={Roemmele, Melissa and Bejan, Cosmin Adrian and Gordon, Andrew S},\n booktitle={2011 AAAI Spring Symposium Series},\n year={2011}\n}\n@article{wang2019superglue,\n title={SuperGLUE: A Stickier Benchmark for General-Purpose Language Understanding Systems},\n author={Wang, Alex and Pruksachatkun, Yada and Nangia, Nikita and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel R},\n journal={arXiv preprint arXiv:1905.00537},\n year={2019}\n}\n\nNote that each SuperGLUE dataset has its own citation. Please see the source to\nget the correct citation for each contained dataset.\n", + "config_name": "copa", + "dataset_name": "super_glue", + "dataset_size": 121558, + "description": "SuperGLUE (https://super.gluebenchmark.com/) is a new benchmark styled after\nGLUE with a new set of more difficult language understanding tasks, improved\nresources, and a new public leaderboard.\n\nThe Choice Of Plausible Alternatives (COPA, Roemmele et al., 2011) dataset is a causal\nreasoning task in which a system is given a premise sentence and two possible alternatives. The\nsystem must choose the alternative which has the more plausible causal relationship with the premise.\nThe method used for the construction of the alternatives ensures that the task requires causal reasoning\nto solve. Examples either deal with alternative possible causes or alternative possible effects of the\npremise sentence, accompanied by a simple question disambiguating between the two instance\ntypes for the model. All examples are handcrafted and focus on topics from online blogs and a\nphotography-related encyclopedia. Following the recommendation of the authors, we evaluate using\naccuracy.", + "download_checksums": { + "https://dl.fbaipublicfiles.com/glue/superglue/data/v2/COPA.zip": { + "num_bytes": 43986, + "checksum": null + } + }, + "download_size": 43986, + "features": { + "premise": { + "dtype": "string", + "_type": "Value" + }, + "choice1": { + "dtype": "string", + "_type": "Value" + }, + "choice2": { + "dtype": "string", + "_type": "Value" + }, + "question": { + "dtype": "string", + "_type": "Value" + }, + "idx": { + "dtype": "int32", + "_type": "Value" + }, + "label": { + "names": [ + "choice1", + "choice2" + ], + "_type": "ClassLabel" + } + }, + "homepage": "http://people.ict.usc.edu/~gordon/copa.html", + "license": "", + "size_in_bytes": 165544, + "splits": { + "train": { + "name": "train", + "num_bytes": 49233, + "num_examples": 400, + "dataset_name": "super_glue" + }, + "validation": { + "name": "validation", + "num_bytes": 12479, + "num_examples": 100, + "dataset_name": "super_glue" + }, + "test": { + "name": "test", + "num_bytes": 59846, + "num_examples": 500, + "dataset_name": "super_glue" + } + }, + "version": { + "version_str": "1.0.3", + "major": 1, + "minor": 0, + "patch": 3 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/super_glue/copa/validation/state.json b/olmo_data/hf_datasets/super_glue/copa/validation/state.json new file mode 100644 index 000000000..4524fcf7f --- /dev/null +++ b/olmo_data/hf_datasets/super_glue/copa/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "c785b24eeb544f2a", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/tau/commonsense_qa/none/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/tau/commonsense_qa/none/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..5d4cf5846 Binary files /dev/null and b/olmo_data/hf_datasets/tau/commonsense_qa/none/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/tau/commonsense_qa/none/validation/dataset_info.json b/olmo_data/hf_datasets/tau/commonsense_qa/none/validation/dataset_info.json new file mode 100644 index 000000000..f4275f04b --- /dev/null +++ b/olmo_data/hf_datasets/tau/commonsense_qa/none/validation/dataset_info.json @@ -0,0 +1,83 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "default", + "dataset_name": "commonsense_qa", + "dataset_size": 2739484, + "description": "", + "download_checksums": { + "hf://datasets/tau/commonsense_qa@94630fe30dad47192a8546eb75f094926d47e155/data/train-00000-of-00001.parquet": { + "num_bytes": 1247103, + "checksum": null + }, + "hf://datasets/tau/commonsense_qa@94630fe30dad47192a8546eb75f094926d47e155/data/validation-00000-of-00001.parquet": { + "num_bytes": 160240, + "checksum": null + }, + "hf://datasets/tau/commonsense_qa@94630fe30dad47192a8546eb75f094926d47e155/data/test-00000-of-00001.parquet": { + "num_bytes": 151227, + "checksum": null + } + }, + "download_size": 1558570, + "features": { + "id": { + "dtype": "string", + "_type": "Value" + }, + "question": { + "dtype": "string", + "_type": "Value" + }, + "question_concept": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "feature": { + "label": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "_type": "Sequence" + }, + "answerKey": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 4298054, + "splits": { + "train": { + "name": "train", + "num_bytes": 2207794, + "num_examples": 9741, + "dataset_name": "commonsense_qa" + }, + "validation": { + "name": "validation", + "num_bytes": 273848, + "num_examples": 1221, + "dataset_name": "commonsense_qa" + }, + "test": { + "name": "test", + "num_bytes": 257842, + "num_examples": 1140, + "dataset_name": "commonsense_qa" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/tau/commonsense_qa/none/validation/state.json b/olmo_data/hf_datasets/tau/commonsense_qa/none/validation/state.json new file mode 100644 index 000000000..2dbb62d13 --- /dev/null +++ b/olmo_data/hf_datasets/tau/commonsense_qa/none/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "fbff160a071447de", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..edf5cccb4 Binary files /dev/null and b/olmo_data/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/dataset_info.json b/olmo_data/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/dataset_info.json new file mode 100644 index 000000000..a1a5ebdd9 --- /dev/null +++ b/olmo_data/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/dataset_info.json @@ -0,0 +1,152 @@ +{ + "builder_name": "parquet", + "citation": "", + "config_name": "rc.wikipedia.nocontext", + "dataset_name": "trivia_qa", + "dataset_size": 56368074, + "description": "", + "download_checksums": { + "hf://datasets/trivia_qa@0f7faf33a3908546c6fd5b73a660e0f8ff173c2f/rc.wikipedia.nocontext/train-00000-of-00001.parquet": { + "num_bytes": 24953647, + "checksum": null + }, + "hf://datasets/trivia_qa@0f7faf33a3908546c6fd5b73a660e0f8ff173c2f/rc.wikipedia.nocontext/validation-00000-of-00001.parquet": { + "num_bytes": 3308451, + "checksum": null + }, + "hf://datasets/trivia_qa@0f7faf33a3908546c6fd5b73a660e0f8ff173c2f/rc.wikipedia.nocontext/test-00000-of-00001.parquet": { + "num_bytes": 541852, + "checksum": null + } + }, + "download_size": 28803950, + "features": { + "question": { + "dtype": "string", + "_type": "Value" + }, + "question_id": { + "dtype": "string", + "_type": "Value" + }, + "question_source": { + "dtype": "string", + "_type": "Value" + }, + "entity_pages": { + "feature": { + "doc_source": { + "dtype": "string", + "_type": "Value" + }, + "filename": { + "dtype": "string", + "_type": "Value" + }, + "title": { + "dtype": "string", + "_type": "Value" + }, + "wiki_context": { + "dtype": "string", + "_type": "Value" + } + }, + "_type": "Sequence" + }, + "search_results": { + "feature": { + "description": { + "dtype": "string", + "_type": "Value" + }, + "filename": { + "dtype": "string", + "_type": "Value" + }, + "rank": { + "dtype": "int32", + "_type": "Value" + }, + "title": { + "dtype": "string", + "_type": "Value" + }, + "url": { + "dtype": "string", + "_type": "Value" + }, + "search_context": { + "dtype": "string", + "_type": "Value" + } + }, + "_type": "Sequence" + }, + "answer": { + "aliases": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "normalized_aliases": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "matched_wiki_entity_name": { + "dtype": "string", + "_type": "Value" + }, + "normalized_matched_wiki_entity_name": { + "dtype": "string", + "_type": "Value" + }, + "normalized_value": { + "dtype": "string", + "_type": "Value" + }, + "type": { + "dtype": "string", + "_type": "Value" + }, + "value": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "", + "size_in_bytes": 85172024, + "splits": { + "train": { + "name": "train", + "num_bytes": 48359645, + "num_examples": 61888, + "dataset_name": "trivia_qa" + }, + "validation": { + "name": "validation", + "num_bytes": 6365273, + "num_examples": 7993, + "dataset_name": "trivia_qa" + }, + "test": { + "name": "test", + "num_bytes": 1643156, + "num_examples": 7701, + "dataset_name": "trivia_qa" + } + }, + "version": { + "version_str": "0.0.0", + "major": 0, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/state.json b/olmo_data/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/state.json new file mode 100644 index 000000000..64e10fe35 --- /dev/null +++ b/olmo_data/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "103745879378fe4b", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/winogrande/winogrande_xl/validation/data-00000-of-00001.arrow b/olmo_data/hf_datasets/winogrande/winogrande_xl/validation/data-00000-of-00001.arrow new file mode 100644 index 000000000..74abc3b7d Binary files /dev/null and b/olmo_data/hf_datasets/winogrande/winogrande_xl/validation/data-00000-of-00001.arrow differ diff --git a/olmo_data/hf_datasets/winogrande/winogrande_xl/validation/dataset_info.json b/olmo_data/hf_datasets/winogrande/winogrande_xl/validation/dataset_info.json new file mode 100644 index 000000000..021713ca7 --- /dev/null +++ b/olmo_data/hf_datasets/winogrande/winogrande_xl/validation/dataset_info.json @@ -0,0 +1,63 @@ +{ + "builder_name": "winogrande", + "citation": "@InProceedings{ai2:winogrande,\ntitle = {WinoGrande: An Adversarial Winograd Schema Challenge at Scale},\nauthors={Keisuke, Sakaguchi and Ronan, Le Bras and Chandra, Bhagavatula and Yejin, Choi\n},\nyear={2019}\n}\n", + "config_name": "winogrande_xl", + "dataset_name": "winogrande", + "dataset_size": 5577568, + "description": "WinoGrande is a new collection of 44k problems, inspired by Winograd Schema Challenge (Levesque, Davis, and Morgenstern\n 2011), but adjusted to improve the scale and robustness against the dataset-specific bias. Formulated as a\nfill-in-a-blank task with binary options, the goal is to choose the right option for a given sentence which requires\ncommonsense reasoning.\n", + "download_checksums": { + "https://storage.googleapis.com/ai2-mosaic/public/winogrande/winogrande_1.1.zip": { + "num_bytes": 3395492, + "checksum": null + } + }, + "download_size": 3395492, + "features": { + "sentence": { + "dtype": "string", + "_type": "Value" + }, + "option1": { + "dtype": "string", + "_type": "Value" + }, + "option2": { + "dtype": "string", + "_type": "Value" + }, + "answer": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "https://leaderboard.allenai.org/winogrande/submissions/get-started", + "license": "", + "size_in_bytes": 8973060, + "splits": { + "train": { + "name": "train", + "num_bytes": 5185752, + "num_examples": 40398, + "dataset_name": "winogrande" + }, + "test": { + "name": "test", + "num_bytes": 227633, + "num_examples": 1767, + "dataset_name": "winogrande" + }, + "validation": { + "name": "validation", + "num_bytes": 164183, + "num_examples": 1267, + "dataset_name": "winogrande" + } + }, + "version": { + "version_str": "1.1.0", + "description": "", + "major": 1, + "minor": 1, + "patch": 0 + } +} \ No newline at end of file diff --git a/olmo_data/hf_datasets/winogrande/winogrande_xl/validation/state.json b/olmo_data/hf_datasets/winogrande/winogrande_xl/validation/state.json new file mode 100644 index 000000000..f8d619900 --- /dev/null +++ b/olmo_data/hf_datasets/winogrande/winogrande_xl/validation/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "1b3333f502a889bb", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "validation" +} \ No newline at end of file diff --git a/tests/util_test.py b/tests/util_test.py index a01a10037..7aa031215 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -1,9 +1,3 @@ -import json -from pathlib import Path -from typing import Any, List - -from datasets import Dataset, DatasetDict - from olmo import util @@ -18,43 +12,3 @@ def test_dir_is_empty(tmp_path): # Should return false if dir contains anything, even hidden files. (dir / ".foo").touch() assert not util.dir_is_empty(dir) - - -def _create_and_store_test_hf_dataset(data: List[Any], dataset_path: Path): - dataset_path.mkdir(parents=True, exist_ok=True) - test_file_path = dataset_path / "test.json" - with test_file_path.open("w") as f: - json.dump(data, f) - - -def test_load_hf_dataset_gets_correct_data(tmp_path: Path): - dataset_path = tmp_path / "test_dataset" - cache_path = tmp_path / "cache" - - data = [{"foo": i} for i in range(10)] - _create_and_store_test_hf_dataset(data, dataset_path) - - dataset = util.load_hf_dataset(str(dataset_path), name=None, split="test", datasets_cache_dir=str(cache_path)) - assert isinstance(dataset, (Dataset, DatasetDict)) - for i in range(10): - assert dataset[i]["foo"] == i - - -def test_load_hf_dataset_caches_dataset(tmp_path: Path): - dataset_path = tmp_path / "test_dataset" - cache_path = tmp_path / "cache" - - data = [{"foo": i} for i in range(10)] - _create_and_store_test_hf_dataset(data, dataset_path) - - dataset = util.load_hf_dataset(str(dataset_path), name=None, split="test", datasets_cache_dir=str(cache_path)) - assert isinstance(dataset, (Dataset, DatasetDict)) - assert dataset[0]["foo"] == 0 - - # Overwrite dataset data and check that old data is loaded - data = [{"bar": i} for i in range(10)] - _create_and_store_test_hf_dataset(data, dataset_path) - - dataset = util.load_hf_dataset(str(dataset_path), name=None, split="test", datasets_cache_dir=str(cache_path)) - assert isinstance(dataset, (Dataset, DatasetDict)) - assert dataset[0]["foo"] == 0