diff --git a/docs/api/data_collector.md b/docs/api/data_collector.md index 59407606..eeff3458 100644 --- a/docs/api/data_collector.md +++ b/docs/api/data_collector.md @@ -21,8 +21,6 @@ data_collector_callbacks/episode_metadata_callback ```{eval-rst} .. autofunction:: minari.DataCollector.step .. autofunction:: minari.DataCollector.reset -.. autofunction:: minari.DataCollector.close .. autofunction:: minari.DataCollector.create_dataset -.. autofunction:: minari.DataCollector.clear_buffer_to_tmp_file -.. autofunction::minari.DataCollector._add_to_episode_buffer +.. autofunction:: minari.DataCollector.close ``` diff --git a/docs/api/minari_dataset/episode_data.md b/docs/api/minari_dataset/episode_data.md index 771633ee..79a6af5e 100644 --- a/docs/api/minari_dataset/episode_data.md +++ b/docs/api/minari_dataset/episode_data.md @@ -17,18 +17,18 @@ The seed used to reset this episode in the Gymnasium API. -.. autoattribute:: minari.EpisodeData.total_timesteps +.. autoattribute:: minari.EpisodeData.total_steps - The number of timesteps contained in this episode. + The number of steps contained in this episode. .. autoattribute:: minari.EpisodeData.observations The observations of the environment. The initial and final observations are included meaning that the number - of observations will be increased by one compared to the number of timesteps + of observations will be increased by one compared to the number of steps. .. autoattribute:: minari.EpisodeData.actions - The actions taken in each episode timestep. + The actions taken in each episode step. .. autoattribute:: minari.EpisodeData.terminations diff --git a/docs/content/dataset_standards.md b/docs/content/dataset_standards.md index a2af8ee5..5ee45a74 100644 --- a/docs/content/dataset_standards.md +++ b/docs/content/dataset_standards.md @@ -548,18 +548,18 @@ The `sampled_episodes` variable will be a list of 10 `EpisodeData` elements, eac | ----------------- | ------------------------------------ | ------------------------------------------------------------- | | `id` | `np.int64` | ID of the episode. | | `seed` | `np.int64` | Seed used to reset the episode. | -| `total_timesteps` | `np.int64` | Number of timesteps in the episode. | -| `observations` | `np.ndarray`, `list`, `tuple`, `dict` | Observations for each timestep including initial observation. | -| `actions` | `np.ndarray`, `list`, `tuple`, `dict` | Actions for each timestep. | -| `rewards` | `np.ndarray` | Rewards for each timestep. | -| `terminations` | `np.ndarray` | Terminations for each timestep. | -| `truncations` | `np.ndarray` | Truncations for each timestep. | +| `total_steps` | `np.int64` | Number of steps in the episode. | +| `observations` | `np.ndarray`, `list`, `tuple`, `dict` | Observations for each step including initial observation. | +| `actions` | `np.ndarray`, `list`, `tuple`, `dict` | Actions for each step. | +| `rewards` | `np.ndarray` | Rewards for each step. | +| `terminations` | `np.ndarray` | Terminations for each step. | +| `truncations` | `np.ndarray` | Truncations for each step. | | `infos` | `dict` | A dictionary containing additional information. | As mentioned in the `Supported Spaces` section, many different observation and action spaces are supported so the data type for these fields are dependent on the environment being used. ## Additional Information Formatting -When creating a dataset with `DataCollector`, if the `DataCollector` is initialized with `record_infos=True`, an info dict must be provided from every call to the environment's `step` and `reset` function. The structure of the info dictionary must be the same across timesteps. +When creating a dataset with `DataCollector`, if the `DataCollector` is initialized with `record_infos=True`, an info dict must be provided from every call to the environment's `step` and `reset` function. The structure of the info dictionary must be the same across steps. -Given that it is not guaranteed that all Gymnasium environments provide infos at every timestep, we provide the `StepDataCallback` which can modify the infos from a non-compliant environment so they have the same structure at every timestep. An example of this pattern is available in our test `test_data_collector_step_data_callback_info_correction` in test_step_data_callback.py. +Given that it is not guaranteed that all Gymnasium environments provide infos at every step, we provide the `StepDataCallback` which can modify the infos from a non-compliant environment so they have the same structure at every step. An example of this pattern is available in our test `test_data_collector_step_data_callback_info_correction` in test_step_data_callback.py. diff --git a/docs/tutorials/using_datasets/behavioral_cloning.py b/docs/tutorials/using_datasets/behavioral_cloning.py index 219d2f73..403e6040 100644 --- a/docs/tutorials/using_datasets/behavioral_cloning.py +++ b/docs/tutorials/using_datasets/behavioral_cloning.py @@ -5,7 +5,7 @@ # %%% # We present here how to perform behavioral cloning on a Minari dataset using `PyTorch `_. # We will start generating the dataset of the expert policy for the `CartPole-v1 `_ environment, which is a classic control problem. -# The objective is to balance the pole on the cart, and we receive a reward of +1 for each successful timestep. +# The objective is to balance the pole on the cart, and we receive a reward of +1 for each successful step. # %% # Imports @@ -108,7 +108,7 @@ def collate_fn(batch): return { "id": torch.Tensor([x.id for x in batch]), "seed": torch.Tensor([x.seed for x in batch]), - "total_timesteps": torch.Tensor([x.total_timesteps for x in batch]), + "total_steps": torch.Tensor([x.total_steps for x in batch]), "observations": torch.nn.utils.rnn.pad_sequence( [torch.as_tensor(x.observations) for x in batch], batch_first=True diff --git a/minari/__init__.py b/minari/__init__.py index 286039c8..b7361540 100644 --- a/minari/__init__.py +++ b/minari/__init__.py @@ -10,7 +10,6 @@ from minari.utils import ( combine_datasets, create_dataset_from_buffers, - create_dataset_from_collector_env, get_normalized_score, split_dataset, ) @@ -33,17 +32,8 @@ "load_dataset", "combine_datasets", "create_dataset_from_buffers", - "create_dataset_from_collector_env", "split_dataset", "get_normalized_score", ] __version__ = "0.4.3" - - -def __getattr__(name): - if name == "DataCollectorV0": - from minari.data_collector import DataCollectorV0 - return DataCollectorV0 - else: - raise ImportError(f"cannot import name '{name}' from '{__name__}' ({__file__})") diff --git a/minari/data_collector/__init__.py b/minari/data_collector/__init__.py index 50060469..3d414e8f 100644 --- a/minari/data_collector/__init__.py +++ b/minari/data_collector/__init__.py @@ -2,11 +2,3 @@ __all__ = ["DataCollector"] - - -def __getattr__(name): - if name == "DataCollectorV0": - from minari.data_collector.data_collector import DataCollectorV0 - return DataCollectorV0 - else: - raise ImportError(f"cannot import name '{name}' from '{__name__}' ({__file__})") diff --git a/minari/data_collector/data_collector.py b/minari/data_collector/data_collector.py index 28310e91..8e8709c5 100644 --- a/minari/data_collector/data_collector.py +++ b/minari/data_collector/data_collector.py @@ -1,12 +1,10 @@ from __future__ import annotations import copy -import inspect import os import secrets import shutil import tempfile -import warnings from typing import Any, Callable, Dict, List, Optional, SupportsFloat, Type, Union import gymnasium as gym @@ -22,6 +20,7 @@ ) from minari.dataset.minari_dataset import MinariDataset from minari.dataset.minari_storage import MinariStorage +from minari.utils import _generate_dataset_metadata, _generate_dataset_path # H5Py supports ints up to uint64 @@ -30,17 +29,6 @@ EpisodeBuffer = Dict[str, Any] # TODO: narrow this down -def __getattr__(name): - if name == "DataCollectorV0": - stacklevel = len(inspect.stack(0)) - warnings.warn("DataCollectorV0 is deprecated and will be removed. Use DataCollector instead.", DeprecationWarning, stacklevel=stacklevel) - return DataCollector - elif name == "__path__": - return False # see https://stackoverflow.com/a/60803436 - else: - raise ImportError(f"cannot import name '{name}' from '{__name__}' ({__file__})") - - class DataCollector(gym.Wrapper): r"""Gymnasium environment wrapper that collects step data. @@ -357,8 +345,6 @@ def create_dataset( Returns: MinariDataset """ - # TODO: move the import to top of the file after removing minari.create_dataset_from_collector_env() in 0.5.0 - from minari.utils import _generate_dataset_metadata, _generate_dataset_path dataset_path = _generate_dataset_path(dataset_id) metadata: Dict[str, Any] = _generate_dataset_metadata( dataset_id, @@ -375,7 +361,7 @@ def create_dataset( minari_version, ) - self.save_to_disk(dataset_path, metadata) + self._save_to_disk(dataset_path, metadata) # will be able to calculate dataset size only after saving the disk, so updating the dataset metadata post `save_to_disk` method @@ -384,7 +370,7 @@ def create_dataset( dataset.storage.update_metadata(metadata) return dataset - def save_to_disk( + def _save_to_disk( self, path: str | os.PathLike, dataset_metadata: Dict[str, Any] = {} ): """Save all in-memory buffer data and move temporary files to a permanent location in disk. @@ -393,7 +379,6 @@ def save_to_disk( path (str): path to store the dataset, e.g.: '/home/foo/datasets/data' dataset_metadata (Dict, optional): additional metadata to add to the dataset file. Defaults to {}. """ - warnings.warn("This method is deprecated and will become private in v0.5.0.", DeprecationWarning, stacklevel=2) self._validate_buffer() self._storage.update_episodes(self._buffer) self._buffer.clear() diff --git a/minari/dataset/episode_data.py b/minari/dataset/episode_data.py index 8d32d754..abb14b3d 100644 --- a/minari/dataset/episode_data.py +++ b/minari/dataset/episode_data.py @@ -13,7 +13,7 @@ class EpisodeData: id: int seed: Optional[int] - total_timesteps: int + total_steps: int observations: Any actions: Any rewards: np.ndarray @@ -26,7 +26,7 @@ def __repr__(self) -> str: "EpisodeData(" f"id={repr(self.id)}, " f"seed={repr(self.seed)}, " - f"total_timesteps={self.total_timesteps}, " + f"total_steps={self.total_steps}, " f"observations={EpisodeData._repr_space_values(self.observations)}, " f"actions={EpisodeData._repr_space_values(self.actions)}, " f"rewards=ndarray of {len(self.rewards)} floats, " diff --git a/minari/dataset/minari_dataset.py b/minari/dataset/minari_dataset.py index 3e12a5b8..f6de12b8 100644 --- a/minari/dataset/minari_dataset.py +++ b/minari/dataset/minari_dataset.py @@ -265,7 +265,7 @@ def total_steps(self) -> int: else: self._total_steps = sum( self.storage.apply( - lambda episode: episode["total_timesteps"], + lambda episode: episode["total_steps"], episode_indices=self.episode_indices, ) ) diff --git a/minari/dataset/minari_storage.py b/minari/dataset/minari_storage.py index b076d0e4..6644b41c 100644 --- a/minari/dataset/minari_storage.py +++ b/minari/dataset/minari_storage.py @@ -227,7 +227,7 @@ def get_episodes(self, episode_indices: Iterable[int]) -> List[dict]: ep_dict = { "id": ep_group.attrs.get("id"), - "total_timesteps": ep_group.attrs.get("total_steps"), + "total_steps": ep_group.attrs.get("total_steps"), "seed": seed, "observations": self._decode_space( ep_group["observations"], self.observation_space diff --git a/minari/utils.py b/minari/utils.py index 5d679b79..ea4ad2c9 100644 --- a/minari/utils.py +++ b/minari/utils.py @@ -16,7 +16,6 @@ from packaging.specifiers import InvalidSpecifier, SpecifierSet from packaging.version import Version -from minari import DataCollector from minari.dataset.minari_dataset import MinariDataset from minari.dataset.minari_storage import MinariStorage from minari.storage.datasets_root_dir import get_dataset_path @@ -566,63 +565,6 @@ def create_dataset_from_buffers( return MinariDataset(storage) -def create_dataset_from_collector_env( - dataset_id: str, - collector_env: DataCollector, - eval_env: Optional[str | gym.Env | EnvSpec] = None, - algorithm_name: Optional[str] = None, - author: Optional[str] = None, - author_email: Optional[str] = None, - code_permalink: Optional[str] = None, - ref_min_score: Optional[float] = None, - ref_max_score: Optional[float] = None, - expert_policy: Optional[Callable[[ObsType], ActType]] = None, - num_episodes_average_score: int = 100, - minari_version: Optional[str] = None, -): - """Create a Minari dataset using the data collected from stepping with a Gymnasium environment wrapped with a `DataCollector` Minari wrapper. - - The ``dataset_id`` parameter corresponds to the name of the dataset, with the syntax as follows: - ``(env_name-)(dataset_name)(-v(version))`` where ``env_name`` identifies the name of the environment used to generate the dataset ``dataset_name``. - This ``dataset_id`` is used to load the Minari datasets with :meth:`minari.load_dataset`. - - Args: - dataset_id (str): name id to identify Minari dataset - collector_env (DataCollector): Gymnasium environment used to collect the buffer data - buffer (list[Dict[str, Union[list, Dict]]]): list of episode dictionaries with data - eval_env (Optional[str|gym.Env|EnvSpec]): Gymnasium environment(gym.Env)/environment id(str)/environment spec(EnvSpec) to use for evaluation with the dataset. After loading the dataset, the environment can be recovered as follows: `MinariDataset.recover_environment(eval_env=True). - If None the `env` used to collect the buffer data should be used for evaluation. - algorithm_name (Optional[str], optional): name of the algorithm used to collect the data. Defaults to None. - author (Optional[str], optional): author that generated the dataset. Defaults to None. - author_email (Optional[str], optional): email of the author that generated the dataset. Defaults to None. - code_permalink (Optional[str], optional): link to relevant code used to generate the dataset. Defaults to None. - ref_min_score( Optional[float], optional): minimum reference score from the average returns of a random policy. This value is later used to normalize a score with :meth:`minari.get_normalized_score`. If default None the value will be estimated with a default random policy. - ref_max_score (Optional[float], optional: maximum reference score from the average returns of a hypothetical expert policy. This value is used in :meth:`minari.get_normalized_score`. Default None. - expert_policy (Optional[Callable[[ObsType], ActType], optional): policy to compute `ref_max_score` by averaging the returns over a number of episodes equal to `num_episodes_average_score`. - `ref_max_score` and `expert_policy` can't be passed at the same time. Default to None - num_episodes_average_score (int): number of episodes to average over the returns to compute `ref_min_score` and `ref_max_score`. Default to 100. - minari_version (Optional[str], optional): Minari version specifier compatible with the dataset. If None (default) use the installed Minari version. - - Returns: - MinariDataset - """ - warnings.warn("This function is deprecated and will be removed in v0.5.0. Please use DataCollector.create_dataset() instead.", DeprecationWarning, stacklevel=2) - dataset = collector_env.create_dataset( - dataset_id=dataset_id, - eval_env=eval_env, - algorithm_name=algorithm_name, - author=author, - author_email=author_email, - code_permalink=code_permalink, - ref_min_score=ref_min_score, - ref_max_score=ref_max_score, - expert_policy=expert_policy, - num_episodes_average_score=num_episodes_average_score, - minari_version=minari_version, - ) - return dataset - - def get_normalized_score(dataset: MinariDataset, returns: np.ndarray) -> np.ndarray: r"""Normalize undiscounted return of an episode. @@ -699,7 +641,7 @@ def get_dataset_spec_dict( version += f" ({__version__} installed)" md_dict = { - "Total Timesteps": dataset_spec["total_steps"], + "Total steps": dataset_spec["total_steps"], "Total Episodes": dataset_spec["total_episodes"], "Dataset Observation Space": f"`{dataset_observation_space}`", "Dataset Action Space": f"`{dataset_action_space}`", diff --git a/tests/common.py b/tests/common.py index 8c4a99c7..b336b117 100644 --- a/tests/common.py +++ b/tests/common.py @@ -559,26 +559,26 @@ def check_data_integrity(data: MinariStorage, episode_indices: Iterable[int]): # verify the actions and observations are in the appropriate action space and observation space, and that the episode lengths are correct for episode in episodes: - total_steps += episode["total_timesteps"] + total_steps += episode["total_steps"] _check_space_elem( episode["observations"], observation_space, - episode["total_timesteps"] + 1, + episode["total_steps"] + 1, ) - _check_space_elem(episode["actions"], action_space, episode["total_timesteps"]) + _check_space_elem(episode["actions"], action_space, episode["total_steps"]) - for i in range(episode["total_timesteps"] + 1): + for i in range(episode["total_steps"] + 1): obs = _reconstuct_obs_or_action_at_index_recursive( episode["observations"], i ) assert observation_space.contains(obs) - for i in range(episode["total_timesteps"]): + for i in range(episode["total_steps"]): action = _reconstuct_obs_or_action_at_index_recursive(episode["actions"], i) assert action_space.contains(action) - assert episode["total_timesteps"] == len(episode["rewards"]) - assert episode["total_timesteps"] == len(episode["terminations"]) - assert episode["total_timesteps"] == len(episode["truncations"]) + assert episode["total_steps"] == len(episode["rewards"]) + assert episode["total_steps"] == len(episode["terminations"]) + assert episode["total_steps"] == len(episode["truncations"]) assert total_steps == data.total_steps @@ -707,11 +707,11 @@ def check_episode_data_integrity( _check_space_elem( episode.observations, observation_space, - episode.total_timesteps + 1, + episode.total_steps + 1, ) - _check_space_elem(episode.actions, action_space, episode.total_timesteps) + _check_space_elem(episode.actions, action_space, episode.total_steps) - for i in range(episode.total_timesteps + 1): + for i in range(episode.total_steps + 1): obs = _reconstuct_obs_or_action_at_index_recursive(episode.observations, i) if info_sample is not None: assert check_infos_equal( @@ -721,13 +721,13 @@ def check_episode_data_integrity( assert observation_space.contains(obs) - for i in range(episode.total_timesteps): + for i in range(episode.total_steps): action = _reconstuct_obs_or_action_at_index_recursive(episode.actions, i) assert action_space.contains(action) - assert episode.total_timesteps == len(episode.rewards) - assert episode.total_timesteps == len(episode.terminations) - assert episode.total_timesteps == len(episode.truncations) + assert episode.total_steps == len(episode.rewards) + assert episode.total_steps == len(episode.terminations) + assert episode.total_steps == len(episode.truncations) def check_infos_equal(info_1: Dict, info_2: Dict) -> bool: diff --git a/tests/data_collector/callbacks/test_step_data_callback.py b/tests/data_collector/callbacks/test_step_data_callback.py index d26cda5a..e05febbf 100644 --- a/tests/data_collector/callbacks/test_step_data_callback.py +++ b/tests/data_collector/callbacks/test_step_data_callback.py @@ -3,7 +3,6 @@ import pytest from gymnasium import spaces -import minari from minari import DataCollector, MinariDataset from minari.data_collector.callbacks import StepDataCallback from tests.common import ( @@ -135,9 +134,8 @@ def test_data_collector_step_data_callback_info_correction(): env.reset() - dataset = minari.create_dataset_from_collector_env( + dataset = env.create_dataset( dataset_id=dataset_id, - collector_env=env, algorithm_name="random_policy", code_permalink=str(__file__), author="WillDudley", @@ -163,7 +161,7 @@ def test_data_collector_step_data_callback_info_correction(): record_infos=True, ) # here we are checking to make sure that if we have an environment changing its info - # structure across timesteps, it is caught by the data_collector + # structure across steps, it is caught by the data_collector with pytest.raises( ValueError, match=r"Info structure inconsistent with info structure returned by original reset." diff --git a/tests/data_collector/test_data_collector.py b/tests/data_collector/test_data_collector.py index ee932cdc..b63f1157 100644 --- a/tests/data_collector/test_data_collector.py +++ b/tests/data_collector/test_data_collector.py @@ -80,7 +80,7 @@ def get_single_step_from_episode(episode: EpisodeData, index: int) -> EpisodeDat step_data = { "id": episode.id, - "total_timesteps": 1, + "total_steps": 1, "seed": None, "observations": observation, "actions": action, @@ -135,7 +135,7 @@ def test_truncation_without_reset(dataset_id, env_id): episodes_generator = dataset.iterate_episodes() last_step = get_single_step_from_episode(next(episodes_generator), -1) for episode in episodes_generator: - assert episode.total_timesteps == ForceTruncateStepDataCallback.episode_steps + assert episode.total_steps == ForceTruncateStepDataCallback.episode_steps first_step = get_single_step_from_episode(episode, 0) # Check that the last observation of the previous episode is carried over to the next episode # as the reset observation. @@ -194,7 +194,7 @@ def test_reproducibility(seed): assert np.allclose(obs, episode.observations[0]) - for k in range(episode.total_timesteps): + for k in range(episode.total_steps): obs, rew, term, trunc, _ = env.step(episode.actions[k]) assert np.allclose(obs, episode.observations[k + 1]) assert rew == episode.rewards[k] diff --git a/tests/dataset/test_minari_dataset.py b/tests/dataset/test_minari_dataset.py index 26099c27..8a577804 100644 --- a/tests/dataset/test_minari_dataset.py +++ b/tests/dataset/test_minari_dataset.py @@ -31,14 +31,14 @@ def test_episode_data(space: gym.Space): id = np.random.randint(1024) seed = np.random.randint(1024) - total_timestep = 100 - rewards = np.random.randn(total_timestep) - terminations = np.random.choice([True, False], size=(total_timestep,)) - truncations = np.random.choice([True, False], size=(total_timestep,)) + total_step = 100 + rewards = np.random.randn(total_step) + terminations = np.random.choice([True, False], size=(total_step,)) + truncations = np.random.choice([True, False], size=(total_step,)) episode_data = EpisodeData( id=id, seed=seed, - total_timesteps=total_timestep, + total_steps=total_step, observations=space.sample(), actions=space.sample(), rewards=rewards, @@ -50,7 +50,7 @@ def test_episode_data(space: gym.Space): pattern = r"EpisodeData\(" pattern += r"id=\d+, " pattern += r"seed=\d+, " - pattern += r"total_timesteps=100, " + pattern += r"total_steps=100, " pattern += r"observations=.+, " pattern += r"actions=.+, " pattern += r"rewards=.+, " diff --git a/tests/dataset/test_minari_storage.py b/tests/dataset/test_minari_storage.py index 63a08570..a3656ce7 100644 --- a/tests/dataset/test_minari_storage.py +++ b/tests/dataset/test_minari_storage.py @@ -200,7 +200,7 @@ def test_episode_metadata(tmp_dataset_dir): ], ) def test_minari_get_dataset_size_from_collector_env(dataset_id, env_id): - """Test get_dataset_size method for dataset made using create_dataset_from_collector_env method.""" + """Test get_dataset_size method for dataset made with DataCollector environment.""" # delete the test dataset if it already exists local_datasets = minari.list_local_datasets() if dataset_id in local_datasets: diff --git a/tests/utils/test_dataset_creation.py b/tests/utils/test_dataset_creation.py index 6dfc7afc..40a2ce2d 100644 --- a/tests/utils/test_dataset_creation.py +++ b/tests/utils/test_dataset_creation.py @@ -131,9 +131,8 @@ def test_record_infos_collector_env(info_override): env.reset() - dataset = minari.create_dataset_from_collector_env( + dataset = env.create_dataset( dataset_id=dataset_id, - collector_env=env, algorithm_name="random_policy", code_permalink=CODELINK, author="WillDudley",