diff --git a/rllib/BUILD b/rllib/BUILD index c68aa11223cb..5e4100da87b2 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -339,6 +339,19 @@ py_test( args = ["--dir=tuned_examples/cql"] ) +py_test( + name = "learning_tests_pendulum_cql", + main = "tuned_examples/cql/pendulum_cql.py", + tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], + size = "large", + srcs = ["tuned_examples/cql/pendulum_cql.py"], + # Include the zipped json data file as well. + data = [ + "tests/data/pendulum/pendulum-v1_enormous", + ], + args = ["--as-test", "--enable-new-api-stack"] +) + # DQN # CartPole py_test( diff --git a/rllib/algorithms/bc/tests/test_bc.py b/rllib/algorithms/bc/tests/test_bc.py index b1b879accd00..28d0d310a6d6 100644 --- a/rllib/algorithms/bc/tests/test_bc.py +++ b/rllib/algorithms/bc/tests/test_bc.py @@ -24,7 +24,8 @@ def test_bc_compilation_and_learning_from_offline_file(self): data_path = "tests/data/cartpole/cartpole-v1_large" base_path = Path(__file__).parents[3] print(f"base_path={base_path}") - data_path = "local://" / base_path.joinpath(data_path) + data_path = "local://" / base_path / data_path + print(f"data_path={data_path}") # Define the BC config. diff --git a/rllib/algorithms/cql/cql.py b/rllib/algorithms/cql/cql.py index c6613ff8c11f..001987d20176 100644 --- a/rllib/algorithms/cql/cql.py +++ b/rllib/algorithms/cql/cql.py @@ -1,7 +1,14 @@ import logging -from typing import Optional, Type +from typing import Optional, Type, Union from ray.rllib.algorithms.algorithm_config import AlgorithmConfig, NotProvided +from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import ( + AddObservationsFromEpisodesToBatch, +) +from ray.rllib.connectors.learner.add_next_observations_from_episodes_to_train_batch import ( # noqa + AddNextObservationsFromEpisodesToTrainBatch, +) +from ray.rllib.core.learner.learner import Learner from ray.rllib.algorithms.cql.cql_tf_policy import CQLTFPolicy from ray.rllib.algorithms.cql.cql_torch_policy import CQLTorchPolicy from ray.rllib.algorithms.sac.sac import ( @@ -23,15 +30,23 @@ ) from ray.rllib.utils.framework import try_import_tf, try_import_tfp from ray.rllib.utils.metrics import ( + ALL_MODULES, + LEARNER_RESULTS, + LEARNER_UPDATE_TIMER, LAST_TARGET_UPDATE_TS, NUM_AGENT_STEPS_SAMPLED, NUM_AGENT_STEPS_TRAINED, NUM_ENV_STEPS_SAMPLED, NUM_ENV_STEPS_TRAINED, + NUM_ENV_STEPS_TRAINED_LIFETIME, + NUM_MODULE_STEPS_TRAINED, + NUM_MODULE_STEPS_TRAINED_LIFETIME, NUM_TARGET_UPDATES, + OFFLINE_SAMPLING_TIMER, TARGET_NET_UPDATE_TIMER, SYNCH_WORKER_WEIGHTS_TIMER, SAMPLE_TIMER, + TIMERS, ) from ray.rllib.utils.typing import ResultDict @@ -122,6 +137,40 @@ def training( return self + @override(SACConfig) + def get_default_learner_class(self) -> Union[Type["Learner"], str]: + if self.framework_str == "torch": + from ray.rllib.algorithms.cql.torch.cql_torch_learner import CQLTorchLearner + + return CQLTorchLearner + else: + raise ValueError( + f"The framework {self.framework_str} is not supported. " + "Use `'torch'` instead." + ) + + @override(AlgorithmConfig) + def build_learner_connector( + self, + input_observation_space, + input_action_space, + device=None, + ): + pipeline = super().build_learner_connector( + input_observation_space=input_observation_space, + input_action_space=input_action_space, + device=device, + ) + + # Prepend the "add-NEXT_OBS-from-episodes-to-train-batch" connector piece (right + # after the corresponding "add-OBS-..." default piece). + pipeline.insert_after( + AddObservationsFromEpisodesToBatch, + AddNextObservationsFromEpisodesToTrainBatch(), + ) + + return pipeline + @override(SACConfig) def validate(self) -> None: # First check, whether old `timesteps_per_iteration` is used. @@ -150,6 +199,20 @@ def validate(self) -> None: ) try_import_tfp(error=True) + # Assert that for a local learner the number of iterations is 1. Note, + # this is needed because we have no iterators, but instead a single + # batch returned directly from the `OfflineData.sample` method. + if ( + self.num_learners == 0 + and not self.dataset_num_iters_per_learner + and self.enable_rl_module_and_learner + ): + raise ValueError( + "When using a single local learner the number of iterations " + "per learner, `dataset_num_iters_per_learner` has to be 1. " + "Set this hyperparameter in the `AlgorithmConfig.offline_data`." + ) + class CQL(SAC): """CQL (derived from SAC).""" @@ -171,6 +234,78 @@ def get_default_policy_class( @override(SAC) def training_step(self) -> ResultDict: + if self.config.enable_env_runner_and_connector_v2: + return self._training_step_new_api_stack() + elif self.config.enable_rl_module_and_learner: + raise ValueError( + "Hybrid API stack is not supported. Either set " + "`enable_rl_module_and_learner=True` and " + "`enable_env_runner_and_connector_v2=True` or set both " + "attributed to `False`." + ) + else: + return self._training_step_old_api_stack() + + def _training_step_new_api_stack(self) -> ResultDict: + + # Sampling from offline data. + with self.metrics.log_time((TIMERS, OFFLINE_SAMPLING_TIMER)): + # Return an iterator in case we are using remote learners. + batch = self.offline_data.sample( + num_samples=self.config.train_batch_size_per_learner, + num_shards=self.config.num_learners, + return_iterator=self.config.num_learners > 1, + ) + + # Updating the policy. + with self.metrics.log_time((TIMERS, LEARNER_UPDATE_TIMER)): + # TODO (simon, sven): Check, if we should execute directly s.th. like + # update_from_iterator. + learner_results = self.learner_group.update_from_batch( + batch, + minibatch_size=self.config.train_batch_size_per_learner, + num_iters=self.config.dataset_num_iters_per_learner, + ) + + # Log training results. + self.metrics.merge_and_log_n_dicts(learner_results, key=LEARNER_RESULTS) + self.metrics.log_value( + NUM_ENV_STEPS_TRAINED_LIFETIME, + self.metrics.peek( + (LEARNER_RESULTS, ALL_MODULES, NUM_ENV_STEPS_TRAINED) + ), + reduce="sum", + ) + self.metrics.log_dict( + { + (LEARNER_RESULTS, mid, NUM_MODULE_STEPS_TRAINED_LIFETIME): ( + stats[NUM_MODULE_STEPS_TRAINED] + ) + for mid, stats in self.metrics.peek(LEARNER_RESULTS).items() + }, + reduce="sum", + ) + + # Synchronize weights. + # As the results contain for each policy the loss and in addition the + # total loss over all policies is returned, this total loss has to be + # removed. + modules_to_update = set(learner_results[0].keys()) - {ALL_MODULES} + + # Update weights - after learning on the local worker - + # on all remote workers. Note, we only have the local `EnvRunner`, + # but from this `EnvRunner` the evaulation `EnvRunner`s get updated. + with self.metrics.log_time((TIMERS, SYNCH_WORKER_WEIGHTS_TIMER)): + self.env_runner_group.sync_weights( + # Sync weights from learner_group to all EnvRunners. + from_worker_or_learner_group=self.learner_group, + policies=modules_to_update, + inference_only=True, + ) + + return self.metrics.reduce() + + def _training_step_old_api_stack(self) -> ResultDict: # Collect SampleBatches from sample workers. with self._timers[SAMPLE_TIMER]: train_batch = synchronous_parallel_sample(worker_set=self.env_runner_group) diff --git a/rllib/algorithms/cql/torch/__init__.py b/rllib/algorithms/cql/torch/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/rllib/algorithms/dqn/dqn.py b/rllib/algorithms/dqn/dqn.py index 25f43e9a020e..213c51b3496b 100644 --- a/rllib/algorithms/dqn/dqn.py +++ b/rllib/algorithms/dqn/dqn.py @@ -18,6 +18,12 @@ from ray.rllib.algorithms.algorithm_config import AlgorithmConfig, NotProvided from ray.rllib.algorithms.dqn.dqn_tf_policy import DQNTFPolicy from ray.rllib.algorithms.dqn.dqn_torch_policy import DQNTorchPolicy +from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import ( + AddObservationsFromEpisodesToBatch, +) +from ray.rllib.connectors.learner.add_next_observations_from_episodes_to_train_batch import ( # noqa + AddNextObservationsFromEpisodesToTrainBatch, +) from ray.rllib.core.learner import Learner from ray.rllib.core.rl_module.rl_module import RLModuleSpec from ray.rllib.execution.rollout_ops import ( @@ -545,6 +551,28 @@ def get_default_learner_class(self) -> Union[Type["Learner"], str]: "Use `config.framework('torch')` instead." ) + @override(AlgorithmConfig) + def build_learner_connector( + self, + input_observation_space, + input_action_space, + device=None, + ): + pipeline = super().build_learner_connector( + input_observation_space=input_observation_space, + input_action_space=input_action_space, + device=device, + ) + + # Prepend the "add-NEXT_OBS-from-episodes-to-train-batch" connector piece (right + # after the corresponding "add-OBS-..." default piece). + pipeline.insert_after( + AddObservationsFromEpisodesToBatch, + AddNextObservationsFromEpisodesToTrainBatch(), + ) + + return pipeline + def calculate_rr_weights(config: AlgorithmConfig) -> List[float]: """Calculate the round robin weights for the rollout and train steps""" diff --git a/rllib/algorithms/dqn/dqn_rainbow_learner.py b/rllib/algorithms/dqn/dqn_rainbow_learner.py index b09174ab2c90..eeaceab10509 100644 --- a/rllib/algorithms/dqn/dqn_rainbow_learner.py +++ b/rllib/algorithms/dqn/dqn_rainbow_learner.py @@ -6,12 +6,6 @@ from ray.rllib.core.rl_module.apis.target_network_api import TargetNetworkAPI from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec from ray.rllib.core.rl_module.rl_module import RLModuleSpec -from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import ( - AddObservationsFromEpisodesToBatch, -) -from ray.rllib.connectors.learner.add_next_observations_from_episodes_to_train_batch import ( # noqa - AddNextObservationsFromEpisodesToTrainBatch, -) from ray.rllib.utils.annotations import ( override, OverrideToImplementCustomLogic_CallToSuperRecommended, @@ -55,14 +49,6 @@ def build(self) -> None: ) ) - # Prepend the "add-NEXT_OBS-from-episodes-to-train-batch" connector piece (right - # after the corresponding "add-OBS-..." default piece). - if self.config.add_default_connectors_to_learner_pipeline: - self._learner_connector.insert_after( - AddObservationsFromEpisodesToBatch, - AddNextObservationsFromEpisodesToTrainBatch(), - ) - @override(Learner) def add_module( self, diff --git a/rllib/algorithms/marwil/marwil.py b/rllib/algorithms/marwil/marwil.py index 9ba3e937b7e7..902e7deca322 100644 --- a/rllib/algorithms/marwil/marwil.py +++ b/rllib/algorithms/marwil/marwil.py @@ -6,6 +6,12 @@ from ray.rllib.algorithms.marwil.marwil_offline_prelearner import ( MARWILOfflinePreLearner, ) +from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import ( + AddObservationsFromEpisodesToBatch, +) +from ray.rllib.connectors.learner.add_next_observations_from_episodes_to_train_batch import ( # noqa + AddNextObservationsFromEpisodesToTrainBatch, +) from ray.rllib.core.learner.learner import Learner from ray.rllib.core.rl_module.rl_module import RLModuleSpec from ray.rllib.execution.rollout_ops import ( @@ -264,6 +270,28 @@ def build( ) return super().build(env, logger_creator) + @override(AlgorithmConfig) + def build_learner_connector( + self, + input_observation_space, + input_action_space, + device=None, + ): + pipeline = super().build_learner_connector( + input_observation_space=input_observation_space, + input_action_space=input_action_space, + device=device, + ) + + # Prepend the "add-NEXT_OBS-from-episodes-to-train-batch" connector piece (right + # after the corresponding "add-OBS-..." default piece). + pipeline.insert_after( + AddObservationsFromEpisodesToBatch, + AddNextObservationsFromEpisodesToTrainBatch(), + ) + + return pipeline + @override(AlgorithmConfig) def validate(self) -> None: # Call super's validation method. @@ -321,7 +349,7 @@ def get_default_policy_class( @override(Algorithm) def training_step(self) -> ResultDict: if self.config.enable_env_runner_and_connector_v2: - return self._training_step_new_stack() + return self._training_step_new_api_stack() elif self.config.enable_rl_module_and_learner: raise ValueError( "`enable_rl_module_and_learner=True`. Hybrid stack is not " @@ -331,9 +359,9 @@ def training_step(self) -> ResultDict: "and `enable_env_runner_and_connector_v2` to `True`." ) else: - return self._training_step_old_stack() + return self._training_step_old_api_stack() - def _training_step_new_stack(self) -> ResultDict: + def _training_step_new_api_stack(self) -> ResultDict: """Implements training logic for the new stack Note, this includes so far training with the `OfflineData` @@ -351,7 +379,7 @@ class (multi-/single-learner setup) and evaluation on batch = self.offline_data.sample( num_samples=self.config.train_batch_size_per_learner, num_shards=self.config.num_learners, - return_iterator=True if self.config.num_learners > 1 else False, + return_iterator=self.config.num_learners > 1, ) with self.metrics.log_time((TIMERS, LEARNER_UPDATE_TIMER)): @@ -400,7 +428,7 @@ class (multi-/single-learner setup) and evaluation on return self.metrics.reduce() - def _training_step_old_stack(self) -> ResultDict: + def _training_step_old_api_stack(self) -> ResultDict: # Collect SampleBatches from sample workers. with self._timers[SAMPLE_TIMER]: train_batch = synchronous_parallel_sample(worker_set=self.env_runner_group) diff --git a/rllib/algorithms/marwil/marwil_learner.py b/rllib/algorithms/marwil/marwil_learner.py index 3f919eebb499..f4a01fb1a603 100644 --- a/rllib/algorithms/marwil/marwil_learner.py +++ b/rllib/algorithms/marwil/marwil_learner.py @@ -1,11 +1,5 @@ from typing import Dict -from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import ( - AddObservationsFromEpisodesToBatch, -) -from ray.rllib.connectors.learner.add_next_observations_from_episodes_to_train_batch import ( # noqa - AddNextObservationsFromEpisodesToTrainBatch, -) from ray.rllib.core.learner.learner import Learner from ray.rllib.utils.annotations import override from ray.rllib.utils.lambda_defaultdict import LambdaDefaultDict @@ -34,17 +28,6 @@ def build(self) -> None: ) ) - # Prepend a NEXT_OBS from episodes to train batch connector piece (right - # after the observation default piece). - if ( - self.config.add_default_connectors_to_learner_pipeline - and self.config.enable_env_runner_and_connector_v2 - ): - self._learner_connector.insert_after( - AddObservationsFromEpisodesToBatch, - AddNextObservationsFromEpisodesToTrainBatch(), - ) - @override(Learner) def remove_module(self, module_id: ModuleID) -> None: super().remove_module(module_id) diff --git a/rllib/algorithms/marwil/tests/test_marwil.py b/rllib/algorithms/marwil/tests/test_marwil.py index 4fdfea0626ff..69a53be46a90 100644 --- a/rllib/algorithms/marwil/tests/test_marwil.py +++ b/rllib/algorithms/marwil/tests/test_marwil.py @@ -41,7 +41,7 @@ def test_marwil_compilation_and_learning_from_offline_file(self): data_path = "tests/data/cartpole/cartpole-v1_large" base_path = Path(__file__).parents[3] print(f"base_path={base_path}") - data_path = "local://" + base_path.joinpath(data_path).as_posix() + data_path = "local://" / base_path / data_path print(f"data_path={data_path}") config = ( @@ -57,7 +57,7 @@ def test_marwil_compilation_and_learning_from_offline_file(self): evaluation_duration=5, evaluation_parallel_to_training=True, ) - .offline_data(input_=[data_path]) + .offline_data(input_=[data_path.as_posix()]) .training( lr=0.0008, train_batch_size_per_learner=2000, diff --git a/rllib/algorithms/sac/sac.py b/rllib/algorithms/sac/sac.py index 2f3bc8d11489..c54a67de0ab8 100644 --- a/rllib/algorithms/sac/sac.py +++ b/rllib/algorithms/sac/sac.py @@ -4,6 +4,12 @@ from ray.rllib.algorithms.algorithm_config import AlgorithmConfig, NotProvided from ray.rllib.algorithms.dqn.dqn import DQN from ray.rllib.algorithms.sac.sac_tf_policy import SACTFPolicy +from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import ( + AddObservationsFromEpisodesToBatch, +) +from ray.rllib.connectors.learner.add_next_observations_from_episodes_to_train_batch import ( # noqa + AddNextObservationsFromEpisodesToTrainBatch, +) from ray.rllib.core.learner import Learner from ray.rllib.core.rl_module.rl_module import RLModuleSpec from ray.rllib.policy.policy import Policy @@ -349,14 +355,30 @@ def validate(self) -> None: # Validate that we use the corresponding `EpisodeReplayBuffer` when using # episodes. # TODO (sven, simon): Implement the multi-agent case for replay buffers. - if self.enable_env_runner_and_connector_v2 and self.replay_buffer_config[ - "type" - ] not in [ - "EpisodeReplayBuffer", - "PrioritizedEpisodeReplayBuffer", - "MultiAgentEpisodeReplayBuffer", - "MultiAgentPrioritizedEpisodeReplayBuffer", - ]: + if ( + self.enable_env_runner_and_connector_v2 + and self.replay_buffer_config["type"] + not in [ + "EpisodeReplayBuffer", + "PrioritizedEpisodeReplayBuffer", + "MultiAgentEpisodeReplayBuffer", + "MultiAgentPrioritizedEpisodeReplayBuffer", + ] + and not ( + # TODO (simon): Set up an indicator `is_offline_new_stack` that + # includes all these variable checks. + self.input_ + and ( + isinstance(self.input_, str) + or ( + isinstance(self.input_, list) + and isinstance(self.input_[0], str) + ) + ) + and self.input_ != "sampler" + and self.enable_rl_module_and_learner + ) + ): raise ValueError( "When using the new `EnvRunner API` the replay buffer must be of type " "`EpisodeReplayBuffer`." @@ -399,6 +421,28 @@ def get_default_learner_class(self) -> Union[Type["Learner"], str]: f"The framework {self.framework_str} is not supported. " "Use `torch`." ) + @override(AlgorithmConfig) + def build_learner_connector( + self, + input_observation_space, + input_action_space, + device=None, + ): + pipeline = super().build_learner_connector( + input_observation_space=input_observation_space, + input_action_space=input_action_space, + device=device, + ) + + # Prepend the "add-NEXT_OBS-from-episodes-to-train-batch" connector piece (right + # after the corresponding "add-OBS-..." default piece). + pipeline.insert_after( + AddObservationsFromEpisodesToBatch, + AddNextObservationsFromEpisodesToTrainBatch(), + ) + + return pipeline + @property def _model_config_auto_includes(self): return super()._model_config_auto_includes | {"twin_q": self.twin_q} diff --git a/rllib/algorithms/sac/torch/sac_torch_learner.py b/rllib/algorithms/sac/torch/sac_torch_learner.py index 52e9b9ec8dda..aed5f21b909e 100644 --- a/rllib/algorithms/sac/torch/sac_torch_learner.py +++ b/rllib/algorithms/sac/torch/sac_torch_learner.py @@ -279,7 +279,7 @@ def compute_loss_for_module( @override(DQNRainbowTorchLearner) def compute_gradients( - self, loss_per_module: Dict[str, TensorType], **kwargs + self, loss_per_module: Dict[ModuleID, TensorType], **kwargs ) -> ParamDict: grads = {} for module_id in set(loss_per_module.keys()) - {ALL_MODULES}: diff --git a/rllib/core/learner/learner.py b/rllib/core/learner/learner.py index 47c4d9aa7a5b..ae0a57a76ffa 100644 --- a/rllib/core/learner/learner.py +++ b/rllib/core/learner/learner.py @@ -442,7 +442,7 @@ def configure_optimizers_for_module( @OverrideToImplementCustomLogic @abc.abstractmethod def compute_gradients( - self, loss_per_module: Dict[str, TensorType], **kwargs + self, loss_per_module: Dict[ModuleID, TensorType], **kwargs ) -> ParamDict: """Computes the gradients based on the given losses. diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000000_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000000_000000.parquet new file mode 100644 index 000000000000..ab4304da54e9 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000000_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000001_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000001_000000.parquet new file mode 100644 index 000000000000..c018812d3970 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000001_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000002_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000002_000000.parquet new file mode 100644 index 000000000000..7c5260ce2663 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000002_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000003_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000003_000000.parquet new file mode 100644 index 000000000000..652f4925222a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000003_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000004_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000004_000000.parquet new file mode 100644 index 000000000000..9dd762ea3314 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000004_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000005_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000005_000000.parquet new file mode 100644 index 000000000000..b9bd4ad4c91d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000005_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000006_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000006_000000.parquet new file mode 100644 index 000000000000..28b7d30eac57 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000006_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000007_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000007_000000.parquet new file mode 100644 index 000000000000..f9998d1758be Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000007_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000008_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000008_000000.parquet new file mode 100644 index 000000000000..040ccceab421 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000008_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000009_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000009_000000.parquet new file mode 100644 index 000000000000..9e2c6340f461 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000009_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000010_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000010_000000.parquet new file mode 100644 index 000000000000..2e6b06edd674 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000010_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000011_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000011_000000.parquet new file mode 100644 index 000000000000..7243396d2c57 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000011_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000012_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000012_000000.parquet new file mode 100644 index 000000000000..34042d8fcfd4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000012_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000013_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000013_000000.parquet new file mode 100644 index 000000000000..49835666e377 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000013_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000014_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000014_000000.parquet new file mode 100644 index 000000000000..9a84552b86dc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000014_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000015_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000015_000000.parquet new file mode 100644 index 000000000000..2d6abd728dd6 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000015_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000016_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000016_000000.parquet new file mode 100644 index 000000000000..591b1cc906b3 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000016_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000017_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000017_000000.parquet new file mode 100644 index 000000000000..69af15f75e0a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000017_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000018_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000018_000000.parquet new file mode 100644 index 000000000000..8aca0f6b3c1d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000018_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000019_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000019_000000.parquet new file mode 100644 index 000000000000..5973b7b768c3 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000019_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000020_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000020_000000.parquet new file mode 100644 index 000000000000..d81dc104d2fd Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000020_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000021_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000021_000000.parquet new file mode 100644 index 000000000000..49dcfa0b494f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000021_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000022_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000022_000000.parquet new file mode 100644 index 000000000000..6dea53ff4d23 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000022_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000023_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000023_000000.parquet new file mode 100644 index 000000000000..d725752ba816 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000023_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000024_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000024_000000.parquet new file mode 100644 index 000000000000..082646eee31d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000024_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000025_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000025_000000.parquet new file mode 100644 index 000000000000..2acd1db67245 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000025_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000026_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000026_000000.parquet new file mode 100644 index 000000000000..4dee401864a2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000026_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000027_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000027_000000.parquet new file mode 100644 index 000000000000..b3538b352534 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000027_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000028_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000028_000000.parquet new file mode 100644 index 000000000000..4238b1850fbc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000028_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000029_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000029_000000.parquet new file mode 100644 index 000000000000..c5617ab3bebc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000029_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000030_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000030_000000.parquet new file mode 100644 index 000000000000..097acb82cdc2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000030_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000031_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000031_000000.parquet new file mode 100644 index 000000000000..76cd5d041c13 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000031_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000032_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000032_000000.parquet new file mode 100644 index 000000000000..28c4791f5c97 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000032_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000033_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000033_000000.parquet new file mode 100644 index 000000000000..08df1affe6bf Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000033_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000034_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000034_000000.parquet new file mode 100644 index 000000000000..2722d2320231 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000034_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000035_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000035_000000.parquet new file mode 100644 index 000000000000..6c8545d2fcfa Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000035_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000036_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000036_000000.parquet new file mode 100644 index 000000000000..b0d3db67de8d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000036_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000037_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000037_000000.parquet new file mode 100644 index 000000000000..92c8f70bc757 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000037_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000038_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000038_000000.parquet new file mode 100644 index 000000000000..2a6df117028d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000038_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000039_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000039_000000.parquet new file mode 100644 index 000000000000..c45d188987f3 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000039_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000040_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000040_000000.parquet new file mode 100644 index 000000000000..3087697c745c Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000040_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000041_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000041_000000.parquet new file mode 100644 index 000000000000..fc546a540c94 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000041_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000042_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000042_000000.parquet new file mode 100644 index 000000000000..8b04f88bf235 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000042_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000043_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000043_000000.parquet new file mode 100644 index 000000000000..fe739938795b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000043_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000044_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000044_000000.parquet new file mode 100644 index 000000000000..0e7668370c7a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000044_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000045_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000045_000000.parquet new file mode 100644 index 000000000000..bce9ae83dae6 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000045_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000046_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000046_000000.parquet new file mode 100644 index 000000000000..05009fa107fb Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000046_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000047_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000047_000000.parquet new file mode 100644 index 000000000000..89966685b87c Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000047_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000048_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000048_000000.parquet new file mode 100644 index 000000000000..f7ec4f8fc664 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000048_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000049_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000049_000000.parquet new file mode 100644 index 000000000000..00da6e0a8012 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000049_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000050_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000050_000000.parquet new file mode 100644 index 000000000000..3401c9492644 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000050_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000051_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000051_000000.parquet new file mode 100644 index 000000000000..6ec543671139 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000051_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000052_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000052_000000.parquet new file mode 100644 index 000000000000..cb418096a0fd Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000052_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000053_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000053_000000.parquet new file mode 100644 index 000000000000..2d4404e1f3e0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000053_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000054_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000054_000000.parquet new file mode 100644 index 000000000000..077de91175c1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000054_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000055_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000055_000000.parquet new file mode 100644 index 000000000000..dcd861b3677a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000055_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000056_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000056_000000.parquet new file mode 100644 index 000000000000..c057dcafac08 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000056_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000057_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000057_000000.parquet new file mode 100644 index 000000000000..d64fab2470f8 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000057_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000058_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000058_000000.parquet new file mode 100644 index 000000000000..888b195152c5 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000058_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000059_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000059_000000.parquet new file mode 100644 index 000000000000..23843491685e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000059_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000060_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000060_000000.parquet new file mode 100644 index 000000000000..be75f499d7b2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000060_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000061_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000061_000000.parquet new file mode 100644 index 000000000000..e104283ee8ec Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000061_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000062_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000062_000000.parquet new file mode 100644 index 000000000000..0a48a8236537 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000062_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000063_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000063_000000.parquet new file mode 100644 index 000000000000..3c381d1c4772 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000063_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000064_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000064_000000.parquet new file mode 100644 index 000000000000..d284e68ce0a0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000064_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000065_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000065_000000.parquet new file mode 100644 index 000000000000..013e757f1307 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000065_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000066_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000066_000000.parquet new file mode 100644 index 000000000000..5851d848df01 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000066_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000067_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000067_000000.parquet new file mode 100644 index 000000000000..10f43787d2cd Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000067_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000068_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000068_000000.parquet new file mode 100644 index 000000000000..b758eba51e4a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000068_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000069_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000069_000000.parquet new file mode 100644 index 000000000000..4b455721c656 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000069_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000070_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000070_000000.parquet new file mode 100644 index 000000000000..f92052471210 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000070_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000071_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000071_000000.parquet new file mode 100644 index 000000000000..868469a0d026 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000071_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000072_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000072_000000.parquet new file mode 100644 index 000000000000..618f5d24973e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000072_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000073_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000073_000000.parquet new file mode 100644 index 000000000000..3e04590e35cc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000073_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000074_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000074_000000.parquet new file mode 100644 index 000000000000..6cc4486f6d65 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000074_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000075_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000075_000000.parquet new file mode 100644 index 000000000000..090b461c5fa2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000075_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000076_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000076_000000.parquet new file mode 100644 index 000000000000..4d6f4964e58c Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000076_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000077_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000077_000000.parquet new file mode 100644 index 000000000000..abb5b5ef2635 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000077_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000078_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000078_000000.parquet new file mode 100644 index 000000000000..cc23e347cdaf Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000078_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000079_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000079_000000.parquet new file mode 100644 index 000000000000..24189e94fc0d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000079_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000080_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000080_000000.parquet new file mode 100644 index 000000000000..44c390e539be Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000080_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000081_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000081_000000.parquet new file mode 100644 index 000000000000..59932246942a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000081_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000082_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000082_000000.parquet new file mode 100644 index 000000000000..0902ec5ab34d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000082_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000083_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000083_000000.parquet new file mode 100644 index 000000000000..4396cb0340dd Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000083_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000084_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000084_000000.parquet new file mode 100644 index 000000000000..075b9819f0ce Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000084_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000085_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000085_000000.parquet new file mode 100644 index 000000000000..f22ccfdfe625 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000085_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000086_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000086_000000.parquet new file mode 100644 index 000000000000..60102a857506 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000086_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000087_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000087_000000.parquet new file mode 100644 index 000000000000..d4bba381895a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000087_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000088_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000088_000000.parquet new file mode 100644 index 000000000000..e58c457d66b2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000088_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000089_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000089_000000.parquet new file mode 100644 index 000000000000..08dcdb9da1e0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000089_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000090_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000090_000000.parquet new file mode 100644 index 000000000000..3e3ac185d7c8 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000090_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000091_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000091_000000.parquet new file mode 100644 index 000000000000..f12a144c86a7 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000091_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000092_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000092_000000.parquet new file mode 100644 index 000000000000..419901c3c6bf Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000092_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000093_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000093_000000.parquet new file mode 100644 index 000000000000..b49af72df8d1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000093_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000094_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000094_000000.parquet new file mode 100644 index 000000000000..1e463493427b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000094_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000095_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000095_000000.parquet new file mode 100644 index 000000000000..d49ae4e16d44 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000095_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000096_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000096_000000.parquet new file mode 100644 index 000000000000..fd3f9eced002 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000096_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000097_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000097_000000.parquet new file mode 100644 index 000000000000..1e14a389e10f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000097_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000098_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000098_000000.parquet new file mode 100644 index 000000000000..df4636379626 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000098_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000099_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000099_000000.parquet new file mode 100644 index 000000000000..d0ba5dc95010 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000099_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000100_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000100_000000.parquet new file mode 100644 index 000000000000..66bddaac3534 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000100_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000101_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000101_000000.parquet new file mode 100644 index 000000000000..95bbf124ad78 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000101_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000102_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000102_000000.parquet new file mode 100644 index 000000000000..b69c3941a055 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000102_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000103_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000103_000000.parquet new file mode 100644 index 000000000000..617b5937c395 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000103_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000104_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000104_000000.parquet new file mode 100644 index 000000000000..4fd28983b1df Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000104_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000105_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000105_000000.parquet new file mode 100644 index 000000000000..c8d84c3ff38e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000105_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000106_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000106_000000.parquet new file mode 100644 index 000000000000..f8bf99482db9 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000106_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000107_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000107_000000.parquet new file mode 100644 index 000000000000..ae2b2956736c Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000107_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000108_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000108_000000.parquet new file mode 100644 index 000000000000..7a0fb5faad16 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000108_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000109_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000109_000000.parquet new file mode 100644 index 000000000000..b65f38b28591 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000109_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000110_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000110_000000.parquet new file mode 100644 index 000000000000..d438acb53957 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000110_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000111_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000111_000000.parquet new file mode 100644 index 000000000000..7a4898427500 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000111_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000112_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000112_000000.parquet new file mode 100644 index 000000000000..cb7ad36de1e0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000112_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000113_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000113_000000.parquet new file mode 100644 index 000000000000..2b65c1e22454 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000113_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000114_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000114_000000.parquet new file mode 100644 index 000000000000..aa849ec02c47 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000114_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000115_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000115_000000.parquet new file mode 100644 index 000000000000..650d52942b18 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000115_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000116_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000116_000000.parquet new file mode 100644 index 000000000000..dda8aa8665d0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000116_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000117_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000117_000000.parquet new file mode 100644 index 000000000000..efaa5befd9db Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000117_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000118_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000118_000000.parquet new file mode 100644 index 000000000000..657c90acb73d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000118_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000119_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000119_000000.parquet new file mode 100644 index 000000000000..9c4d8f1f3155 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000119_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000120_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000120_000000.parquet new file mode 100644 index 000000000000..c0d7994f91ec Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000120_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000121_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000121_000000.parquet new file mode 100644 index 000000000000..ba51c65ec86e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000121_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000122_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000122_000000.parquet new file mode 100644 index 000000000000..a40e1fa726fc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000122_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000123_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000123_000000.parquet new file mode 100644 index 000000000000..b2f261466f49 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000123_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000124_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000124_000000.parquet new file mode 100644 index 000000000000..ac38d030ce55 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000124_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000125_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000125_000000.parquet new file mode 100644 index 000000000000..4f9734d44c71 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000125_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000126_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000126_000000.parquet new file mode 100644 index 000000000000..5230fdb82f69 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000126_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000127_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000127_000000.parquet new file mode 100644 index 000000000000..c767678cda9b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000127_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000128_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000128_000000.parquet new file mode 100644 index 000000000000..b0c4ec5f1612 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000128_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000129_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000129_000000.parquet new file mode 100644 index 000000000000..f3e6b215ca4e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000129_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000130_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000130_000000.parquet new file mode 100644 index 000000000000..b0fada534447 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000130_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000131_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000131_000000.parquet new file mode 100644 index 000000000000..a96b49920159 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000131_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000132_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000132_000000.parquet new file mode 100644 index 000000000000..75df25262707 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000132_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000133_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000133_000000.parquet new file mode 100644 index 000000000000..74d14f700624 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000133_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000134_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000134_000000.parquet new file mode 100644 index 000000000000..dd18066e201a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000134_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000135_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000135_000000.parquet new file mode 100644 index 000000000000..8d841e14b07d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000135_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000136_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000136_000000.parquet new file mode 100644 index 000000000000..cd6b35a23df9 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000136_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000137_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000137_000000.parquet new file mode 100644 index 000000000000..4163be4db8ae Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000137_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000138_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000138_000000.parquet new file mode 100644 index 000000000000..65ac4a4031a5 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000138_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000139_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000139_000000.parquet new file mode 100644 index 000000000000..117e670fac82 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000139_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000140_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000140_000000.parquet new file mode 100644 index 000000000000..960d72ef69ed Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000140_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000141_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000141_000000.parquet new file mode 100644 index 000000000000..7cb85ac7bd37 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000141_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000142_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000142_000000.parquet new file mode 100644 index 000000000000..739f690d7d39 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000142_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000143_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000143_000000.parquet new file mode 100644 index 000000000000..578f95554b1a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000143_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000144_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000144_000000.parquet new file mode 100644 index 000000000000..6135b6e39381 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000144_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000145_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000145_000000.parquet new file mode 100644 index 000000000000..250ce72d5850 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000145_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000146_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000146_000000.parquet new file mode 100644 index 000000000000..678e47bb2545 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000146_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000147_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000147_000000.parquet new file mode 100644 index 000000000000..f6881d9f4acb Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000147_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000148_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000148_000000.parquet new file mode 100644 index 000000000000..d7794684b619 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000148_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000149_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000149_000000.parquet new file mode 100644 index 000000000000..37e18e90bcdb Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000149_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000150_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000150_000000.parquet new file mode 100644 index 000000000000..6184363234dc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000150_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000151_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000151_000000.parquet new file mode 100644 index 000000000000..5185044836dc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000151_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000152_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000152_000000.parquet new file mode 100644 index 000000000000..922af626d2e5 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000152_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000153_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000153_000000.parquet new file mode 100644 index 000000000000..1dc61f862cb2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000153_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000154_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000154_000000.parquet new file mode 100644 index 000000000000..599308f820b2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000154_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000155_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000155_000000.parquet new file mode 100644 index 000000000000..0e91216db536 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000155_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000156_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000156_000000.parquet new file mode 100644 index 000000000000..191de0444f1f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000156_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000157_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000157_000000.parquet new file mode 100644 index 000000000000..98572c7ad7c7 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000157_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000158_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000158_000000.parquet new file mode 100644 index 000000000000..4b14f078176f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000158_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000159_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000159_000000.parquet new file mode 100644 index 000000000000..8f82f399a80b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000159_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000160_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000160_000000.parquet new file mode 100644 index 000000000000..bf8abddfb7bc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000160_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000161_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000161_000000.parquet new file mode 100644 index 000000000000..9d02e03431c1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000161_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000162_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000162_000000.parquet new file mode 100644 index 000000000000..b363f425faa1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000162_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000163_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000163_000000.parquet new file mode 100644 index 000000000000..1e019603474e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000163_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000164_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000164_000000.parquet new file mode 100644 index 000000000000..77f4574a0982 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000164_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000165_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000165_000000.parquet new file mode 100644 index 000000000000..7c7f6a89a9ea Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000165_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000166_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000166_000000.parquet new file mode 100644 index 000000000000..d2bfb1961fd5 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000166_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000167_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000167_000000.parquet new file mode 100644 index 000000000000..7635662fa055 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000167_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000168_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000168_000000.parquet new file mode 100644 index 000000000000..988e6700b167 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000168_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000169_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000169_000000.parquet new file mode 100644 index 000000000000..1b2c11fdd72a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000169_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000170_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000170_000000.parquet new file mode 100644 index 000000000000..2bd7f0ffba7c Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000170_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000171_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000171_000000.parquet new file mode 100644 index 000000000000..88350af4d211 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000171_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000172_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000172_000000.parquet new file mode 100644 index 000000000000..dd7b36cc12c0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000172_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000173_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000173_000000.parquet new file mode 100644 index 000000000000..d9940a09868c Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000173_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000174_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000174_000000.parquet new file mode 100644 index 000000000000..b7aaa6548dae Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000174_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000175_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000175_000000.parquet new file mode 100644 index 000000000000..55dc8ba933ed Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000175_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000176_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000176_000000.parquet new file mode 100644 index 000000000000..177eadd4de6b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000176_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000177_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000177_000000.parquet new file mode 100644 index 000000000000..2ab9488e8208 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000177_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000178_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000178_000000.parquet new file mode 100644 index 000000000000..50f806f56040 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000178_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000179_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000179_000000.parquet new file mode 100644 index 000000000000..205fce3cab08 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000179_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000180_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000180_000000.parquet new file mode 100644 index 000000000000..95cc145bde71 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000180_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000181_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000181_000000.parquet new file mode 100644 index 000000000000..09b5cdb094d9 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000181_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000182_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000182_000000.parquet new file mode 100644 index 000000000000..27a33517a4f1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000182_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000183_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000183_000000.parquet new file mode 100644 index 000000000000..0a8bbd62bb57 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000183_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000184_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000184_000000.parquet new file mode 100644 index 000000000000..990342d9e1f0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000184_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000185_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000185_000000.parquet new file mode 100644 index 000000000000..4323887cd421 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000185_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000186_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000186_000000.parquet new file mode 100644 index 000000000000..bc644a10e9d0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000186_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000187_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000187_000000.parquet new file mode 100644 index 000000000000..6da256c1c180 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000187_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000188_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000188_000000.parquet new file mode 100644 index 000000000000..a67195ca51e2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000188_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000189_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000189_000000.parquet new file mode 100644 index 000000000000..e2a1da1e9ee4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000189_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000190_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000190_000000.parquet new file mode 100644 index 000000000000..e60413fb428d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000190_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000191_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000191_000000.parquet new file mode 100644 index 000000000000..6a0dc11e8dd7 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000191_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000192_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000192_000000.parquet new file mode 100644 index 000000000000..696334d4f755 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000192_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000193_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000193_000000.parquet new file mode 100644 index 000000000000..a63ee70c5cd7 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000193_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000194_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000194_000000.parquet new file mode 100644 index 000000000000..e04ce9d361ae Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000194_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000195_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000195_000000.parquet new file mode 100644 index 000000000000..50b007525793 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000195_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000196_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000196_000000.parquet new file mode 100644 index 000000000000..9b0fe5f168ee Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000196_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000197_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000197_000000.parquet new file mode 100644 index 000000000000..ab6053c63f7e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000197_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000198_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000198_000000.parquet new file mode 100644 index 000000000000..99943cac2408 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000198_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000199_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000199_000000.parquet new file mode 100644 index 000000000000..38c9f6303b8e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_enormous/1_000199_000000.parquet differ diff --git a/rllib/tuned_examples/bc/cartpole_bc.py b/rllib/tuned_examples/bc/cartpole_bc.py index 0f6616636227..a6f06b746f53 100644 --- a/rllib/tuned_examples/bc/cartpole_bc.py +++ b/rllib/tuned_examples/bc/cartpole_bc.py @@ -25,7 +25,7 @@ data_path = "tests/data/cartpole/cartpole-v1_large" base_path = Path(__file__).parents[2] print(f"base_path={base_path}") -data_path = "local://" + base_path.joinpath(data_path).as_posix() +data_path = "local://" / base_path / data_path print(f"data_path={data_path}") # Define the BC config. @@ -48,7 +48,7 @@ # configured. The read method needs at least as many blocks # as remote learners. .offline_data( - input_=[data_path], + input_=[data_path.as_posix()], # Define the number of reading blocks, these should be larger than 1 # and aligned with the data size. input_read_method_kwargs={"override_num_blocks": max(args.num_gpus, 2)}, diff --git a/rllib/tuned_examples/cql/pendulum_cql.py b/rllib/tuned_examples/cql/pendulum_cql.py new file mode 100644 index 000000000000..8d5f47be2780 --- /dev/null +++ b/rllib/tuned_examples/cql/pendulum_cql.py @@ -0,0 +1,66 @@ +from pathlib import Path + +from ray.rllib.algorithms.cql.cql import CQLConfig +from ray.rllib.utils.metrics import ( + ENV_RUNNER_RESULTS, + EPISODE_RETURN_MEAN, + EVALUATION_RESULTS, + NUM_ENV_STEPS_SAMPLED_LIFETIME, +) +from ray.rllib.utils.test_utils import ( + add_rllib_example_script_args, + run_rllib_example_script_experiment, +) + +parser = add_rllib_example_script_args() +# Use `parser` to add your own custom command line options to this script +# and (if needed) use their values toset up `config` below. +args = parser.parse_args() + +assert ( + args.env == "Pendulum-v1" or args.env is None +), "This tuned example works only with `Pendulum-v1`." + + +base_path = Path(__file__).parents[2] +data_path = base_path / "tests/data/pendulum/pendulum-v1_enormous" + +config = ( + CQLConfig() + .environment("Pendulum-v1") + .api_stack( + enable_env_runner_and_connector_v2=True, + enable_rl_module_and_learner=True, + ) + .offline_data( + input_=[data_path.as_posix()], + actions_in_input_normalized=True, + dataset_num_iters_per_learner=1 if args.num_gpus == 0 else None, + ) + .training( + bc_iters=100, + train_batch_size_per_learner=2000, + twin_q=False, + ) + .reporting( + min_time_s_per_iteration=10, + metrics_num_episodes_for_smoothing=5, + ) + .evaluation( + evaluation_interval=1, + evaluation_num_env_runners=2, + evaluation_duration=10, + evaluation_config={ + "explore": False, + }, + ) +) + + +stop = { + f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": -700.0, + NUM_ENV_STEPS_SAMPLED_LIFETIME: 800000, +} + +if __name__ == "__main__": + run_rllib_example_script_experiment(config, args, stop=stop) diff --git a/rllib/tuned_examples/marwil/cartpole_marwil.py b/rllib/tuned_examples/marwil/cartpole_marwil.py index 0d4886b4a908..17f8cdb87605 100644 --- a/rllib/tuned_examples/marwil/cartpole_marwil.py +++ b/rllib/tuned_examples/marwil/cartpole_marwil.py @@ -25,7 +25,7 @@ data_path = "tests/data/cartpole/cartpole-v1_large" base_path = Path(__file__).parents[2] print(f"base_path={base_path}") -data_path = "local://" + base_path.joinpath(data_path).as_posix() +data_path = "local://" / base_path / data_path print(f"data_path={data_path}") # Define the MARWIL config. @@ -47,7 +47,7 @@ # configured. The read method needs at least as many blocks # as remote learners. .offline_data( - input_=[data_path], + input_=[data_path.as_posix()], input_read_method_kwargs={"override_num_blocks": max(args.num_gpus, 1)}, prelearner_module_synch_period=20, )