ray-project · sven1977 · Oct 29, 2021 · Oct 27, 2021 · Oct 27, 2021 · Oct 27, 2021
@@ -157,7 +157,7 @@ def to_argv(config):
 def create_trial_from_spec(spec, output_path, parser, **trial_kwargs):
     """Creates a Trial object from parsing the spec.
 
-    Arguments:
+    Args:
         spec (dict): A resolved experiment specification. Arguments should
             The args here should correspond to the command line flags
             in ray.tune.config_parser.

@@ -2006,13 +2006,14 @@ py_test(
     args = ["--as-test", "--framework=torch", "--stop-reward=150", "--num-cpus=4"]
 )
 
-py_test(
-    name = "examples/bare_metal_policy_with_custom_view_reqs",
-    main = "examples/bare_metal_policy_with_custom_view_reqs.py",
-    tags = ["team:ml", "examples", "examples_B"],
-    size = "small",
-    srcs = ["examples/bare_metal_policy_with_custom_view_reqs.py"],
-)
+# times-out: #
+#py_test(
+#    name = "examples/bare_metal_policy_with_custom_view_reqs",
+#    main = "examples/bare_metal_policy_with_custom_view_reqs.py",
+#    tags = ["team:ml", "examples", "examples_B"],
+#    size = "small",
+#    srcs = ["examples/bare_metal_policy_with_custom_view_reqs.py"],
+#)
 
 py_test(
     name = "examples/batch_norm_model_ppo_tf",

@@ -46,10 +46,11 @@ def on_episode_start(self,
         """Callback run on the rollout worker before each episode starts.
 
         Args:
-            worker (RolloutWorker): Reference to the current rollout worker.
-            base_env (BaseEnv): BaseEnv running the episode. The underlying
-                env object can be gotten by calling base_env.get_unwrapped().
-            policies (dict): Mapping of policy id to policy objects. In single
+            worker: Reference to the current rollout worker.
+            base_env: BaseEnv running the episode. The underlying
+                sub environment objects can be received by calling
+                `base_env.get_sub_environments()`.
+            policies: Mapping of policy id to policy objects. In single
                 agent mode there will only be a single "default" policy.
             episode (MultiAgentEpisode): Episode object which contains episode
                 state. You can use the `episode.user_data` dict to store
@@ -80,7 +81,8 @@ def on_episode_step(self,
         Args:
             worker (RolloutWorker): Reference to the current rollout worker.
             base_env (BaseEnv): BaseEnv running the episode. The underlying
-                env object can be gotten by calling base_env.get_unwrapped().
+                sub environment objects can be gotten by calling
+                `base_env.get_sub_environments()`.
             policies (Optional[Dict[PolicyID, Policy]]): Mapping of policy id
                 to policy objects. In single agent mode there will only be a
                 single "default_policy".
@@ -112,7 +114,8 @@ def on_episode_end(self,
         Args:
             worker (RolloutWorker): Reference to the current rollout worker.
             base_env (BaseEnv): BaseEnv running the episode. The underlying
-                env object can be gotten by calling base_env.get_unwrapped().
+                sub environment objects can be retrieved by calling
+                `base_env.get_sub_environments()`.
             policies (Dict[PolicyID, Policy]): Mapping of policy id to policy
                 objects. In single agent mode there will only be a single
                 "default_policy".

@@ -33,7 +33,7 @@ class AlphaZeroDefaultCallbacks(DefaultCallbacks):
 
     def on_episode_start(self, worker, base_env, policies, episode, **kwargs):
         # save env state when an episode starts
-        env = base_env.get_unwrapped()[0]
+        env = base_env.get_sub_environments()[0]
         state = env.get_state()
         episode.user_data["initial_state"] = state