ray-project · sven1977 · Jun 19, 2024 · Dec 11, 2023 · Dec 11, 2023 · Dec 11, 2023
diff --git a/ci/lint/format.sh b/ci/lint/format.sh
@@ -108,7 +108,7 @@ else
     echo "WARNING: clang-format is not installed!"
 fi
 
-if command -v java >/dev/null; then
+if 0; then #command -v java >/dev/null; then
   if [ ! -f "$GOOGLE_JAVA_FORMAT_JAR" ]; then
     echo "Java code format tool google-java-format.jar is not installed, start to install it."
     wget https://github.com/google/google-java-format/releases/download/google-java-format-1.7/google-java-format-1.7-all-deps.jar -O "$GOOGLE_JAVA_FORMAT_JAR"

diff --git a/python/ray/train/_internal/worker_group.py b/python/ray/train/_internal/worker_group.py
@@ -87,7 +87,7 @@ def construct_metadata() -> WorkerMetadata:
     node_id = ray.get_runtime_context().get_node_id()
     node_ip = ray.util.get_node_ip_address()
     hostname = socket.gethostname()
-    accelerator_ids = ray.get_runtime_context().get_accelerator_ids()
+    accelerator_ids = ray.get_runtime_context().get_resource_ids()
     pid = os.getpid()
 
     return WorkerMetadata(

@@ -150,6 +150,16 @@ py_test(
 # --------------------------------------------------------------------
 
 # APPO
+py_test(
+    name = "learning_tests_cartpole_appo_w_env_runner",
+    main = "tests/run_regression_tests.py",
+    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete"],
+    size = "large",
+    srcs = ["tests/run_regression_tests.py"],
+    data = ["tuned_examples/appo/cartpole_appo_envrunner.py"],
+    args = ["--dir=tuned_examples/appo/"]
+)
+
 py_test(
     name = "learning_tests_cartpole_appo_w_rl_modules_and_learner",
     main = "tests/run_regression_tests.py",
@@ -301,15 +311,15 @@ py_test(
 )
 
 # IMPALA
-# py_test(
-#    name = "learning_tests_cartpole_impala",
-#    main = "tests/run_regression_tests.py",
-#    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete"],
-#    size = "large",
-#    srcs = ["tests/run_regression_tests.py"],
-#    data = ["tuned_examples/impala/cartpole-impala.yaml"],
-#    args = ["--dir=tuned_examples/impala"]
-# )
+py_test(
+    name = "learning_tests_cartpole_impala_w_env_runner",
+    main = "tests/run_regression_tests.py",
+    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete"],
+    size = "large",
+    srcs = ["tests/run_regression_tests.py"],
+    data = ["tuned_examples/impala/cartpole_impala_envrunner.py"],
+    args = ["--dir=tuned_examples/impala/"]
+)
 
 py_test(
     name = "learning_tests_cartpole_separate_losses_impala",
@@ -590,7 +600,7 @@ py_test(
     srcs = ["algorithms/dreamerv3/tests/test_dreamerv3.py"]
 )
 
-# Impala
+# IMPALA
 py_test(
     name = "test_impala",
     tags = ["team:rllib", "algorithms_dir"],

@@ -638,7 +638,7 @@ def setup(self, config: AlgorithmConfig) -> None:
         )
 
         # Ensure remote workers are initially in sync with the local worker.
-        self.workers.sync_weights()
+        #self.workers.sync_weights()
 
         # Compile, validate, and freeze an evaluation config.
         self.evaluation_config = self.config.get_evaluation_config_object()
@@ -722,7 +722,6 @@ def setup(self, config: AlgorithmConfig) -> None:
             # Need to add back method_type in case Algorithm is restored from checkpoint
             method_config["type"] = method_type
 
-        self.learner_group = None
         if self.config._enable_new_api_stack:
             local_worker = self.workers.local_worker()
             env = spaces = None
@@ -781,11 +780,16 @@ def setup(self, config: AlgorithmConfig) -> None:
                     lambda w: w.set_is_policy_to_train(policies_to_train),
                     healthy_only=True,
                 )
-
-            # Sync the weights from the learner group to the rollout workers.
-            weights = self.learner_group.get_weights()
-            local_worker.set_weights(weights)
-            self.workers.sync_weights()
+                # Sync the weights from the learner group to the rollout workers.
+                weights = self.learner_group.get_weights()
+                local_worker.set_weights(weights)
+                self.workers.sync_weights()
+            # New stack/EnvRunner APIs: Use get/set_state (no more get/set_weights).
+            else:
+                # Sync the weights from the learner group to the rollout workers.
+                weights = self.learner_group.get_weights()
+                local_worker.set_state({"rl_module": weights})
+                self.workers.sync_weights()
 
         # Run `on_algorithm_init` callback after initialization is done.
         self.callbacks.on_algorithm_init(algorithm=self)
@@ -876,11 +880,13 @@ def step(self) -> ResultDict:
                 config=self.config,
             )
 
-        episodes_this_iter = collect_episodes(
-            self.workers,
-            self._remote_worker_ids_for_metrics(),
-            timeout_seconds=self.config.metrics_episode_collection_timeout_s,
-        )
+        episodes_this_iter = results.pop("_episodes_this_iter", None)
+        if episodes_this_iter is None:
+            episodes_this_iter = collect_episodes(
+                self.workers,
+                self._remote_worker_ids_for_metrics(),
+                timeout_seconds=self.config.metrics_episode_collection_timeout_s,
+            )
         results = self._compile_iteration_results(
             episodes_this_iter=episodes_this_iter,
             step_ctx=train_iter_ctx,
@@ -1386,9 +1392,15 @@ def _evaluate_async_with_env_runner(
         with self._timers[SYNCH_ENV_CONNECTOR_STATES_TIMER]:
             # Merge connector states from all EnvRunners and broadcast updated
             # states back to all EnvRunners.
-            self.evaluation_workers.sync_env_runner_states(
-                from_worker=self.workers.local_worker(),
-                env_steps_sampled=self._counters[NUM_ENV_STEPS_SAMPLED],
+            self.evaluation_workers.broadcast_state(
+                state={
+                    self.workers.local_worker().get_state(components=[
+                        NUM_ENV_STEPS_SAMPLED,
+                        "env_to_module_connector",
+                        "module_to_env_connector",
+                    ])
+                },
+                local_worker=True,
             )
 
         if self.evaluation_workers is None and (
@@ -3182,19 +3194,41 @@ def _run_one_training_iteration(self) -> Tuple[ResultDict, "TrainIterCtx"]:
         if self.config.get("framework") == "tf2" and not tf.executing_eagerly():
             tf1.enable_eager_execution()
 
-        results = None
+        results = {}
+        training_step_results = {}
+        episodes_this_iter = []
         # Create a step context ...
         with TrainIterCtx(algo=self) as train_iter_ctx:
             # .. so we can query it whether we should stop the iteration loop (e.g.
             # when we have reached `min_time_s_per_iteration`).
-            while not train_iter_ctx.should_stop(results):
+            while not train_iter_ctx.should_stop(training_step_results):
                 # Try to train one step.
                 with self._timers[TRAINING_ITERATION_TIMER]:
-                    results = self.training_step()
+                    # TODO (sven): Add capability to reduce results over different
+                    #  iterations.
+                    training_step_results = self.training_step()
+
+                    # Collect returned episode metrics from each `trainin_step` call,
+                    # so nothing gets lost (in this mode, we do NOT call get_metrics()
+                    # here automatically, it has already been done by the
+                    # `training_step` method).
+                    if "_episodes_this_training_step" in training_step_results:
+                        episodes_this_iter.extend(
+                            training_step_results.pop("_episodes_this_training_step")
+                        )
+
+                    if training_step_results:
+                        results = training_step_results
 
         # With training step done. Try to bring failed workers back.
         self.restore_workers(self.workers)
 
+        # Publish all episodes collected in this entire iteration (consisting of n
+        # `training_step` calls) to let the algo know, we do NOT have to call
+        # `get_metrics` anymore on all EnvRunners (already done inside `training_step`).
+        if episodes_this_iter:
+            results["_episodes_this_iter"] = episodes_this_iter
+
         return results, train_iter_ctx
 
     def _run_one_evaluation(

@@ -812,7 +812,7 @@ def validate(self) -> None:
         # Check to-be-deprecated settings (however that are still in use).
         self._validate_to_be_deprecated_settings()
 
-    def build(
+    def build_algorithm(
         self,
         env: Optional[Union[str, EnvType]] = None,
         logger_creator: Optional[Callable[[], Logger]] = None,
@@ -4295,6 +4295,10 @@ def _resolve_tf_settings(self, _tf1, _tfv):
                 "speed as with static-graph mode."
             )
 
+    @Deprecated(new="AlgorithmConfig.build_algorithm()", error=False)
+    def build(self, *args, **kwargs):
+        return self.build_algorithm(*args, **kwargs)
+
     @property
     @Deprecated(
         old="AlgorithmConfig.multiagent['[some key]']",