ray-project · sven1977 · Sep 12, 2022 · Sep 9, 2022 · Sep 9, 2022 · Sep 9, 2022
@@ -1640,7 +1640,10 @@ def remove_policy(
         *,
         policy_mapping_fn: Optional[Callable[[AgentID], PolicyID]] = None,
         policies_to_train: Optional[
-            Union[Set[PolicyID], Callable[[PolicyID, Optional[SampleBatchType]], bool]]
+            Union[
+                Container[PolicyID],
+                Callable[[PolicyID, Optional[SampleBatchType]], bool],
+            ]
         ] = None,
         evaluation_workers: bool = True,
     ) -> None:

@@ -104,6 +104,24 @@ def new_mapping_fn(agent_id, episode, worker, **kwargs):
                     # Change the list of policies to train.
                     policies_to_train=[f"p{i}", f"p{i-1}"],
                 )
+                # Make sure new policy is part of remote workers in the
+                # worker set and the eval worker set.
+                assert pid in (
+                    ray.get(
+                        algo.workers.remote_workers()[0].apply.remote(
+                            lambda w: list(w.policy_map.keys())
+                        )
+                    )
+                )
+                assert pid in (
+                    ray.get(
+                        algo.evaluation_workers.remote_workers()[0].apply.remote(
+                            lambda w: list(w.policy_map.keys())
+                        )
+                    )
+                )
+                # Assert new policy is part of local worker (eval worker set does NOT
+                # have a local worker, only the main WorkerSet does).
                 pol_map = algo.workers.local_worker().policy_map
                 self.assertTrue(new_pol is not pol0)
                 for j in range(i + 1):
@@ -117,12 +135,14 @@ def new_mapping_fn(agent_id, episode, worker, **kwargs):
                 test = pg.PG(config=config)
                 test.restore(checkpoint)
 
-                # Make sure evaluation worker also gets the restored policy.
-                def _has_policy(w):
-                    return w.get_policy("p0") is not None
+                # Make sure evaluation worker also got the restored, added policy.
+                def _has_policies(w):
+                    return (
+                        w.get_policy("p0") is not None and w.get_policy(pid) is not None
+                    )
 
                 self.assertTrue(
-                    all(test.evaluation_workers.foreach_worker(_has_policy))
+                    all(test.evaluation_workers.foreach_worker(_has_policies))
                 )
 
                 # Make sure algorithm can continue training the restored policy.
@@ -137,13 +157,39 @@ def _has_policy(w):
 
             # Delete all added policies again from Algorithm.
             for i in range(2, 0, -1):
+                pid = f"p{i}"
                 algo.remove_policy(
-                    f"p{i}",
+                    pid,
                     # Note that the complete signature of a policy_mapping_fn
                     # is: `agent_id, episode, worker, **kwargs`.
-                    policy_mapping_fn=lambda aid, eps, **kwargs: f"p{i - 1}",
+                    policy_mapping_fn=(
+                        lambda agent_id, worker, episode, **kwargs: f"p{i - 1}"
+                    ),
+                    # Update list of policies to train.
                     policies_to_train=[f"p{i - 1}"],
                 )
+                # Make sure removed policy is no longer part of remote workers in the
+                # worker set and the eval worker set.
+                assert pid not in (
+                    ray.get(
+                        algo.workers.remote_workers()[0].apply.remote(
+                            lambda w: list(w.policy_map.keys())
+                        )
+                    )
+                )
+                assert pid not in (
+                    ray.get(
+                        algo.evaluation_workers.remote_workers()[0].apply.remote(
+                            lambda w: list(w.policy_map.keys())
+                        )
+                    )
+                )
+                # Assert removed policy is no longer part of local worker
+                # (eval worker set does NOT have a local worker, only the main WorkerSet
+                # does).
+                pol_map = algo.workers.local_worker().policy_map
+                self.assertTrue(pid not in pol_map)
+                self.assertTrue(len(pol_map) == i)
 
             algo.stop()