From 1fa8c87336063c3f4c9d1d1a749d1a8f65e82da0 Mon Sep 17 00:00:00 2001 From: Kourosh Hakhamaneshi Date: Thu, 15 Dec 2022 17:10:35 -0800 Subject: [PATCH 1/2] Convert PolicySpec to a readable format when converting to_dict() Signed-off-by: Kourosh Hakhamaneshi --- rllib/algorithms/algorithm_config.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index b66593f0ce48..c7134b431902 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -490,7 +490,20 @@ def to_dict(self) -> AlgorithmConfigDict: # Setup legacy multi-agent sub-dict: config["multiagent"] = {} for k in self.multiagent.keys(): - config["multiagent"][k] = config.pop(k) + # convert policies dict to something human-readable + if k == "policies": + policies_dict = {} + for policy_id, policy_spec in self.multiagent[k].items(): + if isinstance(policy_spec, PolicySpec): + policies_dict[policy_id] = ( + policy_spec.policy_class, + policy_spec.observation_space, + policy_spec.action_space, + policy_spec.config, + ) + config["multiagent"][k] = policies_dict + else: + config["multiagent"][k] = config.pop(k) # Switch out deprecated vs new config keys. config["callbacks"] = config.pop("callbacks_class", DefaultCallbacks) From ef40855df8288b48ac74f19304319c7045e0acb8 Mon Sep 17 00:00:00 2001 From: Kourosh Hakhamaneshi Date: Thu, 15 Dec 2022 17:20:35 -0800 Subject: [PATCH 2/2] wip Signed-off-by: Kourosh Hakhamaneshi --- rllib/algorithms/algorithm_config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index c7134b431902..fb8ff9767ae3 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -491,7 +491,7 @@ def to_dict(self) -> AlgorithmConfigDict: config["multiagent"] = {} for k in self.multiagent.keys(): # convert policies dict to something human-readable - if k == "policies": + if k == "policies" and isinstance(self.multiagent[k], dict): policies_dict = {} for policy_id, policy_spec in self.multiagent[k].items(): if isinstance(policy_spec, PolicySpec): @@ -501,6 +501,8 @@ def to_dict(self) -> AlgorithmConfigDict: policy_spec.action_space, policy_spec.config, ) + else: + policies_dict[policy_id] = policy_spec config["multiagent"][k] = policies_dict else: config["multiagent"][k] = config.pop(k)