From 1fa8c87336063c3f4c9d1d1a749d1a8f65e82da0 Mon Sep 17 00:00:00 2001
From: Kourosh Hakhamaneshi <kourosh@anyscale.com>
Date: Thu, 15 Dec 2022 17:10:35 -0800
Subject: [PATCH 1/2] Convert PolicySpec to a readable format when converting
 to_dict()

Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com>
---
 rllib/algorithms/algorithm_config.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
index b66593f0ce48..c7134b431902 100644
--- a/rllib/algorithms/algorithm_config.py
+++ b/rllib/algorithms/algorithm_config.py
@@ -490,7 +490,20 @@ def to_dict(self) -> AlgorithmConfigDict:
         # Setup legacy multi-agent sub-dict:
         config["multiagent"] = {}
         for k in self.multiagent.keys():
-            config["multiagent"][k] = config.pop(k)
+            # convert policies dict to something human-readable
+            if k == "policies":
+                policies_dict = {}
+                for policy_id, policy_spec in self.multiagent[k].items():
+                    if isinstance(policy_spec, PolicySpec):
+                        policies_dict[policy_id] = (
+                            policy_spec.policy_class,
+                            policy_spec.observation_space,
+                            policy_spec.action_space,
+                            policy_spec.config,
+                        )
+                config["multiagent"][k] = policies_dict
+            else:
+                config["multiagent"][k] = config.pop(k)
 
         # Switch out deprecated vs new config keys.
         config["callbacks"] = config.pop("callbacks_class", DefaultCallbacks)

From ef40855df8288b48ac74f19304319c7045e0acb8 Mon Sep 17 00:00:00 2001
From: Kourosh Hakhamaneshi <kourosh@anyscale.com>
Date: Thu, 15 Dec 2022 17:20:35 -0800
Subject: [PATCH 2/2] wip

Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com>
---
 rllib/algorithms/algorithm_config.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
index c7134b431902..fb8ff9767ae3 100644
--- a/rllib/algorithms/algorithm_config.py
+++ b/rllib/algorithms/algorithm_config.py
@@ -491,7 +491,7 @@ def to_dict(self) -> AlgorithmConfigDict:
         config["multiagent"] = {}
         for k in self.multiagent.keys():
             # convert policies dict to something human-readable
-            if k == "policies":
+            if k == "policies" and isinstance(self.multiagent[k], dict):
                 policies_dict = {}
                 for policy_id, policy_spec in self.multiagent[k].items():
                     if isinstance(policy_spec, PolicySpec):
@@ -501,6 +501,8 @@ def to_dict(self) -> AlgorithmConfigDict:
                             policy_spec.action_space,
                             policy_spec.config,
                         )
+                    else:
+                        policies_dict[policy_id] = policy_spec
                 config["multiagent"][k] = policies_dict
             else:
                 config["multiagent"][k] = config.pop(k)