diff --git a/rllib/algorithms/ddpg/tests/test_ddpg.py b/rllib/algorithms/ddpg/tests/test_ddpg.py
index 4045dcf78fcf..c15caf296c91 100644
--- a/rllib/algorithms/ddpg/tests/test_ddpg.py
+++ b/rllib/algorithms/ddpg/tests/test_ddpg.py
@@ -1,7 +1,6 @@
 import numpy as np
 import re
 import unittest
-from tempfile import TemporaryDirectory
 
 import ray
 import ray.rllib.algorithms.ddpg as ddpg
@@ -64,23 +63,6 @@ def test_ddpg_compilation(self):
             check(a, 500)
             trainer.stop()
 
-    def test_ddpg_checkpoint_save_and_restore(self):
-        """Test whether a DDPGTrainer can save and load checkpoints."""
-        config = ddpg.DEFAULT_CONFIG.copy()
-        config["num_workers"] = 1
-        config["num_envs_per_worker"] = 2
-        config["replay_buffer_config"]["learning_starts"] = 0
-        config["exploration_config"]["random_timesteps"] = 100
-
-        # Test against all frameworks.
-        for _ in framework_iterator(config, with_eager_tracing=True):
-            trainer = ddpg.DDPGTrainer(config=config, env="Pendulum-v1")
-            trainer.train()
-            with TemporaryDirectory() as temp_dir:
-                checkpoint = trainer.save(temp_dir)
-                trainer.restore(checkpoint)
-            trainer.stop()
-
     def test_ddpg_exploration_and_with_random_prerun(self):
         """Tests DDPG's Exploration (w/ random actions for n timesteps)."""
 
diff --git a/rllib/algorithms/maddpg/__init__.py b/rllib/algorithms/maddpg/__init__.py
index 2ae788f1ebd6..4de518de130a 100644
--- a/rllib/algorithms/maddpg/__init__.py
+++ b/rllib/algorithms/maddpg/__init__.py
@@ -1,3 +1,7 @@
-from ray.rllib.algorithms.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG
+from ray.rllib.algorithms.maddpg.maddpg import (
+    MADDPGConfig,
+    MADDPGTrainer,
+    DEFAULT_CONFIG,
+)
 
-__all__ = ["MADDPGTrainer", "DEFAULT_CONFIG"]
+__all__ = ["MADDPGConfig", "MADDPGTrainer", "DEFAULT_CONFIG"]
diff --git a/rllib/algorithms/maddpg/maddpg.py b/rllib/algorithms/maddpg/maddpg.py
index e63321586169..53fb81eccf9c 100644
--- a/rllib/algorithms/maddpg/maddpg.py
+++ b/rllib/algorithms/maddpg/maddpg.py
@@ -10,115 +10,241 @@
 """
 
 import logging
-from typing import Type
+from typing import List, Optional, Type
 
+from ray.rllib.agents.trainer_config import TrainerConfig
 from ray.rllib.algorithms.dqn.dqn import DQNTrainer
 from ray.rllib.algorithms.maddpg.maddpg_tf_policy import MADDPGTFPolicy
-from ray.rllib.agents.trainer import with_common_config
 from ray.rllib.policy.policy import Policy
 from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch
-from ray.rllib.utils.annotations import override
+from ray.rllib.utils.annotations import Deprecated, override
 from ray.rllib.utils.typing import TrainerConfigDict
 from ray.rllib.utils.deprecation import DEPRECATED_VALUE
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
-# fmt: off
-# __sphinx_doc_begin__
-DEFAULT_CONFIG = with_common_config({
-    # === Framework to run the algorithm ===
-    "framework": "tf",
-
-    # === Settings for each individual policy ===
-    # ID of the agent controlled by this policy
-    "agent_id": None,
-    # Use a local critic for this policy.
-    "use_local_critic": False,
-
-    # === Evaluation ===
-    # Evaluation interval
-    "evaluation_interval": None,
-    # Number of episodes to run per evaluation period.
-    "evaluation_duration": 10,
-
-    # === Model ===
-    # Apply a state preprocessor with spec given by the "model" config option
-    # (like other RL algorithms). This is mostly useful if you have a weird
-    # observation shape, like an image. Disabled by default.
-    "use_state_preprocessor": False,
-    # Postprocess the policy network model output with these hidden layers. If
-    # use_state_preprocessor is False, then these will be the *only* hidden
-    # layers in the network.
-    "actor_hiddens": [64, 64],
-    # Hidden layers activation of the postprocessing stage of the policy
-    # network
-    "actor_hidden_activation": "relu",
-    # Postprocess the critic network model output with these hidden layers;
-    # again, if use_state_preprocessor is True, then the state will be
-    # preprocessed by the model specified with the "model" config option first.
-    "critic_hiddens": [64, 64],
-    # Hidden layers activation of the postprocessing state of the critic.
-    "critic_hidden_activation": "relu",
-    # N-step Q learning
-    "n_step": 1,
-    # Algorithm for good policies.
-    "good_policy": "maddpg",
-    # Algorithm for adversary policies.
-    "adv_policy": "maddpg",
-
-    # === Replay buffer ===
-    "replay_buffer_config": {
-        "type": "MultiAgentReplayBuffer",
-        # Specify prioritized replay by supplying a buffer type that supports
-        # prioritization, for example: MultiAgentPrioritizedReplayBuffer.
-        "prioritized_replay": DEPRECATED_VALUE,
-        "capacity": int(1e6),
-        # How many steps of the model to sample before learning starts.
-        "learning_starts": 1024 * 25,
-        # Force lockstep replay mode for MADDPG.
-        "replay_mode": "lockstep",
-    },
-    # Observation compression. Note that compression makes simulation slow in
-    # MPE.
-    "compress_observations": False,
-    # If set, this will fix the ratio of replayed from a buffer and learned on
-    # timesteps to sampled from an environment and stored in the replay buffer
-    # timesteps. Otherwise, the replay will proceed at the native ratio
-    # determined by (train_batch_size / rollout_fragment_length).
-    "training_intensity": None,
-
-    # === Optimization ===
-    # Learning rate for the critic (Q-function) optimizer.
-    "critic_lr": 1e-2,
-    # Learning rate for the actor (policy) optimizer.
-    "actor_lr": 1e-2,
-    # Update the target network every `target_network_update_freq` sample steps.
-    "target_network_update_freq": 0,
-    # Update the target by \tau * policy + (1-\tau) * target_policy
-    "tau": 0.01,
-    # Weights for feature regularization for the actor
-    "actor_feature_reg": 0.001,
-    # If not None, clip gradients during optimization at this value
-    "grad_norm_clipping": 0.5,
-    # Update the replay buffer with this many samples at once. Note that this
-    # setting applies per-worker if num_workers > 1.
-    "rollout_fragment_length": 100,
-    # Size of a batched sampled from replay buffer for training. Note that
-    # if async_updates is set, then each worker returns gradients for a
-    # batch of this size.
-    "train_batch_size": 1024,
-
-    # === Parallelism ===
-    # Number of workers for collecting samples with. This only makes sense
-    # to increase if your environment is particularly slow to sample, or if
-    # you're using the Async or Ape-X optimizers.
-    "num_workers": 1,
-    # Prevent iterations from going lower than this time span
-    "min_time_s_per_reporting": 0,
-})
-# __sphinx_doc_end__
-# fmt: on
+
+class MADDPGConfig(TrainerConfig):
+    """Defines a configuration class from which a MADDPGTrainer can be built.
+
+    Example:
+        >>> from ray.rllib.algorithms.maddpg.maddpg import MADDPGConfig
+        >>> config = MADDPGConfig()
+        >>> print(config.replay_buffer_config)
+        >>> replay_config = config.replay_buffer_config.update(
+        >>>     {
+        >>>         "capacity": 100000,
+        >>>         "prioritized_replay_alpha": 0.8,
+        >>>         "prioritized_replay_beta": 0.45,
+        >>>         "prioritized_replay_eps": 2e-6,
+        >>>     }
+        >>> )
+        >>> config.training(replay_buffer_config=replay_config)\
+        >>>       .resources(num_gpus=0)\
+        >>>       .rollouts(num_rollout_workers=4)\
+        >>>       .environment("CartPole-v1")
+        >>> trainer = config.build()
+        >>> while True:
+        >>>     trainer.train()
+
+    Example:
+        >>> from ray.rllib.algorithms.maddpg.maddpg import MADDPGConfig
+        >>> from ray import tune
+        >>> config = MADDPGConfig()
+        >>> config.training(n_step=tune.grid_search([3, 5]))
+        >>> config.environment(env="CartPole-v1")
+        >>> tune.run(
+        >>>     "MADDPG",
+        >>>     stop={"episode_reward_mean":200},
+        >>>     config=config.to_dict()
+        >>> )
+    """
+
+    def __init__(self, trainer_class=None):
+        """Initializes a DQNConfig instance."""
+        super().__init__(trainer_class=trainer_class or MADDPGTrainer)
+
+        # fmt: off
+        # __sphinx_doc_begin__
+        # MADDPG specific config settings:
+        self.agent_id = None
+        self.use_local_critic = False
+        self.use_state_preprocessor = False
+        self.actor_hiddens = [64, 64]
+        self.actor_hidden_activation = "relu"
+        self.critic_hiddens = [64, 64]
+        self.critic_hidden_activation = "relu"
+        self.n_step = 1
+        self.good_policy = "maddpg"
+        self.adv_policy = "maddpg"
+        self.replay_buffer_config = {
+            "type": "MultiAgentReplayBuffer",
+            # Specify prioritized replay by supplying a buffer type that supports
+            # prioritization, for example: MultiAgentPrioritizedReplayBuffer.
+            "prioritized_replay": DEPRECATED_VALUE,
+            "capacity": int(1e6),
+            # How many steps of the model to sample before learning starts.
+            "learning_starts": 1024 * 25,
+            # Force lockstep replay mode for MADDPG.
+            "replay_mode": "lockstep",
+        }
+        self.training_intensity = None
+        self.critic_lr = 1e-2
+        self.actor_lr = 1e-2
+        self.target_network_update_freq = 0
+        self.tau = 0.01
+        self.actor_feature_reg = 0.001
+        self.grad_norm_clipping = 0.5
+
+        # Changes to Trainer's default:
+        self.rollout_fragment_length = 100
+        self.train_batch_size = 1024
+        self.num_workers = 1
+        self.min_time_s_per_reporting = 0
+        # fmt: on
+        # __sphinx_doc_end__
+
+    @override(TrainerConfig)
+    def training(
+        self,
+        *,
+        agent_id: Optional[str] = None,
+        use_local_critic: Optional[bool] = None,
+        use_state_preprocessor: Optional[bool] = None,
+        actor_hiddens: Optional[List[int]] = None,
+        actor_hidden_activation: Optional[str] = None,
+        critic_hiddens: Optional[List[int]] = None,
+        critic_hidden_activation: Optional[str] = None,
+        n_step: Optional[int] = None,
+        good_policy: Optional[str] = None,
+        adv_policy: Optional[str] = None,
+        replay_buffer_config: Optional[dict] = None,
+        training_intensity: Optional[float] = None,
+        critic_lr: Optional[float] = None,
+        actor_lr: Optional[float] = None,
+        target_network_update_freq: Optional[int] = None,
+        tau: Optional[float] = None,
+        actor_feature_reg: Optional[float] = None,
+        grad_norm_clipping: Optional[float] = None,
+        **kwargs,
+    ) -> "MADDPGConfig":
+        """Sets the training related configuration.
+
+        Args:
+            agent_id: ID of the agent controlled by this policy.
+            use_local_critic: Use a local critic for this policy.
+            use_state_preprocessor: Apply a state preprocessor with spec given by the
+                "model" config option (like other RL algorithms). This is mostly useful
+                if you have a weird observation shape, like an image. Disabled by
+                default.
+            actor_hiddens: Postprocess the policy network model output with these hidden
+                layers. If `use_state_preprocessor` is False, then these will be the
+                *only* hidden layers in the network.
+            actor_hidden_activation: Hidden layers activation of the postprocessing
+                stage of the policy network.
+            critic_hiddens: Postprocess the critic network model output with these
+                hidden layers; again, if use_state_preprocessor is True, then the state
+                will be preprocessed by the model specified with the "model" config
+                option first.
+            critic_hidden_activation: Hidden layers activation of the postprocessing
+                state of the critic.
+            n_step: N-step for Q-learning.
+            good_policy: Algorithm for good policies.
+            adv_policy: Algorithm for adversary policies.
+            replay_buffer_config: Replay buffer config.
+                Examples:
+                {
+                "_enable_replay_buffer_api": True,
+                "type": "MultiAgentReplayBuffer",
+                "learning_starts": 1000,
+                "capacity": 50000,
+                "replay_sequence_length": 1,
+                }
+                - OR -
+                {
+                "_enable_replay_buffer_api": True,
+                "type": "MultiAgentPrioritizedReplayBuffer",
+                "capacity": 50000,
+                "prioritized_replay_alpha": 0.6,
+                "prioritized_replay_beta": 0.4,
+                "prioritized_replay_eps": 1e-6,
+                "replay_sequence_length": 1,
+                }
+                - Where -
+                prioritized_replay_alpha: Alpha parameter controls the degree of
+                prioritization in the buffer. In other words, when a buffer sample has
+                a higher temporal-difference error, with how much more probability
+                should it drawn to use to update the parametrized Q-network. 0.0
+                corresponds to uniform probability. Setting much above 1.0 may quickly
+                result as the sampling distribution could become heavily “pointy” with
+                low entropy.
+                prioritized_replay_beta: Beta parameter controls the degree of
+                importance sampling which suppresses the influence of gradient updates
+                from samples that have higher probability of being sampled via alpha
+                parameter and the temporal-difference error.
+                prioritized_replay_eps: Epsilon parameter sets the baseline probability
+                for sampling so that when the temporal-difference error of a sample is
+                zero, there is still a chance of drawing the sample.
+            training_intensity: If set, this will fix the ratio of replayed from a
+                buffer and learned on timesteps to sampled from an environment and
+                stored in the replay buffer timesteps. Otherwise, the replay will
+                proceed at the native ratio determined by
+                `(train_batch_size / rollout_fragment_length)`.
+            critic_lr: Learning rate for the critic (Q-function) optimizer.
+            actor_lr: Learning rate for the actor (policy) optimizer.
+            target_network_update_freq: Update the target network every
+                `target_network_update_freq` sample steps.
+            tau: Update the target by \tau * policy + (1-\tau) * target_policy.
+            actor_feature_reg: Weights for feature regularization for the actor.
+            grad_norm_clipping: If not None, clip gradients during optimization at this
+                value.
+
+        Returns:
+            This updated TrainerConfig object.
+        """
+
+        # Pass kwargs onto super's `training()` method.
+        super().training(**kwargs)
+
+        if agent_id is not None:
+            self.agent_id = agent_id
+        if use_local_critic is not None:
+            self.use_local_critic = use_local_critic
+        if use_state_preprocessor is not None:
+            self.use_state_preprocessor = use_state_preprocessor
+        if actor_hiddens is not None:
+            self.actor_hiddens = actor_hiddens
+        if actor_hidden_activation is not None:
+            self.actor_hidden_activation = actor_hidden_activation
+        if critic_hiddens is not None:
+            self.critic_hiddens = critic_hiddens
+        if critic_hidden_activation is not None:
+            self.critic_hidden_activation = critic_hidden_activation
+        if n_step is not None:
+            self.n_step = n_step
+        if good_policy is not None:
+            self.good_policy = good_policy
+        if adv_policy is not None:
+            self.adv_policy = adv_policy
+        if replay_buffer_config is not None:
+            self.replay_buffer_config = replay_buffer_config
+        if training_intensity is not None:
+            self.training_intensity = training_intensity
+        if critic_lr is not None:
+            self.critic_lr = critic_lr
+        if actor_lr is not None:
+            self.actor_lr = actor_lr
+        if target_network_update_freq is not None:
+            self.target_network_update_freq = target_network_update_freq
+        if tau is not None:
+            self.tau = tau
+        if actor_feature_reg is not None:
+            self.actor_feature_reg = actor_feature_reg
+        if grad_norm_clipping is not None:
+            self.grad_norm_clipping = grad_norm_clipping
+
+        return self
 
 
 def before_learn_on_batch(multi_agent_batch, policies, train_batch_size):
@@ -152,7 +278,7 @@ class MADDPGTrainer(DQNTrainer):
     @classmethod
     @override(DQNTrainer)
     def get_default_config(cls) -> TrainerConfigDict:
-        return DEFAULT_CONFIG
+        return MADDPGConfig().to_dict()
 
     @override(DQNTrainer)
     def validate_config(self, config: TrainerConfigDict) -> None:
@@ -175,3 +301,20 @@ def f(batch, workers, config):
     @override(DQNTrainer)
     def get_default_policy_class(self, config: TrainerConfigDict) -> Type[Policy]:
         return MADDPGTFPolicy
+
+
+# Deprecated: Use ray.rllib.algorithms.maddpg.MADDPG instead!
+class _deprecated_default_config(dict):
+    def __init__(self):
+        super().__init__(MADDPGConfig().to_dict())
+
+    @Deprecated(
+        old="ray.rllib.algorithms.maddpg.maddpg.DEFAULT_CONFIG",
+        new="ray.rllib.algorithms.maddpg.maddpg.MADDPGConfig(...)",
+        error=False,
+    )
+    def __getitem__(self, item):
+        return super().__getitem__(item)
+
+
+DEFAULT_CONFIG = _deprecated_default_config()
diff --git a/rllib/algorithms/maddpg/tests/test_maddpg.py b/rllib/algorithms/maddpg/tests/test_maddpg.py
index c6181f7822be..f6e24645a546 100644
--- a/rllib/algorithms/maddpg/tests/test_maddpg.py
+++ b/rllib/algorithms/maddpg/tests/test_maddpg.py
@@ -21,28 +21,32 @@ def tearDownClass(cls) -> None:
 
     def test_maddpg_compilation(self):
         """Test whether an MADDPGTrainer can be built with all frameworks."""
-        config = maddpg.DEFAULT_CONFIG.copy()
-        config["env"] = TwoStepGame
-        config["env_config"] = {
-            "actions_are_logits": True,
-        }
-        config["multiagent"] = {
-            "policies": {
-                "pol1": PolicySpec(
-                    config={"agent_id": 0},
-                ),
-                "pol2": PolicySpec(
-                    config={"agent_id": 1},
-                ),
-            },
-            "policy_mapping_fn": (lambda aid, **kwargs: "pol2" if aid else "pol1"),
-        }
+        config = (
+            maddpg.MADDPGConfig()
+            .environment(
+                env=TwoStepGame,
+                env_config={
+                    "actions_are_logits": True,
+                },
+            )
+            .multi_agent(
+                policies={
+                    "pol1": PolicySpec(
+                        config={"agent_id": 0},
+                    ),
+                    "pol2": PolicySpec(
+                        config={"agent_id": 1},
+                    ),
+                },
+                policy_mapping_fn=lambda aid, **kwargs: "pol2" if aid else "pol1",
+            )
+        )
 
         num_iterations = 1
 
         # Only working for tf right now.
         for _ in framework_iterator(config, frameworks="tf"):
-            trainer = maddpg.MADDPGTrainer(config)
+            trainer = config.build()
             for i in range(num_iterations):
                 results = trainer.train()
                 check_train_results(results)