diff --git a/release/rllib_tests/learning_tests/hard_learning_tests.yaml b/release/rllib_tests/learning_tests/hard_learning_tests.yaml
index b7eeb8cb574f..f7c6a242fecd 100644
--- a/release/rllib_tests/learning_tests/hard_learning_tests.yaml
+++ b/release/rllib_tests/learning_tests/hard_learning_tests.yaml
@@ -1,4 +1,3 @@
-
 a2c-breakoutnoframeskip-v4:
     env: BreakoutNoFrameskip-v4
     run: A2C
@@ -163,56 +162,57 @@ appo-pongnoframeskip-v4:
 #        evaluation_config:
 #            input: sampler
 
-# cql-halfcheetahbulletenv-v0:
-#    env: HalfCheetahBulletEnv-v0
-#    run: CQL
-#    pass_criteria:
-#        episode_reward_mean: 400.0
-#        timesteps_total: 10000000
-#    stop:
-#        time_total_s: 3600
-#    config:
-#        # Use input produced by expert SAC algo.
-#        input: ["~/halfcheetah_expert_sac.zip"]
-#        actions_in_input_normalized: true
-#
-#        soft_horizon: False
-#        horizon: 1000
-#        Q_model:
-#            fcnet_activation: relu
-#            fcnet_hiddens: [256, 256, 256]
-#        policy_model:
-#            fcnet_activation: relu
-#            fcnet_hiddens: [256, 256, 256]
-#        tau: 0.005
-#        target_entropy: auto
-#        no_done_at_end: false
-#        n_step: 3
-#        rollout_fragment_length: 1
-#        prioritized_replay: false
-#        train_batch_size: 256
-#        target_network_update_freq: 0
-#        timesteps_per_iteration: 1000
-#        learning_starts: 256
-#        optimization:
-#            actor_learning_rate: 0.0001
-#            critic_learning_rate: 0.0003
-#            entropy_learning_rate: 0.0001
-#        num_workers: 0
-#        num_gpus: 1
-#        metrics_smoothing_episodes: 5
-#
-#        # CQL Configs
-#        min_q_weight: 5.0
-#        bc_iters: 20000
-#        temperature: 1.0
-#        num_actions: 10
-#        lagrangian: False
-#
-#        # Switch on online evaluation.
-#        evaluation_interval: 3
-#        evaluation_config:
-#            input: sampler
+cql-halfcheetahbulletenv-v0:
+    env: HalfCheetahBulletEnv-v0
+    run: CQL
+    pass_criteria:
+        episode_reward_mean: 400.0
+        timesteps_total: 10000000
+    stop:
+        time_total_s: 3600
+    config:
+        # Use input produced by expert SAC algo.
+        input: ["~/halfcheetah_expert_sac.zip"]
+        actions_in_input_normalized: true
+
+        soft_horizon: False
+        horizon: 1000
+        Q_model:
+            fcnet_activation: relu
+            fcnet_hiddens: [256, 256, 256]
+        policy_model:
+            fcnet_activation: relu
+            fcnet_hiddens: [256, 256, 256]
+        tau: 0.005
+        target_entropy: auto
+        no_done_at_end: false
+        n_step: 3
+        rollout_fragment_length: 1
+        prioritized_replay: false
+        train_batch_size: 256
+        target_network_update_freq: 0
+        timesteps_per_iteration: 1000
+        learning_starts: 256
+        optimization:
+            actor_learning_rate: 0.0001
+            critic_learning_rate: 0.0003
+            entropy_learning_rate: 0.0001
+        num_workers: 0
+        num_gpus: 1
+        metrics_smoothing_episodes: 5
+
+        # CQL Configs
+        min_q_weight: 5.0
+        bc_iters: 20000
+        temperature: 1.0
+        num_actions: 10
+        lagrangian: False
+
+        # Switch on online evaluation.
+        evaluation_interval: 3
+        evaluation_config:
+            input: sampler
+        always_attach_evaluation_results: True
 
 ddpg-hopperbulletenv-v0:
     env: HopperBulletEnv-v0
diff --git a/release/rllib_tests/performance_tests/performance_tests.yaml b/release/rllib_tests/performance_tests/performance_tests.yaml
index fdc0aa3d045e..51de3a7181c3 100644
--- a/release/rllib_tests/performance_tests/performance_tests.yaml
+++ b/release/rllib_tests/performance_tests/performance_tests.yaml
@@ -52,55 +52,55 @@ appo-pongnoframeskip-v4:
         model:
             dim: 42
 
-# Bring cql test back after we make sure it learns.
-#cql-halfcheetahbulletenv-v0:
-#    env: HalfCheetahBulletEnv-v0
-#    run: CQL
-#    frameworks: [ "tf", "tf2", "torch" ]
-#    stop:
-#        time_total_s: 1800
-#    config:
-#        # Use input produced by expert SAC algo.
-#        input: ["~/halfcheetah_expert_sac.zip"]
-#        actions_in_input_normalized: true
-#
-#        soft_horizon: False
-#        horizon: 1000
-#        Q_model:
-#            fcnet_activation: relu
-#            fcnet_hiddens: [256, 256, 256]
-#        policy_model:
-#            fcnet_activation: relu
-#            fcnet_hiddens: [256, 256, 256]
-#        tau: 0.005
-#        target_entropy: auto
-#        no_done_at_end: false
-#        n_step: 3
-#        rollout_fragment_length: 1
-#        prioritized_replay: false
-#        train_batch_size: 256
-#        target_network_update_freq: 0
-#        timesteps_per_iteration: 1000
-#        learning_starts: 256
-#        optimization:
-#            actor_learning_rate: 0.0001
-#            critic_learning_rate: 0.0003
-#            entropy_learning_rate: 0.0001
-#        num_workers: 0
-#        num_gpus: 1
-#        metrics_smoothing_episodes: 5
-#
-#        # CQL Configs
-#        min_q_weight: 5.0
-#        bc_iters: 20000
-#        temperature: 1.0
-#        num_actions: 10
-#        lagrangian: False
-#
-#        # Switch on online evaluation.
-#        evaluation_interval: 3
-#        evaluation_config:
-#            input: sampler
+cql-halfcheetahbulletenv-v0:
+    env: HalfCheetahBulletEnv-v0
+    run: CQL
+    frameworks: [ "tf", "tf2", "torch" ]
+    stop:
+        time_total_s: 1800
+    config:
+        # Use input produced by expert SAC algo.
+        input: ["~/halfcheetah_expert_sac.zip"]
+        actions_in_input_normalized: true
+
+        soft_horizon: False
+        horizon: 1000
+        Q_model:
+            fcnet_activation: relu
+            fcnet_hiddens: [256, 256, 256]
+        policy_model:
+            fcnet_activation: relu
+            fcnet_hiddens: [256, 256, 256]
+        tau: 0.005
+        target_entropy: auto
+        no_done_at_end: false
+        n_step: 3
+        rollout_fragment_length: 1
+        prioritized_replay: false
+        train_batch_size: 256
+        target_network_update_freq: 0
+        timesteps_per_iteration: 1000
+        learning_starts: 256
+        optimization:
+            actor_learning_rate: 0.0001
+            critic_learning_rate: 0.0003
+            entropy_learning_rate: 0.0001
+        num_workers: 0
+        num_gpus: 1
+        metrics_smoothing_episodes: 5
+
+        # CQL Configs
+        min_q_weight: 5.0
+        bc_iters: 20000
+        temperature: 1.0
+        num_actions: 10
+        lagrangian: False
+
+        # Switch on online evaluation.
+        evaluation_interval: 3
+        evaluation_config:
+            input: sampler
+        always_attach_evaluation_results: True
 
 sac-halfcheetahbulletenv-v0:
     env: HalfCheetahBulletEnv-v0
diff --git a/rllib/agents/tests/test_trainer.py b/rllib/agents/tests/test_trainer.py
index 479d7cae1d90..09b4314d0dc3 100644
--- a/rllib/agents/tests/test_trainer.py
+++ b/rllib/agents/tests/test_trainer.py
@@ -161,6 +161,39 @@ def test_evaluation_option(self):
             self.assertTrue("episode_reward_mean" in r1["evaluation"])
             self.assertNotEqual(r1["evaluation"], r3["evaluation"])
 
+    def test_evaluation_option_always_attach_eval_metrics(self):
+        config = dqn.DEFAULT_CONFIG.copy()
+        config.update({
+            "env": "CartPole-v0",
+            "evaluation_interval": 2,
+            "evaluation_duration": 2,
+            "evaluation_duration_unit": "episodes",
+            "evaluation_config": {
+                "gamma": 0.98,
+            },
+            "always_attach_evaluation_results": True,
+            # Use a custom callback that asserts that we are running the
+            # configured exact number of episodes per evaluation.
+            "callbacks": AssertEvalCallback,
+        })
+
+        for _ in framework_iterator(config, frameworks=("tf", "torch")):
+            trainer = dqn.DQNTrainer(config=config)
+            # Should always see latest available eval results.
+            r0 = trainer.train()
+            r1 = trainer.train()
+            r2 = trainer.train()
+            r3 = trainer.train()
+            trainer.stop()
+
+            # Eval results are not available at step 0.
+            # But step 3 should still have it, even though no eval was
+            # run during that step.
+            self.assertFalse("evaluation" in r0)
+            self.assertTrue("evaluation" in r1)
+            self.assertTrue("evaluation" in r2)
+            self.assertTrue("evaluation" in r3)
+
     def test_evaluation_wo_evaluation_worker_set(self):
         config = a3c.DEFAULT_CONFIG.copy()
         config.update({
diff --git a/rllib/agents/trainer.py b/rllib/agents/trainer.py
index b4b8736c61f1..9a52fbb62a3f 100644
--- a/rllib/agents/trainer.py
+++ b/rllib/agents/trainer.py
@@ -321,6 +321,11 @@
     # The Trainer guarantees all eval workers have the latest policy state
     # before this function is called.
     "custom_eval_function": None,
+    # Make sure the latest available evaluation results are always attached to
+    # a step result dict.
+    # This may be useful if Tune or some other meta controller needs access
+    # to evaluation metrics all the time.
+    "always_attach_evaluation_results": False,
 
     # === Advanced Rollout Settings ===
     # Use a background thread for sampling (slightly off-policy, usually not
@@ -986,7 +991,6 @@ def auto_duration_fn(unit, num_eval_workers, eval_cfg, num_units_done):
             # No parallelism.
             if not self.config["evaluation_parallel_to_training"]:
                 step_results = next(self.train_exec_impl)
-
             # Kick off evaluation-loop (and parallel train() call,
             # if requested).
             # Parallel eval + training.
@@ -997,24 +1001,25 @@ def auto_duration_fn(unit, num_eval_workers, eval_cfg, num_units_done):
                     # Automatically determine duration of the evaluation.
                     if self.config["evaluation_duration"] == "auto":
                         unit = self.config["evaluation_duration_unit"]
-
-                        evaluation_metrics = self.evaluate(
+                        self.evaluate(
                             duration_fn=functools.partial(
                                 auto_duration_fn, unit, self.config[
                                     "evaluation_num_workers"], self.config[
                                         "evaluation_config"]))
                     else:
-                        evaluation_metrics = self.evaluate()
+                        self.evaluate()
                     # Collect the training results from the future.
                     step_results = train_future.result()
             # Sequential: train (already done above), then eval.
             else:
-                evaluation_metrics = self.evaluate()
+                self.evaluate()
 
-            # Add evaluation results to train results.
-            assert isinstance(evaluation_metrics, dict), \
+        if (evaluate_this_iter
+                or self.config["always_attach_evaluation_results"]):
+            # Attach latest available evaluation results to train results.
+            assert isinstance(self.evaluation_metrics, dict), \
                 "Trainer.evaluate() needs to return a dict."
-            step_results.update(evaluation_metrics)
+            step_results.update(self.evaluation_metrics)
 
         # Check `env_task_fn` for possible update of the env's task.
         if self.config["env_task_fn"] is not None:
@@ -1176,9 +1181,13 @@ def duration_fn(num_units_done):
                     self.evaluation_workers.remote_workers())
             metrics["timesteps_this_iter"] = num_ts_run
 
-        self.evaluation_metrics = metrics
+        # Evaluation does not run for every step.
+        # Save evaluation metrics on trainer, so it can be attached to
+        # subsequent step results as latest evaluation result.
+        self.evaluation_metrics = {"evaluation": metrics}
 
-        return {"evaluation": metrics}
+        # Also return the results here for convenience.
+        return self.evaluation_metrics
 
     @DeveloperAPI
     @staticmethod