Global optimisation (#5)

* Implemented global optimisation functionality
AarhusCosmology · Apr 30, 2024 · f829ec0 · f829ec0
1 parent dd18419
commit f829ec0
Show file tree

Hide file tree

Showing 17 changed files with 338 additions and 67 deletions.
diff --git a/README.md b/README.md
@@ -49,6 +49,12 @@ profile_dict = load_profile('path_to_my_run')
 ```
 whence `profile_dict` is a dictionary with the best-fitting parameter values at each point in the profile, along with the associated likelihood values. 
 
+#### Global optimisation
+
+The efficient optimiser of PROSPECT can also be used to find global bestfits of your cosmology. An example for such an input .yaml file is found in the `input/example_toy/example_global_optimisation_toy.yaml` file. Starting from a .yaml file for a profile likelihood, you only need to change the `jobtype` input of the `run` section to 'global_optimisation' (rather than the usual 'profile'). All of the settings of the optimiser defined in the `profile` section are then used to carry out global optimisation of your model, giving an output folder similar in structure to that obtained from profile likelihoods runs. 
+
+*Note that when running with `jobtype: 'global_optimisation'`, the inputs `parameter`, `values`, `start_bin_fraction` as well as `plot_profile`, `detailed_plot`, `plot_Delta_chi2` and `confidence_levels` of the `profile` section are ignored, but the rest of the input, such as the schedules and initialisation procedures, are still used to define the settings of the optimiser.*
+
 #### Reoptimising
 
 Not satisfied with your profile? You can always queue new `OptimiseTask`s with the reoptimising feature. Add extra tasks from the terminal by running

diff --git a/input/example_toy/example_global_optimisation_toy.yaml b/input/example_toy/example_global_optimisation_toy.yaml
@@ -0,0 +1,45 @@
+io:
+     jobname: 'example_toy'
+     write: True
+     # relative or absolute path to output directory
+     dir: 'output/example_toy'
+     overwrite_dir: False
+     snapshot_interval: 10.0
+
+run:
+     jobtype: 'global_optimisation'
+     # Choose either 'serial', 'threaded' or 'mpi'
+     # (If 'mpi': Remember to run with `mpiexec -n N prospect example.yaml`)
+     mode: 'serial'
+
+kernel:
+     type: 'analytical'
+     conf: ''
+     # This kernel loads a pre-generated 20d random Gaussian
+     # To generate your own, run with 'kernel_random_gauss.yaml' instead
+     param: 'input/example_toy/kernel_settings/kernel_load_random_gauss.yaml'
+
+profile:
+    optimiser: 'simulated annealing'
+
+    # Temperature changes from the endpoints of the range in 
+    # the amount of iterations given by 'max_iterations'
+    temperature_schedule: 'exponential'
+    temperature_range: [0.1, 0.001]
+    max_iterations: 20
+
+    # Adjusts step size at each iteration to lie in the interval chosen
+    step_size_schedule: 'adaptive'
+    step_size_adaptive_interval: [0.19, 0.21]
+    # Amplitude of the adjustment at each iteration
+    step_size_adaptive_multiplier: 0.3
+    step_size_adaptive_initial: 0.1
+
+    # Choose either 'steps_per_iteration' or 'minutes_per_iteration'
+    steps_per_iteration: 250
+    # Amount of times to independently optimise each point in the profile
+    repetitions: 3
+
+    start_from_mcmc: 'input/example_toy/mcmc/test'
+
+    plot_schedule: True
diff --git a/prospect/communication.py b/prospect/communication.py
@@ -9,7 +9,9 @@
 from dataclasses import dataclass
 from prospect.input import Configuration
 from prospect.tasks.base_task import BaseTask
+from prospect.tasks.base_analyse_task import BaseAnalyseTask
 from prospect.tasks.analyse_profile_task import AnalyseProfileTask
+from prospect.tasks.analyse_global_optimisation_task import AnalyseGlobalOptimisationTask
 from prospect.tasks.initialise import initialise_tasks
 from multiprocessing import Condition
 
@@ -71,6 +73,9 @@ def finalize(self, executor) -> None:
             if self.config.run.jobtype == 'profile':
                 analysis_task = self.get_profile_analysis()
                 analysis_task.run([task for task in self.tasks.done.values() if task.id in analysis_task.required_task_ids])
+            elif self.config.run.jobtype == 'global_optimisation':
+                analysis_task = self.get_global_optimisation_analysis()
+                analysis_task.run([task for task in self.tasks.done.values() if task.id in analysis_task.required_task_ids])
             self.dump_snapshot()
             self.status_update()
 
@@ -142,10 +147,10 @@ def finalize_task(self, future) -> None:
 
         if self.config.io.write:
             if time.time() > self.next_dump_time:
-                if self.config.run.jobtype == 'profile' and finished_task.type != 'AnalyseProfileTask':
-                    if 'AnalyseProfileTask' not in [queued_task.type for queued_task in self.tasks.queued]:
-                        if 'AnalyseProfileTask' not in [ongoing_task.type for ongoing_task in self.tasks.ongoing.values()]:
-                            self.push_task(self.get_profile_analysis())
+                if finished_task.type != 'AnalyseProfileTask' and finished_task.type != 'AnalyseGlobalOptimisationTask':
+                    analyse_task = self.get_analysis_task()
+                    if analyse_task is not None:
+                        self.push_task(analyse_task)
                 self.dump_snapshot()
                 self.status_update()
 
@@ -202,10 +207,17 @@ def get_write_line(task: Type[BaseTask]) -> str:
                 status_file.write(get_write_line(task_done))
             status_file.write("\n")
 
-    def get_profile_analysis(self) -> AnalyseProfileTask:
+    def get_analysis_task(self) -> Type[BaseAnalyseTask]:
         required_tasks = [id for id, task in self.tasks.done.items() if task.type == 'OptimiseTask' or task.type == 'InitialiseOptimiserTask']
-        return AnalyseProfileTask(self.config, required_tasks)
-
+        if self.config.run.jobtype == 'profile':
+            if 'AnalyseProfileTask' not in [queued_task.type for queued_task in self.tasks.queued]:
+                if 'AnalyseProfileTask' not in [ongoing_task.type for ongoing_task in self.tasks.ongoing.values()]:
+                    return AnalyseProfileTask(self.config, required_tasks)
+        elif self.config.run.jobtype == 'global_optimisation':
+            if 'AnalyseGlobalOptimisationTask' not in [queued_task.type for queued_task in self.tasks.queued]:
+                if 'AnalyseGlobalOptimisationTask' not in [ongoing_task.type for ongoing_task in self.tasks.ongoing.values()]:
+                    return AnalyseGlobalOptimisationTask(self.config, required_tasks)
+
 class SerialContext:
     class MockFuture:
         def __init__(self, finished_task: Type[BaseTask]) -> None:

diff --git a/prospect/input.py b/prospect/input.py
@@ -23,6 +23,9 @@ def __init__(self, config_yaml):
                 modules['mcmc'] = import_module('prospect.mcmc')
             elif config_yaml['run']['jobtype'] == 'profile':
                 modules['profile'] = import_module('prospect.profile')
+            elif config_yaml['run']['jobtype'] == 'global_optimisation':
+                # Global optimisation uses profiling inputs for optimisation
+                modules['profile'] = import_module('prospect.profile')
             else:
                 raise ValueError("The given value of 'jobtype' is not recognised. Choose either 'mcmc' or 'profile'.")
         else:

diff --git a/prospect/optimiser.py b/prospect/optimiser.py
@@ -62,7 +62,10 @@ def __init__(self, config, kernel, optimise_settings):
         self.mcmc = MetropolisHastings(config_mcmc, self.kernel, **mcmc_args)
 
     def optimise(self):
-        print(f"Running a simulated annealing iteration of parameter {self.config.profile.parameter}={self.settings['fixed_param_val']} with temperature {self.settings['temperature']}")
+        if self.config.run.jobtype == 'profile':
+            print(f"Running a simulated annealing iteration of parameter {self.config.profile.parameter}={self.settings['fixed_param_val']} with temperature {self.settings['temperature']}")
+        else:
+            print(f"Running a simulated annealing iteration with temperature {self.settings['temperature']}")
         if self.config.profile.steps_per_iteration is not None:
             self.mcmc.run_steps(self.config.profile.steps_per_iteration)
         elif self.config.profile.minutes_per_iteration is not None:
@@ -147,12 +150,13 @@ def get_next_iteration_settings(self):
 
         optimise_settings = {
             'current_best_loglkl': self.bestfit['loglkl'],
-            'fixed_param_val': self.settings['fixed_param_val'],
             'initial_position': self.mcmc.chain.last_position,
             'covmat': self.covmat,
             'temperature': new_temp,
             'temperature_change': self.settings['temperature_change'],
             'step_size': new_step_size,
             'step_size_change': self.settings['step_size_change']
         }
+        if self.config.run.jobtype == 'profile':
+            optimise_settings['fixed_param_val'] = self.settings['fixed_param_val']
         return optimise_settings
diff --git a/prospect/profile.py b/prospect/profile.py
@@ -129,11 +129,22 @@ def load_profile(prospect_folder, direct_txt=False):
 @dataclass
 class Arguments:
     class parameter(InputArgument):
-        val_type = str
+        val_type = str | NoneType
+        def get_default(self, config_yaml: dict[str, Any]):
+            if config_yaml['run']['jobtype'] == 'global_optimisation':
+                # None corresponds to global optimisation
+                return None
+            else:
+                raise ValueError("You need to specify the 'parameter' input of the profile section.")
 
     class values(InputArgument):
         """Parameter values where the profile is evaluated"""
         val_type = list[float] | list[int] | np.ndarray | list
+        def get_default(self, config_yaml: dict[str, Any]):
+            if config_yaml['run']['jobtype'] == 'global_optimisation':
+                return []
+            else:
+                raise ValueError("You need to specify the 'values' input of the profile section.")
 
     class optimiser(InputArgument):
         allowed_values = ['simulated annealing']

diff --git a/prospect/run.py b/prospect/run.py
@@ -94,7 +94,10 @@ def analyse(prospect_folder):
     snapshot = load(prospect_folder)
     if snapshot.config.run.jobtype == 'profile':
         print(f"Analysing {prospect_folder} as a profile likelihood run.")
-        analysis_task = snapshot.get_profile_analysis()
+        analysis_task = snapshot.get_analysis_task()
+    if snapshot.config.run.jobtype == 'global_optimisation':
+        print(f"Analysing {prospect_folder} as a global optimisation likelihood run.")
+        analysis_task = snapshot.get_analysis_task()
 
     elif snapshot.config.run.jobtype == 'mcmc':
         print(f"Analysing {prospect_folder} as an MCMC run.")
@@ -133,7 +136,7 @@ def analyse_from_shell():
 class Arguments:
     class jobtype(InputArgument):
         val_type = str
-        allowed_values = ['mcmc', 'profile']
+        allowed_values = ['mcmc', 'profile', 'global_optimisation']
 
     class mode(InputArgument):
         val_type = str

diff --git a/prospect/tasks/analyse_global_optimisation_task.py b/prospect/tasks/analyse_global_optimisation_task.py
@@ -0,0 +1,136 @@
+from collections import defaultdict
+from copy import deepcopy
+import os 
+import time
+from typing import Type
+import matplotlib
+import matplotlib.pyplot as plt 
+import numpy as np 
+from scipy.interpolate import interp1d
+from scipy import stats
+from prospect.input import Configuration
+from prospect.kernels.initialisation import initialise_kernel
+from prospect.tasks.base_analyse_task import BaseAnalyseTask
+from prospect.tasks.optimise_task import OptimiseTask
+
+# Switch matplotlib backend to one that doesn't pop up figures
+matplotlib.use('Agg')
+
+class AnalyseGlobalOptimisationTask(BaseAnalyseTask):
+    def run(self, tasks: list[OptimiseTask]): 
+        if not tasks:
+            print(f"No tasks to analyse. Ending AnalyseProfileTask.")
+            return
+        print("Running global optimisation analysis script...")
+        kernel = initialise_kernel(self.config.kernel, self.config.io.dir, self.id)
+        optimise_tasks, initialise_tasks = [], []
+        for task in tasks:
+            if task.type == 'OptimiseTask':
+                optimise_tasks.append(task)
+            elif task.type == 'InitialiseOptimiserTask':
+                initialise_tasks.append(task)
+        bestfits = self.sort_tasks(optimise_tasks, initialise_tasks)
+        self.write_results(bestfits, kernel)
+        if self.config.profile.plot_schedule:
+            tic = time.perf_counter()
+            self.plot_schedule(optimise_tasks)
+            print(f"Plotted schedule in {time.perf_counter() - tic:.5} s")
+
+    def sort_tasks(self, optimise_tasks, initialise_tasks):
+        bestfits = {}
+        for idx_rep in range(self.config.profile.repetitions):
+            current_bestfit = {'loglkl': np.inf}
+            for task in optimise_tasks:
+                if task.optimise_settings['repetition_number'] == idx_rep:
+                    if task.optimiser.bestfit['loglkl'] < current_bestfit['loglkl']:
+                        current_bestfit = task.optimiser.bestfit 
+            bestfits[idx_rep] = current_bestfit # store the best in each rep
+            best_rep = list(bestfits.keys())[np.argmin([rep['loglkl'] for rep in bestfits.values()])]
+            bestfits[idx_rep]['best_rep'] = best_rep
+
+            for initialise_task in initialise_tasks:
+                # All reps are all initialised to the same point
+                bestfits[idx_rep]['initial'] = initialise_task.initial_bestfit
+                bestfits[idx_rep]['initial_loglkl'] = initialise_task.initial_loglkl
+                bestfits[idx_rep]['covmat'] = initialise_task.initial_covmat
+                break
+        return bestfits
+
+    def write_results(self, bestfits, kernel) -> None:
+        results_file = os.path.join(self.dir, f'{self.config.profile.parameter}.txt')
+        param_names = [param_name for param_name in kernel.param['varying'] if param_name != self.config.profile.parameter]
+
+        print(f"Writing current optimisation results to {results_file}")
+        with open(results_file,"w") as file:
+            # Write header
+            file.write(f"Repetition number \t -loglkl \t initial_loglkl \t ")
+            file.write(f" \t ".join([str(name) for name in param_names])+"\n")
+            # Write results
+            for idx_rep in range(self.config.profile.repetitions):
+                if bestfits[idx_rep]['loglkl'] == np.inf:
+                    file.write(f"{idx_rep} \t --- no optimisations finished ---\n")
+                else:
+                    file.write(f"{idx_rep} \t {bestfits[idx_rep]['loglkl']:.10e} \t ")
+                    file.write(f"{bestfits[idx_rep]['initial_loglkl']:.10e} \t ")
+                    file.write(" \t ".join([str(np.round(bestfits[idx_rep]['position'][name][0], 10)) for name in param_names])+"\n")
+
+    def plot_schedule(self, optimise_tasks):
+        # Collect nested dict data structure
+        iter_data = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+        # Sort optimisetasks by their repetition and iteration numbers
+        for task in optimise_tasks:
+            iter_data[task.optimise_settings['repetition_number']][task.optimise_settings['iteration_number']] = {
+                'acceptance_rate': task.optimiser.bestfit['acceptance_rate'],
+                'temperature': task.optimiser.settings['temperature'],
+                'step_size': task.optimiser.settings['step_size'],
+                'step_size_change': task.optimiser.settings['step_size_change'],
+                'loglkl': task.optimiser.bestfit['loglkl']
+            }
+        plot_adaptive = self.config.profile.step_size_schedule == 'adaptive' and self.config.profile.step_size_adaptive_multiplier == 'adaptive'
+        if plot_adaptive == 'adaptive':
+            fig, ax = plt.subplots(4, 1, figsize=(self.fig['width'], 3*self.fig['height']))
+        else:
+            fig, ax = plt.subplots(3, 1, figsize=(self.fig['width'], 2.5*self.fig['height']))
+        current_longest = {'len': 0}
+
+        for idx_rep in range(self.config.profile.repetitions):
+            if idx_rep in iter_data:
+                iterlist = list(iter_data[idx_rep].keys())
+                order = np.argsort(iterlist)
+                iterlist = np.array(iterlist)[order]
+                loglkls = np.array([it['loglkl'] for it in iter_data[idx_rep].values()])[order]
+                acceptance_rates = np.array([it['acceptance_rate'] for it in iter_data[idx_rep].values()])[order]
+                step_sizes = np.array([it['step_size'] for it in iter_data[idx_rep].values()])[order]
+                ax[0].plot(iterlist, loglkls, '.-', alpha=0.8, ms=self.fig['rep_ms'][idx_rep])
+                ax[1].plot(iterlist, acceptance_rates, '.-', alpha=0.8, ms=self.fig['rep_ms'][idx_rep])
+                ax[2].plot(iterlist, step_sizes, '.-')    
+                if plot_adaptive == 'adaptive':
+                    multipliers = np.array([it['step_size_change']['current_multiplier'] for it in iter_data[idx_rep].values()])[order]
+                    ax[3].plot(iterlist, multipliers, '.-', alpha=0.8, ms=self.fig['rep_ms'][idx_rep])
+                if len(iterlist) > current_longest['len']:
+                    current_longest = {
+                        'len': len(iterlist),
+                        'idx_rep': idx_rep
+                    }
+        iterlim = [-0.1, current_longest['len'] + 0.1]
+        ax[0].set(ylabel='-loglkl', xlim=iterlim, xticks=[], yscale='log')
+        ax[1].set(ylabel='acceptance rate', xlim=iterlim, xticks=[])
+        ax[2].set(ylabel='step size & temp', xlim=iterlim, xticks=[], yscale='log')
+        if plot_adaptive:
+            ax[3].set(xlabel='iteration', ylabel='step multiplier', xlim=iterlim)
+        if current_longest['len'] > 0:
+            # Plot temperature of the run with most iterations
+            iterlist = list(iter_data[current_longest['idx_rep']].keys())
+            order = np.argsort(iterlist)
+            iterlist = np.array(iterlist)[order]
+            temperatures = np.array([it['temperature'] for it in iter_data[current_longest['idx_rep']].values()])[order]
+            ax[2].plot(iterlist, temperatures, 'r.-', label='T')    
+            handles, labels = ax[0].get_legend_handles_labels()
+            if handles and labels:
+                labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
+                ax[0].legend(handles, labels, fontsize=0.5*self.fig['fontsize'])
+            ax[0].legend(handles, labels, fontsize=0.5*self.fig['fontsize'])
+            ax[2].legend()
+        fig.tight_layout()
+        fig.savefig(os.path.join(self.dir, f'optimisation_schedule.pdf'))
+        plt.close()