Skip to content

Commit

Permalink
Global optimisation (#5)
Browse files Browse the repository at this point in the history
* Implemented global optimisation functionality
  • Loading branch information
EBHolm authored Apr 30, 2024
1 parent dd18419 commit f829ec0
Show file tree
Hide file tree
Showing 17 changed files with 338 additions and 67 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ profile_dict = load_profile('path_to_my_run')
```
whence `profile_dict` is a dictionary with the best-fitting parameter values at each point in the profile, along with the associated likelihood values.

#### Global optimisation

The efficient optimiser of PROSPECT can also be used to find global bestfits of your cosmology. An example for such an input .yaml file is found in the `input/example_toy/example_global_optimisation_toy.yaml` file. Starting from a .yaml file for a profile likelihood, you only need to change the `jobtype` input of the `run` section to 'global_optimisation' (rather than the usual 'profile'). All of the settings of the optimiser defined in the `profile` section are then used to carry out global optimisation of your model, giving an output folder similar in structure to that obtained from profile likelihoods runs.

*Note that when running with `jobtype: 'global_optimisation'`, the inputs `parameter`, `values`, `start_bin_fraction` as well as `plot_profile`, `detailed_plot`, `plot_Delta_chi2` and `confidence_levels` of the `profile` section are ignored, but the rest of the input, such as the schedules and initialisation procedures, are still used to define the settings of the optimiser.*

#### Reoptimising

Not satisfied with your profile? You can always queue new `OptimiseTask`s with the reoptimising feature. Add extra tasks from the terminal by running
Expand Down
45 changes: 45 additions & 0 deletions input/example_toy/example_global_optimisation_toy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
io:
jobname: 'example_toy'
write: True
# relative or absolute path to output directory
dir: 'output/example_toy'
overwrite_dir: False
snapshot_interval: 10.0

run:
jobtype: 'global_optimisation'
# Choose either 'serial', 'threaded' or 'mpi'
# (If 'mpi': Remember to run with `mpiexec -n N prospect example.yaml`)
mode: 'serial'

kernel:
type: 'analytical'
conf: ''
# This kernel loads a pre-generated 20d random Gaussian
# To generate your own, run with 'kernel_random_gauss.yaml' instead
param: 'input/example_toy/kernel_settings/kernel_load_random_gauss.yaml'

profile:
optimiser: 'simulated annealing'

# Temperature changes from the endpoints of the range in
# the amount of iterations given by 'max_iterations'
temperature_schedule: 'exponential'
temperature_range: [0.1, 0.001]
max_iterations: 20

# Adjusts step size at each iteration to lie in the interval chosen
step_size_schedule: 'adaptive'
step_size_adaptive_interval: [0.19, 0.21]
# Amplitude of the adjustment at each iteration
step_size_adaptive_multiplier: 0.3
step_size_adaptive_initial: 0.1

# Choose either 'steps_per_iteration' or 'minutes_per_iteration'
steps_per_iteration: 250
# Amount of times to independently optimise each point in the profile
repetitions: 3

start_from_mcmc: 'input/example_toy/mcmc/test'

plot_schedule: True
26 changes: 19 additions & 7 deletions prospect/communication.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from dataclasses import dataclass
from prospect.input import Configuration
from prospect.tasks.base_task import BaseTask
from prospect.tasks.base_analyse_task import BaseAnalyseTask
from prospect.tasks.analyse_profile_task import AnalyseProfileTask
from prospect.tasks.analyse_global_optimisation_task import AnalyseGlobalOptimisationTask
from prospect.tasks.initialise import initialise_tasks
from multiprocessing import Condition

Expand Down Expand Up @@ -71,6 +73,9 @@ def finalize(self, executor) -> None:
if self.config.run.jobtype == 'profile':
analysis_task = self.get_profile_analysis()
analysis_task.run([task for task in self.tasks.done.values() if task.id in analysis_task.required_task_ids])
elif self.config.run.jobtype == 'global_optimisation':
analysis_task = self.get_global_optimisation_analysis()
analysis_task.run([task for task in self.tasks.done.values() if task.id in analysis_task.required_task_ids])
self.dump_snapshot()
self.status_update()

Expand Down Expand Up @@ -142,10 +147,10 @@ def finalize_task(self, future) -> None:

if self.config.io.write:
if time.time() > self.next_dump_time:
if self.config.run.jobtype == 'profile' and finished_task.type != 'AnalyseProfileTask':
if 'AnalyseProfileTask' not in [queued_task.type for queued_task in self.tasks.queued]:
if 'AnalyseProfileTask' not in [ongoing_task.type for ongoing_task in self.tasks.ongoing.values()]:
self.push_task(self.get_profile_analysis())
if finished_task.type != 'AnalyseProfileTask' and finished_task.type != 'AnalyseGlobalOptimisationTask':
analyse_task = self.get_analysis_task()
if analyse_task is not None:
self.push_task(analyse_task)
self.dump_snapshot()
self.status_update()

Expand Down Expand Up @@ -202,10 +207,17 @@ def get_write_line(task: Type[BaseTask]) -> str:
status_file.write(get_write_line(task_done))
status_file.write("\n")

def get_profile_analysis(self) -> AnalyseProfileTask:
def get_analysis_task(self) -> Type[BaseAnalyseTask]:
required_tasks = [id for id, task in self.tasks.done.items() if task.type == 'OptimiseTask' or task.type == 'InitialiseOptimiserTask']
return AnalyseProfileTask(self.config, required_tasks)

if self.config.run.jobtype == 'profile':
if 'AnalyseProfileTask' not in [queued_task.type for queued_task in self.tasks.queued]:
if 'AnalyseProfileTask' not in [ongoing_task.type for ongoing_task in self.tasks.ongoing.values()]:
return AnalyseProfileTask(self.config, required_tasks)
elif self.config.run.jobtype == 'global_optimisation':
if 'AnalyseGlobalOptimisationTask' not in [queued_task.type for queued_task in self.tasks.queued]:
if 'AnalyseGlobalOptimisationTask' not in [ongoing_task.type for ongoing_task in self.tasks.ongoing.values()]:
return AnalyseGlobalOptimisationTask(self.config, required_tasks)

class SerialContext:
class MockFuture:
def __init__(self, finished_task: Type[BaseTask]) -> None:
Expand Down
3 changes: 3 additions & 0 deletions prospect/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ def __init__(self, config_yaml):
modules['mcmc'] = import_module('prospect.mcmc')
elif config_yaml['run']['jobtype'] == 'profile':
modules['profile'] = import_module('prospect.profile')
elif config_yaml['run']['jobtype'] == 'global_optimisation':
# Global optimisation uses profiling inputs for optimisation
modules['profile'] = import_module('prospect.profile')
else:
raise ValueError("The given value of 'jobtype' is not recognised. Choose either 'mcmc' or 'profile'.")
else:
Expand Down
8 changes: 6 additions & 2 deletions prospect/optimiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ def __init__(self, config, kernel, optimise_settings):
self.mcmc = MetropolisHastings(config_mcmc, self.kernel, **mcmc_args)

def optimise(self):
print(f"Running a simulated annealing iteration of parameter {self.config.profile.parameter}={self.settings['fixed_param_val']} with temperature {self.settings['temperature']}")
if self.config.run.jobtype == 'profile':
print(f"Running a simulated annealing iteration of parameter {self.config.profile.parameter}={self.settings['fixed_param_val']} with temperature {self.settings['temperature']}")
else:
print(f"Running a simulated annealing iteration with temperature {self.settings['temperature']}")
if self.config.profile.steps_per_iteration is not None:
self.mcmc.run_steps(self.config.profile.steps_per_iteration)
elif self.config.profile.minutes_per_iteration is not None:
Expand Down Expand Up @@ -147,12 +150,13 @@ def get_next_iteration_settings(self):

optimise_settings = {
'current_best_loglkl': self.bestfit['loglkl'],
'fixed_param_val': self.settings['fixed_param_val'],
'initial_position': self.mcmc.chain.last_position,
'covmat': self.covmat,
'temperature': new_temp,
'temperature_change': self.settings['temperature_change'],
'step_size': new_step_size,
'step_size_change': self.settings['step_size_change']
}
if self.config.run.jobtype == 'profile':
optimise_settings['fixed_param_val'] = self.settings['fixed_param_val']
return optimise_settings
13 changes: 12 additions & 1 deletion prospect/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,22 @@ def load_profile(prospect_folder, direct_txt=False):
@dataclass
class Arguments:
class parameter(InputArgument):
val_type = str
val_type = str | NoneType
def get_default(self, config_yaml: dict[str, Any]):
if config_yaml['run']['jobtype'] == 'global_optimisation':
# None corresponds to global optimisation
return None
else:
raise ValueError("You need to specify the 'parameter' input of the profile section.")

class values(InputArgument):
"""Parameter values where the profile is evaluated"""
val_type = list[float] | list[int] | np.ndarray | list
def get_default(self, config_yaml: dict[str, Any]):
if config_yaml['run']['jobtype'] == 'global_optimisation':
return []
else:
raise ValueError("You need to specify the 'values' input of the profile section.")

class optimiser(InputArgument):
allowed_values = ['simulated annealing']
Expand Down
7 changes: 5 additions & 2 deletions prospect/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ def analyse(prospect_folder):
snapshot = load(prospect_folder)
if snapshot.config.run.jobtype == 'profile':
print(f"Analysing {prospect_folder} as a profile likelihood run.")
analysis_task = snapshot.get_profile_analysis()
analysis_task = snapshot.get_analysis_task()
if snapshot.config.run.jobtype == 'global_optimisation':
print(f"Analysing {prospect_folder} as a global optimisation likelihood run.")
analysis_task = snapshot.get_analysis_task()

elif snapshot.config.run.jobtype == 'mcmc':
print(f"Analysing {prospect_folder} as an MCMC run.")
Expand Down Expand Up @@ -133,7 +136,7 @@ def analyse_from_shell():
class Arguments:
class jobtype(InputArgument):
val_type = str
allowed_values = ['mcmc', 'profile']
allowed_values = ['mcmc', 'profile', 'global_optimisation']

class mode(InputArgument):
val_type = str
Expand Down
136 changes: 136 additions & 0 deletions prospect/tasks/analyse_global_optimisation_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from collections import defaultdict
from copy import deepcopy
import os
import time
from typing import Type
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import interp1d
from scipy import stats
from prospect.input import Configuration
from prospect.kernels.initialisation import initialise_kernel
from prospect.tasks.base_analyse_task import BaseAnalyseTask
from prospect.tasks.optimise_task import OptimiseTask

# Switch matplotlib backend to one that doesn't pop up figures
matplotlib.use('Agg')

class AnalyseGlobalOptimisationTask(BaseAnalyseTask):
def run(self, tasks: list[OptimiseTask]):
if not tasks:
print(f"No tasks to analyse. Ending AnalyseProfileTask.")
return
print("Running global optimisation analysis script...")
kernel = initialise_kernel(self.config.kernel, self.config.io.dir, self.id)
optimise_tasks, initialise_tasks = [], []
for task in tasks:
if task.type == 'OptimiseTask':
optimise_tasks.append(task)
elif task.type == 'InitialiseOptimiserTask':
initialise_tasks.append(task)
bestfits = self.sort_tasks(optimise_tasks, initialise_tasks)
self.write_results(bestfits, kernel)
if self.config.profile.plot_schedule:
tic = time.perf_counter()
self.plot_schedule(optimise_tasks)
print(f"Plotted schedule in {time.perf_counter() - tic:.5} s")

def sort_tasks(self, optimise_tasks, initialise_tasks):
bestfits = {}
for idx_rep in range(self.config.profile.repetitions):
current_bestfit = {'loglkl': np.inf}
for task in optimise_tasks:
if task.optimise_settings['repetition_number'] == idx_rep:
if task.optimiser.bestfit['loglkl'] < current_bestfit['loglkl']:
current_bestfit = task.optimiser.bestfit
bestfits[idx_rep] = current_bestfit # store the best in each rep
best_rep = list(bestfits.keys())[np.argmin([rep['loglkl'] for rep in bestfits.values()])]
bestfits[idx_rep]['best_rep'] = best_rep

for initialise_task in initialise_tasks:
# All reps are all initialised to the same point
bestfits[idx_rep]['initial'] = initialise_task.initial_bestfit
bestfits[idx_rep]['initial_loglkl'] = initialise_task.initial_loglkl
bestfits[idx_rep]['covmat'] = initialise_task.initial_covmat
break
return bestfits

def write_results(self, bestfits, kernel) -> None:
results_file = os.path.join(self.dir, f'{self.config.profile.parameter}.txt')
param_names = [param_name for param_name in kernel.param['varying'] if param_name != self.config.profile.parameter]

print(f"Writing current optimisation results to {results_file}")
with open(results_file,"w") as file:
# Write header
file.write(f"Repetition number \t -loglkl \t initial_loglkl \t ")
file.write(f" \t ".join([str(name) for name in param_names])+"\n")
# Write results
for idx_rep in range(self.config.profile.repetitions):
if bestfits[idx_rep]['loglkl'] == np.inf:
file.write(f"{idx_rep} \t --- no optimisations finished ---\n")
else:
file.write(f"{idx_rep} \t {bestfits[idx_rep]['loglkl']:.10e} \t ")
file.write(f"{bestfits[idx_rep]['initial_loglkl']:.10e} \t ")
file.write(" \t ".join([str(np.round(bestfits[idx_rep]['position'][name][0], 10)) for name in param_names])+"\n")

def plot_schedule(self, optimise_tasks):
# Collect nested dict data structure
iter_data = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
# Sort optimisetasks by their repetition and iteration numbers
for task in optimise_tasks:
iter_data[task.optimise_settings['repetition_number']][task.optimise_settings['iteration_number']] = {
'acceptance_rate': task.optimiser.bestfit['acceptance_rate'],
'temperature': task.optimiser.settings['temperature'],
'step_size': task.optimiser.settings['step_size'],
'step_size_change': task.optimiser.settings['step_size_change'],
'loglkl': task.optimiser.bestfit['loglkl']
}
plot_adaptive = self.config.profile.step_size_schedule == 'adaptive' and self.config.profile.step_size_adaptive_multiplier == 'adaptive'
if plot_adaptive == 'adaptive':
fig, ax = plt.subplots(4, 1, figsize=(self.fig['width'], 3*self.fig['height']))
else:
fig, ax = plt.subplots(3, 1, figsize=(self.fig['width'], 2.5*self.fig['height']))
current_longest = {'len': 0}

for idx_rep in range(self.config.profile.repetitions):
if idx_rep in iter_data:
iterlist = list(iter_data[idx_rep].keys())
order = np.argsort(iterlist)
iterlist = np.array(iterlist)[order]
loglkls = np.array([it['loglkl'] for it in iter_data[idx_rep].values()])[order]
acceptance_rates = np.array([it['acceptance_rate'] for it in iter_data[idx_rep].values()])[order]
step_sizes = np.array([it['step_size'] for it in iter_data[idx_rep].values()])[order]
ax[0].plot(iterlist, loglkls, '.-', alpha=0.8, ms=self.fig['rep_ms'][idx_rep])
ax[1].plot(iterlist, acceptance_rates, '.-', alpha=0.8, ms=self.fig['rep_ms'][idx_rep])
ax[2].plot(iterlist, step_sizes, '.-')
if plot_adaptive == 'adaptive':
multipliers = np.array([it['step_size_change']['current_multiplier'] for it in iter_data[idx_rep].values()])[order]
ax[3].plot(iterlist, multipliers, '.-', alpha=0.8, ms=self.fig['rep_ms'][idx_rep])
if len(iterlist) > current_longest['len']:
current_longest = {
'len': len(iterlist),
'idx_rep': idx_rep
}
iterlim = [-0.1, current_longest['len'] + 0.1]
ax[0].set(ylabel='-loglkl', xlim=iterlim, xticks=[], yscale='log')
ax[1].set(ylabel='acceptance rate', xlim=iterlim, xticks=[])
ax[2].set(ylabel='step size & temp', xlim=iterlim, xticks=[], yscale='log')
if plot_adaptive:
ax[3].set(xlabel='iteration', ylabel='step multiplier', xlim=iterlim)
if current_longest['len'] > 0:
# Plot temperature of the run with most iterations
iterlist = list(iter_data[current_longest['idx_rep']].keys())
order = np.argsort(iterlist)
iterlist = np.array(iterlist)[order]
temperatures = np.array([it['temperature'] for it in iter_data[current_longest['idx_rep']].values()])[order]
ax[2].plot(iterlist, temperatures, 'r.-', label='T')
handles, labels = ax[0].get_legend_handles_labels()
if handles and labels:
labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
ax[0].legend(handles, labels, fontsize=0.5*self.fig['fontsize'])
ax[0].legend(handles, labels, fontsize=0.5*self.fig['fontsize'])
ax[2].legend()
fig.tight_layout()
fig.savefig(os.path.join(self.dir, f'optimisation_schedule.pdf'))
plt.close()
Loading

0 comments on commit f829ec0

Please sign in to comment.