Skip to content

Commit

Permalink
Added plot and change_plotting_library methods directly to the Model …
Browse files Browse the repository at this point in the history
…class (wrapping lower-level model ones); changing to plotly works but throws many warnings.

Added Laplace Approximation of Leave-One-Out Cross-Validation error (LA_LOO) utility function for possible future work in balancing it with complexity scores.
  • Loading branch information
T-Flet committed Jul 14, 2021
1 parent ea369b9 commit 075d9c5
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 38 deletions.
10 changes: 9 additions & 1 deletion GPy_ABCD/Models/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import warnings
from copy import deepcopy
from GPy.models import GPRegression
from GPy.plotting import change_plotting_library

from GPy_ABCD.KernelExpressions.all import SumKE
from GPy_ABCD.KernelExpansion.kernelOperations import fit_ker_to_kex_with_params
Expand Down Expand Up @@ -36,6 +37,13 @@ def predict(self, X, quantiles = (2.5, 97.5), full_cov = False, Y_metadata = Non
qs = self.model.predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
return {'mean': mean, 'covariance': cov, 'low_quantile': qs[0], 'high_quantile': qs[1]}

def change_plotting_library(self, library = 'plotly_offline'):
'''Wrapper of GPy.plotting's homonymous function;
supported values are: 'matplotlib', 'plotly', 'plotly_online', 'plotly_offline' and 'none'.
If 'plotly' then a 3-tuple is returned, with as 1st value the Figure object requiring a .show() to display.'''
change_plotting_library(library)

def plot(self): return self.model.plot()


# Model fit objective criteria & related values:
Expand All @@ -44,7 +52,7 @@ def _ll(self): return self.model.log_likelihood()
def _n(self): return len(self.model.X) # number of data points
def _k(self): return self.model._size_transformed() # number of estimated parameters, i.e. model degrees of freedom

def _ordered_score_ps(self): return self._ll(), self._n(), self._k()
def _ordered_score_ps(self): return self.model, self._ll(), self._n(), self._k()

def compute_utility(self, score_f):
self.cached_utility_function = score_f(*self._ordered_score_ps())
Expand Down
3 changes: 2 additions & 1 deletion GPy_ABCD/Models/modelSearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ def score(m): return m.compute_utility(utility_function)
def model_search_rounds(X, Y, sorted_models, tested_models, tested_k_exprs, expanded, not_expanded,
model_list_fitter, p_rules, utility_function, rounds, beam, restarts, optimiser, verbose):
'''
See explore_model_space description and source code for argument explanation and context
This function can be used to continue a previous search: the 5 arguments after Y are the outputs of explore_model_space.
See explore_model_space description and definition for argument explanation and context.
Note: sorted_models is not actually used but replaced with the new value; present as an argument just for consistency
'''
Expand Down
10 changes: 6 additions & 4 deletions GPy_ABCD/Util/modelUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@

# Standard Utility functions

def BIC(ll, n, k): return -2 * ll + k * np.log(n)
def AIC(ll, n, k): return 2 * (-ll + k)
def AICc(ll, n, k): return 2 * (-ll + k + (k**2 + k) / (n - k - 1)) # Assumes univariate model linear in its parameters and with normally-distributed residuals conditional upon regressors
# TODO: investigate WAIC, WBIC, BPIC and approximations of LOO
def BIC(m, ll, n, k): return -2 * ll + k * np.log(n)
def AIC(m, ll, n, k): return 2 * (-ll + k)
def AICc(m, ll, n, k): return 2 * (-ll + k + (k**2 + k) / (n - k - 1)) # Assumes univariate model linear in its parameters and with normally-distributed residuals conditional upon regressors
def LA_LOO(m, ll, n, k): return np.mean(m.inference_method.LOO(m.kern, m.X, m.Y, m.likelihood, m.posterior)) # See LOO method description for reference


# Allowed GPy optimisers ('lbfgsb' is the default)
Expand All @@ -25,7 +27,7 @@ def model_printout(m):
print(m.model.kern)
print(f'Log-Lik: {m.model.log_likelihood()}')
print(f'{m.cached_utility_function_type}: {m.cached_utility_function}')
m.model.plot()
m.plot()
print(m.interpret())


Expand Down
8 changes: 7 additions & 1 deletion Tests/regressPeriodicKernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from GPy_ABCD.Util.modelUtil import *


# X, Y = generate_data(lambda x: np.cos( (x - 5) / 2 )**2, np.linspace(-10, 10, 101), 7, 1)
# X, Y = generate_data(lambda x: np.cos( (x - 5) / 2 ), np.linspace(-10, 10, 101), 7, 1)
X, Y = generate_data(lambda x: np.cos( (x - 5) / 2 )**2 + 10, np.linspace(-10, 10, 101), 7, 1)


Expand All @@ -26,6 +26,12 @@
# mod = fit_GPy_kern(X, Y, kernel, 10, optimizer ='lbfgsb')

mod = fit_kex(X, Y, correct_k, 10)


mod.change_plotting_library()
mod.plot()[0].show()


model_printout(mod)

plt.show()
Expand Down
25 changes: 14 additions & 11 deletions Tests/testConsistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,19 @@ def save_one_run(dataset_name, expected_best, best_mods, all_mods, all_exprs):



if __name__ == '__main__':
# np.seterr(all='raise') # Raise exceptions instead of RuntimeWarnings. The exceptions can then be caught by the debugger
# if __name__ == '__main__':
# # np.seterr(all='raise') # Raise exceptions instead of RuntimeWarnings. The exceptions can then be caught by the debugger
#
# n_iterations = 3
# n_runs_stats = []
# for i in range(n_iterations):
# best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y,
# start_kernels = start_kernels['Default'], p_rules = production_rules['Default'], utility_function = BIC,
# rounds = 2, beam = 3, restarts = 4,
# model_list_fitter = fit_mods_parallel_processes, optimiser = GPy_optimisers[0], verbose = True)
# n_runs_stats.append(one_run_statistics(best_mods, all_mods, all_exprs, 5))
# print(f'{i+1} runs done')
#
# get_and_save_stats(n_runs_stats)

n_iterations = 3
n_runs_stats = []
for i in range(n_iterations):
sorted_models, tested_models, tested_k_exprs, expanded, not_expanded = explore_model_space(X, Y, start_kernels=standard_start_kernels, p_rules=production_rules_all,
restarts=3, utility_function='BIC', rounds=2, buffer=2,
dynamic_buffer=True, verbose=True, parallel=True)
n_runs_stats.append(one_run_statistics(sorted_models, tested_models, tested_k_exprs, 5))
print(f'{i+1} runs done')

get_and_save_stats(n_runs_stats)
80 changes: 60 additions & 20 deletions Tests/testData.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,77 @@
import pickle
import pprint
import numpy as np
import scipy.io as sio
from matplotlib import pyplot as plt
from datetime import datetime

from GPy_ABCD.Models.modelSearch import *
from GPy_ABCD import config as global_flags
from testConsistency import save_one_run


# np.seterr(all='raise') # Raise exceptions instead of RuntimeWarnings. The exceptions can then be caught by the debugger
datasets = ['01-airline', '02-solar', '03-mauna', '04-wheat', '05-temperature', '06-internet', '07-call-centre', '08-radio', '09-gas-production', '10-sulphuric', '11-unemployment', '12-births', '13-wages']
# Only 1, 2, 10 and 11 have published analyses, and their identified formulae are (deciphered from component descriptions):
# 1: LIN + PER * LIN + SE + WN * LIN
# Default Rules: (PER + C + LIN * (PER + C)) * (PER + PER + WN) * (C + LIN)
# 2: C + CW_1643_1716(PER + SE + RQ + WN * LIN + WN * LIN, C + WN)
# Default Rules: C + (PER + C) * (PER + PER + PER + WN)
# 10: PER + SE + CP_64(PER + WN, CW_69_77(SE, SE) + CP_90(C + WN, WN))
# Default Rules: (PER + C) * (PER + LIN + PER * LIN)
# 11: SE + PER + SE + SE + WN
# Default Rules: (PER + PER + PER + C) * (C + LIN)


if __name__ == '__main__':
retrieve_instead = False

datasets_to_test = [1, 2]#, 10, 11]

def run_for_dataset_number(dataset_id):
dataset_name = datasets[dataset_id - 1]
data = sio.loadmat(f'./Data/{dataset_name}.mat')
# print(data.keys())

X = data['X']
Y = data['y']

args_to_save = {'start_kernels': start_kernels['Default'], 'p_rules': production_rules['Default'], 'utility_function': BIC,
'rounds': 5, 'beam': 2, 'restarts': 10, 'model_list_fitter': fit_mods_parallel_processes, 'optimiser': GPy_optimisers[0], 'verbose': True}
best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y, **args_to_save)

# np.seterr(all='raise') # Raise exceptions instead of RuntimeWarnings. The exceptions can then be caught by the debugger
datasets = ['01-airline', '02-solar', '03-mauna', '04-wheat', '05-temperature', '06-internet', '07-call-centre', '08-radio', '09-gas-production', '10-sulphuric', '11-unemployment', '12-births', '13-wages']
dataset_name = datasets[1-1]
# for mod_depth in all_mods: print(', '.join([str(mod.kernel_expression) for mod in mod_depth]) + f'\n{len(mod_depth)}')
#
# print()
#
# from matplotlib import pyplot as plt
# for bm in best_mods[:3]: model_printout(bm)
# plt.show()

data = sio.loadmat(f'./Data/{dataset_name}.mat')
# print(data.keys())
with open(f'./Pickles/{dataset_name}_{datetime.now().strftime("%d-%m-%Y_%H-%M-%S")}', 'wb') as f:
pickle.dump({'dataset_name': dataset_name, 'best_mods': best_mods[:10],
'str_of_args': pprint.pformat(args_to_save, width = 40, compact = True),
'global_flags': {
'__INCLUDE_SE_KERNEL': global_flags.__INCLUDE_SE_KERNEL,
'__USE_LIN_KERNEL_HORIZONTAL_OFFSET': global_flags.__USE_LIN_KERNEL_HORIZONTAL_OFFSET,
'__USE_NON_PURELY_PERIODIC_PER_KERNEL': global_flags.__USE_NON_PURELY_PERIODIC_PER_KERNEL,
'__FIX_SIGMOIDAL_KERNELS_SLOPE': global_flags.__FIX_SIGMOIDAL_KERNELS_SLOPE,
'__USE_INDEPENDENT_SIDES_CHANGEWINDOW_KERNEL': global_flags.__USE_INDEPENDENT_SIDES_CHANGEWINDOW_KERNEL
} }, f)

X = data['X']
Y = data['y']
# save_one_run(dataset_name, 'UNKNOWN', best_mods, all_mods, all_exprs)

sorted_models, tested_models, tested_k_exprs, expanded, not_expanded = explore_model_space(X, Y, start_kernels = standard_start_kernels, p_rules = production_rules_all,
restarts = 3, utility_function = 'BIC', rounds = 3, buffer = 2, dynamic_buffer = True, verbose = True, parallel = True)

for mod_depth in tested_models: print(', '.join([str(mod.kernel_expression) for mod in mod_depth]) + f'\n{len(mod_depth)}')
## ACTUAL EXECUTION ##

from matplotlib import pyplot as plt
for bm in sorted_models[:3]:
print(bm.kernel_expression)
print(bm.model.kern)
print(bm.model.log_likelihood())
print(bm.cached_utility_function)
bm.model.plot()
print(bm.interpret())
if not retrieve_instead:
for id in datasets_to_test: run_for_dataset_number(id)
else:
file_names = ['01-airline_19-12-2020_19-22-21', '02-solar_19-12-2020_22-27-53', '10-sulphuric_20-12-2020_04-01-08', '11-unemployment_20-12-2020_12-04-19']
with open(f'./Pickles/{file_names[0]}', 'rb') as f: IMPORTED = pickle.load(f)
# print(IMPORTED)

plt.show()
for bm in IMPORTED['best_mods'][:3]: model_printout(bm)
plt.show()

save_one_run(dataset_name, 'UNKNOWN', sorted_models, tested_models, tested_k_exprs)

0 comments on commit 075d9c5

Please sign in to comment.