Added plot and change_plotting_library methods directly to the Model …

…class (wrapping lower-level model ones); changing to plotly works but throws many warnings. Added Laplace Approximation of Leave-One-Out Cross-Validation error (LA_LOO) utility function for possible future work in balancing it with complexity scores.
T-Flet · Jul 14, 2021 · 075d9c5 · 075d9c5
1 parent ea369b9
commit 075d9c5
Show file tree

Hide file tree

Showing 6 changed files with 98 additions and 38 deletions.
diff --git a/GPy_ABCD/Models/model.py b/GPy_ABCD/Models/model.py
@@ -1,6 +1,7 @@
 import warnings
 from copy import deepcopy
 from GPy.models import GPRegression
+from GPy.plotting import change_plotting_library
 
 from GPy_ABCD.KernelExpressions.all import SumKE
 from GPy_ABCD.KernelExpansion.kernelOperations import fit_ker_to_kex_with_params
@@ -36,6 +37,13 @@ def predict(self, X, quantiles = (2.5, 97.5), full_cov = False, Y_metadata = Non
         qs = self.model.predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
         return {'mean': mean, 'covariance': cov, 'low_quantile': qs[0], 'high_quantile': qs[1]}
 
+    def change_plotting_library(self, library = 'plotly_offline'):
+        '''Wrapper of GPy.plotting's homonymous function;
+        supported values are: 'matplotlib', 'plotly', 'plotly_online', 'plotly_offline' and 'none'.
+        If 'plotly' then a 3-tuple is returned, with as 1st value the Figure object requiring a .show() to display.'''
+        change_plotting_library(library)
+
+    def plot(self): return self.model.plot()
 
 
     # Model fit objective criteria & related values:
@@ -44,7 +52,7 @@ def _ll(self): return self.model.log_likelihood()
     def _n(self): return len(self.model.X) # number of data points
     def _k(self): return self.model._size_transformed() # number of estimated parameters, i.e. model degrees of freedom
 
-    def _ordered_score_ps(self): return self._ll(), self._n(), self._k()
+    def _ordered_score_ps(self): return self.model, self._ll(), self._n(), self._k()
 
     def compute_utility(self, score_f):
         self.cached_utility_function = score_f(*self._ordered_score_ps())

diff --git a/GPy_ABCD/Models/modelSearch.py b/GPy_ABCD/Models/modelSearch.py
@@ -101,7 +101,8 @@ def score(m): return m.compute_utility(utility_function)
 def model_search_rounds(X, Y, sorted_models, tested_models, tested_k_exprs, expanded, not_expanded,
                         model_list_fitter, p_rules, utility_function, rounds, beam, restarts, optimiser, verbose):
     '''
-    See explore_model_space description and source code for argument explanation and context
+    This function can be used to continue a previous search: the 5 arguments after Y are the outputs of explore_model_space.
+    See explore_model_space description and definition for argument explanation and context.
 
     Note: sorted_models is not actually used but replaced with the new value; present as an argument just for consistency
     '''

diff --git a/GPy_ABCD/Util/modelUtil.py b/GPy_ABCD/Util/modelUtil.py
@@ -8,9 +8,11 @@
 
 # Standard Utility functions
 
-def BIC(ll, n, k): return -2 * ll + k * np.log(n)
-def AIC(ll, n, k): return 2 * (-ll + k)
-def AICc(ll, n, k): return 2 * (-ll + k + (k**2 + k) / (n - k - 1)) # Assumes univariate model linear in its parameters and with normally-distributed residuals conditional upon regressors
+# TODO: investigate WAIC, WBIC, BPIC and approximations of LOO
+def BIC(m, ll, n, k): return -2 * ll + k * np.log(n)
+def AIC(m, ll, n, k): return 2 * (-ll + k)
+def AICc(m, ll, n, k): return 2 * (-ll + k + (k**2 + k) / (n - k - 1)) # Assumes univariate model linear in its parameters and with normally-distributed residuals conditional upon regressors
+def LA_LOO(m, ll, n, k): return np.mean(m.inference_method.LOO(m.kern, m.X, m.Y, m.likelihood, m.posterior)) # See LOO method description for reference
 
 
 # Allowed GPy optimisers ('lbfgsb' is the default)
@@ -25,7 +27,7 @@ def model_printout(m):
     print(m.model.kern)
     print(f'Log-Lik: {m.model.log_likelihood()}')
     print(f'{m.cached_utility_function_type}: {m.cached_utility_function}')
-    m.model.plot()
+    m.plot()
     print(m.interpret())
 
 

diff --git a/Tests/regressPeriodicKernel.py b/Tests/regressPeriodicKernel.py
@@ -6,7 +6,7 @@
 from GPy_ABCD.Util.modelUtil import *
 
 
-# X, Y = generate_data(lambda x: np.cos( (x - 5) / 2 )**2, np.linspace(-10, 10, 101), 7, 1)
+# X, Y = generate_data(lambda x: np.cos( (x - 5) / 2 ), np.linspace(-10, 10, 101), 7, 1)
 X, Y = generate_data(lambda x: np.cos( (x - 5) / 2 )**2 + 10, np.linspace(-10, 10, 101), 7, 1)
 
 
@@ -26,6 +26,12 @@
 # mod = fit_GPy_kern(X, Y, kernel, 10, optimizer ='lbfgsb')
 
 mod = fit_kex(X, Y, correct_k, 10)
+
+
+mod.change_plotting_library()
+mod.plot()[0].show()
+
+
 model_printout(mod)
 
 plt.show()

diff --git a/Tests/testConsistency.py b/Tests/testConsistency.py
@@ -55,16 +55,19 @@ def save_one_run(dataset_name, expected_best, best_mods, all_mods, all_exprs):
 
 
 
-if __name__ == '__main__':
-    # np.seterr(all='raise') # Raise exceptions instead of RuntimeWarnings. The exceptions can then be caught by the debugger
+# if __name__ == '__main__':
+#     # np.seterr(all='raise') # Raise exceptions instead of RuntimeWarnings. The exceptions can then be caught by the debugger
+#
+#     n_iterations = 3
+#     n_runs_stats = []
+#     for i in range(n_iterations):
+#         best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y,
+#              start_kernels = start_kernels['Default'], p_rules = production_rules['Default'], utility_function = BIC,
+#              rounds = 2, beam = 3, restarts = 4,
+#              model_list_fitter = fit_mods_parallel_processes, optimiser = GPy_optimisers[0], verbose = True)
+#         n_runs_stats.append(one_run_statistics(best_mods, all_mods, all_exprs, 5))
+#         print(f'{i+1} runs done')
+#
+#     get_and_save_stats(n_runs_stats)
 
-    n_iterations = 3
-    n_runs_stats = []
-    for i in range(n_iterations):
-        sorted_models, tested_models, tested_k_exprs, expanded, not_expanded = explore_model_space(X, Y, start_kernels=standard_start_kernels, p_rules=production_rules_all,
-                                                         restarts=3, utility_function='BIC', rounds=2, buffer=2,
-                                                         dynamic_buffer=True, verbose=True, parallel=True)
-        n_runs_stats.append(one_run_statistics(sorted_models, tested_models, tested_k_exprs, 5))
-        print(f'{i+1} runs done')
 
-    get_and_save_stats(n_runs_stats)
diff --git a/Tests/testData.py b/Tests/testData.py
@@ -1,37 +1,77 @@
+import pickle
+import pprint
 import numpy as np
 import scipy.io as sio
+from matplotlib import pyplot as plt
+from datetime import datetime
 
 from GPy_ABCD.Models.modelSearch import *
+from GPy_ABCD import config as global_flags
 from testConsistency import save_one_run
 
 
+# np.seterr(all='raise') # Raise exceptions instead of RuntimeWarnings. The exceptions can then be caught by the debugger
+datasets = ['01-airline', '02-solar', '03-mauna', '04-wheat', '05-temperature', '06-internet', '07-call-centre', '08-radio', '09-gas-production', '10-sulphuric', '11-unemployment', '12-births', '13-wages']
+    # Only 1, 2, 10 and 11 have published analyses, and their identified formulae are (deciphered from component descriptions):
+    #   1: LIN + PER * LIN + SE + WN * LIN
+    #       Default Rules: (PER + C + LIN * (PER + C)) * (PER + PER + WN) * (C + LIN)
+    #   2: C + CW_1643_1716(PER + SE + RQ + WN * LIN + WN * LIN, C + WN)
+    #       Default Rules: C + (PER + C) * (PER + PER + PER + WN)
+    #  10: PER + SE + CP_64(PER + WN, CW_69_77(SE, SE) + CP_90(C + WN, WN))
+    #       Default Rules: (PER + C) * (PER + LIN + PER * LIN)
+    #  11: SE + PER + SE + SE + WN
+    #       Default Rules: (PER + PER + PER + C) * (C + LIN)
+
+
 if __name__ == '__main__':
+    retrieve_instead = False
+
+    datasets_to_test = [1, 2]#, 10, 11]
+
+    def run_for_dataset_number(dataset_id):
+        dataset_name = datasets[dataset_id - 1]
+        data = sio.loadmat(f'./Data/{dataset_name}.mat')
+        # print(data.keys())
+
+        X = data['X']
+        Y = data['y']
+
+        args_to_save = {'start_kernels': start_kernels['Default'], 'p_rules': production_rules['Default'], 'utility_function': BIC,
+             'rounds': 5, 'beam': 2, 'restarts': 10, 'model_list_fitter': fit_mods_parallel_processes, 'optimiser': GPy_optimisers[0], 'verbose': True}
+        best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y, **args_to_save)
 
-    # np.seterr(all='raise') # Raise exceptions instead of RuntimeWarnings. The exceptions can then be caught by the debugger
-    datasets = ['01-airline', '02-solar', '03-mauna', '04-wheat', '05-temperature', '06-internet', '07-call-centre', '08-radio', '09-gas-production', '10-sulphuric', '11-unemployment', '12-births', '13-wages']
-    dataset_name = datasets[1-1]
+        # for mod_depth in all_mods: print(', '.join([str(mod.kernel_expression) for mod in mod_depth]) + f'\n{len(mod_depth)}')
+        #
+        # print()
+        #
+        # from matplotlib import pyplot as plt
+        # for bm in best_mods[:3]: model_printout(bm)
+        # plt.show()
 
-    data = sio.loadmat(f'./Data/{dataset_name}.mat')
-    # print(data.keys())
+        with open(f'./Pickles/{dataset_name}_{datetime.now().strftime("%d-%m-%Y_%H-%M-%S")}', 'wb') as f:
+            pickle.dump({'dataset_name': dataset_name, 'best_mods': best_mods[:10],
+                            'str_of_args': pprint.pformat(args_to_save, width = 40, compact = True),
+                            'global_flags': {
+                                '__INCLUDE_SE_KERNEL': global_flags.__INCLUDE_SE_KERNEL,
+                                '__USE_LIN_KERNEL_HORIZONTAL_OFFSET': global_flags.__USE_LIN_KERNEL_HORIZONTAL_OFFSET,
+                                '__USE_NON_PURELY_PERIODIC_PER_KERNEL': global_flags.__USE_NON_PURELY_PERIODIC_PER_KERNEL,
+                                '__FIX_SIGMOIDAL_KERNELS_SLOPE': global_flags.__FIX_SIGMOIDAL_KERNELS_SLOPE,
+                                '__USE_INDEPENDENT_SIDES_CHANGEWINDOW_KERNEL': global_flags.__USE_INDEPENDENT_SIDES_CHANGEWINDOW_KERNEL
+            } }, f)
 
-    X = data['X']
-    Y = data['y']
+        # save_one_run(dataset_name, 'UNKNOWN', best_mods, all_mods, all_exprs)
 
-    sorted_models, tested_models, tested_k_exprs, expanded, not_expanded = explore_model_space(X, Y, start_kernels = standard_start_kernels, p_rules = production_rules_all,
-                                                     restarts = 3, utility_function = 'BIC', rounds = 3, buffer = 2, dynamic_buffer = True, verbose = True, parallel = True)
 
-    for mod_depth in tested_models: print(', '.join([str(mod.kernel_expression) for mod in mod_depth]) + f'\n{len(mod_depth)}')
+    ## ACTUAL EXECUTION ##
 
-    from matplotlib import pyplot as plt
-    for bm in sorted_models[:3]:
-        print(bm.kernel_expression)
-        print(bm.model.kern)
-        print(bm.model.log_likelihood())
-        print(bm.cached_utility_function)
-        bm.model.plot()
-        print(bm.interpret())
+    if not retrieve_instead:
+        for id in datasets_to_test: run_for_dataset_number(id)
+    else:
+        file_names = ['01-airline_19-12-2020_19-22-21', '02-solar_19-12-2020_22-27-53', '10-sulphuric_20-12-2020_04-01-08', '11-unemployment_20-12-2020_12-04-19']
+        with open(f'./Pickles/{file_names[0]}', 'rb') as f: IMPORTED = pickle.load(f)
+        # print(IMPORTED)
 
-    plt.show()
+        for bm in IMPORTED['best_mods'][:3]: model_printout(bm)
+        plt.show()
 
-    save_one_run(dataset_name, 'UNKNOWN', sorted_models, tested_models, tested_k_exprs)