Version bump to 1.1:

README updates with accepted paper and more concise example. Change of arguments in the main function (explore_model_space): buffer was renamed to beam, and it now also accepts lists of integers, dynamic_buffer was consequently removed, and some other arguments were reordered. Added checkModelSearchREADMEexample.py for easier future README example updates. Added the KE classes to the default __init__.py exports. Updated requirements.txt and made setup.py read it to ensure dependency installation.
T-Flet · Jul 14, 2021 · ea369b9 · ea369b9
1 parent c5e3a98
commit ea369b9
Show file tree

Hide file tree

Showing 8 changed files with 145 additions and 116 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 .idea/
 Tests/Plots
 Tests/Stats
+Tests/Pickles
 
 
 # Byte-compiled / optimized / DLL files

diff --git a/GPy_ABCD/Models/modelSearch.py b/GPy_ABCD/Models/modelSearch.py
@@ -2,7 +2,7 @@
 from copy import deepcopy
 from operator import attrgetter
 from multiprocessing import Pool, cpu_count
-from typing import Callable, List, Dict, Tuple
+from typing import Callable, List, Dict, Tuple, Union
 
 from GPy_ABCD.KernelExpressions.base import KernelExpression
 from GPy_ABCD.KernelExpansion.grammar import start_kernels, production_rules, make_simple_kexs, expand
@@ -11,11 +11,11 @@
 
 
 # TODO:
-#   - group input parameters into dictionaries
+#   - group input parameters into dictionaries?
 #   - make utility functions take in the kernel expression (or even full model) so that further criteria may be applied (e.g. presence of specific kernels etc)?
 #   - focus on documenting end-user and generic developer functions etc in sphinx
 #   - make the dynamic buffer configurable, or even allow inputting a list of numbers of models to keep per round
-#   - make an interactive mode which asks whether to go further, retaining how many etc
+#   - make an interactive/interruptable mode which asks whether to go further, retaining how many etc
 #   - allow the model lists in each round to be fit in batches, with interactive request to continue (timed response maybe)
 #   - show a live count of models fitted so far in each round (probably by batches)
 
@@ -39,33 +39,32 @@ def fit_mods_parallel_processes(X, Y, k_exprs, restarts = 5, optimiser = GPy_opt
 
 
 def explore_model_space(X, Y,
-                        start_kernels: Dict[str, List[KernelExpression]] = start_kernels['Default'], p_rules: Dict[str, List[Callable]] = production_rules['Default'], utility_function: Callable = BIC,
-                        rounds: int = 2, buffer: int = 4, dynamic_buffer: bool = True, verbose: bool = True,
-                        restarts: int = 5, model_list_fitter: Callable = fit_mods_parallel_processes, optimiser: str = GPy_optimisers[0]) -> Tuple[List[GPModel], List[List[GPModel]], List[KernelExpression], List[GPModel], List[GPModel]]:
+                        start_kernels: List[Union[str, KernelExpression]] = start_kernels['Default'], p_rules: List[Callable] = production_rules['Default'], utility_function: Callable = BIC,
+                        rounds: int = 2, beam: Union[int, List[int]] = [3, 2, 1], restarts: int = 5,
+                        model_list_fitter: Callable = fit_mods_parallel_processes, optimiser: str = GPy_optimisers[0],
+                        verbose: bool = True) -> Tuple[List[GPModel], List[List[GPModel]], List[KernelExpression], List[GPModel], List[GPModel]]:
     '''Perform `rounds` rounds of kernel expansion followed by model fit starting from the given `start_kernels` with and expanding the best `buffer` of them with `p_rules` production rules
 
     NOTE: if the default `model_list_fitter` argument `fit_mods_parallel_processes` is used the function should be called from within a :code:`if __name__ == '__main__':` for full OS-agnostic use.
 
-    :param start_kernels: the starting kernels
-    :type start_kernels: Dict[str, List[KernelExpression]]
+    :param start_kernels: the 0th-round starting kernels
+    :type start_kernels: List[Union[str, KernelExpression]]
     :param p_rules: the production rules applied at each expansion
-    :type p_rules: Dict[str, List[Callable]]
+    :type p_rules: List[Callable]
     :param utility_function: model-scoring utility function: inputs are log-likelihood (ll), number of data points (n) and number of estimated parameters (k); AIC, AICc and BIC functions exported; arbitrary ones accepted
     :type utility_function: Callable
     :param rounds: number of rounds of model exploration
     :type rounds: Int
-    :param buffer: number of best fit-models' kernels to expand each round
-    :type buffer: Int
-    :param dynamic_buffer: if True: buffer is increased by 2 at the beginning and decreased by 1 in the first two and last two rounds
-    :type dynamic_buffer: Boolean
-    :param verbose: produce verbose logs
-    :type verbose: Boolean
+    :param beam: number of best fit-models' kernels to expand each round, either an integer or a list of integers; in the latter case, if its length is less than rounds then the last value will be repeated until required
+    :type beam: Union[Int, List[Int]]
     :param restarts: number of GPy model-fitting restarts with different parameters
     :type restarts: Int
     :param model_list_fitter: function handling the fitting of a list of kernels to the same data; this is where parallelisation implementation can be changed
     :type model_list_fitter: Callable
     :param optimiser: identifying string for the model optimiser function; GPy 1.9.9 optimiser strings (GPy > paramz > optimization > optimization.py): 'lbfgsb', 'org-bfgs', 'fmin_tnc', 'scg', 'simplex', 'adadelta', 'rprop', 'adam'
     :type optimiser: str
+    :param verbose: produce verbose logs
+    :type verbose: Boolean
 
     :rtype: (sorted_models: [GPModel], tested_models: [[GPModel]], tested_k_exprs: [KernelExpression], expanded: [GPModel], not_expanded: [GPModel])
     '''
@@ -82,39 +81,42 @@ def score(m): return m.compute_utility(utility_function)
     expanded = []
     tested_k_exprs = deepcopy(start_kexs)
 
-    original_buffer = buffer
-    if dynamic_buffer: buffer += 2 # Higher for the 1st round
-    if verbose: print(f'(All models are listed by descending {utility_function.__name__})\n\nBest round-{0} models [{len(tested_models[0])} new; {buffer} moving forward]: {print_k_list(not_expanded[:buffer])}')
+    # Handle beam argument: ensure it is a list of integers of length rounds
+    if isinstance(beam, list) and all(isinstance(x, int) for x in beam):
+        beam = beam + ([beam[-1]] * more) if (more := rounds - len(beam)) > 0 else beam[:rounds]
+    elif isinstance(beam, int): beam = [beam] * rounds
+    else: raise TypeError(f'The given beam argument ({beam}) is neither an integer nor a list of integers')
+
+    if verbose: print(f'(All models are listed by descending {utility_function.__name__})\n\nBest round-{0} models [{len(tested_models[0])} new; {beam[0]} moving forward]: {print_k_list(not_expanded[:beam[0]])}')
 
     sorted_models, tested_models, tested_k_exprs, expanded, not_expanded = model_search_rounds(X, Y,
-                   original_buffer, sorted_models, tested_models, tested_k_exprs, expanded, not_expanded, model_list_fitter,
-                   p_rules, utility_function, restarts, rounds, buffer, dynamic_buffer, verbose, optimiser)
+                        sorted_models, tested_models, tested_k_exprs, expanded, not_expanded,
+                        model_list_fitter, p_rules, utility_function, rounds, beam, restarts, optimiser, verbose)
 
-    if verbose: print(f'\nBest models overall: {print_k_list(sorted_models[:original_buffer])}\n')
+    if verbose: print(f'\nBest models overall: {print_k_list(sorted_models[:beam[0]])}\n')
     return sorted_models, tested_models, tested_k_exprs, expanded, not_expanded
 
 
 # This function is split from the above both for tidiness and to allow the possibility of continuing a search if desired
-def model_search_rounds(X, Y, original_buffer, sorted_models, tested_models, tested_k_exprs, expanded, not_expanded,
-                        model_list_fitter, p_rules, utility_function, restarts, rounds, buffer, dynamic_buffer, verbose, optimiser):
+def model_search_rounds(X, Y, sorted_models, tested_models, tested_k_exprs, expanded, not_expanded,
+                        model_list_fitter, p_rules, utility_function, rounds, beam, restarts, optimiser, verbose):
     '''
     See explore_model_space description and source code for argument explanation and context
 
     Note: sorted_models is not actually used but replaced with the new value; present as an argument just for consistency
     '''
     def score(m): return m.compute_utility(utility_function)
 
-    for d in range(1, rounds + 1):
-        new_k_exprs = [kex for kex in unique(flatten([expand(mod.kernel_expression, p_rules) for mod in not_expanded[:buffer]])) if kex not in tested_k_exprs]
+    for r, b in zip(range(1, rounds + 1), beam):
+        new_k_exprs = [kex for kex in unique(flatten([expand(mod.kernel_expression, p_rules) for mod in not_expanded[:b]])) if kex not in tested_k_exprs]
         tested_models.append(sorted(model_list_fitter(X, Y, new_k_exprs, restarts, optimiser), key = score))  # tested_models[d]
 
         sorted_models = sorted(flatten(tested_models), key = attrgetter('cached_utility_function')) # Merge-sort would be applicable
-        expanded += not_expanded[:buffer]
+        expanded += not_expanded[:b]
         not_expanded = diff(sorted_models, expanded) # More efficient than sorting another whole list
         tested_k_exprs += new_k_exprs
 
-        buffer -= 1 if dynamic_buffer and (d <= 2 or d in range(rounds - 1, rounds + 1)) else 0
-        if verbose: print(f'Round-{d} models [{len(tested_models[d])} new; {buffer} moving forward]:\n\tBest new: {print_k_list(tested_models[d][:original_buffer])}\n\tBest so far: {print_k_list(sorted_models[:original_buffer])}\n\tBest not-already-expanded: {print_k_list(not_expanded[:buffer])}')
+        if verbose: print(f'Round-{r} models [{len(tested_models[r])} new; {b} moving forward]:\n\tBest new: {print_k_list(tested_models[r][:beam[0]])}\n\tBest so far: {print_k_list(sorted_models[:beam[0]])}\n\tBest not-already-expanded: {print_k_list(not_expanded[:b])}')
 
     return sorted_models, tested_models, tested_k_exprs, expanded, not_expanded
 

diff --git a/GPy_ABCD/__init__.py b/GPy_ABCD/__init__.py
@@ -1,6 +1,6 @@
 """GPy-ABCD - Basic implementation with GPy of an Automatic Bayesian Covariance Discovery (ABCD) system"""
 
-__version__ = '1.0.3' # Change it in setup.py too
+__version__ = '1.1' # Change it in setup.py too
 __author__ = 'Thomas Fletcher <[email protected]>'
 # __all__ = []
 
@@ -10,5 +10,6 @@
 from GPy_ABCD.Util.modelUtil import BIC, AIC, AICc, fit_kex, fit_GPy_kern, model_printout, GPy_optimisers
 from GPy_ABCD.KernelExpansion.grammar import start_kernels, production_rules_by_type, production_rules
 from GPy_ABCD.KernelExpansion.kernelOperations import base_kerns, base_sigmoids
+from GPy_ABCD.KernelExpressions.all import SumKE, ProductKE, ChangeKE