Skip to content

Commit

Permalink
Version bump to 1.1:
Browse files Browse the repository at this point in the history
README updates with accepted paper and more concise example.
Change of arguments in the main function (explore_model_space): buffer was renamed to beam, and it now also accepts lists of integers, dynamic_buffer was consequently removed, and some other arguments were reordered.
Added checkModelSearchREADMEexample.py for easier future README example updates.
Added the KE classes to the default __init__.py exports.
Updated requirements.txt and made setup.py read it to ensure dependency installation.
  • Loading branch information
T-Flet committed Jul 14, 2021
1 parent c5e3a98 commit ea369b9
Show file tree
Hide file tree
Showing 8 changed files with 145 additions and 116 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.idea/
Tests/Plots
Tests/Stats
Tests/Pickles


# Byte-compiled / optimized / DLL files
Expand Down
58 changes: 30 additions & 28 deletions GPy_ABCD/Models/modelSearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from copy import deepcopy
from operator import attrgetter
from multiprocessing import Pool, cpu_count
from typing import Callable, List, Dict, Tuple
from typing import Callable, List, Dict, Tuple, Union

from GPy_ABCD.KernelExpressions.base import KernelExpression
from GPy_ABCD.KernelExpansion.grammar import start_kernels, production_rules, make_simple_kexs, expand
Expand All @@ -11,11 +11,11 @@


# TODO:
# - group input parameters into dictionaries
# - group input parameters into dictionaries?
# - make utility functions take in the kernel expression (or even full model) so that further criteria may be applied (e.g. presence of specific kernels etc)?
# - focus on documenting end-user and generic developer functions etc in sphinx
# - make the dynamic buffer configurable, or even allow inputting a list of numbers of models to keep per round
# - make an interactive mode which asks whether to go further, retaining how many etc
# - make an interactive/interruptable mode which asks whether to go further, retaining how many etc
# - allow the model lists in each round to be fit in batches, with interactive request to continue (timed response maybe)
# - show a live count of models fitted so far in each round (probably by batches)

Expand All @@ -39,33 +39,32 @@ def fit_mods_parallel_processes(X, Y, k_exprs, restarts = 5, optimiser = GPy_opt


def explore_model_space(X, Y,
start_kernels: Dict[str, List[KernelExpression]] = start_kernels['Default'], p_rules: Dict[str, List[Callable]] = production_rules['Default'], utility_function: Callable = BIC,
rounds: int = 2, buffer: int = 4, dynamic_buffer: bool = True, verbose: bool = True,
restarts: int = 5, model_list_fitter: Callable = fit_mods_parallel_processes, optimiser: str = GPy_optimisers[0]) -> Tuple[List[GPModel], List[List[GPModel]], List[KernelExpression], List[GPModel], List[GPModel]]:
start_kernels: List[Union[str, KernelExpression]] = start_kernels['Default'], p_rules: List[Callable] = production_rules['Default'], utility_function: Callable = BIC,
rounds: int = 2, beam: Union[int, List[int]] = [3, 2, 1], restarts: int = 5,
model_list_fitter: Callable = fit_mods_parallel_processes, optimiser: str = GPy_optimisers[0],
verbose: bool = True) -> Tuple[List[GPModel], List[List[GPModel]], List[KernelExpression], List[GPModel], List[GPModel]]:
'''Perform `rounds` rounds of kernel expansion followed by model fit starting from the given `start_kernels` with and expanding the best `buffer` of them with `p_rules` production rules
NOTE: if the default `model_list_fitter` argument `fit_mods_parallel_processes` is used the function should be called from within a :code:`if __name__ == '__main__':` for full OS-agnostic use.
:param start_kernels: the starting kernels
:type start_kernels: Dict[str, List[KernelExpression]]
:param start_kernels: the 0th-round starting kernels
:type start_kernels: List[Union[str, KernelExpression]]
:param p_rules: the production rules applied at each expansion
:type p_rules: Dict[str, List[Callable]]
:type p_rules: List[Callable]
:param utility_function: model-scoring utility function: inputs are log-likelihood (ll), number of data points (n) and number of estimated parameters (k); AIC, AICc and BIC functions exported; arbitrary ones accepted
:type utility_function: Callable
:param rounds: number of rounds of model exploration
:type rounds: Int
:param buffer: number of best fit-models' kernels to expand each round
:type buffer: Int
:param dynamic_buffer: if True: buffer is increased by 2 at the beginning and decreased by 1 in the first two and last two rounds
:type dynamic_buffer: Boolean
:param verbose: produce verbose logs
:type verbose: Boolean
:param beam: number of best fit-models' kernels to expand each round, either an integer or a list of integers; in the latter case, if its length is less than rounds then the last value will be repeated until required
:type beam: Union[Int, List[Int]]
:param restarts: number of GPy model-fitting restarts with different parameters
:type restarts: Int
:param model_list_fitter: function handling the fitting of a list of kernels to the same data; this is where parallelisation implementation can be changed
:type model_list_fitter: Callable
:param optimiser: identifying string for the model optimiser function; GPy 1.9.9 optimiser strings (GPy > paramz > optimization > optimization.py): 'lbfgsb', 'org-bfgs', 'fmin_tnc', 'scg', 'simplex', 'adadelta', 'rprop', 'adam'
:type optimiser: str
:param verbose: produce verbose logs
:type verbose: Boolean
:rtype: (sorted_models: [GPModel], tested_models: [[GPModel]], tested_k_exprs: [KernelExpression], expanded: [GPModel], not_expanded: [GPModel])
'''
Expand All @@ -82,39 +81,42 @@ def score(m): return m.compute_utility(utility_function)
expanded = []
tested_k_exprs = deepcopy(start_kexs)

original_buffer = buffer
if dynamic_buffer: buffer += 2 # Higher for the 1st round
if verbose: print(f'(All models are listed by descending {utility_function.__name__})\n\nBest round-{0} models [{len(tested_models[0])} new; {buffer} moving forward]: {print_k_list(not_expanded[:buffer])}')
# Handle beam argument: ensure it is a list of integers of length rounds
if isinstance(beam, list) and all(isinstance(x, int) for x in beam):
beam = beam + ([beam[-1]] * more) if (more := rounds - len(beam)) > 0 else beam[:rounds]
elif isinstance(beam, int): beam = [beam] * rounds
else: raise TypeError(f'The given beam argument ({beam}) is neither an integer nor a list of integers')

if verbose: print(f'(All models are listed by descending {utility_function.__name__})\n\nBest round-{0} models [{len(tested_models[0])} new; {beam[0]} moving forward]: {print_k_list(not_expanded[:beam[0]])}')

sorted_models, tested_models, tested_k_exprs, expanded, not_expanded = model_search_rounds(X, Y,
original_buffer, sorted_models, tested_models, tested_k_exprs, expanded, not_expanded, model_list_fitter,
p_rules, utility_function, restarts, rounds, buffer, dynamic_buffer, verbose, optimiser)
sorted_models, tested_models, tested_k_exprs, expanded, not_expanded,
model_list_fitter, p_rules, utility_function, rounds, beam, restarts, optimiser, verbose)

if verbose: print(f'\nBest models overall: {print_k_list(sorted_models[:original_buffer])}\n')
if verbose: print(f'\nBest models overall: {print_k_list(sorted_models[:beam[0]])}\n')
return sorted_models, tested_models, tested_k_exprs, expanded, not_expanded


# This function is split from the above both for tidiness and to allow the possibility of continuing a search if desired
def model_search_rounds(X, Y, original_buffer, sorted_models, tested_models, tested_k_exprs, expanded, not_expanded,
model_list_fitter, p_rules, utility_function, restarts, rounds, buffer, dynamic_buffer, verbose, optimiser):
def model_search_rounds(X, Y, sorted_models, tested_models, tested_k_exprs, expanded, not_expanded,
model_list_fitter, p_rules, utility_function, rounds, beam, restarts, optimiser, verbose):
'''
See explore_model_space description and source code for argument explanation and context
Note: sorted_models is not actually used but replaced with the new value; present as an argument just for consistency
'''
def score(m): return m.compute_utility(utility_function)

for d in range(1, rounds + 1):
new_k_exprs = [kex for kex in unique(flatten([expand(mod.kernel_expression, p_rules) for mod in not_expanded[:buffer]])) if kex not in tested_k_exprs]
for r, b in zip(range(1, rounds + 1), beam):
new_k_exprs = [kex for kex in unique(flatten([expand(mod.kernel_expression, p_rules) for mod in not_expanded[:b]])) if kex not in tested_k_exprs]
tested_models.append(sorted(model_list_fitter(X, Y, new_k_exprs, restarts, optimiser), key = score)) # tested_models[d]

sorted_models = sorted(flatten(tested_models), key = attrgetter('cached_utility_function')) # Merge-sort would be applicable
expanded += not_expanded[:buffer]
expanded += not_expanded[:b]
not_expanded = diff(sorted_models, expanded) # More efficient than sorting another whole list
tested_k_exprs += new_k_exprs

buffer -= 1 if dynamic_buffer and (d <= 2 or d in range(rounds - 1, rounds + 1)) else 0
if verbose: print(f'Round-{d} models [{len(tested_models[d])} new; {buffer} moving forward]:\n\tBest new: {print_k_list(tested_models[d][:original_buffer])}\n\tBest so far: {print_k_list(sorted_models[:original_buffer])}\n\tBest not-already-expanded: {print_k_list(not_expanded[:buffer])}')
if verbose: print(f'Round-{r} models [{len(tested_models[r])} new; {b} moving forward]:\n\tBest new: {print_k_list(tested_models[r][:beam[0]])}\n\tBest so far: {print_k_list(sorted_models[:beam[0]])}\n\tBest not-already-expanded: {print_k_list(not_expanded[:b])}')

return sorted_models, tested_models, tested_k_exprs, expanded, not_expanded

Expand Down
3 changes: 2 additions & 1 deletion GPy_ABCD/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""GPy-ABCD - Basic implementation with GPy of an Automatic Bayesian Covariance Discovery (ABCD) system"""

__version__ = '1.0.3' # Change it in setup.py too
__version__ = '1.1' # Change it in setup.py too
__author__ = 'Thomas Fletcher <[email protected]>'
# __all__ = []

Expand All @@ -10,5 +10,6 @@
from GPy_ABCD.Util.modelUtil import BIC, AIC, AICc, fit_kex, fit_GPy_kern, model_printout, GPy_optimisers
from GPy_ABCD.KernelExpansion.grammar import start_kernels, production_rules_by_type, production_rules
from GPy_ABCD.KernelExpansion.kernelOperations import base_kerns, base_sigmoids
from GPy_ABCD.KernelExpressions.all import SumKE, ProductKE, ChangeKE


Loading

0 comments on commit ea369b9

Please sign in to comment.