Skip to content

Commit

Permalink
Version bump to 0.1.5 and greatly expanded the README to begin docume…
Browse files Browse the repository at this point in the history
…nting crucial components for future functionality expansion
  • Loading branch information
T-Flet committed Mar 3, 2020
1 parent b9166e2 commit ef91fd8
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 24 deletions.
2 changes: 1 addition & 1 deletion GPy_ABCD/Models/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from GPy_ABCD.Models.modelSearch import explore_model_space, standard_start_kernels, production_rules_all
from GPy_ABCD.Models.modelSearch import GPModel, explore_model_space, model_search_rounds, standard_start_kernels, production_rules_all, fit_one_model, fit_model_list_not_parallel, fit_model_list_parallel
41 changes: 34 additions & 7 deletions GPy_ABCD/Models/modelSearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,32 @@ def fit_model_list_parallel(X, Y, k_exprs, restarts = 5):


# start_kernels = make_simple_kexs(['WN']) #for the original ABCD
def explore_model_space(X, Y, start_kernels = standard_start_kernels, p_rules = production_rules_all, restarts = 5,
utility_function = 'BIC', rounds = 2, buffer = 4, dynamic_buffer = True, verbose = False, parallel = True):
def explore_model_space(X, Y, start_kernels = standard_start_kernels, p_rules = production_rules_all, utility_function = 'BIC',
restarts = 5, rounds = 2, buffer = 4, dynamic_buffer = True, verbose = False, parallel = True):
"""
Perform `rounds` rounds of kernel expansion followed by model fit starting from the given `start_kernels` with and
expanding the best `buffer` of them with `p_rules` production rules
:param start_kernels: the starting kernels
:type start_kernels: [KernelExpression]
:param p_rules: the production rules applied at each expansion
:type p_rules: [function]
:param utility_function: Name of utility function: AIC, AICc and BIC available so far (will allow function input in future releases)
:type utility_function: String
:param restarts: Number of GPy model-fitting restarts with different parameters
:type restarts: Int
:param rounds: Number of rounds of model exploration
:type rounds: Int
:param buffer: Number of best fit-models' kernels to expand each round
:type buffer: Int
:param dynamic_buffer: If True: buffer is increased by 2 at the beginning and decreased by 1 in the first two and last two rounds
:type dynamic_buffer: Boolean
:param verbose: Produce verbose logs
:type verbose: Boolean
:param parallel: Perform multiple model fits concurrently on all available processors (vs GPy's own parallel argument, which splits a single fit over multiple processors)
:type parallel: Boolean
:rtype: (sorted_models: [GPModel], tested_models: [[GPModel]], tested_k_exprs: [KernelExpression], expanded: [GPModel], not_expanded: [GPModel])
"""
if len(np.shape(X)) == 1: X = np.array(X)[:, None]
if len(np.shape(Y)) == 1: Y = np.array(Y)[:, None]
Expand All @@ -106,16 +128,20 @@ def explore_model_space(X, Y, start_kernels = standard_start_kernels, p_rules =

sorted_models, tested_models, tested_k_exprs, expanded, not_expanded = model_search_rounds(X, Y,
original_buffer, sorted_models, tested_models, tested_k_exprs, expanded, not_expanded, fit_model_list,
p_rules, restarts, utility_function, rounds, buffer, dynamic_buffer, verbose)
p_rules, utility_function, restarts, rounds, buffer, dynamic_buffer, verbose)

if verbose: print(f'\nBest models overall: {print_k_list(sorted_models[:original_buffer])}\n')
return sorted_models, tested_models, tested_k_exprs, expanded, not_expanded


# This function is split from the above both for tidyness and to allow the possibility of continuing a search if desired
# This function is split from the above both for tidiness and to allow the possibility of continuing a search if desired
def model_search_rounds(X, Y, original_buffer, sorted_models, tested_models, tested_k_exprs, expanded, not_expanded,
fit_model_list, p_rules, restarts, utility_function, rounds, buffer, dynamic_buffer, verbose):
# Note: sorted_models is not actually used but replaced with the new value; present as an argument just for consistency
fit_model_list, p_rules, utility_function, restarts, rounds, buffer, dynamic_buffer, verbose):
"""
See explore_model_space description and source code for argument explanation and context
Note: sorted_models is not actually used but replaced with the new value; present as an argument just for consistency
"""
for d in range(1, rounds + 1):
new_k_exprs = [kex for kex in unique(flatten([expand(mod.kernel_expression, p_rules) for mod in not_expanded[:buffer]])) if kex not in tested_k_exprs]
tested_models.append(sorted(fit_model_list(X, Y, new_k_exprs, restarts), key = methodcaller(utility_function))) # tested_models[d]
Expand All @@ -138,3 +164,4 @@ def model_search_rounds(X, Y, original_buffer, sorted_models, tested_models, tes
# - make an interactive mode which asks whether to go further, retaining how many etc
# - allow the model lists in each round to be fit in batches, with interactive request to continue (timed response maybe)
# - show an updating count of models having been fitted so far in this round; at least by batches
# - Make the utility_function argument optionally a function taking (ll, n, k) as arguments as in kernelUtil
100 changes: 87 additions & 13 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,42 @@ Automatic construction and natural-language description of nonparametric regress
In: National Conference on Artificial Intelligence, 7/27/2014, pp. 1242-1250.
Available online at https://academic.microsoft.com/paper/1950803081.)

Installation
------------
::

pip install GPy_ABCD

Usage
-----
The main function exported by this package is :code:`explore_model_space`;
note that if the :code:`parallel` argument is :code:`True` then the function should be
called from within a :code:`if __name__ == '__main__':`

::

import numpy as np
from GPy_ABCD import *

if __name__ == '__main__':
# Example data
X = np.linspace(-10, 10, 101)[:, None]
Y = np.cos( (X - 5) / 2 )**2 * X * 2 + np.random.randn(101, 1)

best_mods, all_mods, all_exprs = find_best_model(X, Y,
# Main function call with suggested arguments
best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y,
start_kernels = standard_start_kernels, p_rules = production_rules_all,
restarts = 5, utility_function = 'BIC', rounds = 2, buffer = 3,
utility_function = 'BIC', restarts = 3, rounds = 2, buffer = 3,
dynamic_buffer = True, verbose = False, parallel = True)

# Typical full output printout

# Typical output exploration printout

for mod_depth in all_mods: print(', '.join([str(mod.kernel_expression) for mod in mod_depth]) + f'\n{len(mod_depth)}')

print()

# Explore the best 3 models in detail
from matplotlib import pyplot as plt
for bm in best_mods[:3]:
print(bm.kernel_expression)
Expand All @@ -53,24 +69,82 @@ Usage
bm.model.plot()
print(bm.interpret())

# Perform some predictions
predict_X = np.linspace(10, 15, 50)[:, None]
preds = best_mods[0].predict(predict_X)
print(preds)

plt.show()

Note: if the :code:`parallel` argument is :code:`True` then the function should be
called from within a :code:`if __name__ == '__main__':`

Installation
------------
::
.. figure:: selected_output_example.png
:align: center
:figclass: align-center

pip install GPy_ABCD
Selection of output from the above example

Importable elements from this package (refer to the section below for context):

- The :code:`GPModel` class
- The main function :code:`explore_model_space`
- The :code:`model_search_rounds` function to continue a search from where another left-off
- Single and list model fitting functions :code:`fit_one_model`, :code:`fit_model_list_not_parallel` and :code:`fit_model_list_parallel`
- The default start kernels :code:`standard_start_kernels` and production rules :code:`production_rules_all`, along with the same production rules grouped by type in a dictionary :code:`production_rules_by_type`
- The concrete :code:`KernelExpression` subclasses :code:`SumKE`, :code:`ProductKE` and :code:`ChangeKE`
- The frozensets of :code:`base_kerns` and :code:`base_sigmoids`

(The purpose of exporting elements in the last 3 lines is for users to create alternative sets of production
rules and starting kernel lists by mixing kernel expressions and raw strings of base kernels)

Project Structure
-----------------

Read the paper mentioned above for a full picture of what an ABCD system is, but, briefly,
it consists in exploring a space of compositional kernels built from a few carefully selected base ones,
returning the best fitting models using them and generating simple text interpretations of the fits based
on the functional shapes of the final composed covariance kernels and parameter values.

The key pillars of this project's ABCD system implementation structure are the following:

- :code:`Kernels.baseKernels` contains the "mathematical" base kernels (i.e. GPy kernel objects) for the whole machinery

- This script also acts as a general configuration of what the system can use (including a few pre-packaged flags for certain behaviours)
- Some of the base kernels are simply wrapped GPy-provided kernels (White-Noise, Constant and Squared-Exponential)
- The others are either not present in GPy's default arsenal or are improved versions of ones which are (Linear which can identify polynomial roots and purely-Periodic standard-periodic kernel)
- It contains sigmoidal kernels (both base sigmoids and indicator-like ones, i.e. sigmoidal hat/well) which are not used directly in the symbolic expressions but are substituted in by change-type kernels
- It contains (multiple implementations of) change-point and change-window kernels which use the aforementioned sigmoidals
- :code:`KernelExpansion.kernelExpression` contains the "symbolic" kernel classes constituting the nodes with which to build complex kernel expressions in the form of trees

- The non-abstract kernel expression classes are :code:`SumKE`, :code:`ProductKE` and :code:`ChangeKE`
- :code:`SumKE` and :code:`ProductKE` are direct subclasses of the abstract class `SumOrProductKE` and only really differ in how they self-simplify and distribute over the other
- :code:`ChangeKE` could be split into separate change-point and change-window classes, but a single argument difference allows full method overlap
- :code:`SumOrProductKE` and :code:`ChangeKE` are direct subclasses of the abstract base class :code:`KernelExpression`
- The above kernel expression classes have a wide variety of methods providing the following general functionality in order to make the rest of the project light of ad-hoc functions:

- They self-simplify when modified through the appropriate methods (they are symbolic expressions after all)
- They can produce GPy kernel objects
- They can line-up with and absorb fit model parameters from a matching GPy object
- They can rearrange to a sum-of-products form
- They can generate text interpretations of their sum-of-products form
- :code:`KernelExpansion.grammar` contains the various production rules and default kernel lists used in model space exploration
- :code:`Models.modelSearch` contains the system front-end elements:

- The :code:`GPModel` class, which is where the GPy kernels/models interact with the symbolic kernel expressions
- Functions to fit lists of models (the parallel version uses :code:`multiprocessing`'s :code:`Pool`, but alternative parallel frameworks' versions can be implemented here)
- The :code:`explore_model_space` function, which is the point of it all
- The :code:`model_search_rounds` function, which is used by the above but also meant to continue searching by building on past exploration results

Further Notes
-------------

Requirements
^^^^^^^^^^^^
- The important tests are in pytest scripts, but many other scripts are present and intended as functionality showcases or "tests by inspection"
- Additionally, pytest.ini has a two opposite configuration lines intended to be toggled to perform "real" tests vs other "by inspection" tests
- Please feel free to fork and expand this project since it is not the focus of my research and merely a component I need for part of it, therefore I will not be expanding its functionality in the near future

Python 3.7
Possible expansion directions:

See requirements.txt
- Many "TODO" comments are present throughout the codebase
- Optimising ChangeWindow window-location fitting is an open issue (multiple implementations of change-window and the sigmoidal kernels they rely on have already been tried; see the commented-out declarations in baseKernels.py)
- The periodic kernel could be more stable in non-periodic-data fits (GPy's own as well)
- Making each project layer accept multidimensional data, starting from the GPy kernels (some already do)
- Expanding on the GPy side of things: add more methods to the kernels in order to make use of the full spectrum of GPy features (MCMC etc)
4 changes: 2 additions & 2 deletions Tests/checkModelSearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
# from timeit import timeit
# def statement():
# best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y, start_kernels=standard_start_kernels, p_rules=production_rules_all,
# restarts=2, utility_function='BIC', rounds=2, buffer=3, dynamic_buffer = True, verbose=True, parallel=True)
# utility_function='BIC', restarts=2, rounds=2, buffer=3, dynamic_buffer = True, verbose=True, parallel=True)
# print(timeit(statement, number = 3))


# best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y, start_kernels = ['WN'], p_rules = production_rules_all,
# best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y, start_kernels = test_start_kernels, p_rules = production_rules_all,
# best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y, start_kernels = extended_start_kernels, p_rules = production_rules_all,
best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y, start_kernels = standard_start_kernels, p_rules = production_rules_all,
restarts = 3, utility_function = 'BIC', rounds = 2, buffer = 2, dynamic_buffer = True, verbose = True, parallel = True)
utility_function = 'BIC', restarts = 3, rounds = 2, buffer = 2, dynamic_buffer = True, verbose = True, parallel = True)


for mod_depth in all_mods: print(', '.join([str(mod.kernel_expression) for mod in mod_depth]) + f'\n{len(mod_depth)}')
Expand Down
Binary file added selected_output_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def read(filename):

setup(
name="GPy-ABCD",
version="0.1.4",
version="0.1.5",
url="https://github.com/T-Flet/GPy-ABCD",
license='BSD 3-Clause',

Expand Down

0 comments on commit ef91fd8

Please sign in to comment.