Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor main loop of tranquilo #397

Merged
merged 13 commits into from
Nov 12, 2022
2 changes: 2 additions & 0 deletions src/estimagic/optimization/subsolvers/gqtpar.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def gqtpar(model, *, k_easy=0.1, k_hard=0.2, maxiter=200):
"""
hessian_info = HessianInfo()

x_candidate = np.zeros_like(model.linear_terms)

janosg marked this conversation as resolved.
Show resolved Hide resolved
# Small floating point number signaling that for vectors smaller
# than that backward substituition is not reliable.
# See Golub, G. H., Van Loan, C. F. (2013), "Matrix computations", p.165.
Expand Down
32 changes: 32 additions & 0 deletions src/estimagic/optimization/tranquilo/count_points.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from functools import partial

"""Functions to count the effective number of points in a given sample."""


def get_counter(counter, bounds):
"""Get a function that counts the effective number of points in a sample.

The resulting function takes the following arguments:
- xs (np.ndarray): A 2d numpy array containing a sample.
- trustregion (TrustRegion): The current trustregion.

Args:
counter (str)
bounds (Bounds)
"""
if isinstance(counter, str):
built_in_counters = {"count_all": count_all}
counter = built_in_counters[counter]
elif not callable(counter):
raise TypeError("counter must be a string or callable.")

out = partial(counter, bounds=bounds)
return out


def count_all(xs, trustregion, bounds): # noqa: U100
return len(xs)


def count_clusters(xs, trustregion, bounds): # noqa: U100
raise NotImplementedError()
11 changes: 9 additions & 2 deletions src/estimagic/optimization/tranquilo/filter_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def get_sample_filter(sample_filter="keep_all"):
built_in_filters = {
"discard_all": discard_all,
"keep_all": keep_all,
"keep_sphere": keep_sphere,
"drop_collinear": drop_collinear,
"drop_pounders": drop_collinear_pounders,
}
Expand All @@ -45,6 +46,12 @@ def keep_all(xs, indices, state):
return xs, indices


def keep_sphere(xs, indices, state):
dists = np.linalg.norm(xs - state.trustregion.center, axis=1)
keep = dists <= state.trustregion.radius
return xs[keep], indices[keep]
janosg marked this conversation as resolved.
Show resolved Hide resolved


def drop_collinear(xs, indices, state):
"""Make sure that the points that are kept are linearly independent."""
raise NotImplementedError()
Expand All @@ -68,8 +75,8 @@ def _drop_collinear_pounders(xs, indices, state):
indices_reverse = indices[::-1]
indexer_reverse = np.arange(n_samples)[::-1]

radius = state.radius
center = state.x
radius = state.trustregion.radius
center = state.trustregion.center
index_center = int(np.where(indices_reverse == state.index)[0])
centered_xs = (xs - center) / radius

Expand Down
16 changes: 12 additions & 4 deletions src/estimagic/optimization/tranquilo/fit_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,10 @@ def get_fitter(fitter, user_options=None, model_info=None):
def _fitter_template(
x,
y,
fitter,
model_info,
options,
weights=None,
fitter=None,
model_info=None,
options=None,
):
"""Fit a model to data.

Expand All @@ -122,9 +123,16 @@ def _fitter_template(
Returns:
VectorModel or ScalarModel: Results container.
"""
n_params = x.shape[1]
n_samples, n_params = x.shape
n_residuals = y.shape[1]

# weight the data in order to get weighted fitting from fitters that do not support
# weights. Inspired by: https://stackoverflow.com/a/52452833
if weights is not None:
_root_weights = np.sqrt(weights).reshape(n_samples, 1)
y = y * _root_weights
x = x * _root_weights

coef = fitter(x, y, model_info, **options)

# results processing
Expand Down
2 changes: 1 addition & 1 deletion src/estimagic/optimization/tranquilo/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def log_d_cutoff_simulator(
sampler = partial(_sampler, trustregion=trustregion)
raw = []
for _ in range(n_simulations):
x = sampler(target_size=n_samples, rng=rng)
x = sampler(n_points=n_samples, rng=rng)
raw.append(log_d_quality_calculator(x, trustregion, bounds))
out = np.nanmean(raw)
return out
Expand Down
41 changes: 41 additions & 0 deletions src/estimagic/optimization/tranquilo/handle_infinity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import numpy as np


def get_infinity_handler(infinity_handler):
if isinstance(infinity_handler, str):
built_in_handlers = {"relative": clip_relative}
infinity_handler = built_in_handlers[infinity_handler]
elif not callable(infinity_handler):
raise TypeError("infinity_handler must be a string or callable.")

return infinity_handler


def clip_relative(fvecs):
"""Clip infinities at a value that is relative to worst finite value.

Args:
fvecs (np.ndarray): 2d numpy array of shape n_samples, n_residuals.


Returns:
np.ndarray: Array of same shape as fvecs with finite values.

"""
_mask = np.isfinite(fvecs)

_mins = np.min(fvecs, axis=0, where=_mask, initial=1e300)
_maxs = np.max(fvecs, axis=0, where=_mask, initial=-1e300)

# abs is necessary because if all values are infinite, the diffs can switch sign
# due to the initial value in the masked min and max
_diff = np.abs(_maxs - _mins)

_pos_penalty = _maxs + 2 * _diff + 1
_neg_penalty = _mins - 2 * _diff - 1
janosg marked this conversation as resolved.
Show resolved Hide resolved

out = np.nan_to_num(
fvecs, nan=_pos_penalty, posinf=_pos_penalty, neginf=_neg_penalty
)

return out
Loading