Skip to content

Commit

Permalink
Report number evaluations ga tuner completes
Browse files Browse the repository at this point in the history
  • Loading branch information
epeters3 committed Apr 22, 2020
1 parent 730c1fd commit da61b69
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
19 changes: 16 additions & 3 deletions skplumber/tuners/ga.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,25 +91,37 @@ def ga_tune(
metric: Metric,
exit_on_pipeline_error: bool = True,
**flexgakwargs,
) -> t.Tuple[float, t.Dict[int, t.Dict[str, t.Any]]]:
) -> t.Tuple[float, t.Dict[int, t.Dict[str, t.Any]], int]:
"""
Performs a genetic algorithm hyperparameter tuning on a copy of
`pipeline`, returning the best score it could find and the
hyperparameter configuration for that best score, so the user
can decide to use it if they want.
Returns
-------
optimal_score : float
The best evaluator score the optimizer could find
optimal_params : dict
The hyperparameter configuration that yielded the optimal score
n_evals : int
The number of pipeline evaluations the optimizer completed along
the way.
"""
pipeline_to_tune = copy.deepcopy(pipeline)
n_evals = 0 # keep track of how many iterations were completed

def objective(*args, **flexga_params) -> float:
"""
The objective function the genetic algorithm will
try to maximize.
"""
params = get_params_from_flexga(flexga_params)
pipeline_to_tune.set_params(params)
nonlocal n_evals

try:
score = evaluator(pipeline_to_tune, X, y, metric)
pipeline_to_tune.set_params(params)
except PipelineRunError as e:
logger.exception(e)
if exit_on_pipeline_error:
Expand All @@ -118,6 +130,7 @@ def objective(*args, **flexga_params) -> float:
# TODO: make this `None` or `np.nan` instead.
score = metric.worst_value

n_evals += 1
# The genetic algorithm tries to maximize
return -score if metric.opt_dir == OptimizationDirection.MINIMIZE else score

Expand All @@ -130,4 +143,4 @@ def objective(*args, **flexga_params) -> float:
logger.info("tuning complete.")
logger.info(f"found best pipeline configuration: {pipeline_to_tune}")
logger.info(f"found best validation score of {optimal_score}")
return optimal_score, optimal_params
return optimal_score, optimal_params, n_evals
11 changes: 11 additions & 0 deletions tests/test_ga_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,14 @@ def test_can_tune_multiple_primitives(self):
evaluate = make_train_test_evaluator()
logger.info(f"baseline score: {evaluate(pipeline, self.X, self.y, f1macro)}")
ga_tune(pipeline, self.X, self.y, evaluate, f1macro, iters=2)

def test_returns_correct_number_evalus(self):
pipeline = Pipeline()
pipeline.add_step(classifiers["DecisionTreeClassifierPrimitive"])
evaluate = make_train_test_evaluator()
n_expected_evals = 12

_, _, n_evals = ga_tune(
pipeline, self.X, self.y, evaluate, f1macro, iters=2, population_size=6
)
self.assertEqual(n_evals, n_expected_evals)

0 comments on commit da61b69

Please sign in to comment.