Skip to content

Commit

Permalink
Merge pull request #104 from automl/development
Browse files Browse the repository at this point in the history
Release 1.0.6
  • Loading branch information
shukon authored Oct 1, 2019
2 parents b235a31 + 6313eb4 commit cff1b90
Show file tree
Hide file tree
Showing 9 changed files with 273 additions and 155 deletions.
2 changes: 1 addition & 1 deletion pimp/__version__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Version information."""
__version__ = "1.0.5"
__version__ = "1.0.6"
62 changes: 52 additions & 10 deletions pimp/epm/base_epm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
from pyrfr import regression

from smac.epm.base_epm import AbstractEPM
from smac.configspace import (
CategoricalHyperparameter,
UniformFloatHyperparameter,
UniformIntegerHyperparameter,
Constant,
)


class RandomForestWithInstances(AbstractEPM):
Expand All @@ -28,8 +34,11 @@ class RandomForestWithInstances(AbstractEPM):
logger : logging.logger
"""

def __init__(self, types: np.ndarray,
def __init__(self,
configspace,
types: np.ndarray,
bounds: np.ndarray,
seed: int,
num_trees: int = 10,
do_bootstrapping: bool = True,
n_points_per_tree: int = -1,
Expand All @@ -39,13 +48,14 @@ def __init__(self, types: np.ndarray,
max_depth: int = 20,
eps_purity: int = 1e-8,
max_num_nodes: int = 2 ** 20,
seed: int = 42,
logged_y: bool = True,
**kwargs):
"""Constructor
Parameters
----------
configspace: ConfigurationSpace
configspace to be passed to random forest (used to impute inactive parameter-values)
types : np.ndarray (D)
Specifies the number of categorical values of an input dimension where
the i-th entry corresponds to the i-th input dimension. Let's say we
Expand All @@ -54,6 +64,8 @@ def __init__(self, types: np.ndarray,
have to pass np.array([2, 0]). Note that we count starting from 0.
bounds : np.ndarray (D, 2)
Specifies the bounds for continuous features.
seed : int
The seed that is passed to the random_forest_run library.
num_trees : int
The number of trees in the random forest.
do_bootstrapping : bool
Expand All @@ -74,17 +86,20 @@ def __init__(self, types: np.ndarray,
different
max_num_nodes : int
The maxmimum total number of nodes in a tree
seed : int
The seed that is passed to the random_forest_run library.
logged_y: bool
Indicates if the y data is transformed (i.e. put on logscale) or not
"""
try:
super().__init__(types, bounds, **kwargs)
super().__init__(configspace=configspace, types=types, bounds=bounds, seed=seed, **kwargs)
except TypeError:
# To ensure backwards-compatibility with smac<0.9.0
super().__init__(**kwargs)

try:
# To ensure backwards-compatibility with smac==0.10.0
super().__init__(types, bounds, **kwargs)
except TypeError:
# To ensure backwards-compatibility with smac<0.9.0
super().__init__(**kwargs)

self.configspace = configspace
self.types = types
self.bounds = bounds
self.rng = regression.default_random_engine(seed)
Expand Down Expand Up @@ -112,9 +127,33 @@ def __init__(self, types: np.ndarray,
min_samples_leaf, max_depth, eps_purity, seed]
self.seed = seed

self.impute_values = {}

self.logger = logging.getLogger(self.__module__ + "." +
self.__class__.__name__)

def _impute_inactive(self, X: np.ndarray) -> np.ndarray:
X = X.copy()
for idx, hp in enumerate(self.configspace.get_hyperparameters()):
if idx not in self.impute_values:
parents = self.configspace.get_parents_of(hp.name)
if len(parents) == 0:
self.impute_values[idx] = None
else:
if isinstance(hp, CategoricalHyperparameter):
self.impute_values[idx] = len(hp.choices)
elif isinstance(hp, (UniformFloatHyperparameter, UniformIntegerHyperparameter)):
self.impute_values[idx] = -1
elif isinstance(hp, Constant):
self.impute_values[idx] = 1
else:
raise ValueError

nonfinite_mask = ~np.isfinite(X[:, idx])
X[nonfinite_mask, idx] = self.impute_values[idx]

return X

def _train(self, X: np.ndarray, y: np.ndarray, **kwargs):
"""Trains the random forest on X and y.
Expand All @@ -130,9 +169,8 @@ def _train(self, X: np.ndarray, y: np.ndarray, **kwargs):
self
"""

self.X = X
self.X = self._impute_inactive(X)
self.y = y.flatten()

if self.n_points_per_tree <= 0:
self.rf_opts.num_data_points_per_tree = self.X.shape[0]
else:
Expand All @@ -141,6 +179,7 @@ def _train(self, X: np.ndarray, y: np.ndarray, **kwargs):
self.rf.options = self.rf_opts
data = self.__init_data_container(self.X, self.y)
self.rf.fit(data, rng=self.rng)

return self

def __init_data_container(self, X: np.ndarray, y: np.ndarray):
Expand Down Expand Up @@ -200,6 +239,9 @@ def _predict(self, X: np.ndarray):
(self.types.shape[0], X.shape[1]))

means, vars_ = [], []

X = self._impute_inactive(X)

for row_X in X:
mean, var = self.rf.predict_mean_var(row_X)
means.append(mean)
Expand Down
11 changes: 6 additions & 5 deletions pimp/epm/epar_x_rfwi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

class EPARrfi(rfi):

def __init__(self, types, bounds,
def __init__(self, configspace, types, bounds, seed,
cutoff=0,
threshold=0, **kwargs):
"""
Expand All @@ -21,6 +21,8 @@ def __init__(self, types, bounds,
Parameters
----------
configspace: ConfigurationSpace
configspace to be passed to random forest (used to impute inactive parameter-values)
types: np.ndarray (D)
Specifies the number of categorical values of an input dimension. Where
the i-th entry corresponds to the i-th input dimension. Let say we have
Expand All @@ -29,6 +31,8 @@ def __init__(self, types, bounds,
have to pass np.array([2, 0]). Note that we count starting from 0.
bounds: np.ndarray (D)
Specifies the bounds
seed: int
The seed that is passed to the random_forest_run library.
instance_features: np.ndarray (I, K)
Contains the K dimensional instance features
of the I different instances
Expand All @@ -48,16 +52,13 @@ def __init__(self, types, bounds,
max_num_nodes: int
seed: int
The seed that is passed to the random_forest_run library.
cutoff: int
The cutoff used in the specified scenario
threshold:
Maximal possible value
"""
super().__init__(types=types, bounds=bounds, **kwargs)
super().__init__(configspace=configspace, types=types, bounds=bounds, seed=seed, **kwargs)
np.seterr(divide='ignore', invalid='ignore')
self.cutoff = cutoff
self.threshold = threshold
Expand Down
6 changes: 3 additions & 3 deletions pimp/epm/unlogged_epar_x_rfwi.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@

class UnloggedEPARXrfi(EPARrfi, Urfi):

def __init__(self, types, bounds,
def __init__(self, configspace, types, bounds, seed,
cutoff=0,
threshold=0, **kwargs):
"""
TODO
"""
Urfi.__init__(self, types=types, bounds=bounds, **kwargs)
EPARrfi.__init__(self, types=types, bounds=bounds, cutoff=cutoff, threshold=threshold, **kwargs)
Urfi.__init__(self, configspace=configspace, types=types, bounds=bounds, seed=seed, **kwargs)
EPARrfi.__init__(self, configspace=configspace, types=types, bounds=bounds, seed=seed, cutoff=cutoff, threshold=threshold, **kwargs)

def predict(self, X):
"""
Expand Down
12 changes: 7 additions & 5 deletions pimp/epm/unlogged_rfwi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@

class Unloggedrfwi(rfi):

def __init__(self, types, bounds, **kwargs):
def __init__(self, configspace, types, bounds, seed, **kwargs):
"""
Interface to the random forest that takes instance features
into account.
Parameters
----------
configspace: ConfigurationSpace
configspace to be passed to random forest (used to impute inactive parameter-values)
types: np.ndarray (D)
Specifies the number of categorical values of an input dimension. Where
the i-th entry corresponds to the i-th input dimension. Let say we have
Expand All @@ -27,6 +29,9 @@ def __init__(self, types, bounds, **kwargs):
have to pass np.array([2, 0]). Note that we count starting from 0.
bounds: np.ndarray (D)
Specifies the bounds
seed: int
The seed that is passed to the random_forest_run library.
instance_features: np.ndarray (I, K)
Contains the K dimensional instance features
of the I different instances
Expand All @@ -46,16 +51,13 @@ def __init__(self, types, bounds, **kwargs):
max_num_nodes: int
seed: int
The seed that is passed to the random_forest_run library.
cutoff: int
The cutoff used in the specified scenario
threshold:
Maximal possible value
"""
super().__init__(types=types, bounds=bounds, **kwargs)
super().__init__(configspace=configspace, types=types, bounds=bounds, seed=seed, **kwargs)

# With the usage of pyrfr 0.8.0 this method is obsolete.
# def _predict(self, X):
Expand Down
6 changes: 5 additions & 1 deletion pimp/evaluator/base_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from collections import OrderedDict

from smac.epm.rf_with_instances import RandomForestWithInstances
from ConfigSpace.hyperparameters import UniformFloatHyperparameter

from pimp.configspace import ConfigurationSpace
from pimp.utils import Scenario
Expand Down Expand Up @@ -102,6 +103,9 @@ def _refit_model(self, types, bounds, X, y):
y:ndarray
corresponding y vector
"""
self.model = RandomForestWithInstances(types, bounds, do_bootstrapping=True)
# We need to fake config-space bypass imputation of inactive values in random forest implementation
fake_cs = ConfigurationSpace(name="fake-cs-for-configurator-footprint")

self.model = RandomForestWithInstances(fake_cs, types, bounds, seed=12345, do_bootstrapping=True)
self.model.rf_opts.compute_oob_error = True
self.model.train(X, y)
Loading

0 comments on commit cff1b90

Please sign in to comment.