From 09da67c2437ceee17de2b36980eb35d33948d9e3 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Tue, 25 Feb 2020 14:52:48 +0000 Subject: [PATCH] Fix TPE Suggestion (#1063) * Init change * Change hyperopt suggestion * Generate new id in Trial loop * Run tpe suggest for each Trial * Move is first run --- .../hyperopt/v1alpha3/requirements.txt | 2 +- .../hyperopt/base_hyperopt_service.py | 261 ++++++++++++------ pkg/suggestion/v1alpha3/hyperopt_service.py | 30 +- 3 files changed, 191 insertions(+), 102 deletions(-) diff --git a/cmd/suggestion/hyperopt/v1alpha3/requirements.txt b/cmd/suggestion/hyperopt/v1alpha3/requirements.txt index 3f1d3961587..b4bf1ae04e8 100644 --- a/cmd/suggestion/hyperopt/v1alpha3/requirements.txt +++ b/cmd/suggestion/hyperopt/v1alpha3/requirements.txt @@ -7,4 +7,4 @@ scipy>=0.19.1 forestci==0.3 protobuf==3.9.1 googleapis-common-protos==1.6.0 -hyperopt==0.1.2 +hyperopt==0.2.3 diff --git a/pkg/suggestion/v1alpha3/hyperopt/base_hyperopt_service.py b/pkg/suggestion/v1alpha3/hyperopt/base_hyperopt_service.py index 455228fea9f..23d3f4ccc17 100644 --- a/pkg/suggestion/v1alpha3/hyperopt/base_hyperopt_service.py +++ b/pkg/suggestion/v1alpha3/hyperopt/base_hyperopt_service.py @@ -7,12 +7,15 @@ logger = logging.getLogger("BaseHyperoptService") +TPE_ALGORITHM_NAME = "tpe" +RANDOM_ALGORITHM_NAME = "random" + class BaseHyperoptService(object): - def __init__(self, algorithm_name="tpe", random_state=None): - self.random_state = random_state - if algorithm_name == 'tpe': + def __init__(self, algorithm_name=TPE_ALGORITHM_NAME, random_state=None, search_space=None): + self.algorithm_name = algorithm_name + if self.algorithm_name == TPE_ALGORITHM_NAME: self.hyperopt_algorithm = hyperopt.tpe.suggest - elif algorithm_name == 'random': + elif self.algorithm_name == RANDOM_ALGORITHM_NAME: self.hyperopt_algorithm = hyperopt.rand.suggest # elif algorithm_name == 'hyperopt-anneal': # self.hyperopt_algorithm = hyperopt.anneal.suggest_batch @@ -21,13 +24,17 @@ def __init__(self, algorithm_name="tpe", random_state=None): else: raise Exception('"Failed to create the algortihm: {}'.format(algorithm_name)) - def getSuggestions(self, search_space, trials, request_number): - """ - Get the new suggested trials with the given algorithm. - """ + self.search_space = search_space + # New hyperopt variables + self.hyperopt_rstate = np.random.RandomState(random_state) + self.create_hyperopt_domain() + self.create_fmin() + self.is_first_run = True + + def create_hyperopt_domain(self): # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)} hyperopt_search_space = {} - for param in search_space.params: + for param in self.search_space.params: if param.type == INTEGER: hyperopt_search_space[param.name] = hyperopt.hp.quniform( param.name, @@ -38,102 +45,174 @@ def getSuggestions(self, search_space, trials, request_number): param.name, float(param.min), float(param.max)) - elif param.type == CATEGORICAL \ - or param.type == DISCRETE: + elif param.type == CATEGORICAL or param.type == DISCRETE: hyperopt_search_space[param.name] = hyperopt.hp.choice( param.name, param.list) - # New hyperopt variables - hyperopt_rstate = np.random.RandomState(self.random_state) - hyperopt_domain = hyperopt.Domain( + + self.hyperopt_domain = hyperopt.Domain( None, hyperopt_search_space, pass_expr_memo_ctrl=None) - hyperopt_trial_specs = [] - hyperopt_trial_results = [] - # Example: # Example: [{'tid': 0, 'idxs': {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}, 'workdir': None}] - hyperopt_trial_miscs = [] - hyperopt_trial_new_ids = [] + def create_fmin(self): + self.fmin = hyperopt.FMinIter( + self.hyperopt_algorithm, + self.hyperopt_domain, + trials=hyperopt.Trials(), + max_evals=-1, + rstate=self.hyperopt_rstate, + verbose=False) - # Update hyperopt for trained trials with completed advisor trials - completed_hyperopt_trials = hyperopt.Trials() - for trial in trials: - # Example: {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]} - hyperopt_trial_miscs_idxs = {} - # Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]} - hyperopt_trial_miscs_vals = {} - new_id = trial.name - hyperopt_trial_new_ids.append(new_id) - hyperopt_trial_misc = dict( - tid=new_id, cmd=hyperopt_domain.cmd, workdir=hyperopt_domain.workdir) - for param in search_space.params: - parameter_value = None - for assignment in trial.assignments: - if assignment.name == param.name: - parameter_value = assignment.value - break - if param.type == INTEGER: - hyperopt_trial_miscs_idxs[param.name] = [new_id] - hyperopt_trial_miscs_vals[param.name] = [ - parameter_value] - elif param.type == DOUBLE: - hyperopt_trial_miscs_idxs[param.name] = [new_id] - hyperopt_trial_miscs_vals[param.name] = [ - parameter_value] - elif param.type == DISCRETE or param.type == CATEGORICAL: - index_of_value_in_list = param.list.index(parameter_value) - hyperopt_trial_miscs_idxs[param.name] = [trial.name] - hyperopt_trial_miscs_vals[param.name] = [ - index_of_value_in_list - ] - - hyperopt_trial_specs.append(None) - - hyperopt_trial_misc["idxs"] = hyperopt_trial_miscs_idxs - hyperopt_trial_misc["vals"] = hyperopt_trial_miscs_vals - hyperopt_trial_miscs.append(hyperopt_trial_misc) - - # TODO: Use negative objective value for loss or not - objective_for_hyperopt = float(trial.target_metric.value) - if search_space.goal == MAX_GOAL: - # Now hyperopt only supports fmin and we need to reverse objective value for maximization - objective_for_hyperopt = -1 * objective_for_hyperopt - hyperopt_trial_result = { - "loss": objective_for_hyperopt, - "status": hyperopt.STATUS_OK - } - hyperopt_trial_results.append(hyperopt_trial_result) - if len(trials) > 0: - # Example: {'refresh_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'book_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'misc': {'tid': 0, 'idxs': {'x2': [0], 'x': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'x2': [-8.137088361136204], 'x': [-4.849028446711832]}, 'workdir': None}, 'state': 2, 'tid': 0, 'exp_key': None, 'version': 0, 'result': {'status': 'ok', 'loss': 14.849028446711833}, 'owner': None, 'spec': None} - hyperopt_trials = completed_hyperopt_trials.new_trial_docs( - hyperopt_trial_new_ids, hyperopt_trial_specs, hyperopt_trial_results, hyperopt_trial_miscs) - for current_hyperopt_trials in hyperopt_trials: - current_hyperopt_trials["state"] = hyperopt.JOB_STATE_DONE + self.fmin.catch_eval_exceptions = False - completed_hyperopt_trials.insert_trial_docs(hyperopt_trials) - completed_hyperopt_trials.refresh() - rval = hyperopt.FMinIter( - self.hyperopt_algorithm, - hyperopt_domain, - completed_hyperopt_trials, - max_evals=-1, - rstate=hyperopt_rstate, - verbose=0) - rval.catch_eval_exceptions = False + def getSuggestions(self, trials, request_number): + """ + Get the new suggested trials with the given algorithm. + """ - new_ids = rval.trials.new_trial_ids(request_number) + recorded_trials_names = self.fmin.trials.specs - rval.trials.refresh() - random_state = rval.rstate.randint(2**31 - 1) - new_trials = self.hyperopt_algorithm( - new_ids, rval.domain, completed_hyperopt_trials, random_state) - rval.trials.refresh() + hyperopt_trial_new_ids = [] + hyperopt_trial_specs = [] + hyperopt_trial_results = [] + hyperopt_trial_miscs = [] + # Update hyperopt FMin with new completed Trials + for trial in trials: + if {"trial-name":trial.name} not in recorded_trials_names: + # Produce new id for the new Trial + new_id = self.fmin.trials.new_trial_ids(1) + hyperopt_trial_new_ids.append(new_id[0]) + hyperopt_trial_miscs_idxs = {} + # Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]} + hyperopt_trial_miscs_vals = {} + + # Insert Trial assignment to the misc + hyperopt_trial_misc = dict( + tid=new_id[0], cmd=self.hyperopt_domain.cmd, workdir=self.hyperopt_domain.workdir) + for param in self.search_space.params: + parameter_value = None + for assignment in trial.assignments: + if assignment.name == param.name: + parameter_value = assignment.value + break + if param.type == INTEGER: + hyperopt_trial_miscs_idxs[param.name] = new_id + hyperopt_trial_miscs_vals[param.name] = [int(parameter_value)] + elif param.type == DOUBLE: + hyperopt_trial_miscs_idxs[param.name] = new_id + hyperopt_trial_miscs_vals[param.name] = [float(parameter_value)] + elif param.type == DISCRETE or param.type == CATEGORICAL: + index_of_value_in_list = param.list.index(parameter_value) + hyperopt_trial_miscs_idxs[param.name] = new_id + hyperopt_trial_miscs_vals[param.name] = [index_of_value_in_list] + + + hyperopt_trial_misc["idxs"] = hyperopt_trial_miscs_idxs + hyperopt_trial_misc["vals"] = hyperopt_trial_miscs_vals + hyperopt_trial_miscs.append(hyperopt_trial_misc) + + # Insert Trial name to the spec + hyperopt_trial_spec = { + "trial-name": trial.name + } + hyperopt_trial_specs.append(hyperopt_trial_spec) + + # Insert Trial result to the result + # TODO: Use negative objective value for loss or not + # TODO: Do we need to analyse additional_metrics? + objective_for_hyperopt = float(trial.target_metric.value) + if self.search_space.goal == MAX_GOAL: + # Now hyperopt only supports fmin and we need to reverse objective value for maximization + objective_for_hyperopt = -1 * objective_for_hyperopt + hyperopt_trial_result = { + "loss": objective_for_hyperopt, + "status": hyperopt.STATUS_OK + } + hyperopt_trial_results.append(hyperopt_trial_result) + + if len(trials) > 0: - # Construct return advisor trials from new hyperopt trials + # Create new Trial doc + hyperopt_trials = hyperopt.Trials().new_trial_docs( + tids=hyperopt_trial_new_ids, + specs=hyperopt_trial_specs, + results=hyperopt_trial_results, + miscs=hyperopt_trial_miscs) + + for i, _ in enumerate(hyperopt_trials): + hyperopt_trials[i]["state"] = hyperopt.JOB_STATE_DONE + + # Insert new set of Trial to FMin object + # Example: of inserting doc with tunning lr + # [{ + # 'state':2, + # 'tid':5, + # 'spec':{ + # 'trial-name':'tpe-example-48xl8whg' + # }, + # 'result':{ + # 'loss':-0.1135, + # 'status':'ok' + # }, + # 'misc':{ + # 'tid':5, + # 'cmd':('domain_attachment','FMinIter_Domain'), + # 'workdir':None, + # 'idxs':{ + # '--lr':[5] + # }, + # 'vals':{ + # '--lr':[0.025351232898626827] + # } + # }, + # 'exp_key':None, + # 'owner':None, + # 'version':0, + # 'book_time':None, + # 'refresh_time':None + # }] + self.fmin.trials.insert_trial_docs(hyperopt_trials) + self.fmin.trials.refresh() + + # Produce new request_number ids to make new Suggestion + hyperopt_trial_new_ids = self.fmin.trials.new_trial_ids(request_number) + random_state = self.fmin.rstate.randint(2**31 - 1) + if self.algorithm_name == RANDOM_ALGORITHM_NAME: + new_trials = self.hyperopt_algorithm( + new_ids=hyperopt_trial_new_ids, + domain=self.fmin.domain, + trials=self.fmin.trials, + seed=random_state) + elif self.algorithm_name == TPE_ALGORITHM_NAME: + # n_startup_jobs indicates for how many Trials we run random suggestion + # This must be request_number value + # After this tpe suggestion starts analyse Trial info. + # On the first run we can run suggest just once with n_startup_jobs + # Next suggest runs must be for each new Trial generation + if self.is_first_run: + new_trials = self.hyperopt_algorithm( + new_ids=hyperopt_trial_new_ids, + domain=self.fmin.domain, + trials=self.fmin.trials, + seed=random_state, + n_startup_jobs=request_number) + self.is_first_run = False + else: + new_trials = [] + for i in range(request_number): + # hyperopt_algorithm always returns one new Trial + new_trials.append(self.hyperopt_algorithm( + new_ids=[hyperopt_trial_new_ids[i]], + domain=self.fmin.domain, + trials=self.fmin.trials, + seed=random_state, + n_startup_jobs=request_number)[0]) + + # Construct return advisor Trials from new hyperopt Trials list_of_assignments = [] for i in range(request_number): vals = new_trials[i]['misc']['vals'] - list_of_assignments.append(BaseHyperoptService.convert(search_space, vals)) + list_of_assignments.append(BaseHyperoptService.convert(self.search_space, vals)) return list_of_assignments @staticmethod diff --git a/pkg/suggestion/v1alpha3/hyperopt_service.py b/pkg/suggestion/v1alpha3/hyperopt_service.py index c29bc4624f0..9a46390af5e 100644 --- a/pkg/suggestion/v1alpha3/hyperopt_service.py +++ b/pkg/suggestion/v1alpha3/hyperopt_service.py @@ -11,20 +11,30 @@ logger = logging.getLogger("HyperoptRandomService") -class HyperoptService( - api_pb2_grpc.SuggestionServicer, HealthServicer): +class HyperoptService(api_pb2_grpc.SuggestionServicer, HealthServicer): + + def __init__(self): + super(HyperoptService, self).__init__() + self.base_service = None + self.is_first_run = True + def GetSuggestions(self, request, context): """ Main function to provide suggestion. """ name, config = OptimizerConfiguration.convertAlgorithmSpec( request.experiment.spec.algorithm) - base_serice = BaseHyperoptService( - algorithm_name=name, random_state=config.random_state) - search_space = HyperParameterSearchSpace.convert(request.experiment) + + if self.is_first_run: + search_space = HyperParameterSearchSpace.convert(request.experiment) + self.base_service = BaseHyperoptService( + algorithm_name=name, + random_state=config.random_state, + search_space=search_space) + self.is_first_run = False + trials = Trial.convert(request.trials) - new_assignments = base_serice.getSuggestions( - search_space, trials, request.request_number) + new_assignments = self.base_service.getSuggestions(trials, request.request_number) return api_pb2.GetSuggestionsReply( parameter_assignments=Assignment.generate(new_assignments) ) @@ -36,8 +46,8 @@ def __init__(self, random_state=None): @staticmethod def convertAlgorithmSpec(algorithm_spec): - optmizer = OptimizerConfiguration() + optimizer = OptimizerConfiguration() for s in algorithm_spec.algorithm_setting: if s.name == "random_state": - optmizer.random_state = int(s.value) - return algorithm_spec.algorithm_name, optmizer + optimizer.random_state = int(s.value) + return algorithm_spec.algorithm_name, optimizer