Skip to content

Commit

Permalink
Remove player param
Browse files Browse the repository at this point in the history
  • Loading branch information
roquelopez committed Apr 25, 2024
1 parent 0c7b0e5 commit 1d9810e
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 91 deletions.
10 changes: 4 additions & 6 deletions alpha_automl/pipeline_search/agent_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ def __init__(self, config: EnvContext):
self.action_offsets = self.generate_action_offsets()
self.action_space = Discrete(self.max_actions)


self.cur_player = 1 # NEVER USED - ONLY ONE PLAYER

def reset(self, *, seed=None, options=None):
# init number of steps
Expand All @@ -59,7 +57,7 @@ def step(self, action):
offseted_action = self.action_offsets[curr_step]+action
valid_action_size = self.action_spaces[curr_step]
# Check the action is illegal
valid_moves = self.game.getValidMoves(self.board, self.cur_player)
valid_moves = self.game.getValidMoves(self.board)
if action >= valid_action_size or valid_moves[offseted_action-1] != 1:
return (
{"board": np.array(self.board).astype(np.uint8)},
Expand Down Expand Up @@ -89,13 +87,13 @@ def step(self, action):

# update board with new action
# print(f"action: {action}\n board: {self.board}")
self.board, _ = self.game.getNextState(self.board, self.cur_player, offseted_action-1)
self.board = self.game.getNextState(self.board, offseted_action-1)

if self.num_steps > 9:
logger.info(f"[YFW]================={self.board[self.game.m:]}")
logger.debug(f"[YFW]================={self.board[self.game.m:]}")
# reward: win(1) - pipeline score, not end(0) - 0, bad(2) - 0
reward = 0
game_end = self.game.getGameEnded(self.board, self.cur_player)
game_end = self.game.getGameEnded(self.board)
if game_end == 1: # pipeline score over threshold
try:
if self.game.problem == "REGRESSION":
Expand Down
12 changes: 6 additions & 6 deletions alpha_automl/pipeline_search/agent_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def load_rllib_checkpoint(game, num_rollout_workers):

# Checking if the list is empty or not
if [f for f in os.listdir(PATH_TO_CHECKPOINT) if not f.startswith(".")] == []:
logger.info("[RlLib] Cannot read RlLib checkpoint, create a new one.")
logger.debug("[RlLib] Cannot read RlLib checkpoint, create a new one.")
return config.build()
else:
algo = config.build()
Expand All @@ -88,21 +88,21 @@ def train_rllib_model(algo, time_bound, save_checkpoint=False):
result = algo.train()
last_best = result["episode_reward_mean"]
best_unchanged_iter = 1
logger.info(pretty_print(result))
logger.debug(pretty_print(result))
while True:
if (
time.time() > timeout
or (best_unchanged_iter >= 600 and result["episode_reward_mean"] >= 0)
# or result["episode_reward_mean"] >= 70
):
logger.info(f"[RlLib] Train Timeout")
logger.debug(f"[RlLib] Train Timeout")
break

if save_checkpoint and [f for f in os.listdir(PATH_TO_CHECKPOINT) if not f.startswith(".")] != []:
weights = load_rllib_policy_weights()
algo.set_weights(weights)
result = algo.train()
logger.info(pretty_print(result))
logger.debug(pretty_print(result))
# stop training of the target train steps or reward are reached
if result["episode_reward_mean"] > last_best:
last_best = result["episode_reward_mean"]
Expand All @@ -115,7 +115,7 @@ def train_rllib_model(algo, time_bound, save_checkpoint=False):


def load_rllib_policy_weights():
logger.info(f"[RlLib] Synchronizing model weights...")
logger.debug(f"[RlLib] Synchronizing model weights...")
policy = Policy.from_checkpoint(PATH_TO_CHECKPOINT)
policy = policy['default_policy']
weights = policy.get_weights()
Expand All @@ -127,7 +127,7 @@ def save_rllib_checkpoint(algo):
save_result = algo.save(checkpoint_dir=PATH_TO_CHECKPOINT)
path_to_checkpoint = save_result.checkpoint.path

logger.info(
logger.debug(
f"[RlLib] An Algorithm checkpoint has been created inside directory: '{path_to_checkpoint}'."
)

Expand Down
31 changes: 10 additions & 21 deletions alpha_automl/pipeline_search/game.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from __future__ import print_function
import os
import pickle
import math
import logging
from copy import deepcopy
from alpha_automl.pipeline_search.game_logic import Board
import numpy as np
import traceback
Expand All @@ -14,7 +11,7 @@

class PipelineGame():
# FIXEME: Maybe the input parameters can be in json
def __init__(self, input={}, eval_pipeline=None):
def __init__(self, input=None, eval_pipeline=None):
self.steps = 0
self.evaluations = {}
self.eval_times = {}
Expand Down Expand Up @@ -53,18 +50,18 @@ def getActionSize(self):
board = Board(self.m, self.grammar, self.pipeline_size, self.metric)
return len(board.valid_moves)

def getNextState(self, board, player, action):
# if player takes action on board, return next (board,player)
def getNextState(self, board, action):
# action must be a valid move
b = Board(self.m, self.grammar, self.pipeline_size, self.metric)
b.set_metafeatures(board)
b.set_pipeline(board)
# logger.debug('PREV STATE %s', b.pieces_p)
b.execute_move(action, player)
b.execute_move(action)
# logger.debug('NEXT STATE %s', b.pieces_p)
return (b.pieces_m+b.pieces_p, -player)

def getValidMoves(self, board, player):
return b.pieces_m+b.pieces_p

def getValidMoves(self, board):
# return a fixed size binary vector
b = Board(self.m, self.grammar, self.pipeline_size, self.metric)
b.set_metafeatures(board)
Expand Down Expand Up @@ -97,9 +94,8 @@ def getEvaluation(self, board):

return eval_val

def getGameEnded(self, board, player, eval_val=None):
# return 0 if not ended, 1 if x won, -1 if x lost
# player = 1
def getGameEnded(self, board, eval_val=None):
# return 0 if not ended, 1 if x won, 2 if x lost

b = Board(self.m, self.grammar, self.pipeline_size, self.metric)
b.set_metafeatures(board)
Expand All @@ -117,21 +113,14 @@ def getGameEnded(self, board, player, eval_val=None):

eval_val = self.getEvaluation(board)

if b.findWin(player, eval_val):
logger.debug('findwin %s', player)
if b.findWin(eval_val):
logger.debug('Win')
return 1
if b.findWin(-player, eval_val):
logger.debug('findwin %', -player)
return -1
if b.has_legal_moves():
return 0

return 2

def getCanonicalForm(self, board, player):
# return state if player==1, else return -state if player==-1
return deepcopy(board)

def stringRepresentation(self, board):
# 3x3 numpy array (canonical board)
return np.asarray(board).tostring()
Expand Down
6 changes: 3 additions & 3 deletions alpha_automl/pipeline_search/game_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

class Board():

def __init__(self, m=30, grammar={}, pipeline_size=6, metric='f1macro', win_threshold=0.01):
def __init__(self, m=30, grammar=None, pipeline_size=6, metric='accuracy', win_threshold=0.01):
"Set up initial board configuration."

self.terminals = grammar['TERMINALS']
Expand Down Expand Up @@ -61,7 +61,7 @@ def is_terminal_pipeline(self):
return False
return True

def findWin(self, player, eval_val=None):
def findWin(self, eval_val=None):
"""Find win of the given color in row, column, or diagonal
(1 for x, -1 for o)"""
if not any(self[0:]):
Expand Down Expand Up @@ -135,7 +135,7 @@ def get_train_board(self):
def get_board_size(self):
return self.m+(len(self.terminals)+len(self.non_terminals))

def execute_move(self, action, player):
def execute_move(self, action):
"""Perform the given move on the board;
color gives the color of the piece to play (1=x,-1=o)
"""
Expand Down
96 changes: 41 additions & 55 deletions alpha_automl/pipeline_synthesis/setup_search.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import logging
import sys
import logging
from datetime import datetime
from os.path import join

from alpha_automl.grammar_loader import (load_automatic_grammar,
load_manual_grammar)
from alpha_automl.grammar_loader import load_manual_grammar
from alpha_automl.pipeline_search.game import PipelineGame
from alpha_automl.pipeline_search.agent_lab import pipeline_search_rllib, dump_result_to_json, read_result_to_pipeline
from alpha_automl.pipeline_synthesis.pipeline_builder import BaseBuilder
Expand All @@ -27,7 +24,8 @@
'TABULAR': 1,
'TEXT': 2,
'IMAGE': 3,
'VIDEO': 4
'VIDEO': 4,
'MULTIMODAL': 5
},
'PIPELINE_SIZE': 10
}
Expand All @@ -40,56 +38,9 @@ def signal_handler(queue, signum):
sys.exit(0)


def check_repeated_classifiers(pipeline_primitives, all_primitives, ensemble_pipelines_hash):
# Verify if the classifiers are repeated in the ensembles (regardless of the order)
classifiers = []
pipeline_hash = ''
has_ensemble_primitive = False
has_repeated_classifiers = False

for primitive_name in pipeline_primitives:
primitive_type = all_primitives[primitive_name]['type']

if primitive_type == 'CLASSIFIER':
classifiers.append(primitive_name)
elif primitive_type == 'MULTI_ENSEMBLER':
has_ensemble_primitive = True
pipeline_hash += primitive_name
if len(classifiers) != len(set(classifiers)): # All classifiers should be different
has_repeated_classifiers = True
else:
pipeline_hash += primitive_name

if not has_ensemble_primitive:
return False

if has_repeated_classifiers:
return True

pipeline_hash += ''.join(sorted(classifiers))

if pipeline_hash in ensemble_pipelines_hash:
return True
else:
ensemble_pipelines_hash.add(pipeline_hash)
return False

def search_pipelines(
X,
y,
scoring,
splitting_strategy,
task_name,
time_bound,
automl_hyperparams,
metadata,
output_folder,
verbose,
):
def search_pipelines(X, y, scoring, splitting_strategy, task_name, time_bound, automl_hyperparams, metadata, output_folder, verbose):
# signal.signal(signal.SIGTERM, lambda signum, frame: signal_handler(queue, signum))
hide_logs(
verbose
) # Hide logs here too, since multiprocessing has some issues with loggers
hide_logs(verbose) # Hide logs here too, since multiprocessing has some issues with loggers

builder = BaseBuilder(metadata, automl_hyperparams)
all_primitives = builder.all_primitives
Expand Down Expand Up @@ -159,6 +110,41 @@ def update_config(task_name, metric, grammar, metadata):
return config


def check_repeated_classifiers(pipeline_primitives, all_primitives, ensemble_pipelines_hash):
# Verify if the classifiers are repeated in the ensembles (regardless of the order)
classifiers = []
pipeline_hash = ''
has_ensemble_primitive = False
has_repeated_classifiers = False

for primitive_name in pipeline_primitives:
primitive_type = all_primitives[primitive_name]['type']

if primitive_type == 'CLASSIFIER':
classifiers.append(primitive_name)
elif primitive_type == 'MULTI_ENSEMBLER':
has_ensemble_primitive = True
pipeline_hash += primitive_name
if len(classifiers) != len(set(classifiers)): # All classifiers should be different
has_repeated_classifiers = True
else:
pipeline_hash += primitive_name

if not has_ensemble_primitive:
return False

if has_repeated_classifiers:
return True

pipeline_hash += ''.join(sorted(classifiers))

if pipeline_hash in ensemble_pipelines_hash:
return True
else:
ensemble_pipelines_hash.add(pipeline_hash)
return False


def compute_metafeatures(metadata):
metafeatures = []
# IMPUTE
Expand Down

0 comments on commit 1d9810e

Please sign in to comment.