diff --git a/.gitignore b/.gitignore
index 190eb0dc..2d498ce2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,3 +60,6 @@ lightning_logs
 .cache
 **/smac3_output
 /venv
+
+.vscode
+**/.vscode
\ No newline at end of file
diff --git a/alpha_automl/automl_api.py b/alpha_automl/automl_api.py
index d0ff15e5..a261f89d 100644
--- a/alpha_automl/automl_api.py
+++ b/alpha_automl/automl_api.py
@@ -24,7 +24,7 @@ class BaseAutoML():
 
     def __init__(self, time_bound=15, metric=None, split_strategy='holdout', time_bound_run=5, task=None,
                  score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None,  output_folder=None,
-                 num_cpus=None, start_mode='auto', verbose=logging.INFO):
+                 checkpoints_folder=None, num_cpus=None, start_mode='auto', verbose=logging.INFO):
         """
         Create/instantiate an BaseAutoML object.
 
@@ -39,6 +39,8 @@ def __init__(self, time_bound=15, metric=None, split_strategy='holdout', time_bo
         :param metric_kwargs: Additional arguments for metric.
         :param split_strategy_kwargs: Additional arguments for splitting_strategy.
         :param output_folder: Path to the output directory. If it is None, create a temp folder automatically.
+        :param checkpoints_folder: Path to the directory to load and save the checkpoints. If it is None, 
+            it will use the default checkpoints and save the new checkpoints in output_folder.
         :param num_cpus: Number of CPUs to be used.
         :param start_mode: The mode to start the multiprocessing library. It could be `auto`, `fork` or `spawn`.
         :param verbose: The logs level.
@@ -60,11 +62,11 @@ def __init__(self, time_bound=15, metric=None, split_strategy='holdout', time_bo
         self.X = None
         self.y = None
         self.leaderboard = None
-        self.automl_manager = AutoMLManager(self.output_folder, time_bound, time_bound_run, task, num_cpus, verbose)
-        self._start_method = get_start_method(start_mode)
-        set_start_method(self._start_method, force=True)
-        check_input_for_multiprocessing(self._start_method, self.scorer._score_func, 'metric')
-        check_input_for_multiprocessing(self._start_method, self.splitter, 'split strategy')
+        self.automl_manager = AutoMLManager(self.output_folder, checkpoints_folder, time_bound, time_bound_run, task, num_cpus, verbose)
+        #self._start_method = get_start_method(start_mode)
+        #set_start_method(self._start_method, force=True)
+        #check_input_for_multiprocessing(self._start_method, self.scorer._score_func, 'metric')
+        #check_input_for_multiprocessing(self._start_method, self.splitter, 'split strategy')
         self.label_encoder = None
         self.task_type = task
 
@@ -194,7 +196,7 @@ def add_primitives(self, new_primitives):
         :param new_primitives: Set of new primitives, tuples of name and object primitive
         """
         for primitive_object, primitive_type in new_primitives:
-            check_input_for_multiprocessing(self._start_method, primitive_object, 'primitive')
+            #check_input_for_multiprocessing(self._start_method, primitive_object, 'primitive')
             primitive_name = f'{primitive_object.__module__}.{primitive_object.__class__.__name__}'
             primitive_name = primitive_name.replace('__', '')  # Sklearn restriction on estimator names
             self.new_primitives[primitive_name] = {'primitive_object': primitive_object,
@@ -297,7 +299,7 @@ class ClassifierBaseAutoML(BaseAutoML):
 
     def __init__(self, time_bound=15, metric='accuracy_score', split_strategy='holdout', time_bound_run=5, task=None,
                  score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None, output_folder=None,
-                 num_cpus=None, start_mode='auto', verbose=logging.INFO):
+                 checkpoints_folder=None, num_cpus=None, start_mode='auto', verbose=logging.INFO):
         """
         Create/instantiate an AutoMLClassifier object.
 
@@ -312,13 +314,15 @@ def __init__(self, time_bound=15, metric='accuracy_score', split_strategy='holdo
         :param metric_kwargs: Additional arguments for metric.
         :param split_strategy_kwargs: Additional arguments for splitting_strategy.
         :param output_folder: Path to the output directory. If it is None, create a temp folder automatically.
+        :param checkpoints_folder: Path to the directory to load and save the checkpoints. If it is None, 
+            it will use the default checkpoints and save the new checkpoints in output_folder.
         :param num_cpus: Number of CPUs to be used.
         :param start_mode: The mode to start the multiprocessing library. It could be `auto`, `fork` or `spawn`.
         :param verbose: The logs level.
         """
 
         super().__init__(time_bound, metric, split_strategy, time_bound_run, task, score_sorting, metric_kwargs,
-                         split_strategy_kwargs, output_folder, num_cpus, start_mode, verbose)
+                         split_strategy_kwargs, output_folder, checkpoints_folder, num_cpus, start_mode, verbose)
 
         self.label_encoder = LabelEncoder()
 
@@ -354,7 +358,7 @@ class AutoMLClassifier(ClassifierBaseAutoML):
 
     def __init__(self, time_bound=15, metric='accuracy_score', split_strategy='holdout', time_bound_run=5,
                  score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None, output_folder=None,
-                 num_cpus=None, start_mode='auto', verbose=logging.INFO):
+                 checkpoints_folder=None, num_cpus=None, start_mode='auto', verbose=logging.INFO):
         """
         Create/instantiate an AutoMLClassifier object.
 
@@ -368,6 +372,8 @@ def __init__(self, time_bound=15, metric='accuracy_score', split_strategy='holdo
         :param metric_kwargs: Additional arguments for metric.
         :param split_strategy_kwargs: Additional arguments for splitting_strategy.
         :param output_folder: Path to the output directory. If it is None, create a temp folder automatically.
+        :param checkpoints_folder: Path to the directory to load and save the checkpoints. If it is None, 
+            it will use the default checkpoints and save the new checkpoints in output_folder.
         :param num_cpus: Number of CPUs to be used.
         :param start_mode: The mode to start the multiprocessing library. It could be `auto`, `fork` or `spawn`.
         :param verbose: The logs level.
@@ -375,14 +381,14 @@ def __init__(self, time_bound=15, metric='accuracy_score', split_strategy='holdo
 
         task = 'CLASSIFICATION'
         super().__init__(time_bound, metric, split_strategy, time_bound_run, task, score_sorting, metric_kwargs,
-                         split_strategy_kwargs, output_folder, num_cpus, start_mode, verbose)
+                         split_strategy_kwargs, output_folder, checkpoints_folder, num_cpus, start_mode, verbose)
 
 
 class AutoMLRegressor(BaseAutoML):
 
     def __init__(self, time_bound=15, metric='mean_absolute_error', split_strategy='holdout', time_bound_run=5,
                  score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None, output_folder=None,
-                 num_cpus=None, start_mode='auto', verbose=logging.INFO):
+                 checkpoints_folder=None, num_cpus=None, start_mode='auto', verbose=logging.INFO):
         """
         Create/instantiate an AutoMLRegressor object.
 
@@ -396,6 +402,8 @@ def __init__(self, time_bound=15, metric='mean_absolute_error', split_strategy='
         :param metric_kwargs: Additional arguments for metric.
         :param split_strategy_kwargs: Additional arguments for splitting_strategy.
         :param output_folder: Path to the output directory. If it is None, create a temp folder automatically.
+        :param checkpoints_folder: Path to the directory to load and save the checkpoints. If it is None, 
+            it will use the default checkpoints and save the new checkpoints in output_folder.
         :param num_cpus: Number of CPUs to be used.
         :param start_mode: The mode to start the multiprocessing library. It could be `auto`, `fork` or `spawn`.
         :param verbose: The logs level.
@@ -403,13 +411,14 @@ def __init__(self, time_bound=15, metric='mean_absolute_error', split_strategy='
 
         task = 'REGRESSION'
         super().__init__(time_bound, metric, split_strategy, time_bound_run, task, score_sorting, metric_kwargs,
-                         split_strategy_kwargs, output_folder, num_cpus, start_mode, verbose)
+                         split_strategy_kwargs, output_folder, checkpoints_folder, num_cpus, start_mode, verbose)
 
 
 class AutoMLTimeSeries(BaseAutoML):
     def __init__(self, time_bound=15, metric='mean_squared_error', split_strategy='timeseries', time_bound_run=5,
                  score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None, output_folder=None,
-                 num_cpus=None, start_mode='auto', verbose=logging.INFO, date_column=None, target_column=None):
+                 checkpoints_folder=None, num_cpus=None, start_mode='auto', verbose=logging.INFO, date_column=None,
+                 target_column=None):
         """
         Create/instantiate an AutoMLTimeSeries object.
 
@@ -423,6 +432,8 @@ def __init__(self, time_bound=15, metric='mean_squared_error', split_strategy='t
         :param metric_kwargs: Additional arguments for metric.
         :param split_strategy_kwargs: Additional arguments for TimeSeriesSplit, E.g. n_splits and test_size(int).
         :param output_folder: Path to the output directory. If it is None, create a temp folder automatically.
+        :param checkpoints_folder: Path to the directory to load and save the checkpoints. If it is None, 
+            it will use the default checkpoints and save the new checkpoints in output_folder.
         :param num_cpus: Number of CPUs to be used.
         :param start_mode: The mode to start the multiprocessing library. It could be `auto`, `fork` or `spawn`.
         :param verbose: The logs level.
@@ -433,7 +444,7 @@ def __init__(self, time_bound=15, metric='mean_squared_error', split_strategy='t
         self.target_column = target_column
 
         super().__init__(time_bound, metric, split_strategy, time_bound_run, task, score_sorting, metric_kwargs,
-                         split_strategy_kwargs, output_folder, num_cpus, start_mode, verbose)
+                         split_strategy_kwargs, output_folder, checkpoints_folder, num_cpus, start_mode, verbose)
 
     def _column_parser(self, X):
         cols = list(X.columns.values)
@@ -452,7 +463,7 @@ class AutoMLSemiSupervisedClassifier(ClassifierBaseAutoML):
 
     def __init__(self, time_bound=15, metric='accuracy_score', split_strategy='holdout', time_bound_run=5,
                  score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None, output_folder=None,
-                 num_cpus=None, start_mode='auto', verbose=logging.INFO):
+                 checkpoints_folder=None, num_cpus=None, start_mode='auto', verbose=logging.INFO):
         """
         Create/instantiate an AutoMLSemiSupervisedClassifier object.
 
@@ -467,6 +478,8 @@ def __init__(self, time_bound=15, metric='accuracy_score', split_strategy='holdo
         :param split_strategy_kwargs: Additional arguments for splitting_strategy. In SemiSupervised case, `n_splits`
             and `test_size`(test proportion from 0 to 1) can be pass to the splitter.
         :param output_folder: Path to the output directory. If it is None, create a temp folder automatically.
+        :param checkpoints_folder: Path to the directory to load and save the checkpoints. If it is None, 
+            it will use the default checkpoints and save the new checkpoints in output_folder.
         :param num_cpus: Number of CPUs to be used.
         :param start_mode: The mode to start the multiprocessing library. It could be `auto`, `fork` or `spawn`.
         :param verbose: The logs level.
@@ -474,7 +487,7 @@ def __init__(self, time_bound=15, metric='accuracy_score', split_strategy='holdo
 
         task = 'SEMISUPERVISED'
         super().__init__(time_bound, metric, split_strategy, time_bound_run, task, score_sorting, metric_kwargs,
-                         split_strategy_kwargs, output_folder, num_cpus, start_mode, verbose)
+                         split_strategy_kwargs, output_folder, checkpoints_folder, num_cpus, start_mode, verbose)
 
         if split_strategy_kwargs is None:
             split_strategy_kwargs = {'test_size': 0.25}
diff --git a/alpha_automl/automl_manager.py b/alpha_automl/automl_manager.py
index c0d2a448..c950c29c 100644
--- a/alpha_automl/automl_manager.py
+++ b/alpha_automl/automl_manager.py
@@ -1,7 +1,7 @@
 import logging
 import time
 import multiprocessing
-import queue as Q
+from alpha_automl.pipeline import Pipeline
 from alpha_automl.data_profiler import profile_data
 from alpha_automl.scorer import make_splitter, score_pipeline
 from alpha_automl.utils import sample_dataset, is_equal_splitting
@@ -22,8 +22,9 @@
 
 class AutoMLManager():
 
-    def __init__(self, output_folder, time_bound, time_bound_run, task, num_cpus, verbose):
+    def __init__(self, output_folder, checkpoints_folder, time_bound, time_bound_run, task, num_cpus, verbose):
         self.output_folder = output_folder
+        self.checkpoints_folder = checkpoints_folder
         self.time_bound = time_bound * 60
         self.time_bound_run = time_bound_run * 60
         self.task = task
@@ -60,86 +61,33 @@ def _search_pipelines(self, automl_hyperparams):
         if not is_sample and is_equal_splitting(internal_splitting_strategy, self.splitting_strategy):
             need_rescoring = False
 
-        queue = multiprocessing.Queue()
-        search_process = multiprocessing.Process(target=search_pipelines_proc,
-                                                 args=(X, y, self.scoring, internal_splitting_strategy, self.task,
-                                                       automl_hyperparams, metadata, self.output_folder, self.verbose,
-                                                       queue
-                                                       )
-                                                 )
-
-        search_process.start()
-        self.running_processes += 1
-        num_processes = self.num_cpus - 2  # Exclude the main process and search process
-        scoring_pool = multiprocessing.Pool(max(1, num_processes))
-        pipelines_to_score = []
-        scoring_results = []
-
-        while True:
+        pipelines = search_pipelines_proc(X, y, self.scoring, internal_splitting_strategy, self.task,
+                        self.time_bound, automl_hyperparams, metadata, self.output_folder, 
+                        self.checkpoints_folder)
+
+        found_pipelines = 0
+
+        pipeline_threshold = 20
+        X, y, _ = sample_dataset(self.X, self.y, SAMPLE_SIZE, self.task)
+        while pipelines and found_pipelines < pipeline_threshold:
+            pipeline = pipelines.pop()
             try:
-                result = queue.get(timeout=10)
-            except Q.Empty:
-                logger.debug('Reached timeout getting new pipelines')
-                result = None
-
-            if result == 'DONE':
-                search_process.terminate()
-                search_process.join(10)
-                scoring_pool.terminate()
-                scoring_pool.join()
-                logger.debug(f'Found {self.found_pipelines} pipelines')
-                logger.debug('Search done')
-                break
-
-            elif result is not None:
-                pipeline = result
-                logger.debug('Found new pipeline')
-                yield {'pipeline': pipeline, 'message': 'FOUND'}
-
-                if need_rescoring:
-                    pipelines_to_score.append(pipeline)
-                else:
-                    logger.debug(f'Pipeline scored successfully, score={pipeline.get_score()}')
-                    self.found_pipelines += 1
-                    yield {'pipeline': pipeline, 'message': 'SCORED'}
-
-            if len(pipelines_to_score) > 0:
-                if self.running_processes < MAX_RUNNING_PROCESSES:
-                    pipeline = pipelines_to_score.pop(0).get_pipeline()
-                    scoring_result = scoring_pool.apply_async(
-                        score_pipeline,
-                        args=(pipeline, self.X, self.y, self.scoring, self.splitting_strategy, self.task, self.verbose)
-                        )
-                    scoring_results.append(scoring_result)
-                    self.running_processes += 1
-
-            tmp_scoring_results = []
-            for scoring_result in scoring_results:
-                if scoring_result.ready():
-                    self.running_processes -= 1
-                    pipeline = scoring_result.get()
-                    if pipeline is not None:
-                        logger.debug(f'Pipeline scored successfully, score={pipeline.get_score()}')
-                        self.found_pipelines += 1
-                        yield {'pipeline': pipeline, 'message': 'SCORED'}
-                else:
-                    tmp_scoring_results.append(scoring_result)
-
-            scoring_results = tmp_scoring_results
-
-            if time.time() > search_start_time + self.time_bound:
-                logger.debug('Reached search timeout')
-                search_process.terminate()
-                search_process.join(10)
-                scoring_pool.terminate()
-                scoring_pool.join()
-                logger.debug(f'Found {self.found_pipelines} pipelines')
-                break
+                alphaautoml_pipeline = score_pipeline(pipeline, X, y, self.scoring, self.splitting_strategy, self.task)
+    
+                if alphaautoml_pipeline is not None:
+                    score = alphaautoml_pipeline.get_score()
+                    logger.debug(f'Pipeline scored successfully, score={score}')
+                    found_pipelines += 1
+                    yield {'pipeline': alphaautoml_pipeline, 'message': 'SCORED'}
+            except:
+                logger.debug(f'Pipeline scoring error!')
+                continue
+        
+        logger.debug(f'Found {found_pipelines} pipelines')
+        logger.debug('Search done')
 
-    def check_automl_hyperparams(self, automl_hyperparams):
-        if 'use_automatic_grammar' not in automl_hyperparams:
-            automl_hyperparams['use_automatic_grammar'] = USE_AUTOMATIC_GRAMMAR
 
+    def check_automl_hyperparams(self, automl_hyperparams):
         if 'prioritize_primitives' not in automl_hyperparams:
             automl_hyperparams['prioritize_primitives'] = PRIORITIZE_PRIMITIVES
 
diff --git a/alpha_automl/grammar_loader.py b/alpha_automl/grammar_loader.py
index dfe645f0..eb6b2a17 100644
--- a/alpha_automl/grammar_loader.py
+++ b/alpha_automl/grammar_loader.py
@@ -48,8 +48,9 @@ def create_task_grammar(global_grammar, task):
 
     for production in global_grammar.productions():
         for new_production in new_productions:
-            if production.lhs() in new_production.rhs() and production not in new_productions:
-                new_productions.append(production)
+            if production not in new_productions:
+                if str(production.lhs()) != "S" and "_TASK" not in str(production.lhs()):
+                    new_productions.append(production)
 
     task_grammar = CFG(start_token, new_productions)
     logger.debug(f'Task grammar: {task_grammar}')
diff --git a/alpha_automl/hyperparameter_tuning/bayesian.py b/alpha_automl/hyperparameter_tuning/bayesian.py
deleted file mode 100644
index 4919f4af..00000000
--- a/alpha_automl/hyperparameter_tuning/bayesian.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import logging
-import numpy as np
-from smac.configspace import ConfigurationSpace
-from ConfigSpace.hyperparameters import IntegerHyperparameter, FloatHyperparameter, CategoricalHyperparameter, \
-    OrdinalHyperparameter
-from smac.facade.smac_ac_facade import SMAC4AC
-from smac.scenario.scenario import Scenario
-
-MAX_RUNS = 100
-logger = logging.getLogger(__name__)
-
-
-def build_configspace(primitives):
-    # Build Configuration Space which defines all parameters and their ranges
-    configspace = ConfigurationSpace()
-    for primitive in primitives:
-        pass  # load_primitive_configspace(configspace, primitive)
-
-    return configspace
-
-
-def get_new_hyperparameters(primitive_name, configspace):
-    hyperparameters = {}  # load_hyperparameters(primitive_name)
-    new_hyperparameters = {}
-
-    for hyperparameter_name in hyperparameters:
-        hyperparameter_config_name = primitive_name + '|' + hyperparameter_name
-        hyperparameter_config_name_case = hyperparameter_config_name + '|case'
-        if hyperparameter_config_name in configspace:
-            value = None if configspace[hyperparameter_config_name] == 'None' \
-                else configspace[hyperparameter_config_name]
-            new_hyperparameters[hyperparameter_name] = value
-            logger.debug('New value for %s=%s', hyperparameter_config_name, new_hyperparameters[hyperparameter_name])
-        elif hyperparameter_config_name_case in configspace:
-            case = configspace[hyperparameter_config_name_case]
-            value = None if configspace[hyperparameter_config_name + '|' + case] == 'None' \
-                else configspace[hyperparameter_config_name + '|' + case]
-            new_hyperparameters[hyperparameter_name] = {'case': case,
-                                                        'value': value}
-            logger.debug('New value for %s=%s', hyperparameter_config_name, new_hyperparameters[hyperparameter_name])
-
-    return new_hyperparameters
-
-
-class HyperparameterTuning(object):
-    def __init__(self, primitives):
-        self.configspace = build_configspace(primitives)
-        # Avoiding too many iterations
-        self.runcount = 1
-
-        for param in self.configspace.get_hyperparameters():
-            if isinstance(param, IntegerHyperparameter):
-                self.runcount *= (param.upper - param.lower)
-            elif isinstance(param, CategoricalHyperparameter):
-                self.runcount *= len(param.choices)
-            elif isinstance(param, OrdinalHyperparameter):
-                self.runcount *= len(param.sequence)
-            elif isinstance(param, FloatHyperparameter):
-                self.runcount = MAX_RUNS
-                break
-
-        self.runcount = min(self.runcount, MAX_RUNS)
-
-    def tune(self, runner, wallclock, output_dir):
-        # Scenario object
-        # Allow long pipelines to try to execute one fourth of the iterations limit
-        cutoff = wallclock / (self.runcount / 10)
-        scenario = Scenario({'run_obj': 'quality',  # We optimize quality (alternatively runtime)
-                             'runcount-limit': self.runcount,  # Maximum function evaluations
-                             'wallclock-limit': wallclock,
-                             'cutoff_time': cutoff,
-                             'cs': self.configspace,  # Configuration space
-                             'deterministic': 'true',
-                             'output_dir': output_dir,
-                             'abort_on_first_run_crash': False
-                             })
-        smac = SMAC4AC(scenario=scenario, rng=np.random.RandomState(0), tae_runner=runner)
-        best_configuration = smac.optimize()
-        min_cost = float('inf')
-        best_scores = {}
-
-        for _, run_data in smac.get_runhistory().data.items():
-            if run_data.cost < min_cost:
-                min_cost = run_data.cost
-                best_scores = run_data.additional_info
-
-        return best_configuration, best_scores
diff --git a/alpha_automl/metalearning/__init__.py b/alpha_automl/metalearning/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/alpha_automl/metalearning/dataset_profiler.py b/alpha_automl/metalearning/dataset_profiler.py
deleted file mode 100644
index a33c31d8..00000000
--- a/alpha_automl/metalearning/dataset_profiler.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import logging
-import pandas as pd
-import datamart_profiler
-from metalearn import Metafeatures
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_METAFEATURES = []
-
-
-def extract_metafeatures(dataset_path, target_column):
-    data = pd.read_csv(dataset_path)
-    Y = data[target_column]
-    Y = pd.Series([str(i) for i in Y], name=target_column)  # Cast to string to get metalearn lib working correctly
-    X = data.drop(columns=[target_column])
-    metafeatures = Metafeatures()
-    mfs = metafeatures.compute(X, Y, metafeature_ids=DEFAULT_METAFEATURES, seed=0, timeout=300)
-
-    return mfs
-
-
-def extract_dataprofiles(dataset_path, target_column, ignore_target_column=False):
-    metadata = datamart_profiler.process_dataset(dataset_path, coverage=False)
-    feature_types = set()
-    missing_values = False
-    for item in metadata['columns']:
-        if ignore_target_column and item['name'] == target_column:
-            continue
-        identified_types = item['semantic_types'] if len(item['semantic_types']) > 0 else [item['structural_type']]
-        for feature_type in identified_types:
-            feature_types.add(feature_type)
-
-        if 'missing_values_ratio' in item and item['name'] != target_column:
-            missing_values = True
-
-    dps = {'feature_types': sorted(feature_types), 'missing_values': missing_values}
-
-    return dps
diff --git a/alpha_automl/metalearning/dataset_similarity.py b/alpha_automl/metalearning/dataset_similarity.py
deleted file mode 100644
index 5348ae33..00000000
--- a/alpha_automl/metalearning/dataset_similarity.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import logging
-import math
-import hashlib
-import datamart_profiler
-from metalearn import Metafeatures
-from alphad3m.metalearning.resource_builder import load_precalculated_data
-from alphad3m.metalearning.dataset_profiler import extract_dataprofiles, extract_metafeatures
-from sklearn.metrics.pairwise import cosine_similarity
-from d3m.metadata.problem import TaskKeywordBase
-
-logger = logging.getLogger(__name__)
-
-
-def create_metafeatures_vector(metafeatures, metafeature_indices):
-    vector = []
-
-    for metafeatures_id in metafeature_indices:
-        value = metafeatures[metafeatures_id]['value']
-        if isinstance(value, str):
-            value = int(hashlib.sha256(value.encode('utf-8')).hexdigest(), 16) % 256
-        elif math.isnan(value) or math.isinf(value):
-            value = 0
-        vector.append(value)
-
-    return vector
-
-
-def create_dataprofiles_vector(dataprofiles, dataprofile_indices):
-    vector = []
-
-    for dataprofile_id in dataprofile_indices:
-        if dataprofile_id in dataprofiles['feature_types']:
-            value = 1
-        else:
-            value = 0
-        vector.append(value)
-
-    value = 1 if dataprofiles['missing_values'] else 0
-    vector.append(value)  # Add an extra value corresponding to the missing values data
-
-    return vector
-
-
-def create_taskkeywords_vector(task_keywords, taskkeyword_indices):
-    vector = []
-
-    for taskkeyword_id in taskkeyword_indices:
-        if taskkeyword_id in task_keywords['task_keywords']:
-            value = 1
-        else:
-            value = 0
-        vector.append(value)
-
-    return vector
-
-
-def create_metafeatures_vectors_mldb(metafeature_indices):
-    vectors = {}
-    metafeature_datasets = load_precalculated_data('metafeatures')
-
-    for id_dataset, metafeatures in metafeature_datasets.items():
-        vector = create_metafeatures_vector(metafeatures, metafeature_indices)
-        vectors[id_dataset] = vector
-
-    return vectors
-
-
-def create_dataprofiles_vectors_mldb(dataprofile_indices):
-    vectors = {}
-    dataprofile_datasets = load_precalculated_data('dataprofiles')
-
-    for id_dataset, dataprofiles in dataprofile_datasets.items():
-        vector = create_dataprofiles_vector(dataprofiles, dataprofile_indices)
-        vectors[id_dataset] = vector
-
-    return vectors
-
-
-def create_taskkeywords_vectors_mldb(taskkeyword_indices):
-    vectors = {}
-    taskkeyword_datasets = load_precalculated_data('task_keywords')
-
-    for id_dataset, task_keywords in taskkeyword_datasets.items():
-        vector = create_taskkeywords_vector(task_keywords, taskkeyword_indices)
-        vectors[id_dataset] = vector
-
-    return vectors
-
-
-def load_metafeatures_vectors(dataset_path, target_column):
-    mfs = extract_metafeatures(dataset_path, target_column)
-    metafeature_indices = Metafeatures.list_metafeatures(group='all')
-    target_metafeatures_vector = create_metafeatures_vector(mfs, metafeature_indices)
-    metalearningdb_vectors = create_metafeatures_vectors_mldb(metafeature_indices)
-
-    return metalearningdb_vectors, target_metafeatures_vector
-
-
-def load_profiles_vectors(dataset_path, target_column):
-    dps = extract_dataprofiles(dataset_path, target_column)
-    dataprofile_indices = [v for k, v in datamart_profiler.types.__dict__.items() if not k.startswith('_')]
-    target_dataprofile_vector = create_dataprofiles_vector(dps, dataprofile_indices)
-    metalearningdb_vectors = create_dataprofiles_vectors_mldb(dataprofile_indices)
-
-    return metalearningdb_vectors, target_dataprofile_vector
-
-
-def load_taskkeyword_vectors(task_keywords):
-    taskkeyword_indices = sorted([keyword for keyword in TaskKeywordBase.get_map().keys() if keyword is not None])
-    target_dataprofile_vector = create_taskkeywords_vector({'task_keywords': task_keywords}, taskkeyword_indices)
-    metalearningdb_vectors = create_taskkeywords_vectors_mldb(taskkeyword_indices)
-
-    return metalearningdb_vectors, target_dataprofile_vector
-
-
-def calculate_similarity(metalearningdb_vectors, target_vector, threshold):
-    similar_datasets = {}
-    for id_dataset, vector in metalearningdb_vectors.items():
-        similarity = round(cosine_similarity([target_vector], [vector]).flat[0], 5)
-        if similarity >= threshold:
-            similar_datasets[id_dataset] = similarity
-
-    return similar_datasets
-
-
-def similarity_repr(dataset_similarities):
-    similarity_string = []
-
-    for dataset_similarity in sorted(dataset_similarities.items(), key=lambda x: x[1], reverse=True):
-        pretty_string = '%s=%.2f' % dataset_similarity
-        similarity_string.append(pretty_string)
-
-    return ', '.join(similarity_string)
-
-
-def get_similar_datasets(mode, dataset_path, target_column, task_keywords, threshold=0.8, combined=False):
-    vectors_taskkeywords, target_vector_taskkeywords = load_taskkeyword_vectors(task_keywords)
-
-    if mode == 'metafeatures':
-        vectors_dataset, target_vector_dataset = load_metafeatures_vectors(dataset_path, target_column)
-    elif mode == 'dataprofiles':
-        vectors_dataset, target_vector_dataset = load_profiles_vectors(dataset_path, target_column)
-    else:
-        raise ValueError('Unknown mode "%s" to load data' % mode)
-
-    if combined:
-        # Concatenate the vectors of the dataset and task keywords
-        for id_dataset in vectors_dataset:
-            vectors_dataset[id_dataset] += vectors_taskkeywords[id_dataset]
-        target_vector_dataset += target_vector_taskkeywords
-        similar_datasets = calculate_similarity(vectors_dataset, target_vector_dataset, threshold)
-        logger.debug('Similar datasets found using both information:\n%s', similarity_repr(similar_datasets))
-    else:
-        # Use threshold=1.0 to get datasets with the same task keywords
-        similar_datasets = calculate_similarity(vectors_taskkeywords, target_vector_taskkeywords, 1.0)
-        logger.debug('Similar datasets found using task_keywords features:\n%s', similarity_repr(similar_datasets))
-        vectors_dataset = {k: vectors_dataset[k] for k in similar_datasets}  # Use only the similar datasets
-        similar_datasets = calculate_similarity(vectors_dataset, target_vector_dataset, threshold)
-        logger.debug('Similar datasets found using %s features:\n%s', mode, similarity_repr(similar_datasets))
-
-    return similar_datasets
diff --git a/alpha_automl/metalearning/grammar_builder.py b/alpha_automl/metalearning/grammar_builder.py
deleted file mode 100644
index 33fc70c9..00000000
--- a/alpha_automl/metalearning/grammar_builder.py
+++ /dev/null
@@ -1,329 +0,0 @@
-import logging
-import numpy as np
-from scipy import stats
-from collections import OrderedDict
-from alphad3m.metalearning.resource_builder import load_metalearningdb
-from alphad3m.metalearning.dataset_similarity import get_similar_datasets
-from alphad3m.primitive_loader import load_primitives_by_name, load_primitives_by_id
-
-logger = logging.getLogger(__name__)
-
-
-def load_related_pipelines(dataset_path, target_column, task_keywords):
-    available_primitives = load_primitives_by_id()
-    all_pipelines = load_metalearningdb()
-    similar_datasets = get_similar_datasets('dataprofiles', dataset_path, target_column, task_keywords)
-    task_pipelines = []
-
-    for similar_dataset in similar_datasets.keys():
-        if similar_dataset not in all_pipelines['pipeline_performances']:
-            continue
-
-        for pipeline_id, pipeline_performances in all_pipelines['pipeline_performances'][similar_dataset].items():
-            primitive_ids = all_pipelines['pipeline_structure'][pipeline_id]
-            if is_available_primitive(primitive_ids, available_primitives):
-                for index in range(len(pipeline_performances['score'])):
-                    primitives = [available_primitives[p] for p in primitive_ids]  # Use the current names of primitives
-                    score = pipeline_performances['score'][index]
-                    metric = pipeline_performances['metric'][index]
-                    task_pipelines.append({'pipeline': primitives, 'score': score, 'metric': metric,
-                                           'dataset': similar_dataset, 'pipeline_repr': '_'.join(primitives)})
-
-    logger.debug('Found %d related pipelines', len(task_pipelines))
-
-    return task_pipelines
-
-
-def create_metalearningdb_grammar(task_name, dataset_path, target_column, task_keywords, merge=False):
-    pipelines = load_related_pipelines(dataset_path, target_column, task_keywords)
-    patterns, primitives = extract_patterns(pipelines)
-    empty_elements = []
-
-    if merge:
-        patterns, empty_elements = merge_patterns(patterns)
-
-    grammar = format_grammar(task_name, patterns, empty_elements)
-
-    return grammar, primitives
-
-
-def format_grammar(task_name, patterns, empty_elements):
-    if len(patterns) == 0:
-        logger.warning('Empty patterns, no grammar have been generated')
-        return None
-
-    grammar = 'S -> %s\n' % task_name
-    grammar += task_name + ' -> ' + ' | '.join([' '.join(p) for p in patterns])
-
-    for element in sorted(set([e for sublist in patterns for e in sublist])):  # Sort to have a deterministic grammar
-        production_rule = element + " -> 'primitive_terminal'"
-        if element in empty_elements:
-            production_rule += " | 'E'"
-
-        grammar += '\n' + production_rule
-    logger.debug('Grammar obtained:\n%s', grammar)
-
-    return grammar
-
-
-def extract_patterns(pipelines, max_nro_patterns=15, min_frequency=3, adtm_threshold=0.5, mean_score_threshold=0.5,
-                     ratio_datasets=0.2):
-    available_primitives = load_primitives_by_name()
-    pipelines = calculate_adtm(pipelines)
-    patterns = {}
-
-    for pipeline_data in pipelines:
-        if pipeline_data['adtm'] > adtm_threshold:
-            # Skip pipelines with average distance to the minimum higher than the threshold
-            continue
-
-        primitive_types = [available_primitives[p]['type'] for p in pipeline_data['pipeline']]
-        pattern_id = ' '.join(primitive_types)
-        if pattern_id not in patterns:
-            patterns[pattern_id] = {'structure': primitive_types, 'primitives': set(), 'datasets': set(),
-                                    'pipelines': [], 'scores': [], 'adtms': [], 'frequency': 0}
-        patterns[pattern_id]['primitives'].update(pipeline_data['pipeline'])
-        patterns[pattern_id]['datasets'].add(pipeline_data['dataset'])
-        patterns[pattern_id]['pipelines'].append(pipeline_data['pipeline'])
-        patterns[pattern_id]['scores'].append(pipeline_data['score'])
-        patterns[pattern_id]['adtms'].append(pipeline_data['adtm'])
-        patterns[pattern_id]['frequency'] += 1
-
-    logger.debugggg('Found %d different patterns, after creating the portfolio', len(patterns))
-    # TODO: Group these removing conditions into a single loop
-    # Remove patterns with fewer elements than the minimum frequency
-    patterns = {k: v for k, v in patterns.items() if v['frequency'] >= min_frequency}
-    logger.debug('Found %d different patterns, after removing uncommon patterns', len(patterns))
-
-    # Remove patterns with undesirable primitives (AlphaD3M doesn't have support to handle some of these primitives)
-    blacklist_primitives = {'d3m.primitives.data_transformation.dataframe_to_ndarray.Common',
-                            'd3m.primitives.data_transformation.list_to_dataframe.DistilListEncoder',
-                            'd3m.primitives.data_transformation.ndarray_to_dataframe.Common',
-                            'd3m.primitives.data_transformation.horizontal_concat.DSBOX',
-                            'd3m.primitives.data_transformation.horizontal_concat.DataFrameCommon',
-                            'd3m.primitives.data_transformation.multi_horizontal_concat.Common',
-                            'd3m.primitives.data_transformation.conditioner.Conditioner',
-                            'd3m.primitives.data_transformation.remove_semantic_types.Common',
-                            'd3m.primitives.data_transformation.replace_semantic_types.Common',
-                            'd3m.primitives.data_transformation.remove_columns.Common',
-                            'd3m.primitives.operator.dataset_map.DataFrameCommon',
-                            'd3m.primitives.data_transformation.i_vector_extractor.IVectorExtractor'}
-    patterns = {k: v for k, v in patterns.items() if not blacklist_primitives & v['primitives']}
-    logger.debug('Found %d different patterns, after blacklisting primitives', len(patterns))
-
-    unique_datasets = set()
-    for pattern_id in patterns:
-        scores = patterns[pattern_id]['scores']
-        adtms = patterns[pattern_id]['adtms']
-        patterns[pattern_id]['mean_score'] = np.mean(scores)
-        patterns[pattern_id]['mean_adtm'] = np.mean(adtms)
-        unique_datasets.update(patterns[pattern_id]['datasets'])
-    # Remove patterns with low performances
-    patterns = {k: v for k, v in patterns.items() if v['mean_score'] >= mean_score_threshold}
-    logger.debug('Found %d different patterns, after removing low-performance patterns', len(patterns))
-
-    # Remove patterns with low variability
-    patterns = {k: v for k, v in patterns.items() if len(v['datasets']) >= len(unique_datasets) * ratio_datasets}
-    logger.debug('Found %d different patterns, after removing low-variability patterns', len(patterns))
-
-    if len(patterns) > max_nro_patterns:
-        logger.debug('Found many patterns, selecting top %d (max_nro_patterns)' % max_nro_patterns)
-        sorted_patterns = sorted(patterns.items(), key=lambda x: x[1]['mean_score'], reverse=True)
-        patterns = {k: v for k, v in sorted_patterns[:max_nro_patterns]}
-
-    primitive_info = add_correlations(patterns, available_primitives)
-    # Make deterministic the order of the patterns
-    patterns = sorted(patterns.values(), key=lambda x: x['mean_score'], reverse=True)
-    logger.debug('Patterns:\n%s', patterns_repr(patterns))
-    logger.debug('Hierarchy:\n%s', '\n'.join(['%s:\n%s' % (k, ', '.join(v)) for k, v in
-                                             primitive_info['hierarchy'].items()]))
-    patterns = [p['structure'] for p in patterns]
-
-    return patterns, primitive_info
-
-
-def add_correlations(patterns, available_primitives):
-    primitive_hierarchy = {}
-    all_pipelines = []
-    all_performances = []
-    all_primitives = []
-
-    # Add local correlations
-    local_probabilities = {}
-    for pattern_id, pattern in patterns.items():
-        for primitive in pattern['primitives']:
-            primitive_type = available_primitives[primitive]['type']
-            if primitive_type not in primitive_hierarchy:
-                primitive_hierarchy[primitive_type] = set()
-            primitive_hierarchy[primitive_type].add(primitive)
-        performances = [1 - x for x in pattern['adtms']]  # Use adtms as performances because their are scaled
-        all_pipelines += pattern['pipelines']
-        all_primitives += pattern['primitives']
-        all_performances += performances
-        correlations = calculate_correlations(pattern['primitives'], pattern['pipelines'], performances)
-        local_probabilities[pattern_id] = {}
-        for primitive, correlation in correlations.items():
-            primitive_type = available_primitives[primitive]['type']
-            if primitive_type not in local_probabilities[pattern_id]:
-                local_probabilities[pattern_id][primitive_type] = {}
-            local_probabilities[pattern_id][primitive_type][primitive] = correlation
-
-    # Add global correlations
-    global_probabilities = {}
-    correlations = calculate_correlations(set(all_primitives), all_pipelines, all_performances)
-
-    for primitive, correlation in correlations.items():
-        primitive_type = available_primitives[primitive]['type']
-        if primitive_type not in global_probabilities:
-            global_probabilities[primitive_type] = {}
-        global_probabilities[primitive_type][primitive] = correlation
-
-    global_probabilities['S'] = {}
-    for pattern, pattern_data in patterns.items():  # Use the mean adtm values as probabilities for the patterns
-        global_probabilities['S'][pattern] = 1 - pattern_data['mean_adtm']
-
-    primitive_probabilities = {'global': global_probabilities, 'local': local_probabilities,
-                               'types': available_primitives}
-    # Make deterministic the order of the hierarchy
-    primitive_hierarchy = OrderedDict({k: sorted(v) for k, v in
-                                       sorted(primitive_hierarchy.items(), key=lambda x: x[0])})
-    primitive_info = {'hierarchy': primitive_hierarchy, 'probabilities': primitive_probabilities}
-
-    return primitive_info
-
-
-def calculate_correlations(primitives, pipelines, scores, normalize=True):
-    correlations = {}
-
-    for primitive in primitives:
-        occurrences = [1 if primitive in pipeline else 0 for pipeline in pipelines]
-        correlation_coefficient, p_value = stats.pointbiserialr(occurrences, scores)
-        if np.isnan(correlation_coefficient):  # Assign a positive correlation (1) to NaN values
-            correlation_coefficient = 1
-        if normalize:  # Normalize the Pearson values, from [-1, 1] to [0, 1] range
-            correlation_coefficient = (correlation_coefficient - (-1)) / 2  # xi − min(x) / max(x) − min(x)
-        correlations[primitive] = round(correlation_coefficient, 4)
-
-    return correlations
-
-
-def calculate_adtm(pipelines):
-    dataset_performaces = {}
-    pipeline_performances = {}
-
-    for pipeline_data in pipelines:
-        # Even the same dataset can be run under different metrics. So, use the metric to create the id of the dataset
-        id_dataset = pipeline_data['dataset'] + '_' + pipeline_data['metric']
-
-        if id_dataset not in dataset_performaces:
-            dataset_performaces[id_dataset] = {'min': float('inf'), 'max': float('-inf')}
-        performance = pipeline_data['score']
-
-        if performance > dataset_performaces[id_dataset]['max']:
-            dataset_performaces[id_dataset]['max'] = performance
-
-        if performance < dataset_performaces[id_dataset]['min']:
-            dataset_performaces[id_dataset]['min'] = performance
-
-        id_pipeline = pipeline_data['pipeline_repr']
-
-        if id_pipeline not in pipeline_performances:
-            pipeline_performances[id_pipeline] = {}
-
-        if id_dataset not in pipeline_performances[id_pipeline]:
-            pipeline_performances[id_pipeline][id_dataset] = pipeline_data['score']
-        else:
-            # A pipeline can have different performances for a given dataset, choose the best one
-            if pipeline_data['score'] > pipeline_performances[id_pipeline][id_dataset]:
-                pipeline_performances[id_pipeline][id_dataset] = pipeline_data['score']
-
-    add_adtm(pipelines, pipeline_performances, dataset_performaces)
-
-    return pipelines
-
-
-def add_adtm(pipelines, pipeline_performances, dataset_performaces):
-    for pipeline_data in pipelines:
-        id_pipeline = pipeline_data['pipeline_repr']
-        id_dataset_pipeline = pipeline_data['dataset'] + '_' + pipeline_data['metric']
-        dtm = 0
-
-        for id_dataset in pipeline_performances[id_pipeline]:  # Iterate over the datasets where the pipeline was used
-            minimum = dataset_performaces[id_dataset]['min']
-            maximum = dataset_performaces[id_dataset]['max']
-
-            if id_dataset_pipeline == id_dataset:
-                score = pipeline_data['score']
-            else:
-                score = pipeline_performances[id_pipeline][id_dataset]
-
-            if minimum != maximum:
-                dtm += (maximum - score) / (maximum - minimum)
-
-        adtm = dtm / len(pipeline_performances[id_pipeline])
-        pipeline_data['adtm'] = adtm
-
-
-def merge_patterns(grammar_patterns):
-    patterns = sorted(grammar_patterns, key=lambda x: len(x), reverse=True)
-    empty_elements = set()
-    skip_patterns = []
-
-    for pattern in patterns:
-        for element in pattern:
-            modified_pattern = [e for e in pattern if e != element]
-            for current_pattern in patterns:
-                if modified_pattern == current_pattern:
-                    empty_elements.add(element)
-                    skip_patterns.append(modified_pattern)
-
-    for skip_pattern in skip_patterns:
-        if skip_pattern in patterns:
-            patterns.remove(skip_pattern)
-
-    return patterns, empty_elements
-
-
-def is_available_primitive(pipeline_primitives, available_primitives, verbose=False):
-    for primitive in pipeline_primitives:
-        if primitive not in available_primitives:
-            if verbose:
-                logger.warning('Primitive %s is not longer available' % primitive)
-            return False
-    return True
-
-
-def patterns_repr(patterns):
-    patterns_string = []
-
-    for pattern in patterns:
-        pretty_string = ''
-        pretty_string += 'structure: [%s]' % ', '.join([i for i in pattern['structure']])
-        pretty_string += ', frequency: %d' % pattern['frequency']
-        if 'mean_score' in pattern:
-            pretty_string += ', mean_score: %.3f' % pattern['mean_score']
-        if 'mean_adtm' in pattern:
-            pretty_string += ', mean_adtm: %.3f' % pattern['mean_adtm']
-        patterns_string.append(pretty_string)
-
-    return '\n'.join(patterns_string)
-
-
-def test_dataset(dataset_folder_path, task_name='TASK'):
-    from os.path import join
-    import json
-    dataset_path = join(dataset_folder_path, 'TRAIN/dataset_TRAIN/tables/learningData.csv')
-    problem_path = join(dataset_folder_path, 'TRAIN/problem_TRAIN/problemDoc.json')
-
-    with open(problem_path) as fin:
-        problem_doc = json.load(fin)
-        task_keywords = problem_doc['about']['taskKeywords']
-        target_column = problem_doc['inputs']['data'][0]['targets'][0]['colName']
-    logger.debug('Evaluating dataset "%s" with task keywords=%s' % (dataset_folder_path, str(task_keywords)))
-    create_metalearningdb_grammar(task_name, dataset_path, target_column, task_keywords)
-
-
-if __name__ == '__main__':
-    dataset_id = '185_baseball_MIN_METADATA'
-    dataset_folder_path = '/Users/rlopez/D3M/datasets/seed_datasets_current/%s' % dataset_id
-    test_dataset(dataset_folder_path)
diff --git a/alpha_automl/metalearning/nnet_trainer.py b/alpha_automl/metalearning/nnet_trainer.py
deleted file mode 100644
index 7ca8675f..00000000
--- a/alpha_automl/metalearning/nnet_trainer.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import pickle
-import logging
-import copy
-import pandas as pd
-from alphad3m.primitive_loader import load_primitives_by_name, load_primitives_by_id, load_primitives_types
-from alphad3m.metalearning.resource_builder import get_dataset_id, filter_primitives, load_precalculated_data
-from alphad3m.metalearning.dataset_profiler import DEFAULT_METAFEATURES
-
-logger = logging.getLogger(__name__)
-
-
-IGNORE_PRIMITIVES = {
-    # These primitives are static elements in the pipelines, not considered as part of the pattern
-    'd3m.primitives.data_transformation.construct_predictions.Common',
-    'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common',
-    'd3m.primitives.data_transformation.dataset_to_dataframe.Common',
-    'd3m.primitives.data_transformation.denormalize.Common',
-    'd3m.primitives.data_transformation.flatten.DataFrameCommon',
-    'd3m.primitives.data_transformation.column_parser.Common',
-    'd3m.primitives.data_transformation.simple_column_parser.DataFrameCommon',
-    'd3m.primitives.data_transformation.do_nothing.DSBOX',
-    'd3m.primitives.data_transformation.do_nothing_for_dataset.DSBOX',
-    'd3m.primitives.data_transformation.add_semantic_types.Common',
-    'd3m.primitives.schema_discovery.profiler.Common',
-    'd3m.primitives.schema_discovery.profiler.DSBOX',
-    'd3m.primitives.data_cleaning.column_type_profiler.Simon',
-    'd3m.primitives.operator.compute_unique_values.Common',
-    'd3m.primitives.data_transformation.construct_confidence.Common',
-    # We add these primitives internally because they require special connections
-    'd3m.primitives.data_transformation.text_reader.Common',
-    'd3m.primitives.data_transformation.image_reader.Common'
-}
-
-
-def create_csv_data(metalearningdb_pickle_path, pipelines_csv_path,  use_primitive_names=True):
-    logger.debug('Creating CSV file...')
-    primitives_by_name = load_primitives_by_name(only_installed_primitives=False)
-    primitives_by_id = load_primitives_by_id(only_installed_primitives=False)
-    primitives_types = load_primitives_types(only_installed_primitives=False)
-    dataset_task_keywords = load_precalculated_data('task_keywords')
-    dataset_semantic_types = load_precalculated_data('dataprofiles')
-    dataset_metafeatures = load_precalculated_data('metafeatures')
-    ignore_primitives_ids = {primitives_by_name[ignore_primitive]['id'] for ignore_primitive in IGNORE_PRIMITIVES}
-    train_pipelines = []
-    total_pipelines = 0
-
-    with open(metalearningdb_pickle_path, 'rb') as fin:
-        all_pipelines = pickle.load(fin)
-
-    for pipeline_run in all_pipelines:
-        dataset_id = get_dataset_id(pipeline_run['problem']['id'])
-
-        if dataset_id not in dataset_task_keywords:
-            continue
-
-        pipeline_primitives = pipeline_run['steps']
-        pipeline_primitives = filter_primitives(pipeline_primitives, ignore_primitives_ids)  # Get the IDs of primitives
-        task_keywords = ' '.join(dataset_task_keywords[dataset_id]['task_keywords'])
-        semantic_types = ' '.join(dataset_semantic_types[dataset_id]['feature_types'])
-        metafeatures = [dataset_metafeatures[dataset_id][mf]['value'] for mf in DEFAULT_METAFEATURES]
-
-        if len(pipeline_primitives) > 0:
-            score = pipeline_run['scores'][0]['normalized']
-            metric = pipeline_run['scores'][0]['metric']['metric']
-            try:
-                pipeline_primitive_types = [primitives_types[primitives_by_id[p]] for p in pipeline_primitives]
-                if use_primitive_names:
-                    pipeline_primitives = [primitives_by_id[p] for p in pipeline_primitives]
-                pipeline_stages = generate_pipeline_stages(pipeline_primitives, pipeline_primitive_types)
-                pipeline_stages = [[' '.join(ps), task_keywords, semantic_types, metric, score] + metafeatures
-                                   for ps in pipeline_stages]
-                train_pipelines += pipeline_stages
-                total_pipelines += 1
-            except Exception:
-                logger.warning(f'Primitives "{str(pipeline_primitives)}" are not longer available')
-
-    logger.debug(f'Loaded {len(all_pipelines)} pipelines')
-    logger.debug(f'Found {total_pipelines} pipelines')
-    pipelines_df = pd.DataFrame.from_records(train_pipelines, columns=['primitives', 'task_keywords', 'semantic_types',
-                                                                       'metric', 'score'] + DEFAULT_METAFEATURES)
-    pipelines_df.to_csv(pipelines_csv_path, index=False)
-
-
-def generate_pipeline_stages(primitive_items, primitive_types):
-    combinations = []
-    pre_pipeline = primitive_types
-    combinations.append(copy.deepcopy(pre_pipeline))
-
-    for index, primitive_item in enumerate(primitive_items):
-        pre_pipeline[index] = primitive_item
-        combinations.append(copy.deepcopy(pre_pipeline))
-
-    return combinations
-
-
-if __name__ == '__main__':
-    # Download the metalearningdb.pkl file from https://drive.google.com/file/d/1WjY7iKkkKMZFeoiCqzamA_iqVOwQidXS/view
-    # and D3M datasets from https://datasets.datadrivendiscovery.org/d3m/datasets/-/tree/master/seed_datasets_current
-    metalearningdb_pickle_path = '/Users/rlopez/D3M/metalearning_db/metalearningdb.pkl'
-    pipelines_csv_path = '/Users/rlopez/D3M/metalearning_db/marvin_pipelines.csv'
-
-    create_csv_data(metalearningdb_pickle_path, pipelines_csv_path)
diff --git a/alpha_automl/metalearning/resource_builder.py b/alpha_automl/metalearning/resource_builder.py
deleted file mode 100644
index 3011f5fc..00000000
--- a/alpha_automl/metalearning/resource_builder.py
+++ /dev/null
@@ -1,253 +0,0 @@
-import re
-import ast
-import json
-import gzip
-import pickle
-import logging
-from os.path import join, dirname, exists
-from alphad3m.primitive_loader import load_primitives_by_name
-from alphad3m.metalearning.dataset_profiler import extract_dataprofiles, extract_metafeatures
-
-logger = logging.getLogger(__name__)
-
-METALEARNING_DB_PATH = join(dirname(__file__), '../resource/metalearning_db.json.gz')
-PRECALCULATED_TASKKEYWORDS_PATH = join(dirname(__file__), '../resource/precalculated_taskkeywords.json')
-PRECALCULATED_METAFEATURES_PATH = join(dirname(__file__), '../resource/precalculated_metafeatures.json')
-PRECALCULATED_DATAPROFILES_PATH = join(dirname(__file__), '../resource/precalculated_dataprofiles.json')
-
-
-IGNORE_PRIMITIVES = {
-    # These primitives are static elements in the pipelines, not considered as part of the pattern
-    'd3m.primitives.data_transformation.construct_predictions.Common',
-    'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common',
-    'd3m.primitives.data_transformation.dataset_to_dataframe.Common',
-    'd3m.primitives.data_transformation.denormalize.Common',
-    'd3m.primitives.data_transformation.flatten.DataFrameCommon',
-    'd3m.primitives.data_transformation.column_parser.Common',
-    'd3m.primitives.data_transformation.simple_column_parser.DataFrameCommon',
-    'd3m.primitives.data_transformation.do_nothing.DSBOX',
-    'd3m.primitives.data_transformation.do_nothing_for_dataset.DSBOX',
-    'd3m.primitives.data_transformation.add_semantic_types.Common',
-    'd3m.primitives.schema_discovery.profiler.Common',
-    'd3m.primitives.schema_discovery.profiler.DSBOX',
-    'd3m.primitives.data_cleaning.column_type_profiler.Simon',
-    'd3m.primitives.operator.compute_unique_values.Common',
-    'd3m.primitives.data_transformation.construct_confidence.Common',
-    # We add these primitives internally because they require special connections
-    'd3m.primitives.data_transformation.text_reader.Common',
-    'd3m.primitives.data_transformation.image_reader.Common'
-}
-
-
-def create_compressed_metalearningdb(metalearningdb_pickle_path):
-    logger.debug('Compressing Meta-Learning DB...')
-    primitives_by_name = load_primitives_by_name(only_installed_primitives=False)
-    available_datasets = load_precalculated_data('task_keywords')
-    ignore_primitives_ids = {primitives_by_name[ignore_primitive]['id'] for ignore_primitive in IGNORE_PRIMITIVES}
-    pipelines_by_dataset = {}
-    pipelines_hashing = {}
-
-    with open(metalearningdb_pickle_path, 'rb') as fin:
-        all_pipelines = pickle.load(fin)
-
-    for pipeline_run in all_pipelines:
-        dataset_id = get_dataset_id(pipeline_run['problem']['id'])
-
-        if dataset_id not in available_datasets:
-            continue
-
-        pipeline_primitives = pipeline_run['steps']
-        pipeline_primitives = filter_primitives(pipeline_primitives, ignore_primitives_ids)  # Get the IDs of primitives
-
-        if dataset_id not in pipelines_by_dataset:
-            pipelines_by_dataset[dataset_id] = {}
-
-        if len(pipeline_primitives) > 0:
-            score = pipeline_run['scores'][0]['normalized']
-            metric = pipeline_run['scores'][0]['metric']['metric']
-            pipeline_str = str(pipeline_primitives)
-
-            if pipeline_str not in pipelines_hashing:
-                hashing_value = 'P' + str(len(pipelines_hashing))
-                pipelines_hashing[pipeline_str] = hashing_value
-
-            pipeline_id = pipelines_hashing[pipeline_str]
-
-            if pipeline_id not in pipelines_by_dataset[dataset_id]:
-                pipelines_by_dataset[dataset_id][pipeline_id] = {'score': [], 'metric': []}
-
-            pipelines_by_dataset[dataset_id][pipeline_id]['score'].append(score)
-            pipelines_by_dataset[dataset_id][pipeline_id]['metric'].append(metric)
-
-    pipeline_structure = {}
-    for pipeline_str, pipeline_id in pipelines_hashing.items():
-        primitives = [primitive for primitive in ast.literal_eval(pipeline_str)]  # Convert str to list
-        pipeline_structure[pipeline_id] = primitives
-
-    metalearning_db = {}
-    metalearning_db['pipeline_performances'] = pipelines_by_dataset
-    metalearning_db['pipeline_structure'] = pipeline_structure
-
-    with gzip.open(METALEARNING_DB_PATH, 'wt', encoding='UTF-8') as zipfile:
-        json.dump(json.dumps(metalearning_db), zipfile)  # Convert to str and then compress it
-
-    logger.debug('Compressing process ended')
-
-
-def extract_taskkeywords_metalearningdb(datasets_path):
-    datasets = get_unique_datasets()
-    task_keywords = load_precalculated_data('task_keywords')
-
-    for dataset_id in datasets:
-        logger.debug('Calculating task keywords for dataset %s...', dataset_id)
-        if dataset_id not in task_keywords:
-            try:
-                _, _, keywords = load_task_info(dataset_id, datasets_path)
-                task_keywords[dataset_id] = {'task_keywords': keywords}
-                logger.debug('Task keywords successfully calculated for dataset %s', dataset_id)
-                with open(PRECALCULATED_TASKKEYWORDS_PATH, 'w') as fout:
-                    json.dump(task_keywords, fout, indent=4, sort_keys=True)
-            except Exception as e:
-                logger.error(str(e))
-        else:
-            logger.debug('Using pre-calculated task keywords for dataset %s', dataset_id)
-
-    return task_keywords
-
-
-def extract_metafeatures_metalearningdb(datasets_path):
-    datasets = get_unique_datasets()
-    metafeatures = load_precalculated_data('metafeatures')
-
-    for dataset_id in datasets:
-        logger.debug('Calculating metafeatures for dataset %s...', dataset_id)
-        if dataset_id not in metafeatures:
-            try:
-                dataset_path, target_column, _ = load_task_info(dataset_id, datasets_path, 'SCORE')
-                mfs = extract_metafeatures(dataset_path, target_column)
-                metafeatures[dataset_id] = mfs
-                logger.debug('Metafeatures successfully calculated for dataset %s', dataset_id)
-                with open(PRECALCULATED_METAFEATURES_PATH, 'w') as fout:
-                    json.dump(metafeatures, fout, indent=4, sort_keys=True)
-            except Exception as e:
-                logger.error(str(e))
-        else:
-            logger.debug('Using pre-calculated metafeatures for dataset %s', dataset_id)
-
-    return metafeatures
-
-
-def extract_dataprofiles_metalearningdb(datasets_path):
-    datasets = get_unique_datasets()
-    dataprofiles = load_precalculated_data('dataprofiles')
-
-    for dataset_id in datasets:
-        logger.debug('Calculating data profiles for dataset %s...', dataset_id)
-        if dataset_id not in dataprofiles:
-            try:
-                dataset_path, target_column, _ = load_task_info(dataset_id, datasets_path)
-                dps = extract_dataprofiles(dataset_path, target_column)
-                dataprofiles[dataset_id] = dps
-                logger.debug('Data profiles successfully calculated for dataset %s', dataset_id)
-                with open(PRECALCULATED_DATAPROFILES_PATH, 'w') as fout:
-                    json.dump(dataprofiles, fout, indent=4, sort_keys=True)
-            except Exception as e:
-                logger.error(str(e))
-        else:
-            logger.debug('Using pre-calculated data profiles for dataset %s', dataset_id)
-
-    return dataprofiles
-
-
-def load_task_info(dataset_id, datasets_path, suffix='TRAIN'):
-    possible_names = [join(datasets_path, dataset_id), join(datasets_path, dataset_id + '_MIN_METADATA'),
-                      join(datasets_path, dataset_id.replace('_MIN_METADATA', ''))]
-    # All possible names of the datasets on disk, with/without the suffix 'MIN_METADATA'
-
-    for dataset_folder_path in possible_names:
-        if exists(dataset_folder_path):
-            break
-    else:
-        raise FileNotFoundError('Dataset %s not found' % dataset_id)
-
-    dataset_path = join(dataset_folder_path, suffix, 'dataset_%s/tables/learningData.csv' % suffix)
-    problem_path = join(dataset_folder_path, suffix, 'problem_%s/problemDoc.json' % suffix)
-
-    with open(problem_path) as fin:
-        problem_doc = json.load(fin)
-        task_keywords = problem_doc['about']['taskKeywords']
-        target_column = problem_doc['inputs']['data'][0]['targets'][0]['colName']
-
-    return dataset_path, target_column, task_keywords
-
-
-def get_dataset_id(problem_id):
-    # Remove suffixes 'TRAIN' and 'problem' from the dataset name
-    dataset_id = re.sub('_TRAIN$', '', problem_id)
-    dataset_id = re.sub('_problem$', '', dataset_id)
-
-    return dataset_id
-
-
-def get_unique_datasets():
-    metalearning_db = load_metalearningdb()
-    datasets = metalearning_db['pipeline_performances'].keys()
-
-    return sorted(datasets)
-
-
-def filter_primitives(pipeline_steps, ignore_primitives):
-    primitives = []
-
-    for pipeline_step in pipeline_steps:
-        if pipeline_step['primitive']['id'] not in ignore_primitives:
-            primitives.append(pipeline_step['primitive']['id'])
-
-    if len(primitives) > 0 and primitives[0] == '7ddf2fd8-2f7f-4e53-96a7-0d9f5aeecf93':
-        # Special case: Primitive to_numeric.DSBOX
-        # This primitive should not be first because it only takes the numeric features, ignoring the remaining ones
-        primitives = primitives[1:]
-
-    return primitives
-
-
-def load_metalearningdb():
-    all_pipelines = []
-    logger.debug('Loading pipelines from metalearning database...')
-
-    with gzip.open(METALEARNING_DB_PATH, 'rt', encoding='UTF-8') as zipfile:
-        all_pipelines = json.loads(json.load(zipfile))  # Uncompress as str and then convert to dict
-
-    logger.debug('Found %d unique pipelines in metalearning database' % len(all_pipelines['pipeline_structure']))
-
-    return all_pipelines
-
-
-def load_precalculated_data(mode):
-    if mode == 'metafeatures':
-        file_path = PRECALCULATED_METAFEATURES_PATH
-    elif mode == 'dataprofiles':
-        file_path = PRECALCULATED_DATAPROFILES_PATH
-    elif mode == 'task_keywords':
-        file_path = PRECALCULATED_TASKKEYWORDS_PATH
-    else:
-        raise ValueError('Unknown mode "%s" to load data' % mode)
-
-    if exists(file_path):
-        with open(file_path) as fin:
-            return json.load(fin)
-
-    return {}
-
-
-if __name__ == '__main__':
-    # Run this to create the meta-learning DB, task keywords, data profiles, and meta-features files
-    # Download the metalearningdb.pkl file from https://drive.google.com/file/d/1WjY7iKkkKMZFeoiCqzamA_iqVOwQidXS/view
-    # and D3M datasets from https://datasets.datadrivendiscovery.org/d3m/datasets/-/tree/master/seed_datasets_current
-    metalearningdb_pickle_path = '/Users/rlopez/D3M/metalearning_db/metalearningdb.pkl'
-    datasets_root_path = '/Users/rlopez/D3M/datasets/seed_datasets_current/'
-
-    create_compressed_metalearningdb(metalearningdb_pickle_path)
-    extract_taskkeywords_metalearningdb(datasets_root_path)
-    extract_dataprofiles_metalearningdb(datasets_root_path)
-    extract_metafeatures_metalearningdb(datasets_root_path)
diff --git a/alpha_automl/pipeline_search/Arena.py b/alpha_automl/pipeline_search/Arena.py
deleted file mode 100644
index a7709f1b..00000000
--- a/alpha_automl/pipeline_search/Arena.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import logging
-
-logger = logging.getLogger(__name__)
-NUM_IT = 5
-
-
-class Arena():
-    """
-    An Arena class where any 2 agents can be pit against each other.
-    """
-    def __init__(self, player1, player2, game, display=None, logfile=None):
-        """
-        Input:
-            player 1,2: two functions that takes board as input, return action
-            game: Game object
-            display: a function that takes board as input and prints it (e.g.
-                     display in othello/OthelloGame). Is necessary for verbose
-                     mode.
-
-        see othello/OthelloPlayers.py for an example. See pit.py for pitting
-        human players/other baselines with each other.
-        """
-        self.player1 = player1
-        self.player2 = player2
-        self.game = game
-        self.display = display
-        if logfile is not None:
-            self.f = open(logfile, 'a')
-
-    def playGame(self, verbose=False):
-        """
-        Executes one episode of a game.
-
-        Returns:
-            winner: player who won the game (1 if player1, -1 if player2)
-        """
-        players = [self.player2, None, self.player1]
-        curPlayer = 1
-        board = self.game.getInitBoard()
-        it = 0
-        while self.game.getGameEnded(board, curPlayer) == 0 and it <= NUM_IT:
-            it += 1
-            if verbose:
-                self.f.write(','.join(self.game.get_pipeline_primitives(board)) + '\n')
-                assert self.display
-                logger.debug("Turn %s", it)
-                logger.debug("Player %s", curPlayer)
-                self.display(board)
-            action = players[curPlayer+1](self.game.getCanonicalForm(board, curPlayer))
-            # print('ACTION ', action)
-            valids = self.game.getValidMoves(self.game.getCanonicalForm(board, curPlayer), 1)
-
-            # print('VALIDS ', valids)
-            if valids[action] != 0:
-                # print(action)
-                assert valids[action] > 0
-            board, curPlayer = self.game.getNextState(board, curPlayer, action)
-        if verbose:
-            self.f.write(','.join(self.game.get_pipeline_primitives(board)) + '\n')
-            assert self.display
-            logger.debug("Turn %s", it)
-            logger.debug("Player %s", curPlayer)
-            self.display(board)
-        game_ended = self.game.getGameEnded(board, 1)
-        if verbose:
-            if game_ended == 1:
-                self.f.write('Working Pipeline\n')
-            elif game_ended == 2:
-                self.f.write('Non-Working Pipeline\n')
-        # return game_ended==1 and curPlayer == 1
-        return self.game.getGameEnded(board, 1)
-
-    def playGames(self, num, verbose=False):
-        """
-        Plays num games in which player1 starts num/2 games and player2 starts
-        num/2 games.
-
-        Returns:
-            oneWon: games won by player1
-            twoWon: games won by player2
-        """
-        num = int(num/2)
-        oneWon = 0
-        twoWon = 0
-        if verbose:
-            self.f.write('Round 1\n')
-        for i in range(num):
-            if verbose:
-                self.f.write('Game '+str(i)+'\n')
-            if self.playGame(verbose=verbose) == 1:
-                oneWon += 1
-            else:
-                twoWon += 1
-        self.player1, self.player2 = self.player2, self.player1
-        if verbose:
-            self.f.write('Round 2 - Players Swapped\n')
-        for i in range(num):
-            if verbose:
-                self.f.write('Game '+str(i)+'\n')
-            if self.playGame(verbose=verbose) == -1:
-                oneWon += 1
-            else:
-                twoWon += 1
-        return oneWon, twoWon
diff --git a/alpha_automl/pipeline_search/Coach.py b/alpha_automl/pipeline_search/Coach.py
deleted file mode 100644
index c0fea9ad..00000000
--- a/alpha_automl/pipeline_search/Coach.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import logging
-import numpy as np
-from collections import deque
-from alpha_automl.pipeline_search.Arena import Arena
-from alpha_automl.pipeline_search.MCTS import MCTS
-# from alphad3m.pipeline_search.utils import Bar, AverageMeter
-
-
-logger = logging.getLogger(__name__)
-np.random.seed(0)
-
-
-class Coach():
-    """
-    This class executes the self-play + learning. It uses the functions defined
-    in Game and NeuralNet. args are specified in main.py.
-    """
-    def __init__(self, game, nnet, args):
-        self.game = game
-        self.board = game.getInitBoard()
-        self.nnet = nnet
-        self.args = args
-        self.mcts = MCTS(self.game, self.nnet, self.args)
-
-    def executeEpisode(self):
-        """
-        This function executes one episode of self-play, starting with player 1.
-        As the game is played, each turn is added as a training example to
-        trainExamples. The game is played till the game ends. After the game
-        ends, the outcome of the game is used to assign values to each example
-        in trainExamples.
-
-        It uses a temp=1 if episodeStep < tempThreshold, and thereafter
-        uses temp=0.
-
-        Returns:
-            trainExamples: a list of examples of the form (canonicalBoard,pi,v)
-                           pi is the MCTS informed policy vector, v is +1 if
-                           the player eventually won the game, else -1.
-        """
-        trainExamples = []
-        self.board = self.game.getInitBoard()
-        self.curPlayer = 1
-        episodeStep = 0
-
-        while True:
-            episodeStep += 1
-            canonicalBoard = self.game.getCanonicalForm(self.board, self.curPlayer)
-            temp = int(episodeStep < self.args.get('tempThreshold'))
-
-            pi = self.mcts.getActionProb(canonicalBoard, temp=temp)
-
-            sym = self.game.getTrainExamples(canonicalBoard, pi)
-
-            trainExamples.append(sym)
-
-            action = np.random.choice(len(pi), p=pi)
-
-            logger.debug('COACH ACTION %s', action)
-            self.board, self.curPlayer = self.game.getNextState(self.board, self.curPlayer, action)
-
-            r = self.game.getGameEnded(self.board, self.curPlayer)
-            if r != 0:
-                break
-
-        return trainExamples
-
-    def learn(self):
-        """
-        Performs numIters iterations with numEps episodes of self-play in each
-        iteration. After every iteration, it retrains neural network with
-        examples in trainExamples (which has a maximium length of maxlenofQueue).
-        It then pits the new neural network against the old one and accepts it
-        only if it wins >= updateThreshold fraction of games.
-        """
-
-        trainExamples = deque([], maxlen=self.args.get('maxlenOfQueue'))
-        for i in range(self.args.get('numIters')):
-            # bookkeeping
-            logger.debug('------ITER ' + str(i+1) + '------')
-            # eps_time = AverageMeter()
-            # bar = Bar('Self Play', max=self.args.get('numEps'))
-            # end = time.time()
-
-            for eps in range(self.args.get('numEps')):
-                self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
-                trainExamples += self.executeEpisode()
-
-                # bookkeeping + plot progress
-                # eps_time.update(time.time() - end)
-                # end = time.time()
-                # bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
-                # eps=eps+1, maxeps=self.args.get('numEps'), et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td)
-                # bar.next()
-            # bar.finish()
-
-            # training new network, keeping a copy of the old one
-            self.nnet.save_checkpoint(folder=self.args.get('checkpoint'), filename='temp.pth.tar')
-            pnet = self.nnet.__class__(self.game)
-            pnet.load_checkpoint(folder=self.args.get('checkpoint'), filename='temp.pth.tar')
-            pmcts = MCTS(self.game, pnet, self.args)
-            boards, pis, vs = list(zip(*trainExamples))
-            # logger.debug([board[self.game.m:self.game.m+self.game.p] for board in boards])
-            self.nnet.train(trainExamples)
-            nmcts = MCTS(self.game, self.nnet, self.args)
-
-            logger.debug('PITTING AGAINST PREVIOUS VERSION')
-            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
-                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game, self.game.display,
-                          self.args['stepsfile'])
-            pwins, nwins = arena.playGames(self.args.get('arenaCompare'), verbose=self.args['verbose'])
-
-            logger.debug('EVALUATIONS ', self.game.evaluations)
-            logger.debug('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins))
-            if float(nwins)/(pwins+nwins) < self.args['updateThreshold']:
-                logger.debug('REJECTING NEW MODEL')
-                self.nnet = pnet
-
-            else:
-                logger.debug('ACCEPTING NEW MODEL')
-                self.nnet.save_checkpoint(folder=self.args['checkpoint'], filename='checkpoint_' + str(i) + '.pth.tar')
-                self.nnet.save_checkpoint(folder=self.args['checkpoint'], filename='best.pth.tar')
diff --git a/alpha_automl/pipeline_search/Game.py b/alpha_automl/pipeline_search/Game.py
deleted file mode 100644
index 62963f9a..00000000
--- a/alpha_automl/pipeline_search/Game.py
+++ /dev/null
@@ -1,112 +0,0 @@
-class Game():
-    """
-    This class specifies the base Game class. To define your own game, subclass
-    this class and implement the functions below. This works when the game is
-    two-player, adversarial and turn-based.
-
-    Use 1 for player1 and -1 for player2.
-
-    See othello/OthelloGame.py for an example implementation.
-    """
-    def __init__(self):
-        pass
-
-    def getInitBoard(self):
-        """
-        Returns:
-            startBoard: a representation of the board (ideally this is the form
-                        that will be the input to your neural network)
-        """
-        pass
-
-    def getBoardSize(self):
-        """
-        Returns:
-            (x,y): a tuple of board dimensions
-        """
-        pass
-
-    def getActionSize(self):
-        """
-        Returns:
-            actionSize: number of all possible actions
-        """
-        pass
-
-    def getNextState(self, board, player, action):
-        """
-        Input:
-            board: current board
-            player: current player (1 or -1)
-            action: action taken by current player
-
-        Returns:
-            nextBoard: board after applying action
-            nextPlayer: player who plays in the next turn (should be -player)
-        """
-        pass
-
-    def getValidMoves(self, board, player):
-        """
-        Input:
-            board: current board
-            player: current player
-
-        Returns:
-            validMoves: a binary vector of length self.getActionSize(), 1 for
-                        moves that are valid from the current board and player,
-                        0 for invalid moves
-        """
-        pass
-
-    def getGameEnded(self, board, player):
-        """
-        Input:
-            board: current board
-            player: current player (1 or -1)
-
-        Returns:
-            r: 0 if game has not ended. 1 if player won, -1 if player lost.
-        """
-        pass
-
-    def getCanonicalForm(self, board, player):
-        """
-        Input:
-            board: current board
-            player: current player (1 or -1)
-
-        Returns:
-            canonicalBoard: returns canonical form of board. The canonical form
-                            should be independent of player. For e.g. in chess,
-                            the canonical form can be chosen to be from the pov
-                            of white. When the player is white, we can return
-                            board as is. When the player is black, we can invert
-                            the colors and return the board.
-        """
-        pass
-
-    def getSymmetries(self, board, pi):
-        """
-        Input:
-            board: current board
-            pi: policy vector of size self.getActionSize()
-
-        Returns:
-            symmForms: a list of [(board,pi)] where each tuple is a symmetrical
-                       form of the board and the corresponding pi vector. This
-                       is used when training the neural network from examples.
-        """
-        pass
-
-    def stringRepresentation(self, board):
-        """
-        Input:
-
-            board: current board
-
-        Returns:
-            boardString: a quick conversion of board to a string format.
-                         Required by MCTS for hashing.
-        """
-        pass
diff --git a/alpha_automl/pipeline_search/MCTS.py b/alpha_automl/pipeline_search/MCTS.py
deleted file mode 100644
index c660b016..00000000
--- a/alpha_automl/pipeline_search/MCTS.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import math
-import numpy as np
-import logging
-
-logger = logging.getLogger(__name__)
-
-np.random.seed(0)
-
-
-class MCTS():
-    """
-    This class handles the MCTS tree.
-    """
-
-    def __init__(self, game, nnet, args):
-        self.game = game
-        self.nnet = nnet
-        self.args = args
-        self.Qsa = {}  # stores Q values for s,a (as defined in the paper)
-        self.Nsa = {}  # stores #times edge s,a was visited
-        self.Ns = {}  # stores #times board s was visited
-        self.Ps = {}  # stores initial policy (returned by neural net)
-
-        self.Es = {}  # stores game.getGameEnded ended for board s
-        self.Vals = {}
-        self.Vs = {}  # stores game.getValidMoves for board s
-        self.count = 0
-
-    def getActionProb(self, canonicalBoard, temp=1):
-        """
-        This function performs numMCTSSims simulations of MCTS starting from
-        canonicalBoard.
-
-        Returns:
-            probs: a policy vector where the probability of the ith action is
-                   proportional to Nsa[(s,a)]**(1./temp)
-        """
-        for i in range(self.args.get('numMCTSSims')):
-            logger.debug('MCTS SIMULATION %s', i + 1)
-            self.search(canonicalBoard)
-
-        s = self.game.stringRepresentation(canonicalBoard)
-        counts = [0] * self.game.getActionSize()
-        counts = [self.Nsa[(s, a)] if (s, a) in self.Nsa else 0 for a in range(self.game.getActionSize())]
-
-        if temp == 0:
-            bestA = np.argmax(counts)
-            probs = [0] * len(counts)
-            probs[bestA] = 1
-            if np.sum(probs) == 0:
-                logger.debug('PROB ZERO')
-            return probs
-
-        counts = [x ** (1. / temp) for x in counts]
-        if np.sum(counts) == 0:
-            probs = [1 / (len(counts))] * len(counts)
-        else:
-            non_zero_args = list(np.where(np.asarray(counts) > 0)[0])
-            probs = [0] * len(counts)
-            for index in non_zero_args:
-                probs[index] = counts[index] / float(sum(counts))
-        if np.sum(probs) == 0:
-            logger.debug('PROB ZERO')
-        return probs
-
-    def search(self, canonicalBoard, player=1):
-        """
-        This function performs one iteration of MCTS. It is recursively called
-        till a leaf node is found. The action chosen at each node is one that
-        has the maximum upper confidence bound as in the paper.
-
-        Once a leaf node is found, the neural network is called to return an
-        initial policy P and a value v for the state. This value is propogated
-        up the search path. In case the leaf node is a terminal state, the
-        outcome is propogated up the search path. The values of Ns, Nsa, Qsa are
-        updated.
-
-        NOTE: the return values are the negative of the value of the current
-        state. This is done since v is in [-1,1] and if v is the value of a
-        state for the current player, then its value is -v for the other player.
-
-        Returns:
-            v: the negative of the value of the current canonicalBoard
-        """
-        self.game.display(canonicalBoard)
-
-        s = self.game.stringRepresentation(canonicalBoard)
-        game_ended = self.game.getGameEnded(canonicalBoard, player)
-        # logger.debug('GAME ENDED %s', game_ended)
-
-        if s not in self.Es:
-            self.Es[s] = game_ended
-        if self.Es[s] != 0:
-            # terminal node
-            # Clear all previous moves
-            return self.Vals[s] if not self.Vals.get(s) is None else 0
-
-        if s not in self.Ps:
-            # leaf node
-            self.Ps[s], v = self.nnet.predict(self.game.getTrainBoard(canonicalBoard))
-            logger.debug('Prediction %s', v)
-            # logger.debug('CALLING VALID MOVES')
-            valids = self.game.getValidMoves(canonicalBoard, 1)
-            self.Ps[s] = self.Ps[s] * valids  # masking invalid moves
-            self.Ps[s] /= np.sum(self.Ps[s])  # renormalize
-            self.Vals[s] = v
-            self.Vs[s] = valids
-            self.Ns[s] = 0
-            return v
-
-        valids = self.Vs[s]
-
-        # Check if valid moves are available. Quit if no more legal moves are possible
-        if not any(valids):
-            return 0
-
-        cur_best = -float('inf')
-        best_act = -1
-        current_primitives = self.game.get_pipeline_primitives(canonicalBoard)
-
-        current_pattern = ' '.join(
-            [self.game.grammar['RULES_PROBA']['TYPES'].get(p, {'type': p})['type'] for p in current_primitives])
-        # pick the action with the highest upper confidence bound
-        actions = []
-        alpha = 0.1
-        metalearning_weights = {}
-
-        if len(self.game.grammar['RULES_PROBA']['GLOBAL']) == 0:  # It's a manual grammar, so only use the NN info
-            alpha = 1.0
-
-        for a in range(self.game.getActionSize()):
-            if valids[a]:
-                # logger.debug('MCTS ACTION %s', a)
-                global_proba = self.game.grammar['RULES_PROBA']['GLOBAL'].get(a + 1, (0, 0))[1]
-                local_proba = self.game.grammar['RULES_PROBA']['LOCAL'].get(current_pattern, {}).get(a + 1, (0, 0))[1]
-                correlation = global_proba * local_proba
-                metalearning_weights[a] = correlation
-
-                if (s, a) in self.Qsa:
-                    u = self.Qsa[(s, a)] + \
-                        self.args.get('cpuct') * (alpha * self.Ps[s][a] + (1 - alpha) * correlation) * \
-                        math.sqrt(self.Ns[s]) / (1 + self.Nsa[(s, a)])
-                    # u = (global_proba + local_proba) + self.Qsa[(s, a)] + self.args.get('cpuct') *
-                    # math.sqrt(self.Ns[s]) / (1 + self.Nsa[(s, a)])
-                    # u = self.Qsa[(s, a)] + self.args.get('cpuct') * self.Ps[s][a] *
-                    # math.sqrt(self.Ns[s]) / (1 + self.Nsa[(s, a)])
-                else:
-                    u = self.args.get('cpuct') * \
-                        (alpha * self.Ps[s][a] + (1 - alpha) * correlation) * math.sqrt(self.Ns[s])  # Q = 0 ?
-                    # u = (global_proba + local_proba) + self.args.get('cpuct') * math.sqrt(self.Ns[s]) # Q = 0 ?
-                    # u = self.args.get('cpuct') * self.Ps[s][a] * math.sqrt(self.Ns[s])  # Q = 0 ?
-                # print('Production:', self.game.grammar['RULES_PROBA']['GLOBAL']
-                # .get(a + 1, ('None', 0))[0], correlation)
-                # print('Value of u:', u)
-                if u > cur_best:
-                    cur_best = u
-                    best_act = a
-                    actions = [a]
-                elif u == cur_best:
-                    actions.append(a)
-
-        if len(actions) == sum(valids):  # All the available actions have the same value
-            a = sorted(metalearning_weights.items(), key=lambda x: x[1], reverse=True)[0][0]
-        elif len(actions) > 1:
-            a = np.random.choice(np.asarray(actions))
-        else:
-            a = best_act
-        # print('>>>>>>>>>>best a=%d' % a, 'local', self.game.grammar['RULES_PROBA']['LOCAL']
-        # .get(current_pattern, {}).get(a + 1, ('None', 0)))
-        # print('>>>>>>>>>>best a=%d' % a, 'total',  self.game.grammar['RULES_PROBA']['GLOBAL'].get(a+1, (0, 0))[1]
-        # * self.game.grammar['RULES_PROBA']['LOCAL'].get(current_pattern, {}).get(a + 1, (0, 0))[1])
-        # logger.debug('BEST ACTIONS %s', actions)
-        # logger.debug('MCTS BEST ACTION %s', best_act)
-        next_s, next_player = self.game.getNextState(canonicalBoard, player, a)
-        next_s = self.game.getCanonicalForm(next_s, next_player)
-        # self.game.display(next_s)
-
-        # logger.debug('NEXT STATE SEARCH RECURSION')
-        v = self.search(next_s, next_player)
-
-        if (s, a) in self.Qsa:
-            self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] + v) / (self.Nsa[(s, a)] + 1)
-            self.Nsa[(s, a)] += 1
-
-        else:
-            self.Qsa[(s, a)] = v
-            self.Nsa[(s, a)] = 1
-
-        self.Ns[s] += 1
-
-        return v
diff --git a/alpha_automl/pipeline_search/NeuralNet.py b/alpha_automl/pipeline_search/NeuralNet.py
deleted file mode 100644
index 7fcfd644..00000000
--- a/alpha_automl/pipeline_search/NeuralNet.py
+++ /dev/null
@@ -1,50 +0,0 @@
-class NeuralNet():
-    """
-    This class specifies the base NeuralNet class. To define your own neural
-    network, subclass this class and implement the functions below. The neural
-    network does not consider the current player, and instead only deals with
-    the canonical form of the board.
-
-    See othello/NNet.py for an example implementation.
-    """
-
-    def __init__(self, game):
-        pass
-
-    def train(self, examples):
-        """
-        This function trains the neural network with examples obtained from
-        self-play.
-
-        Input:
-            examples: a list of training examples, where each example is of form
-                      (board, pi, v). pi is the MCTS informed policy vector for
-                      the given board, and v is its value. The examples has
-                      board in its canonical form.
-        """
-        pass
-
-    def predict(self, board):
-        """
-        Input:
-            board: current board in its canonical form.
-
-        Returns:
-            pi: a policy vector for the current board- a numpy array of length
-                game.getActionSize
-            v: a float in [-1,1] that gives the value of the current board
-        """
-        pass
-
-    def save_checkpoint(self, folder, filename):
-        """
-        Saves the current neural network (with its parameters) in
-        folder/filename
-        """
-        pass
-
-    def load_checkpoint(self, folder, filename):
-        """
-        Loads parameters of the neural network from folder/filename
-        """
-        pass
diff --git a/alpha_automl/pipeline_search/agent_environment.py b/alpha_automl/pipeline_search/agent_environment.py
new file mode 100644
index 00000000..b99686c7
--- /dev/null
+++ b/alpha_automl/pipeline_search/agent_environment.py
@@ -0,0 +1,147 @@
+import logging
+
+import gymnasium as gym
+import numpy as np
+from gymnasium.spaces import Box, Dict, Discrete
+from ray.rllib.env.env_context import EnvContext
+
+logger = logging.getLogger(__name__)
+
+
+class AutoMLEnv(gym.Env):
+    """
+    Customized environment for RLlib Reinforcement Learning.
+    reset: reset the environment to the initial state
+    step: take an action and return the next state, reward, done, and info
+    rewards in detail:
+        - win:
+            - CLASSIFICATION: 10 + (pipeline score) ^ 2 * 100
+            - REGRESSION: 10 + (100 / pipeline score)
+        - not end: 1
+        - invalid: 10
+        - bad: -1
+    """
+
+    def __init__(self, config: EnvContext):
+        self.game = config["game"]  # PipelineGame
+        self.board = self.game.getInitBoard()  # initial board
+        self.step_stack = ["S"]  # stack for steps
+        self.metadata = self.board[: self.game.m]
+        self.observation_space = Dict(
+            {
+                "board": Box(
+                    0, 85, shape=(self.game.p + self.game.m,), dtype=np.uint8
+                ),  # Ray env board contains pipeline and metadata
+            }
+        )
+        self.action_spaces = (
+            self.generate_action_spaces()
+        )  # number of actions for each step
+        self.max_actions = max(list(self.action_spaces.values()))  # max number of actions (depends on the largest step in the grammar, i.e. CLASSIFIER)
+        self.action_offsets = (
+            self.generate_action_offsets()
+        )  # offset for each step, for translating action to PipelineGame action
+        self.action_space = Discrete(self.max_actions)  # Ray env action space
+
+    def reset(self, *, seed=None, options=None):
+        self.num_steps = 0
+        self.step_stack = ["S"]
+        self.board = self.game.getInitBoard()
+        self.metadata = self.board[: self.game.m]
+
+        return {"board": np.array(self.board).astype(np.uint8)}, {}
+
+    def step(self, action):
+        curr_step = self.step_stack.pop()
+        offseted_action = self.action_offsets[curr_step] + action
+        valid_action_size = self.action_spaces[curr_step]
+        # Check the action is illegal
+        valid_moves = self.game.getValidMoves(self.board)
+        if action >= valid_action_size or valid_moves[offseted_action - 1] != 1:
+            return (
+                {"board": np.array(self.board).astype(np.uint8)},
+                -1,
+                True,
+                False,
+                {},
+            )
+
+        # Check the action is out of order
+        move_type, non_terminals_moves = self.extract_action_details(offseted_action)
+        if move_type != curr_step:
+            return (
+                {"board": np.array(self.board).astype(np.uint8)},
+                -100,
+                True,
+                False,
+                {},
+            )
+        if (
+            non_terminals_moves[0] != "E"
+            and non_terminals_moves[0].upper() == non_terminals_moves[0]
+        ):
+            self.step_stack.extend(non_terminals_moves[::-1])
+
+        # update number of steps
+        self.num_steps += 1
+
+        # update board with new action
+        self.board = self.game.getNextState(self.board, offseted_action - 1)
+
+        # reward: win(1) - pipeline score, not end(0) - 1, bad(2) - -1
+        reward = 0
+        game_end = self.game.getGameEnded(self.board)
+        if game_end == 1:  # pipeline score over threshold
+            try:
+                if self.game.problem == "REGRESSION":
+                    reward = 10 + (100 / self.game.getEvaluation(self.board))
+                else:
+                    reward = 10 + (self.game.getEvaluation(self.board)) ** 2 * 100
+            except Exception as e:
+                logger.critical(f"[PIPELINE FOUND] Error happened: {str(e)}")
+        elif game_end == 2:  # finished but invalid
+            reward = 10
+        else:
+            reward = 1
+
+        # done & truncated
+        truncated = self.num_steps >= 20
+        done = game_end or truncated
+
+        return (
+            {"board": np.array(self.board).astype(np.uint8)},
+            reward,
+            done,
+            truncated,
+            {},
+        )
+
+    def extract_action_details(self, action):
+        rules = self.game.grammar["RULES"]
+        move_string = list(rules.keys())[list(rules.values()).index(action)]
+        split_move = move_string.split("->")
+        move_type = split_move[0].strip()
+        non_terminals_moves = split_move[1].strip().split(" ")
+        return move_type, non_terminals_moves
+
+    def generate_action_spaces(self):
+        action_spaces = {}
+        for action in self.game.grammar["RULES"].values():
+            move_type, non_terminals_moves = self.extract_action_details(action)
+
+            if move_type not in action_spaces:
+                action_spaces[move_type] = 1
+            else:
+                action_spaces[move_type] += 1
+
+        return action_spaces
+
+    def generate_action_offsets(self):
+        action_offsets = {}
+        for action in self.game.grammar["RULES"].values():
+            move_type, non_terminals_moves = self.extract_action_details(action)
+
+            if move_type not in action_offsets:
+                action_offsets[move_type] = action
+
+        return action_offsets
diff --git a/alpha_automl/pipeline_search/agent_lab.py b/alpha_automl/pipeline_search/agent_lab.py
new file mode 100644
index 00000000..a1614f5b
--- /dev/null
+++ b/alpha_automl/pipeline_search/agent_lab.py
@@ -0,0 +1,196 @@
+import json
+import logging
+import os
+import time
+from datetime import datetime
+
+import ray
+from alpha_automl.pipeline_search.agent_environment import AutoMLEnv
+from ray.rllib.policy import Policy
+from ray.rllib.utils.checkpoints import get_checkpoint_info
+from ray.tune.logger import pretty_print
+from ray.tune.registry import get_trainable_cls
+
+logger = logging.getLogger(__name__)
+
+
+def pipeline_search_rllib(game, time_bound, checkpoint_load_folder, checkpoint_save_folder):
+    """
+    Search for pipelines using Rllib
+    """
+    ray.init(local_mode=True, num_cpus=8, logging_level=logging.CRITICAL, log_to_driver=False)
+    num_cpus = int(ray.available_resources()["CPU"])
+
+    # load checkpoint or create a new one
+    algo = load_rllib_checkpoint(game, checkpoint_load_folder, num_rollout_workers=7)
+    logger.debug("Create Algo object done")
+
+    # train model
+    train_rllib_model(algo, time_bound, checkpoint_load_folder, checkpoint_save_folder)
+    logger.debug("Training done")
+    ray.shutdown()
+
+
+def load_rllib_checkpoint(game, checkpoint_load_folder, num_rollout_workers):
+    config = (
+        get_trainable_cls("PPO")
+        .get_default_config()
+        # or "corridor" if registered above
+        .environment(AutoMLEnv, env_config={"game": game})
+        .framework("torch")
+        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
+        .resources(
+            # num_gpus=1,
+            # num_gpus_per_worker=1 / (num_rollout_workers + 1),
+            num_cpus_per_worker=1,
+        )
+        .rollouts(num_rollout_workers=num_rollout_workers)
+        .training(
+            gamma=0.99,
+            clip_param=0.3,
+            kl_coeff=0.3,
+            entropy_coeff=0.05,
+            train_batch_size=10000,
+        )
+    )
+    config.lr = 1e-5
+    config.simple_optimizer = True
+    logger.debug("Create Config done")
+
+    # Checking if the list is empty or not
+    if not contain_checkpoints(checkpoint_load_folder):
+        logger.debug("Cannot read checkpoint, create a new one.")
+        return config.build()
+    else:
+        algo = config.build()
+        weights = load_rllib_policy_weights(checkpoint_load_folder)
+
+        algo.set_weights(weights)
+        # Restore the old state.
+        # algo.restore(load_folder)
+        # checkpoint_info = get_checkpoint_info(load_folder)
+        return algo
+
+
+def train_rllib_model(algo, time_bound, checkpoint_load_folder, checkpoint_save_folder):
+    timeout = time.time() + time_bound
+    result = algo.train()
+    last_best = result["episode_reward_mean"]
+    best_unchanged_iter = 1
+    logger.debug(pretty_print(result))
+
+    while True:
+        if (
+            time.time() > timeout
+            or (best_unchanged_iter >= 600 and result["episode_reward_mean"] >= 0)
+            # or result["episode_reward_mean"] >= 70
+        ):
+            logger.debug(f"Training timeout reached")
+            break
+
+        if contain_checkpoints(checkpoint_save_folder):
+            weights = load_rllib_policy_weights(checkpoint_save_folder)
+            algo.set_weights(weights)
+        elif contain_checkpoints(checkpoint_load_folder):
+            weights = load_rllib_policy_weights(checkpoint_load_folder)
+            algo.set_weights(weights)
+        result = algo.train()
+        logger.debug(pretty_print(result))
+        # stop training of the target train steps or reward are reached
+        if result["episode_reward_mean"] > last_best:
+            last_best = result["episode_reward_mean"]
+            best_unchanged_iter = 1
+            save_rllib_checkpoint(algo, checkpoint_save_folder)
+        else:
+            best_unchanged_iter += 1
+    algo.stop()
+
+
+def load_rllib_policy_weights(checkpoint_folder):
+    logger.debug(f"Synchronizing model weights...")
+    policy = Policy.from_checkpoint(checkpoint_folder)
+    policy = policy["default_policy"]
+    weights = policy.get_weights()
+
+    weights = {"default_policy": weights}
+
+    return weights
+
+
+def save_rllib_checkpoint(algo, checkpoint_save_folder):
+    save_result = algo.save(checkpoint_dir=checkpoint_save_folder)
+    path_to_checkpoint = save_result.checkpoint.path
+
+    logger.debug(
+        f"An Algorithm checkpoint has been created inside directory: '{path_to_checkpoint}'."
+    )
+
+
+def dump_result_to_json(primitives, task_start, score, output_folder=None):
+    output_path = generate_json_path(output_folder)
+    # Read JSON data from input file
+    if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
+        with open(output_path, "w") as f:
+            json.dump({}, f)
+    with open(output_path, "r") as f:
+        data = json.load(f)
+
+    timestamp = str(datetime.now() - task_start)
+    # strftime("%Y-%m-%d %H:%M:%S")
+
+    # Check for duplicate elements
+    if primitives in data.values():
+        return
+    data[score] = primitives
+
+    # Write unique elements to output file
+    with open(output_path, "w") as f:
+        json.dump(data, f)
+
+
+def read_result_to_pipeline(builder, output_folder=None):
+    output_path = generate_json_path(output_folder)
+
+    pipelines = []
+    # Read JSON data from input file
+    if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
+        return []
+    with open(output_path, "r") as f:
+        data = json.load(f)
+
+    # Check for duplicate elements
+    for score, primitives in sorted(data.items()):
+        pipeline = builder.make_pipeline(primitives)
+        if pipeline:
+            pipelines.append(pipeline)
+
+    return pipelines
+
+
+def generate_json_path(output_folder=None):
+    output_path = os.path.join(output_folder, "result.json")
+
+    return output_path
+
+
+def contain_checkpoints(folder_path):
+    if folder_path is None:
+        return False
+
+    file_list = os.listdir(folder_path)
+
+    if [f for f in file_list if not f.startswith(".")] == []:
+        return False
+
+    if (
+        "algorithm_state.pkl" in file_list
+        and "policies" in file_list
+        and "rllib_checkpoint.json" in file_list
+    ):
+        return True
+    else:
+        logger.debug(
+            f"Checkpoint folder {folder_path} does not contain all necessary files, files: {file_list}."
+        )
+
+    return False
diff --git a/alpha_automl/pipeline_search/pipeline/PipelineGame.py b/alpha_automl/pipeline_search/game.py
similarity index 67%
rename from alpha_automl/pipeline_search/pipeline/PipelineGame.py
rename to alpha_automl/pipeline_search/game.py
index 467e8b17..dc798d42 100644
--- a/alpha_automl/pipeline_search/pipeline/PipelineGame.py
+++ b/alpha_automl/pipeline_search/game.py
@@ -1,11 +1,7 @@
 from __future__ import print_function
-import os
-import pickle
 import math
 import logging
-from copy import deepcopy
-from alpha_automl.pipeline_search.Game import Game
-from alpha_automl.pipeline_search.pipeline.PipelineLogic import Board
+from alpha_automl.pipeline_search.game_logic import Board
 import numpy as np
 import traceback
 import time
@@ -13,11 +9,10 @@
 logger = logging.getLogger(__name__)
 
 
-class PipelineGame(Game):
+class PipelineGame():
     # FIXEME: Maybe the input parameters can be in json
-    def __init__(self, input={}, eval_pipeline=None, args=None):
+    def __init__(self, input=None, eval_pipeline=None):
         self.steps = 0
-        self.args = input['ARGS']
         self.evaluations = {}
         self.eval_times = {}
 
@@ -31,21 +26,7 @@ def __init__(self, input={}, eval_pipeline=None, args=None):
         self.data_type = input['DATA_TYPE'].upper()
         self.metric = input['METRIC']
         self.dataset = input['DATASET']
-
-        self.dataset_metafeatures = input['DATASET_METAFEATURES']
-        if self.dataset_metafeatures is None:
-            metafeatures_path = args.get('metafeatures_path')
-            if metafeatures_path is not None:
-                metafeatures_file = os.path.join(metafeatures_path, args['dataset'] + '_metafeatures.pkl')
-                if os.path.isfile(metafeatures_file):
-                    m_f = open(metafeatures_file, 'rb')
-                    self.dataset_metafeatures = pickle.load(m_f)[args['dataset']]
-
-        if self.dataset_metafeatures is None:
-            logger.warning('No Dataset Metafeatures specified - Initializing to empty')
-            self.dataset_metafeatures = []
-        else:
-            self.dataset_metafeatures = list(np.nan_to_num(np.asarray(self.dataset_metafeatures)))
+        self.dataset_metafeatures = list(np.nan_to_num(np.asarray(input['DATASET_METAFEATURES'])))
 
         self.m = len(self.dataset_metafeatures)+2
         self.p = input['PIPELINE_SIZE']
@@ -69,18 +50,18 @@ def getActionSize(self):
         board = Board(self.m, self.grammar, self.pipeline_size, self.metric)
         return len(board.valid_moves)
 
-    def getNextState(self, board, player, action):
-        # if player takes action on board, return next (board,player)
+    def getNextState(self, board, action):
         # action must be a valid move
         b = Board(self.m, self.grammar, self.pipeline_size, self.metric)
         b.set_metafeatures(board)
         b.set_pipeline(board)
         # logger.debug('PREV STATE %s', b.pieces_p)
-        b.execute_move(action, player)
+        b.execute_move(action)
         # logger.debug('NEXT STATE %s', b.pieces_p)
-        return (b.pieces_m+b.pieces_p, -player)
 
-    def getValidMoves(self, board, player):
+        return b.pieces_m+b.pieces_p
+
+    def getValidMoves(self, board):
         # return a fixed size binary vector
         b = Board(self.m, self.grammar, self.pipeline_size, self.metric)
         b.set_metafeatures(board)
@@ -102,7 +83,7 @@ def getEvaluation(self, board):
         if eval_val is None:
             self.steps = self.steps + 1
             try:
-                eval_val = self.eval_pipeline(pipeline, 'AlphaAutoML')
+                eval_val = self.eval_pipeline(pipeline)
             except Exception:
                 logger.warning('Error in Pipeline Execution %s', eval_val)
                 traceback.print_exc()
@@ -113,41 +94,33 @@ def getEvaluation(self, board):
 
         return eval_val
 
-    def getGameEnded(self, board, player, eval_val=None):
-        # return 0 if not ended, 1 if x won, -1 if x lost
-        # player = 1
+    def getGameEnded(self, board, eval_val=None):
+        # return 0 if not ended, 1 if x won, 2 if x lost
 
         b = Board(self.m, self.grammar, self.pipeline_size, self.metric)
         b.set_metafeatures(board)
         b.set_pipeline(board)
         if not b.is_terminal_pipeline():
             return 0
-        if len(self.evaluations) > 0:
-            sorted_evals = sorted([eval for eval in list(self.evaluations.values()) if eval != float('inf')])
-            if len(sorted_evals) > 0:
-                if 'error' in self.metric.lower():
-                    win_threshold = sorted_evals[0]
-                else:
-                    win_threshold = sorted_evals[-1]
-                b.win_threshold = win_threshold
+#         if len(self.evaluations) > 0:
+#             sorted_evals = sorted([eval for eval in list(self.evaluations.values()) if eval != float('inf')])
+#             if len(sorted_evals) > 0:
+#                 if 'error' in self.metric.lower():
+#                     win_threshold = sorted_evals[0]
+#                 else:
+#                     win_threshold = sorted_evals[-1]
+#                 b.win_threshold = win_threshold
 
         eval_val = self.getEvaluation(board)
 
-        if b.findWin(player, eval_val):
-            logger.debug('findwin %s', player)
+        if b.findWin(eval_val):
+            logger.debug('Win')
             return 1
-        if b.findWin(-player, eval_val):
-            logger.debug('findwin %', -player)
-            return -1
         if b.has_legal_moves():
             return 0
 
         return 2
 
-    def getCanonicalForm(self, board, player):
-        # return state if player==1, else return -state if player==-1
-        return deepcopy(board)
-
     def stringRepresentation(self, board):
         # 3x3 numpy array (canonical board)
         return np.asarray(board).tostring()
diff --git a/alpha_automl/pipeline_search/pipeline/PipelineLogic.py b/alpha_automl/pipeline_search/game_logic.py
similarity index 96%
rename from alpha_automl/pipeline_search/pipeline/PipelineLogic.py
rename to alpha_automl/pipeline_search/game_logic.py
index 5ffa0b6d..21810cb1 100644
--- a/alpha_automl/pipeline_search/pipeline/PipelineLogic.py
+++ b/alpha_automl/pipeline_search/game_logic.py
@@ -16,7 +16,7 @@
 
 class Board():
 
-    def __init__(self, m=30, grammar={}, pipeline_size=6, metric='f1macro', win_threshold=0.6):
+    def __init__(self, m=30, grammar=None, pipeline_size=6, metric='accuracy', win_threshold=0.01):
         "Set up initial board configuration."
 
         self.terminals = grammar['TERMINALS']
@@ -61,7 +61,7 @@ def is_terminal_pipeline(self):
                 return False
         return True
 
-    def findWin(self, player, eval_val=None):
+    def findWin(self, eval_val=None):
         """Find win of the given color in row, column, or diagonal
         (1 for x, -1 for o)"""
         if not any(self[0:]):
@@ -135,7 +135,7 @@ def get_train_board(self):
     def get_board_size(self):
         return self.m+(len(self.terminals)+len(self.non_terminals))
 
-    def execute_move(self, action, player):
+    def execute_move(self, action):
         """Perform the given move on the board;
         color gives the color of the piece to play (1=x,-1=o)
         """
diff --git a/alpha_automl/pipeline_search/pipeline/NNet.py b/alpha_automl/pipeline_search/pipeline/NNet.py
deleted file mode 100644
index f364f4ab..00000000
--- a/alpha_automl/pipeline_search/pipeline/NNet.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import os
-import numpy as np
-import logging
-from alpha_automl.pipeline_search.NeuralNet import NeuralNet
-import torch
-import torch.optim as optim
-from torch.autograd import Variable
-from alpha_automl.pipeline_search.pipeline.PipelineNNet import PipelineNNet as onnet
-
-
-logger = logging.getLogger(__name__)
-
-args = dict({
-    'lr': 0.001,
-    'dropout': 0.3,
-    'epochs': 2,
-    'batch_size': 64,
-    'num_channels': 512,
-})
-
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
-
-
-class NNetWrapper(NeuralNet):
-    def __init__(self, game):
-        self.nnet = onnet(game, args)
-        self.action_size = game.getActionSize()
-        self.board_size = game.getBoardSize()
-        if device == 'cuda':
-            self.nnet.cuda()
-
-    def train(self, examples):
-        """
-        examples: list of examples, each example is of form (board, pi, v)
-        """
-        optimizer = optim.Adam(self.nnet.parameters())
-
-        for epoch in range(args.get('epochs')):
-            logger.debug('EPOCH ::: %s', str(epoch+1))
-            self.nnet.train()
-            # data_time = AverageMeter()
-            # batch_time = AverageMeter()
-            # pi_losses = AverageMeter()
-            # v_losses = AverageMeter()
-            # end = time.time()
-
-            batch_size = args.get('batch_size')
-            # bar = Bar('Training Net', max=int(len(examples)/batch_size))
-            batch_idx = 0
-
-            while batch_idx < int(len(examples)/batch_size):
-                sample_ids = np.random.randint(len(examples), size=batch_size)
-                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
-                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
-                target_pis = torch.FloatTensor(np.array(pis))
-                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))
-
-                #  predict
-                if device == 'cuda':
-                    boards, target_pis, target_vs = Variable(boards.contiguous().cuda(), requires_grad=True), \
-                                                    Variable(target_pis.contiguous().cuda(), requires_grad=True), \
-                                                    Variable(target_vs.contiguous().cuda(), requires_grad=True)
-                else:
-                    boards, target_pis, target_vs = Variable(boards), Variable(target_pis),  Variable(target_vs)
-
-                # measure data loading time
-                # data_time.update(time.time() - end)
-
-                # compute output
-                # print(boards)
-                out_pi, out_v = self.nnet(boards)
-                l_pi = self.loss_pi(target_pis, out_pi)
-                l_v = self.loss_v(target_vs, out_v)
-                total_loss = l_pi + l_v
-
-                # record loss
-                # pi_losses.update(l_pi.data, boards.size(0))
-                # v_losses.update(l_v.data, boards.size(0))
-
-                # compute gradient and do SGD step
-                optimizer.zero_grad()
-                total_loss.backward()
-                optimizer.step()
-
-                # measure elapsed time
-                # batch_time.update(time.time() - end)
-                # end = time.time()
-                batch_idx += 1
-
-    def predict(self, board):
-        """
-        board: np array with board
-        """
-        # timing
-        # start = time.time()
-        # print('BOARD\n', board)
-
-        # preparing input
-        board = torch.from_numpy(np.array(board[0:self.board_size], dtype='f')).cuda().float() if device == 'cuda' \
-            else torch.from_numpy(np.array(board[0:self.board_size], dtype='f'))
-        # board = torch.FloatTensor(board[0:self.board_x])
-        if device == 'cuda':
-            board = board.contiguous().cuda()
-        board = Variable(board, volatile=True)
-        board = board.view(1, self.board_size)
-
-        self.nnet.eval()
-        pi, v = self.nnet(board)
-
-        # print('PROBABILITY ', torch.exp(pi).data.cpu().numpy()[0])
-        # print('VALUE ',  v.data.cpu().numpy()[0])
-
-        # logger.debug('PREDICTION TIME TAKEN : {0:03f}'.format(time.time()-start))
-        return torch.exp(pi).data.cpu().numpy()[0], v.data.cpu().numpy()[0][0]
-
-    def loss_pi(self, targets, outputs):
-        return -torch.sum(targets*outputs)/targets.size()[0]
-
-    def loss_v(self, targets, outputs):
-        return torch.sum((targets-outputs.view(-1))**2)/targets.size()[0]
-
-    def save_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
-        filepath = os.path.join(folder, filename)
-        if not os.path.exists(folder):
-            logger.debug("Checkpoint Directory does not exist! Making directory {}".format(folder))
-            os.mkdir(folder)
-        else:
-            logger.debug("Checkpoint Directory exists! ")
-        torch.save({
-            'state_dict': self.nnet.state_dict(),
-        }, filepath)
-
-    def load_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
-        # https://github.com/pytorch/examples/blob/master/imagenet/main.py#L98
-        filepath = os.path.join(folder, filename)
-        if not os.path.exists(filepath):
-            raise ("No model in path {}".format(folder))
-
-        checkpoint = torch.load(filepath)
-        self.nnet.load_state_dict(checkpoint['state_dict'])
diff --git a/alpha_automl/pipeline_search/pipeline/PipelineNNet.py b/alpha_automl/pipeline_search/pipeline/PipelineNNet.py
deleted file mode 100644
index 8c1975fc..00000000
--- a/alpha_automl/pipeline_search/pipeline/PipelineNNet.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import logging
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-logger = logging.getLogger(__name__)
-
-
-class PipelineNNet(nn.Module):
-    def __init__(self, game, args):
-        # game params
-        self.action_size = game.getActionSize()
-        self.board_size = game.getBoardSize()
-        self.problem = game.problem
-        self.args = args
-
-        super(PipelineNNet, self).__init__()
-        hlayer = 512
-        torch.manual_seed(1)
-        self.lstm = nn.LSTM(self.board_size, hlayer, 2)
-        self.probFC = nn.Linear(hlayer, self.action_size)
-        self.valueFC = nn.Linear(hlayer, 1)
-
-    def forward(self, s):
-        s = s.view(-1, 1, self.board_size)
-        lstm_out, hidden = self.lstm(s)
-        s = lstm_out[:, -1]
-        pi = self.probFC(s)                                                                         # batch_size x 512
-        v = self.valueFC(s)                                                                          # batch_size x 512
-
-        if self.problem == 'CLASSIFICATION':
-            return F.log_softmax(pi, 1), F.sigmoid(v)
-        else:
-            return F.log_softmax(pi, 1), v
diff --git a/alpha_automl/pipeline_search/pipeline/PipelinePlayers.py b/alpha_automl/pipeline_search/pipeline/PipelinePlayers.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/alpha_automl/pipeline_search/pipeline/__init__.py b/alpha_automl/pipeline_search/pipeline/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/alpha_automl/pipeline_search/utils.py b/alpha_automl/pipeline_search/utils.py
deleted file mode 100644
index ffda3a9f..00000000
--- a/alpha_automl/pipeline_search/utils.py
+++ /dev/null
@@ -1,3 +0,0 @@
-class dotdict(dict):
-    def __getattr__(self, name):
-        return self[name]
diff --git a/alpha_automl/pipeline_synthesis/setup_search.py b/alpha_automl/pipeline_synthesis/setup_search.py
index 636e7e29..1bcc4de8 100644
--- a/alpha_automl/pipeline_synthesis/setup_search.py
+++ b/alpha_automl/pipeline_synthesis/setup_search.py
@@ -1,69 +1,138 @@
-import os
-import sys
-import signal
 import logging
-from os.path import join
-from alpha_automl.grammar_loader import load_automatic_grammar, load_manual_grammar
-from alpha_automl.pipeline_search.Coach import Coach
-from alpha_automl.pipeline_search.pipeline.NNet import NNetWrapper
-from alpha_automl.pipeline_search.pipeline.PipelineGame import PipelineGame
+import sys
+from datetime import datetime
+from os.path import dirname, join
+
+from alpha_automl.grammar_loader import load_manual_grammar
+from alpha_automl.pipeline_search.agent_lab import (contain_checkpoints,
+                                                    dump_result_to_json,
+                                                    pipeline_search_rllib,
+                                                    read_result_to_pipeline)
+from alpha_automl.pipeline_search.game import PipelineGame
 from alpha_automl.pipeline_synthesis.pipeline_builder import BaseBuilder
 from alpha_automl.scorer import score_pipeline
 from alpha_automl.utils import hide_logs
 
 logger = logging.getLogger(__name__)
+DEFAULT_CHECKPOINT_PATH = join(dirname(__file__), "../resource/checkpoints/")
 
 
 config = {
-    'PROBLEM_TYPES': {
-        'CLASSIFICATION': 1,
-        'REGRESSION': 2,
-        'CLUSTERING': 3,
-        'NA': 4,
-        'TIME_SERIES_FORECAST': 5,
-        'SEMISUPERVISED': 6,
-    },
-    'DATA_TYPES': {'TABULAR': 1, 'GRAPH': 2, 'IMAGE': 3},
-    'PIPELINE_SIZE': 8,
-    'ARGS': {
-        'numIters': 25,
-        'numEps': 5,
-        'tempThreshold': 15,
-        'updateThreshold': 0.6,
-        'maxlenOfQueue': 200000,
-        'numMCTSSims': 5,
-        'arenaCompare': 40,
-        'cpuct': 1,
-        'load_model': False,
-        'metafeatures_path': '/d3m/data/metafeatures',
-        'verbose': True,
+    "PROBLEM_TYPES": {
+        "CLASSIFICATION": 1,
+        "REGRESSION": 2,
+        "CLUSTERING": 3,
+        "TIME_SERIES_FORECAST": 4,
+        "SEMISUPERVISED": 5,
+        "NA": 6,
     },
+    "DATA_TYPES": {"TABULAR": 1, "TEXT": 2, "IMAGE": 3, "VIDEO": 4, "MULTIMODAL": 5},
+    "PIPELINE_SIZE": 10,
 }
 
 
-def signal_handler(queue, signum):
-    logger.debug(f'Receiving signal {signum}, terminating process')
-    queue.put('DONE')
-    # TODO: Should it save the last status of the NN model?
-    sys.exit(0)
+def search_pipelines(X, y, scoring, splitting_strategy, task_name, time_bound, automl_hyperparams, metadata, output_folder, checkpoints_folder):
+    builder = BaseBuilder(metadata, automl_hyperparams)
+    all_primitives = builder.all_primitives
+    ensemble_pipelines_hash = set()
+
+    task_start = datetime.now()
+
+    def evaluate_pipeline(primitives):
+        has_repeated_classifiers = check_repeated_classifiers(
+            primitives, all_primitives, ensemble_pipelines_hash
+        )
+
+        if has_repeated_classifiers:
+            logger.info("Repeated classifiers detected in ensembles, ignoring pipeline")
+            return None
+
+        pipeline = builder.make_pipeline(primitives)
+        score = None
+
+        if pipeline is not None:
+            alphaautoml_pipeline = score_pipeline(pipeline, X, y, scoring, splitting_strategy, task_name)
+            if alphaautoml_pipeline is not None:
+                score = alphaautoml_pipeline.get_score()
+                if score is not None:
+                    dump_result_to_json(primitives, task_start, score, output_folder)
+        return score
+
+    if task_name is None:
+        task_name = "NA"
+
+    task_name_id = task_name + "_TASK"
+    include_primitives = automl_hyperparams["include_primitives"]
+    exclude_primitives = automl_hyperparams["exclude_primitives"]
+    new_primitives = automl_hyperparams["new_primitives"]
+    use_imputer = metadata["missing_values"]
+    nonnumeric_columns = metadata["nonnumeric_columns"]
+
+    logger.debug("Creating a manual grammar")
+    grammar = load_manual_grammar(
+        task_name_id,
+        nonnumeric_columns,
+        use_imputer,
+        new_primitives,
+        include_primitives,
+        exclude_primitives,
+    )
+
+    metric = scoring._score_func.__name__
+    config_updated = update_config(task_name, metric, grammar, metadata)
+    checkpoint_load_folder = (
+        checkpoints_folder
+        if contain_checkpoints(checkpoints_folder)
+        else DEFAULT_CHECKPOINT_PATH
+    )
+    checkpoint_save_folder = (
+        checkpoints_folder
+        if checkpoints_folder is not None
+        else DEFAULT_CHECKPOINT_PATH
+    )
+    game = PipelineGame(config_updated, evaluate_pipeline)
+    pipeline_search_rllib(
+        game, time_bound, checkpoint_load_folder, checkpoint_save_folder
+    )
+    logger.debug("Search completed")
+    results = read_result_to_pipeline(builder, output_folder)
+
+    # queue.put('DONE')
+    return results
+
 
+def update_config(task_name, metric, grammar, metadata):
+    config["PROBLEM"] = task_name
+    config["DATA_TYPE"] = "TABULAR"
+    config["METRIC"] = metric
+    config["DATASET"] = f"DATASET_{task_name}"
+    config["GRAMMAR"] = grammar
+    metafeatures = compute_metafeatures(metric, metadata)
+    config["DATASET_METAFEATURES"] = metafeatures + [0] * (8 - len(metafeatures))
 
-def check_repeated_classifiers(pipeline_primitives, all_primitives, ensemble_pipelines_hash):
+    return config
+
+
+def check_repeated_classifiers(
+    pipeline_primitives, all_primitives, ensemble_pipelines_hash
+):
     # Verify if the classifiers are repeated in the ensembles (regardless of the order)
     classifiers = []
-    pipeline_hash = ''
+    pipeline_hash = ""
     has_ensemble_primitive = False
     has_repeated_classifiers = False
 
     for primitive_name in pipeline_primitives:
-        primitive_type = all_primitives[primitive_name]['type']
+        primitive_type = all_primitives[primitive_name]["type"]
 
-        if primitive_type == 'CLASSIFIER':
+        if primitive_type == "CLASSIFIER":
             classifiers.append(primitive_name)
-        elif primitive_type == 'MULTI_ENSEMBLER':
+        elif primitive_type == "MULTI_ENSEMBLER":
             has_ensemble_primitive = True
             pipeline_hash += primitive_name
-            if len(classifiers) != len(set(classifiers)):  # All classifiers should be different
+            if len(classifiers) != len(
+                set(classifiers)
+            ):  # All classifiers should be different
                 has_repeated_classifiers = True
         else:
             pipeline_hash += primitive_name
@@ -74,7 +143,7 @@ def check_repeated_classifiers(pipeline_primitives, all_primitives, ensemble_pip
     if has_repeated_classifiers:
         return True
 
-    pipeline_hash += ''.join(sorted(classifiers))
+    pipeline_hash += "".join(sorted(classifiers))
 
     if pipeline_hash in ensemble_pipelines_hash:
         return True
@@ -83,108 +152,67 @@ def check_repeated_classifiers(pipeline_primitives, all_primitives, ensemble_pip
         return False
 
 
-def search_pipelines(X, y, scoring, splitting_strategy, task_name, automl_hyperparams, metadata, output_folder, verbose,
-                     queue):
-    signal.signal(signal.SIGTERM, lambda signum, frame: signal_handler(queue, signum))
-    hide_logs(verbose)  # Hide logs here too, since multiprocessing has some issues with loggers
-
-    builder = BaseBuilder(metadata, automl_hyperparams)
-    all_primitives = builder.all_primitives
-    ensemble_pipelines_hash = set()
-
-    def evaluate_pipeline(primitives, origin):
-        has_repeated_classifiers = check_repeated_classifiers(primitives, all_primitives, ensemble_pipelines_hash)
-
-        if has_repeated_classifiers:
-            logger.debug('Repeated classifiers detected in ensembles, ignoring pipeline')
-            return None
-
-        pipeline = builder.make_pipeline(primitives)
-        score = None
-
-        if pipeline is not None:
-            alphaautoml_pipeline = score_pipeline(pipeline, X, y, scoring, splitting_strategy, task_name, verbose)
-            if alphaautoml_pipeline is not None:
-                score = alphaautoml_pipeline.get_score()
-                queue.put(alphaautoml_pipeline)  # Only send valid pipelines
-
-        return score
-
-    if task_name is None:
-        task_name = 'NA'
-
-    task_name_id = task_name + '_TASK'
-    use_automatic_grammar = automl_hyperparams['use_automatic_grammar']
-    include_primitives = automl_hyperparams['include_primitives']
-    exclude_primitives = automl_hyperparams['exclude_primitives']
-    new_primitives = automl_hyperparams['new_primitives']
-    grammar = None
-
-    if use_automatic_grammar:
-        logger.debug('Creating an automatic grammar')
-        prioritize_primitives = automl_hyperparams['prioritize_primitives']
-        target_column = ''
-        dataset_path = ''
-        grammar = load_automatic_grammar(
-            task_name_id,
-            dataset_path,
-            target_column,
-            include_primitives,
-            exclude_primitives,
-            prioritize_primitives,
+def compute_metafeatures(metric, metadata):
+    metafeatures = []
+    # SCORING METRIC
+    scoring_type = 0
+    if metric in [
+        "accuracy_score",
+        "f1_score",
+        "precision_score",
+        "recall_score",
+        "jaccard_score",
+    ]:
+        scoring_type = 1
+    elif metric in [
+        "max_error",
+        "mean_absolute_error",
+        "mean_squared_error",
+        "mean_squared_log_error",
+        "median_absolute_error",
+        "r2_score",
+    ]:
+        scoring_type = 2
+    elif metric in [
+        "adjusted_mutual_info_score",
+        "rand_score",
+        "mutual_info_score",
+        "normalized_mutual_info_score",
+    ]:
+        scoring_type = 3
+    metafeatures.append(scoring_type)
+
+    # IMPUTE
+    metafeatures.append(1 if metadata["missing_values"] else 0)
+    # ENCODE
+    nonnumeric_columns = metadata["nonnumeric_columns"]
+    if nonnumeric_columns != {}:
+        metafeatures.append(1)
+        # TEXT
+        metafeatures.append(
+            len(nonnumeric_columns["TEXT_ENCODER"])
+            if "TEXT_ENCODER" in nonnumeric_columns
+            else 0
         )
-
-    if grammar is None:
-        logger.debug('Creating a manual grammar')
-        use_imputer = metadata['missing_values']
-        nonnumeric_columns = metadata['nonnumeric_columns']
-        grammar = load_manual_grammar(
-            task_name_id,
-            nonnumeric_columns,
-            use_imputer,
-            new_primitives,
-            include_primitives,
-            exclude_primitives,
+        # CATEGORICAL
+        metafeatures.append(
+            len(nonnumeric_columns["CATEGORICAL_ENCODER"])
+            if "CATEGORICAL_ENCODER" in nonnumeric_columns
+            else 0
         )
-
-    metric = scoring._score_func.__name__
-    config_updated = update_config(task_name, metric, output_folder, grammar)
-    game = PipelineGame(config_updated, evaluate_pipeline)
-    nnet = NNetWrapper(game)
-
-    if config['ARGS'].get('load_model'):
-        model_file = join(
-            config['ARGS'].get('load_folder_file')[0],
-            config['ARGS'].get('load_folder_file')[1],
+        # DATETIME
+        metafeatures.append(
+            len(nonnumeric_columns["DATETIME_ENCODER"])
+            if "DATETIME_ENCODER" in nonnumeric_columns
+            else 0
         )
-        if os.path.isfile(model_file):
-            nnet.load_checkpoint(
-                config['ARGS'].get('load_folder_file')[0],
-                config['ARGS'].get('load_folder_file')[1],
-            )
-
-    c = Coach(game, nnet, config['ARGS'])
-    c.learn()
-    logger.debug('Search completed')
-    queue.put('DONE')
-
-
-def update_config(task_name, metric, output_folder, grammar):
-    config['PROBLEM'] = task_name
-    config['DATA_TYPE'] = 'TABULAR'
-    config['METRIC'] = metric
-    config['DATASET'] = f'DATASET_{task_name}'
-    config['ARGS']['stepsfile'] = join(
-        output_folder, f'DATASET_{task_name}_pipeline_steps.txt'
-    )
-    config['ARGS']['checkpoint'] = join(output_folder, 'nn_models')
-    config['ARGS']['load_folder_file'] = join(
-        output_folder, 'nn_models', 'best.pth.tar'
-    )
-    config['GRAMMAR'] = grammar
-    # metafeatures_extractor = ComputeMetafeatures(dataset, targets, features, DBSession)
-    config['DATASET_METAFEATURES'] = [
-        0
-    ] * 50  # metafeatures_extractor.compute_metafeatures('Compute_metafeatures')
+        # IMAGE
+        metafeatures.append(
+            len(nonnumeric_columns["IMAGE_ENCODER"])
+            if "IMAGE_ENCODER" in nonnumeric_columns
+            else 0
+        )
+    else:
+        metafeatures.append(0)
 
-    return config
+    return metafeatures
\ No newline at end of file
diff --git a/alpha_automl/resource/checkpoints/.gitignore b/alpha_automl/resource/checkpoints/.gitignore
new file mode 100644
index 00000000..4f074fd0
--- /dev/null
+++ b/alpha_automl/resource/checkpoints/.gitignore
@@ -0,0 +1,2 @@
+.DS_Store
+**/.DS_Store
diff --git a/alpha_automl/scorer.py b/alpha_automl/scorer.py
index 760adff1..7945527a 100644
--- a/alpha_automl/scorer.py
+++ b/alpha_automl/scorer.py
@@ -121,8 +121,7 @@ def make_splitter(splitting_strategy, splitting_strategy_kwargs=None):
                          f'instance of BaseCrossValidator, BaseShuffleSplit, RepeatedSplits.')
 
 
-def score_pipeline(pipeline, X, y, scoring, splitting_strategy, task_name, verbose):
-    hide_logs(verbose)  # Hide logs here too, since multiprocessing has some issues with loggers
+def score_pipeline(pipeline, X, y, scoring, splitting_strategy, task_name):
     score = None
     start_time = None
     end_time = None
diff --git a/alpha_automl/utils.py b/alpha_automl/utils.py
index 73313108..7c147cb1 100644
--- a/alpha_automl/utils.py
+++ b/alpha_automl/utils.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pandas as pd
 import torch
+from datetime import datetime
 from enum import Enum
 from sklearn.compose import ColumnTransformer
 from sklearn.preprocessing import LabelEncoder
@@ -66,10 +67,10 @@ def sample_dataset(X, y, sample_size, task):
     if original_size > sample_size:
         ratio = sample_size / original_size
         try:
-            _, X_test, _, y_test = train_test_split(X, y, random_state=RANDOM_SEED, test_size=ratio, stratify=y, shuffle=shuffle)
+            _, X_test, _, y_test = train_test_split(X, y, random_state=int(datetime.now().microsecond), test_size=ratio, stratify=y, shuffle=shuffle)
         except Exception:
             # Not using stratified sampling when the minority class has few instances, not enough for all the folds
-            _, X_test, _, y_test = train_test_split(X, y, random_state=RANDOM_SEED, test_size=ratio, shuffle=shuffle)
+            _, X_test, _, y_test = train_test_split(X, y, random_state=int(datetime.now().microsecond), test_size=ratio, shuffle=shuffle)
         logger.debug(f'Sampling down data from {original_size} to {len(X_test)}')
         if isinstance(X_test, pd.DataFrame):
             X_test = X_test.reset_index(drop=True)
@@ -236,7 +237,7 @@ def hide_logs(level):
 def setup_output_folder(output_folder):
     if output_folder is None:
         output_folder = tempfile.mkdtemp(prefix="alpha_automl", suffix="_log")
-        logger.debug(f'Created temporary directory: {output_folder}')
+        logger.info(f'Created temporary directory: {output_folder}')
     else:
         os.makedirs(output_folder, exist_ok=True)
 
diff --git a/requirements.txt b/requirements.txt
index 96fc1e36..18c41783 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,5 @@ feature_engine
 xgboost
 lightgbm
 numpy
+typing-extensions==4.5.0
+ray[rllib]
\ No newline at end of file
diff --git a/scripts/amlb/automl_job.SBATCH b/scripts/amlb/automl_job.SBATCH
index d90ee08a..e68d0271 100644
--- a/scripts/amlb/automl_job.SBATCH
+++ b/scripts/amlb/automl_job.SBATCH
@@ -1,8 +1,9 @@
 #!/bin/bash
 #SBATCH -c 4
-#SBATCH --mem 32GB
-#SBATCH --time 01:15:00
+#SBATCH --mem 128GB
+#SBATCH --time 02:00:00
 #SBATCH --output logs/automl_job_%J.out
 #SBATCH --mail-user=rl3725@nyu.edu
+#SBATCH --no-kill
 
-singularity exec --overlay overlay-50G-10M.ext3:ro /scratch/work/public/singularity/ubuntu-22.04.sif /bin/bash -c "source /ext3/env.sh; python automlbenchmark/runbenchmark.py ${1} ${2} 1h4c -f 0 -u user_config/ -i openml_datasets/ -o results/"
+singularity exec --overlay overlay-50G-10M.ext3:ro /scratch/work/public/singularity/ubuntu-22.04.sif /bin/bash -c "source /ext3/env.sh; python automlbenchmark/runbenchmark.py ${1} ${2} 1h4c -f 0 -u user_config/ -i openml_datasets/ -o results/ -s skip"
diff --git a/scripts/amlb/run_all_automlbenchmark.sh b/scripts/amlb/run_all_automlbenchmark.sh
index 8744ac2a..61d2d133 100644
--- a/scripts/amlb/run_all_automlbenchmark.sh
+++ b/scripts/amlb/run_all_automlbenchmark.sh
@@ -3,6 +3,8 @@
 datasets="openml/t/10101 openml/t/12 openml/t/146195 openml/t/146212 openml/t/146606 openml/t/146818 openml/t/146821 openml/t/146822 openml/t/146825 openml/t/14965 openml/t/167119 openml/t/167120 openml/t/168329 openml/t/168330 openml/t/168331 openml/t/168332 openml/t/168335 openml/t/168337 openml/t/168338 openml/t/168868 openml/t/168908 openml/t/168909 openml/t/168910 openml/t/168911 openml/t/168912 openml/t/189354 openml/t/189355 openml/t/189356 openml/t/3 openml/t/31 openml/t/34539 openml/t/3917 openml/t/3945 openml/t/53 openml/t/7592 openml/t/7593 openml/t/9952 openml/t/9977 openml/t/9981"
 systems="autosklearn AutoGluon TPOT H2OAutoML AutoWEKA Alpha-AutoML"
 
+
+rm -rf logs/*
 for system in $systems
 do
    for dataset in $datasets
@@ -10,4 +12,4 @@ do
           echo "Running ${system} system in ${dataset} dataset"
           sbatch automl_job.SBATCH $system $dataset
    done
-done
\ No newline at end of file
+done
diff --git a/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py b/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py
index 153c6f84..5bd9f148 100644
--- a/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py
+++ b/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py
@@ -46,11 +46,12 @@ def run(dataset, config):
              f'target_name: {target_name}\n'
              f'time_bound: {time_bound}\n'
              f'metric: {metric}\n'
-             f'cores: {cores}'
+             f'cores: {cores}\n'
+             f'output_folder: {output_path}\n'
              )
 
     automl = AutoMLClassifier(time_bound=time_bound, metric=metrics_mapping[metric], time_bound_run=15,
-                              output_folder=output_path, num_cpus=cores, verbose=logging.DEBUG)
+                              output_folder=output_path, verbose=logging.INFO)
 
     train_dataset = pd.read_csv(train_dataset_path)
     test_dataset = pd.read_csv(test_dataset_path)
diff --git a/scripts/amlb/user_config/extensions/Alpha-AutoML/requirements.txt b/scripts/amlb/user_config/extensions/Alpha-AutoML/requirements.txt
index 2fb52117..b710072a 100644
--- a/scripts/amlb/user_config/extensions/Alpha-AutoML/requirements.txt
+++ b/scripts/amlb/user_config/extensions/Alpha-AutoML/requirements.txt
@@ -1 +1 @@
-alpha-automl
\ No newline at end of file
+alpha-automl @ git+https://github.com/VIDA-NYU/alpha-automl@dqn_implementation
diff --git a/scripts/offline_training/requirements.txt b/scripts/offline_training/requirements.txt
new file mode 100644
index 00000000..e4fd828f
--- /dev/null
+++ b/scripts/offline_training/requirements.txt
@@ -0,0 +1 @@
+openml
\ No newline at end of file
diff --git a/scripts/offline_training/results/.gitignore b/scripts/offline_training/results/.gitignore
new file mode 100644
index 00000000..464e52e2
--- /dev/null
+++ b/scripts/offline_training/results/.gitignore
@@ -0,0 +1,5 @@
+automl_job_*
+**/automl_job_*
+
+logs
+results.json
diff --git a/scripts/offline_training/train_all_datasets.sh b/scripts/offline_training/train_all_datasets.sh
new file mode 100644
index 00000000..eeae7f45
--- /dev/null
+++ b/scripts/offline_training/train_all_datasets.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+#datasets="10101 12 146195 146212 146606 146818 146821 146822 146825 14965 167119 167120 168329 168330 168331 168332 168335 168337 168338 168868 168908 168909 168910 168911 168912 189354 189355 189356 3 31 34539 3917 3945 53 7592 7593 9952 9977 9981"
+# datasets="146825 168329 168330 168331 168332"
+datasets="168910"
+
+
+rm -rf tmp/logs/*
+for dataset in $datasets
+do
+    echo "Training Alpha-AutoML for ${dataset} dataset"
+    sbatch --output results/logs/automl_job_${dataset}.out trainer_job.SBATCH $dataset
+done
diff --git a/scripts/offline_training/trainer.py b/scripts/offline_training/trainer.py
new file mode 100644
index 00000000..7e38396c
--- /dev/null
+++ b/scripts/offline_training/trainer.py
@@ -0,0 +1,98 @@
+import argparse
+import openml
+import pandas as pd
+from os.path import dirname, join
+from alpha_automl import AutoMLClassifier
+
+
+if __name__ == "__main__":
+    # If running it in Windows or CUDA environment, Alpha-AutoML should be used inside of "if __name__ == '__main__':"
+
+    # argparser
+    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument(
+        "-s",
+        "--save-dir",
+        default=None,
+        help="The saving directory for the PPO model checkpoint.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output-dir",
+        default=None,
+        help="The output directory for saving results.json and other caching files.",
+    )
+    parser.add_argument(
+        "-d",
+        "--dataset-dir",
+        default=None,
+        help="The specific dataset directory, should contain train_data.csv and test_data.csv.",
+    )
+    parser.add_argument(
+        "-c",
+        "--target-column",
+        default="class",
+        help="The target column name for local dataset csv files, 'class' by default",
+    )
+
+    parser.add_argument(
+        "-t",
+        "--task",
+        metavar="task_id",
+        type=int,
+        default=None,
+        help="The specific task name (as defined in the benchmark file) to run."
+        "\nWhen an OpenML reference is used as benchmark, the dataset name should be used instead."
+        "\nIf not provided, then all tasks from the benchmark will be run.",
+    )
+    parser.add_argument(
+        "-T",
+        "--time-bound",
+        type=int,
+        default=1,
+        help="The time bound for running AlphaAutoML task, unit by minute, 1 minute by default.",
+    )
+
+    args = parser.parse_args()
+
+    # Read the datasets
+    if args.dataset_dir:
+        train_dataset = pd.read_csv(
+            join(dirname(__file__), args.dataset_dir, "train_data.csv")
+        )
+        test_dataset = pd.read_csv(
+            join(dirname(__file__), args.dataset_dir, "test_data.csv")
+        )
+        target_column = args.target_column
+        X_train = train_dataset.drop(columns=[target_column])
+        y_train = train_dataset[[target_column]]
+        X_test = test_dataset.drop(columns=[target_column])
+        y_test = test_dataset[[target_column]]
+    elif args.task:
+        task = openml.tasks.get_task(args.task, download_qualities=False)
+        X, y = task.get_X_and_y(dataset_format="dataframe")
+        train_indices, test_indices = task.get_train_test_split_indices(
+            repeat=0,
+            fold=0,
+            sample=0,
+        )
+        X_train = X.iloc[train_indices]
+        y_train = y.iloc[train_indices]
+        X_test = X.iloc[test_indices]
+        y_test = y.iloc[test_indices]
+
+    # Add settings
+    automl = AutoMLClassifier(
+        time_bound=args.time_bound,
+        output_folder=args.output_dir,
+        checkpoints_folder=args.save_dir,
+    )
+
+    # Perform the search
+    automl.fit(X_train, y_train)
+
+    # Plot leaderboard
+    automl.plot_leaderboard(use_print=True)
+
+    # Evaluate best model
+    automl.score(X_test, y_test)
diff --git a/scripts/offline_training/trainer_job.SBATCH b/scripts/offline_training/trainer_job.SBATCH
new file mode 100644
index 00000000..79c4524d
--- /dev/null
+++ b/scripts/offline_training/trainer_job.SBATCH
@@ -0,0 +1,7 @@
+#!/bin/bash
+#SBATCH -c 4
+#SBATCH --mem 128GB
+#SBATCH --time 12:00:00
+#SBATCH --mail-user=yfw215@nyu.edu
+
+singularity exec --overlay overlay-50G-10M.ext3:ro /scratch/work/public/singularity/ubuntu-20.04.4.sif /bin/bash -c "source /ext3/env.sh; python trainer.py -o results -t ${1} -T 5"
\ No newline at end of file