diff --git a/alpha_automl/automl_manager.py b/alpha_automl/automl_manager.py index 0235afe3..3fc052ee 100644 --- a/alpha_automl/automl_manager.py +++ b/alpha_automl/automl_manager.py @@ -46,7 +46,6 @@ def search_pipelines(self, X, y, scoring, splitting_strategy, automl_hyperparams def _search_pipelines(self, automl_hyperparams): search_start_time = time.time() automl_hyperparams = self.check_automl_hyperparams(automl_hyperparams) - metadata = profile_data(self.X) X, y, is_sample = sample_dataset(self.X, self.y, SAMPLE_SIZE, self.task) internal_splitting_strategy = make_splitter(SPLITTING_STRATEGY) diff --git a/alpha_automl/builtin_primitives/__init__.py b/alpha_automl/builtin_primitives/__init__.py index 29e9735e..54a08a98 100644 --- a/alpha_automl/builtin_primitives/__init__.py +++ b/alpha_automl/builtin_primitives/__init__.py @@ -9,6 +9,15 @@ SkLabelSpreading, SkSelfTrainingClassifier, ) +from alpha_automl.builtin_primitives.image_encoder import ( + CannyEdgeDetection, + FisherVectorTransformer, + HogTransformer, + ImageReader, + RGB2GrayTransformer, + SkPatchExtractor, + ThresholdOtsu, +) from alpha_automl.builtin_primitives.time_series_forecasting import ( ArimaEstimator, DeeparEstimator, @@ -28,4 +37,11 @@ "SkLabelSpreading", "SkSelfTrainingClassifier", "AutonBox", + "ImageReader", + "RGB2GrayTransformer", + "ThresholdOtsu", + "CannyEdgeDetection", + "HogTransformer", + "FisherVectorTransformer", + "SkPatchExtractor", ] diff --git a/alpha_automl/builtin_primitives/image_encoder.py b/alpha_automl/builtin_primitives/image_encoder.py new file mode 100644 index 00000000..06db70a4 --- /dev/null +++ b/alpha_automl/builtin_primitives/image_encoder.py @@ -0,0 +1,224 @@ +import logging + +import numpy as np +import pandas as pd + +from alpha_automl._optional_dependency import import_optional_dependency +from alpha_automl.base_primitive import BasePrimitive + +skimage = import_optional_dependency("skimage") + +from skimage.color import gray2rgb, rgb2gray, rgba2rgb +from skimage.feature import ORB, canny, fisher_vector, hog, learn_gmm +from skimage.filters import threshold_otsu +from skimage.io import imread +from skimage.transform import resize +from sklearn.feature_extraction import image + +logging.getLogger("PIL").setLevel(logging.CRITICAL + 1) +logger = logging.getLogger("automl") + + +class ImageReader(BasePrimitive): + """Convert Image path to numpy array""" + + def __init__(self, width=80, height=80): + self.width = width + self.height = height + + def fit(self, X, y=None): + return self + + def transform(self, images): + data = [] + if isinstance(images, pd.DataFrame): + for file in images[images.columns[0]]: + im = imread(file) + im = resize(im, (self.width, self.height)) + if len(im.shape) < 3: + im = gray2rgb(im) + elif im.shape[2] == 4: + im = rgba2rgb(im) + elif im.shape[2] != 3: + im = gray2rgb(im[:, :, 0]) + data.append(im) + else: + for file in images: + im = imread(file[0]) + im = resize(im, (self.width, self.height)) + if len(im.shape) < 3: + im = gray2rgb(im) + elif im.shape[2] == 4: + im = rgba2rgb(im) + elif im.shape[2] != 3: + im = gray2rgb(im[:, :, 0]) + data.append(im) + return np.array(data) + + +class ThresholdOtsu(BasePrimitive): + """ + Filter image with a calculated threshold + """ + + def __init__(self): + self.reader = ImageReader() + pass + + def fit(self, X, y=None): + """returns itself""" + return self + + def transform(self, X, y=None): + """perform the transformation and return an array""" + X = self.reader.transform(X) + + def threshold(img): + img = rgb2gray(img) + threashold_value = threshold_otsu(img) + img = img > threashold_value + return img.flatten() + + return np.array([threshold(img) for img in X]) + + +class CannyEdgeDetection(BasePrimitive): + """ + Filter image with canny edge detection + """ + + def __init__(self): + self.reader = ImageReader() + pass + + def fit(self, X, y=None): + """returns itself""" + return self + + def transform(self, X, y=None): + """perform the transformation and return an array""" + X = self.reader.transform(X) + + def canny_edge(img): + img = rgb2gray(img) + img = canny(img) + return img.flatten() + + return np.array([canny_edge(img) for img in X]) + + +class RGB2GrayTransformer(BasePrimitive): + """ + Convert an array of RGB images to grayscale + """ + + def __init__(self): + self.reader = ImageReader() + pass + + def fit(self, X, y=None): + """returns itself""" + return self + + def transform(self, X, y=None): + """perform the transformation and return an array""" + X = self.reader.transform(X) + return np.array([rgb2gray(img).flatten() for img in X]) + + +class HogTransformer(BasePrimitive): + """ + Expects an array of 2d arrays (1 channel images) + Calculates hog features for each img + """ + + def __init__( + self, + y=None, + orientations=9, + pixels_per_cell=(14, 14), + cells_per_block=(2, 2), + block_norm="L2-Hys", + ): + self.y = y + self.orientations = orientations + self.pixels_per_cell = pixels_per_cell + self.cells_per_block = cells_per_block + self.block_norm = block_norm + self.reader = ImageReader() + + def fit(self, X, y=None): + return self + + def transform(self, X, y=None): + def local_hog(X): + return hog( + X, + orientations=self.orientations, + pixels_per_cell=self.pixels_per_cell, + cells_per_block=self.cells_per_block, + block_norm=self.block_norm, + ) + + X = self.reader.transform(X) + X = np.array([rgb2gray(img) for img in X]) + return np.array([local_hog(img) for img in X]) + + +class FisherVectorTransformer(BasePrimitive): + """ + Fisher vector is an image feature encoding and quantization technique + that can be seen as a soft or probabilistic version of the popular + bag-of-visual-words or VLAD algorithms + """ + + def __init__(self, n_keypoints=5, harris_k=0.01, k=16): + self.n_keypoints = n_keypoints + self.harris_k = harris_k + self.k = k + self.reader = ImageReader() + self.gmm = None + + def fit(self, X, y=None): + """returns itself""" + return self + + def transform(self, X, y=None): + """perform the transformation and return an array""" + X = self.reader.transform(X) + X = np.array([rgb2gray(img) for img in X]) + descriptors = [] + for x in X: + detector_extractor = ORB( + n_keypoints=self.n_keypoints, harris_k=self.harris_k + ) + detector_extractor.detect_and_extract(x) + descriptors.append(detector_extractor.descriptors.astype("float32")) + + if self.gmm is None: + self.gmm = learn_gmm(descriptors, n_modes=self.k) + + fvs = np.array( + [fisher_vector(descriptor_mat, self.gmm) for descriptor_mat in descriptors] + ) + return fvs + + +class SkPatchExtractor(BasePrimitive): + """ + Extracts patches from a collection of images + """ + + def __init__(self): + self.reader = ImageReader() + self.extractor = image.PatchExtractor() + pass + + def fit(self, X, y=None): + """returns itself""" + return self + + def transform(self, X, y=None): + """perform the transformation and return an array""" + X = self.reader.transform(X) + return self.extractor.transform(X).reshape((X.shape[0], -1)) diff --git a/alpha_automl/data_profiler.py b/alpha_automl/data_profiler.py index 99268df9..4995ab6c 100644 --- a/alpha_automl/data_profiler.py +++ b/alpha_automl/data_profiler.py @@ -5,6 +5,7 @@ DATETIME_COLUMN = 'http://schema.org/DateTime' TEXT_COLUMN = 'http://schema.org/Text' EMPTY_COLUMN = 'https://metadata.datadrivendiscovery.org/types/MissingData' +IMAGE_COLUMN = 'https://schema.org/ImageObject' logger = logging.getLogger(__name__) @@ -13,7 +14,7 @@ def profile_data(X): metadata = {'nonnumeric_columns': {}, 'useless_columns': [], 'missing_values': False} mapping_encoders = {CATEGORICAL_COLUMN: 'CATEGORICAL_ENCODER', DATETIME_COLUMN: 'DATETIME_ENCODER', - TEXT_COLUMN: 'TEXT_ENCODER'} + TEXT_COLUMN: 'TEXT_ENCODER', IMAGE_COLUMN: 'IMAGE_ENCODER'} profiled_data = datamart_profiler.process_dataset(X, coverage=False, indexes=False) @@ -32,7 +33,11 @@ def profile_data(X): add_nonnumeric_column(column_type, metadata, index_column, column_name) elif TEXT_COLUMN == profiled_column['structural_type']: - column_type = mapping_encoders[TEXT_COLUMN] + samples = X[column_name].dropna().sample(5) + if samples.apply(lambda x: x.endswith(('jpg', 'png', 'jpeg', 'gif'))).all(): + column_type = mapping_encoders[IMAGE_COLUMN] + else: + column_type = mapping_encoders[TEXT_COLUMN] add_nonnumeric_column(column_type, metadata, index_column, column_name) if 'missing_values_ratio' in profiled_column: diff --git a/alpha_automl/pipeline_synthesis/pipeline_builder.py b/alpha_automl/pipeline_synthesis/pipeline_builder.py index 91544ffb..2c9116ee 100644 --- a/alpha_automl/pipeline_synthesis/pipeline_builder.py +++ b/alpha_automl/pipeline_synthesis/pipeline_builder.py @@ -121,7 +121,7 @@ def create_transformers(self, primitive_object, primitive_name, primitive_type): if primitive_type == 'TEXT_ENCODER': column_transformers = [(f'{primitive_name}-{col_name}', primitive_object, col_index) for col_index, col_name in nonnumeric_columns[primitive_type]] - elif primitive_type == 'CATEGORICAL_ENCODER' or primitive_type == 'DATETIME_ENCODER': + elif primitive_type == 'CATEGORICAL_ENCODER' or primitive_type == 'DATETIME_ENCODER' or primitive_type == 'IMAGE_ENCODER': column_transformers = [(primitive_name, primitive_object, [col_index for col_index, _ in nonnumeric_columns[primitive_type]])] diff --git a/alpha_automl/resource/base_grammar.bnf b/alpha_automl/resource/base_grammar.bnf index fced74e9..2b90c795 100644 --- a/alpha_automl/resource/base_grammar.bnf +++ b/alpha_automl/resource/base_grammar.bnf @@ -5,13 +5,14 @@ CLUSTERING_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR CLUSTERER TIME_SERIES_FORECAST_TASK -> REGRESSION_TASK | IMPUTER TIME_SERIES_FORECAST SEMISUPERVISED_TASK -> IMPUTER ENCODERS FEATURE_SCALER SEMISUPERVISED_CLASSIFIER CLASSIFIER | IMPUTER ENCODERS FEATURE_SCALER LABELPROPAGATION_CLASSIFIER NA_TASK -> CLASSIFICATION_TASK | REGRESSION_TASK | SEMISUPERVISED_TASK -ENCODERS -> TEXT_ENCODER DATETIME_ENCODER CATEGORICAL_ENCODER +ENCODERS -> TEXT_ENCODER DATETIME_ENCODER CATEGORICAL_ENCODER IMAGE_ENCODER IMPUTER -> 'primitive_terminal' FEATURE_SCALER -> 'primitive_terminal' | 'E' FEATURE_SELECTOR -> 'primitive_terminal' | 'E' TEXT_ENCODER -> 'primitive_terminal' CATEGORICAL_ENCODER -> 'primitive_terminal' DATETIME_ENCODER -> 'primitive_terminal' +IMAGE_ENCODER -> 'primitive_terminal' CLASSIFIER -> 'primitive_terminal' REGRESSOR -> 'primitive_terminal' CLUSTERER -> 'primitive_terminal' diff --git a/alpha_automl/resource/primitives_hierarchy.json b/alpha_automl/resource/primitives_hierarchy.json index 4a78e87c..ea800ec6 100644 --- a/alpha_automl/resource/primitives_hierarchy.json +++ b/alpha_automl/resource/primitives_hierarchy.json @@ -28,7 +28,7 @@ ], "DATETIME_ENCODER": [ "sklearn.preprocessing.OrdinalEncoder", - "alpha_automl.builtin_primitives.CyclicalFeature", + "alpha_automl.builtin_primitives.CyclicalFeature", "alpha_automl.builtin_primitives.Datetime64ExpandEncoder", "alpha_automl.builtin_primitives.DummyEncoder" ], @@ -75,6 +75,10 @@ "sklearn.feature_extraction.text.CountVectorizer", "sklearn.feature_extraction.text.TfidfVectorizer" ], + "IMAGE_ENCODER": [ + "alpha_automl.builtin_primitives.RGB2GrayTransformer", + "alpha_automl.builtin_primitives.HogTransformer" + ], "COLUMN_TRANSFORMER": [ "sklearn.compose.ColumnTransformer" ], @@ -86,7 +90,7 @@ ], "SEMISUPERVISED_CLASSIFIER": [ "sklearn.semi_supervised.SelfTrainingClassifier", - "alpha_automl.builtin_primitives.AutonBox" + "alpha_automl.builtin_primitives.AutonBox" ], "LABELPROPAGATION_CLASSIFIER": [ "alpha_automl.builtin_primitives.SkLabelSpreading", diff --git a/alpha_automl/wrapper_primitives/__init__.py b/alpha_automl/wrapper_primitives/__init__.py index b6fa28eb..40941da6 100644 --- a/alpha_automl/wrapper_primitives/__init__.py +++ b/alpha_automl/wrapper_primitives/__init__.py @@ -1,2 +1,3 @@ from alpha_automl.wrapper_primitives.fasttext import FastTextEmbedder from alpha_automl.wrapper_primitives.huggingface import HuggingfaceEmbedder +from alpha_automl.wrapper_primitives.clip import HuggingfaceCLIPTransformer diff --git a/alpha_automl/wrapper_primitives/clip.py b/alpha_automl/wrapper_primitives/clip.py new file mode 100644 index 00000000..442c292f --- /dev/null +++ b/alpha_automl/wrapper_primitives/clip.py @@ -0,0 +1,39 @@ +import numpy as np +import torch + +from alpha_automl._optional_dependency import import_optional_dependency +from alpha_automl.base_primitive import BasePrimitive +from alpha_automl.builtin_primitives import ImageReader + +transformers = import_optional_dependency("transformers") + +DEFAULT_MODEL_ID = "openai/clip-vit-base-patch32" + + +class HuggingfaceCLIPTransformer(BasePrimitive): + """ + Convert an array of RGB images to grayscale + """ + + def __init__(self, model_id=DEFAULT_MODEL_ID): + self.model_id = model_id + self.reader = ImageReader(width=224, height=224) + self.model = transformers.CLIPModel.from_pretrained(self.model_id) + + def fit(self, X, y=None): + """returns itself""" + return self + + def transform(self, X, y=None): + """perform the transformation and return an array""" + X = self.reader.transform(X) + + def clip(img): + img = np.transpose(img, (2, 0, 1)) + img = torch.from_numpy(img) + img = img[None, :, :, :] + img = img.float() + img = self.model.get_image_features(img) + return img.detach().numpy()[0] + + return np.array([clip(img) for img in X]) diff --git a/examples/adding_clip_primitives.ipynb b/examples/adding_clip_primitives.ipynb new file mode 100644 index 00000000..49b8d925 --- /dev/null +++ b/examples/adding_clip_primitives.ipynb @@ -0,0 +1,14845 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Adding CLIP Image Encoder Primitives" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "output_path = 'tmp/'\n", + "media_path = os.path.join(os.getcwd(), 'datasets/selfie')\n", + "dataset = pd.read_csv('datasets/selfie/learningData.csv').sample(1000)\n", + "dataset[\"image\"] = dataset[\"image\"].apply(lambda x: os.path.join(media_path, x))\n", + "X = dataset[[\"image\"]]\n", + "y = dataset[[\"label\"]]\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, \n", + " y, \n", + " test_size=0.2, \n", + " shuffle=True,\n", + " random_state=42,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
3310/home/yfw215/alpha-automl/examples/datasets/se...
2949/home/yfw215/alpha-automl/examples/datasets/se...
1610/home/yfw215/alpha-automl/examples/datasets/se...
2639/home/yfw215/alpha-automl/examples/datasets/se...
91/home/yfw215/alpha-automl/examples/datasets/se...
......
3267/home/yfw215/alpha-automl/examples/datasets/se...
7090/home/yfw215/alpha-automl/examples/datasets/se...
571/home/yfw215/alpha-automl/examples/datasets/se...
711/home/yfw215/alpha-automl/examples/datasets/se...
1100/home/yfw215/alpha-automl/examples/datasets/se...
\n", + "

800 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " image\n", + "3310 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "2949 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "1610 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "2639 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "91 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "... ...\n", + "3267 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "7090 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "571 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "711 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "1100 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "\n", + "[800 rows x 1 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from alpha_automl import AutoMLImageClassifier\n", + "\n", + "output_path = 'tmp/'\n", + "\n", + "automl = AutoMLImageClassifier(output_path, time_bound=10, verbose=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from alpha_automl.wrapper_primitives.clip import CLIPTransformer \n", + "\n", + "my_clip_encoder = CLIPTransformer()\n", + "automl.add_primitives([(my_clip_encoder, 'IMAGE_ENCODER')])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.63\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.59\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:38, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.72\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.99\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.565\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.535\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.595\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:12, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.505\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:20, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.61\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.58\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:37, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.635\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:28, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.96\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.625\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:06:04, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.605\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:06:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.73\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.985\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.565\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.575\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.58\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:42, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.575\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:48, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.58\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:53, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.545\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.575\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.57\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.575\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.62\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.56\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.575\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:38, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.54\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.56\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.63\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.575\n", + "INFO:alpha_automl.automl_api:Found 36 pipelines\n" + ] + } + ], + "source": [ + "automl.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rankingpipelineaccuracy_score
1CLIPTransformer, SelectKBest, RandomForestClassifier0.990
2CLIPTransformer, StandardScaler, SelectKBest, RandomForestClassifier0.985
3CLIPTransformer, SelectKBest, DecisionTreeClassifier0.960
4HogTransformer, StandardScaler, SelectKBest, RandomForestClassifier0.730
5HogTransformer, SelectKBest, RandomForestClassifier0.720
6HogTransformer, SelectKBest, DecisionTreeClassifier0.635
7RGB2GrayTransformer, SelectKBest, RandomForestClassifier0.630
8RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LGBMClassifier0.630
9RGB2GrayTransformer, StandardScaler, SelectKBest, RandomForestClassifier0.625
10RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, KNeighborsClassifier0.620
11ThresholdOtsu, StandardScaler, LinearSVC0.610
12ThresholdOtsu, StandardScaler, SelectKBest, RandomForestClassifier0.605
13ThresholdOtsu, StandardScaler, SelectKBest, LinearSVC0.595
14ThresholdOtsu, SelectKBest, RandomForestClassifier0.590
15ThresholdOtsu, SelectKBest, DecisionTreeClassifier0.580
16RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.580
17RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.580
18RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.575
19RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.575
20RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, ExtraTreesClassifier0.575
21RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, BaggingClassifier0.575
22RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LogisticRegression0.575
23RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, XGBClassifier0.575
24RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, SVC0.570
25FisherVectorTransformer, SelectKBest, RandomForestClassifier0.565
26FisherVectorTransformer, StandardScaler, SelectKBest, RandomForestClassifier0.565
27RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, GaussianNB0.560
28RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, QuadraticDiscriminantAnalysis0.560
29RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, MultinomialNB0.550
30RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, BernoulliNB0.550
31RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, GradientBoostingClassifier0.545
32RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, SGDClassifier0.540
33ThresholdOtsu, StandardScaler, GenericUnivariateSelect, LinearSVC0.535
34FisherVectorTransformer, SelectKBest, DecisionTreeClassifier0.530
35ThresholdOtsu, StandardScaler, SelectPercentile, LinearSVC0.505
36RGB2GrayTransformer, SelectKBest, DecisionTreeClassifier0.500
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl.plot_leaderboard()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t
\n", + "\t
\n", + "\t \n", + "\t\n", + "\t\n", + "\t" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "automl.plot_comparison_pipelines()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing Pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pipeline predictions are accessed with:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n", + " 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,\n", + " 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,\n", + " 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,\n", + " 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,\n", + " 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,\n", + " 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,\n", + " 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,\n", + " 0, 0])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = automl.predict(X_test)\n", + "y_pred" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The pipeline can be evaluated against a held out dataset with the function call:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Metric: accuracy_score, Score: 0.98\n" + ] + }, + { + "data": { + "text/plain": [ + "{'metric': 'accuracy_score', 'score': 0.98}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl.score(X_test, y_test)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "my_env", + "language": "python", + "name": "my_env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/adding_huggingface_clip_primitives.ipynb b/examples/adding_huggingface_clip_primitives.ipynb new file mode 100644 index 00000000..e1ba143b --- /dev/null +++ b/examples/adding_huggingface_clip_primitives.ipynb @@ -0,0 +1,16476 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Adding HuggingFace CLIP Image Encoder Primitives" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, import the class `AutoMLClassifier`\n", + "\n", + "In this example, we are generating pipelines for a CSV dataset. The selfie dataset is used for this example.\n", + "Sample and devide the dataset using _train_test_split_.\n", + "\n", + "For this task, we use the Wildlife Dataset, a customize dataset for recognizing wildlife product from various product images. \n", + "You can download the dataset via the following [google drive link](https://drive.google.com/file/d/18KEG_DSHU0LpKq9yPWvEmRtSXm8-bL8j/view?usp=drive_link)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "output_path = 'tmp/'\n", + "media_path = os.path.join(os.getcwd(), 'datasets/')\n", + "dataset = pd.read_csv('datasets/wildlifedata/learningData.csv')\n", + "dataset[\"image\"] = dataset[\"image\"].apply(lambda x: os.path.join(media_path, x))\n", + "X = dataset[[\"image\"]]\n", + "y = dataset[[\"label\"]]\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, \n", + " y, \n", + " test_size=0.2, \n", + " shuffle=True,\n", + " random_state=42,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
29/home/yfw215/alpha-automl/examples/datasets/wi...
535/home/yfw215/alpha-automl/examples/datasets/wi...
695/home/yfw215/alpha-automl/examples/datasets/wi...
557/home/yfw215/alpha-automl/examples/datasets/wi...
836/home/yfw215/alpha-automl/examples/datasets/wi...
......
106/home/yfw215/alpha-automl/examples/datasets/wi...
270/home/yfw215/alpha-automl/examples/datasets/wi...
860/home/yfw215/alpha-automl/examples/datasets/wi...
435/home/yfw215/alpha-automl/examples/datasets/wi...
102/home/yfw215/alpha-automl/examples/datasets/wi...
\n", + "

800 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " image\n", + "29 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "535 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "695 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "557 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "836 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + ".. ...\n", + "106 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "270 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "860 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "435 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "102 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "\n", + "[800 rows x 1 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from alpha_automl import AutoMLClassifier\n", + "\n", + "output_path = 'tmp/'\n", + "\n", + "automl = AutoMLClassifier(output_path, time_bound=60, verbose=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Adding New Primitives into AlphaAutoML's Search Space" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEBUG:h5py._conv:Creating converter from 7 to 5\n", + "DEBUG:h5py._conv:Creating converter from 5 to 7\n", + "DEBUG:h5py._conv:Creating converter from 7 to 5\n", + "DEBUG:h5py._conv:Creating converter from 5 to 7\n", + "INFO:gluonts.mx.context:Using CPU\n", + "DEBUG:matplotlib:matplotlib data path: /ext3/miniconda3/lib/python3.10/site-packages/matplotlib/mpl-data\n", + "DEBUG:matplotlib:CONFIGDIR=/home/yfw215/.config/matplotlib\n", + "DEBUG:matplotlib:interactive is False\n", + "DEBUG:matplotlib:platform is linux\n", + "DEBUG:matplotlib:CACHEDIR=/home/yfw215/.cache/matplotlib\n", + "DEBUG:matplotlib.font_manager:Using fontManager instance from /home/yfw215/.cache/matplotlib/fontlist-v330.json\n" + ] + } + ], + "source": [ + "from alpha_automl.wrapper_primitives.clip import HuggingfaceCLIPTransformer \n", + "\n", + "model_id = 'openai/clip-vit-base-patch32'\n", + "my_clip_encoder = HuggingfaceCLIPTransformer(model_id=model_id)\n", + "automl.add_primitives([(my_clip_encoder, 'IMAGE_ENCODER')])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:12, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.505\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.465\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.465\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.51\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.465\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.43\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.495\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:39, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.535\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.49\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.485\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.545\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.485\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.575\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.425\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:06:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.825\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:06:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.51\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:06:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.505\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:07:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.52\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.455\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:38, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.57\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.46\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.42\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.535\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.455\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:04, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.475\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.515\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.495\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.545\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.51\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.49\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.545\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:15:22, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.49\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:17:48, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.885\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:18:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.465\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:18:53, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.49\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:19:24, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.43\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:21:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.88\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:22:23, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.425\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:23:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.525\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:23:12, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.525\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:25:38, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.835\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:25:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.515\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:25:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:26:13, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.56\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:26:42, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.465\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:27:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:27:20, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.565\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:28:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.49\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:28:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:28:20, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.425\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:28:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.495\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:28:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.565\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:29:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.565\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:29:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.495\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:29:41, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.545\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:29:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.48\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:29:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.535\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:32:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.94\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:32:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:32:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.44\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:33:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.415\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:33:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.425\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:33:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.48\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:33:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:34:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:34:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.505\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:34:12, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:34:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.43\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:34:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.635\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:34:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.49\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:35:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.505\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:35:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.595\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:35:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.505\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:35:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.465\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:35:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.475\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:35:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.48\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:35:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.425\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:36:28, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.485\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:38:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.88\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:41:22, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.94\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:43:48, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.885\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:43:53, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:43:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:44:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.49\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:44:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.425\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:44:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:44:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:44:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:44:32, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:46:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.975\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:49:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.875\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:49:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:49:38, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.425\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:49:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.505\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:49:48, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.575\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:50:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.43\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:50:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.485\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:51:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:51:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:51:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.565\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:51:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.47\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:53:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.885\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:53:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.515\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:53:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.53\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:53:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.555\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:54:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.555\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:54:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.505\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:54:50, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.56\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:56:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.51\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:56:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.555\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:59:18, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.88\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:59:23, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.425\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=1:01:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.825\n", + "INFO:alpha_automl.automl_api:Found 122 pipelines\n" + ] + } + ], + "source": [ + "automl.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exploring Pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After the pipeline search is complete, we can display the leaderboard:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rankingpipelineaccuracy_score
1ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, SelectPercentile, LinearDiscriminantAnalysis0.975
2ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, SelectPercentile, BaggingClassifier0.940
3ColumnTransformer, HuggingfaceCLIPTransformer, StandardScaler, SelectPercentile, BaggingClassifier0.940
4ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.885
5ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, GaussianNB0.885
6ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, QuadraticDiscriminantAnalysis0.885
7ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.880
8ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, XGBClassifier0.880
9ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, SGDClassifier0.880
10ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, GradientBoostingClassifier0.875
11ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, BaggingClassifier0.835
12ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.825
13ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.825
14ColumnTransformer, HogTransformer, MaxAbsScaler, SelectPercentile, BaggingClassifier0.635
15ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, RandomForestClassifier0.600
16ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, SelectPercentile, LinearSVC0.595
17ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, SGDClassifier0.575
18ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, KNeighborsClassifier0.575
19ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, SelectPercentile, LinearSVC0.570
20ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, SelectPercentile, RandomForestClassifier0.565
21ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.565
22ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, SelectPercentile, LinearDiscriminantAnalysis0.565
23ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.565
24ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, BaggingClassifier0.560
25ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, XGBClassifier0.560
26ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, GaussianNB0.555
27ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, GradientBoostingClassifier0.555
28ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, QuadraticDiscriminantAnalysis0.555
29ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, RandomForestClassifier0.550
30ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, GradientBoostingClassifier0.545
31ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, XGBClassifier0.545
32ColumnTransformer, FisherVectorTransformer, SelectPercentile, BaggingClassifier0.545
33ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, LGBMClassifier0.545
34ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, BaggingClassifier0.535
35ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, SelectPercentile, GradientBoostingClassifier0.535
36ColumnTransformer, CannyEdgeDetection, StandardScaler, SelectPercentile, BaggingClassifier0.535
37ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.530
38ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.530
39ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, SelectPercentile, GaussianNB0.530
40ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, BaggingClassifier0.530
41ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.530
42ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, XGBClassifier0.530
43ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, GaussianNB0.530
44ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, GradientBoostingClassifier0.530
45ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, KNeighborsClassifier0.530
46ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, QuadraticDiscriminantAnalysis0.530
47ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, SGDClassifier0.530
48ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.530
49ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, SVC0.530
50ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, LGBMClassifier0.530
51ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, ExtraTreesClassifier0.530
52ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, LogisticRegression0.530
53ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, BernoulliNB0.530
54ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, XGBClassifier0.530
55ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, LinearSVC0.525
56ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, SelectPercentile, BaggingClassifier0.525
57ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, SelectPercentile, RandomForestClassifier0.520
58ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, BaggingClassifier0.515
59ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, LinearDiscriminantAnalysis0.515
60ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, GaussianNB0.515
61ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, XGBClassifier0.510
62ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.510
63ColumnTransformer, FisherVectorTransformer, StandardScaler, SelectPercentile, BaggingClassifier0.510
64ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectKBest, LinearDiscriminantAnalysis0.510
65ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.505
66ColumnTransformer, ThresholdOtsu, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.505
67ColumnTransformer, ThresholdOtsu, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.505
68ColumnTransformer, ThresholdOtsu, MaxAbsScaler, SelectPercentile, BaggingClassifier0.505
69ColumnTransformer, RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.505
70ColumnTransformer, ThresholdOtsu, MaxAbsScaler, GenericUnivariateSelect, BaggingClassifier0.505
71ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectKBest, BaggingClassifier0.505
72ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.500
73ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, SelectPercentile, LinearDiscriminantAnalysis0.500
74ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, RandomForestClassifier0.500
75ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, LinearSVC0.500
76ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, BaggingClassifier0.500
77ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.495
78ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, LinearSVC0.495
79ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, SVC0.495
80ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LGBMClassifier0.495
81ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, XGBClassifier0.490
82ColumnTransformer, FisherVectorTransformer, RobustScaler, SelectPercentile, BaggingClassifier0.490
83ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, GaussianNB0.490
84ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, GradientBoostingClassifier0.490
85ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, BaggingClassifier0.490
86ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LogisticRegression0.490
87ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, BaggingClassifier0.490
88ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, GaussianNB0.485
89ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, QuadraticDiscriminantAnalysis0.485
90ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, BernoulliNB0.485
91ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, LinearDiscriminantAnalysis0.485
92ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, ExtraTreesClassifier0.480
93ColumnTransformer, RGB2GrayTransformer, StandardScaler, SelectPercentile, BaggingClassifier0.480
94ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, LogisticRegression0.480
95ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, LinearDiscriminantAnalysis0.475
96ColumnTransformer, RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, BaggingClassifier0.475
97ColumnTransformer, RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, BaggingClassifier0.470
98ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.465
99ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, BaggingClassifier0.465
100ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.465
101ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, SelectKBest, RandomForestClassifier0.465
102ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.465
103ColumnTransformer, RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, RandomForestClassifier0.465
104ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, SelectPercentile, XGBClassifier0.460
105ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, SelectPercentile, BaggingClassifier0.455
106ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, SelectPercentile, SGDClassifier0.455
107ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.450
108ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, ExtraTreesClassifier0.440
109ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.430
110ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, QuadraticDiscriminantAnalysis0.430
111ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, KNeighborsClassifier0.430
112ColumnTransformer, FisherVectorTransformer, StandardScaler, SelectPercentile, RandomForestClassifier0.430
113ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, MultinomialNB0.425
114ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, SGDClassifier0.425
115ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, SVC0.425
116ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, PassiveAggressiveClassifier0.425
117ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, BernoulliNB0.425
118ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, MultinomialNB0.425
119ColumnTransformer, CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, PassiveAggressiveClassifier0.425
120ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, MultinomialNB0.425
121ColumnTransformer, FisherVectorTransformer, MaxAbsScaler, SelectPercentile, QuadraticDiscriminantAnalysis0.420
122ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, PassiveAggressiveClassifier0.415
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl.plot_leaderboard()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to explore the produced pipelines, we can use [PipelineProfiler](https://github.com/VIDA-NYU/PipelineVis). PipelineProfiler is a visualization that enables users to compare and explore the pipelines generated by the AlphaAutoML system.\n", + "\n", + "After the pipeline search process is completed, we can use PipelineProfiler with:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t
\n", + "\t
\n", + "\t \n", + "\t\n", + "\t\n", + "\t" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "automl.plot_comparison_pipelines()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing Pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pipeline predictions are accessed with:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,\n", + " 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0,\n", + " 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,\n", + " 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,\n", + " 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,\n", + " 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0,\n", + " 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1,\n", + " 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0,\n", + " 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,\n", + " 0, 1])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = automl.predict(X_test)\n", + "y_pred" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The pipeline can be evaluated against a held out dataset with the function call:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Metric: accuracy_score, Score: 0.965\n" + ] + }, + { + "data": { + "text/plain": [ + "{'metric': 'accuracy_score', 'score': 0.965}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl.score(X_test, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Semi-Supervised Image Classification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we randomly mask 90% of the data labels and run a semi-supervised classification task with the class `AutoMLSemiSupervisedClassifier`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of training samples: 800\n", + "Unlabeled samples in training set: label\n", + "-1 712\n", + " 1 46\n", + " 0 42\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "y_mask = np.random.rand(len(y_train)) < 0.1\n", + "y_train[~y_mask] = -1\n", + "print(\"Number of training samples:\", len(X_train))\n", + "print(\"Unlabeled samples in training set:\", y_train.value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
557/home/yfw215/alpha-automl/examples/datasets/wi...
824/home/yfw215/alpha-automl/examples/datasets/wi...
65/home/yfw215/alpha-automl/examples/datasets/wi...
907/home/yfw215/alpha-automl/examples/datasets/wi...
904/home/yfw215/alpha-automl/examples/datasets/wi...
......
747/home/yfw215/alpha-automl/examples/datasets/wi...
252/home/yfw215/alpha-automl/examples/datasets/wi...
21/home/yfw215/alpha-automl/examples/datasets/wi...
276/home/yfw215/alpha-automl/examples/datasets/wi...
435/home/yfw215/alpha-automl/examples/datasets/wi...
\n", + "

88 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " image\n", + "557 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "824 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "65 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "907 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "904 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + ".. ...\n", + "747 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "252 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "21 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "276 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "435 /home/yfw215/alpha-automl/examples/datasets/wi...\n", + "\n", + "[88 rows x 1 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train[y_mask]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import `AutoMLSemiSupervisedClassifier`.\n", + "Here since we need to make sure there are no unlabeled data rows in validation dataset in cross validation. We pass the arguement `test_size: .05` in order to make sure 40 out of the 80 labeled data are used for validation." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from alpha_automl import AutoMLSemiSupervisedClassifier\n", + "\n", + "output_path = 'tmp/'\n", + "\n", + "automl = AutoMLSemiSupervisedClassifier(output_path, time_bound=30, verbose=False, \n", + " split_strategy_kwargs={'test_size': .05})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Adding New Primitives into AlphaAutoML's Search Space" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEBUG:h5py._conv:Creating converter from 7 to 5\n", + "DEBUG:h5py._conv:Creating converter from 5 to 7\n", + "DEBUG:h5py._conv:Creating converter from 7 to 5\n", + "DEBUG:h5py._conv:Creating converter from 5 to 7\n", + "INFO:gluonts.mx.context:Using CPU\n", + "DEBUG:matplotlib:matplotlib data path: /ext3/miniconda3/lib/python3.10/site-packages/matplotlib/mpl-data\n", + "DEBUG:matplotlib:CONFIGDIR=/home/yfw215/.config/matplotlib\n", + "DEBUG:matplotlib:interactive is False\n", + "DEBUG:matplotlib:platform is linux\n", + "DEBUG:matplotlib:CACHEDIR=/home/yfw215/.cache/matplotlib\n", + "DEBUG:matplotlib.font_manager:Using fontManager instance from /home/yfw215/.cache/matplotlib/fontlist-v330.json\n" + ] + } + ], + "source": [ + "from alpha_automl.wrapper_primitives.clip import HuggingfaceCLIPTransformer \n", + "\n", + "my_clip_encoder = HuggingfaceCLIPTransformer()\n", + "automl.add_primitives([(my_clip_encoder, 'IMAGE_ENCODER')])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exploring Pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After the pipeline search is complete, we can display the leaderboard:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.47500000000000003\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.625\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:48, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:07:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.625\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.47500000000000003\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.975\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:32, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.625\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.975\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:18:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:18:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:18:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.47500000000000003\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:23:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.675\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:24:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4000000000000001\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:24:13, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:24:23, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.47500000000000003\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:24:35, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.425\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:29:18, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.47500000000000003\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:29:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:29:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.47500000000000003\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:29:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4000000000000001\n", + "INFO:alpha_automl.automl_api:Found 20 pipelines\n" + ] + } + ], + "source": [ + "automl.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to explore the produced pipelines, we can use [PipelineProfiler](https://github.com/VIDA-NYU/PipelineVis). PipelineProfiler is a visualization that enables users to compare and explore the pipelines generated by the AlphaAutoML system.\n", + "\n", + "After the pipeline search process is completed, we can use PipelineProfiler with:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rankingpipelinef1_score
1ColumnTransformer, HuggingfaceCLIPTransformer, AutonBox, SGDClassifier0.975
2ColumnTransformer, HuggingfaceCLIPTransformer, StandardScaler, AutonBox, SGDClassifier0.975
3ColumnTransformer, HogTransformer, StandardScaler, SelfTrainingClassifier, SGDClassifier0.675
4ColumnTransformer, HogTransformer, MaxAbsScaler, SkLabelSpreading0.625
5ColumnTransformer, RGB2GrayTransformer, AutonBox, SGDClassifier0.625
6ColumnTransformer, HogTransformer, StandardScaler, AutonBox, SGDClassifier0.625
7ColumnTransformer, HogTransformer, SkLabelSpreading0.600
8ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, SkLabelSpreading0.550
9ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SkLabelSpreading0.475
10ColumnTransformer, HogTransformer, AutonBox, SGDClassifier0.475
11ColumnTransformer, HuggingfaceCLIPTransformer, SkLabelSpreading0.475
12ColumnTransformer, HogTransformer, StandardScaler, SkLabelSpreading0.475
13ColumnTransformer, RGB2GrayTransformer, StandardScaler, SkLabelPropagation0.475
14ColumnTransformer, RGB2GrayTransformer, RobustScaler, SkLabelSpreading0.475
15ColumnTransformer, RGB2GrayTransformer, SkLabelSpreading0.450
16ColumnTransformer, RGB2GrayTransformer, StandardScaler, SkLabelSpreading0.450
17ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelfTrainingClassifier, SGDClassifier0.450
18ColumnTransformer, HuggingfaceCLIPTransformer, StandardScaler, SkLabelSpreading0.425
19ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SkLabelPropagation0.400
20ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelfTrainingClassifier, BaggingClassifier0.400
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl.plot_leaderboard()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing Pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The pipeline can be evaluated against a held out dataset with the function call:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Metric: f1_score, Score: 0.9456521739130435\n" + ] + }, + { + "data": { + "text/plain": [ + "{'metric': 'f1_score', 'score': 0.9456521739130435}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl.score(X_test, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Comparing Semi-Supervised Score With Score Trained Only Using Labeled Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now train a `AutoMLClassifier` model using only the labeled 10% of the data rows. We can see that there is a significant improvement in accuracy score using `AutoMLSemiSupervisedClassifier`" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from alpha_automl import AutoMLClassifier\n", + "\n", + "output_path = 'tmp/'\n", + "\n", + "automl_new = AutoMLClassifier(output_path, time_bound=30, verbose=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "my_clip_encoder = HuggingfaceCLIPTransformer()\n", + "automl_new.add_primitives([(my_clip_encoder, 'IMAGE_ENCODER')])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.36363636363636365\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:24, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.36363636363636365\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:24, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9545454545454546\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:04, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5909090909090909\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7727272727272727\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:32, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:35, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:35, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:37, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:38, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:38, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:20, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:20, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7727272727272727\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:22, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:22, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.22727272727272727\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:23, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.36363636363636365\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:23, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:42, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5909090909090909\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:48, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5909090909090909\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7727272727272727\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:50, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3181818181818182\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:50, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7727272727272727\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5909090909090909\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:04, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7727272727272727\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:04, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7727272727272727\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9545454545454546\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:28, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:28, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:04:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7727272727272727\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3181818181818182\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:06:04, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:06:22, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:06:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:06:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:06:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:07:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:07:18, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:07:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9545454545454546\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3181818181818182\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:18, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:20, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:22, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:23, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.36363636363636365\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:23, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.8636363636363636\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:24, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3181818181818182\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5909090909090909\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3181818181818182\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3181818181818182\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:48, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:50, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.36363636363636365\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.36363636363636365\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:53, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.36363636363636365\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7727272727272727\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:13, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:38, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:39, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:10:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9545454545454546\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5454545454545454\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6363636363636364\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:35, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3181818181818182\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5909090909090909\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5909090909090909\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:35, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9545454545454546\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:35, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5909090909090909\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9545454545454546\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:12, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.2727272727272727\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45454545454545453\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9545454545454546\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:13:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5909090909090909\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9545454545454546\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4090909090909091\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:20, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.36363636363636365\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:39, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9545454545454546\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9545454545454546\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6818181818181818\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:15:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7272727272727273\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:15:32, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:15:50, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=1.0\n", + "INFO:alpha_automl.automl_api:Found 204 pipelines\n" + ] + } + ], + "source": [ + "automl_new.fit(X_train[y_mask], y_train[y_mask])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Metric: accuracy_score, Score: 0.83\n" + ] + }, + { + "data": { + "text/plain": [ + "{'metric': 'accuracy_score', 'score': 0.83}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl_new.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/image_classification.ipynb b/examples/image_classification.ipynb new file mode 100644 index 00000000..056ba2f6 --- /dev/null +++ b/examples/image_classification.ipynb @@ -0,0 +1,16837 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9042e825-a16a-4a66-b0ca-86833f4a82ea", + "metadata": {}, + "source": [ + "## Solving Image Classification Tasks" + ] + }, + { + "cell_type": "markdown", + "id": "482388d8-04cb-4716-8772-b362f9463c2c", + "metadata": {}, + "source": [ + "First, import the class `AutoMLClassifier`" + ] + }, + { + "cell_type": "markdown", + "id": "de9047fb-a307-4bbb-a354-663079c0e874", + "metadata": {}, + "source": [ + "### Generating Pipelines for CSV Datasets" + ] + }, + { + "cell_type": "markdown", + "id": "e4e5afa5-c392-49e1-9acb-811c03341102", + "metadata": {}, + "source": [ + "In this example, we are generating pipelines for a CSV dataset. The 299_libras_move dataset is used for this example." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "83397c72-47d0-4463-9fa4-87fc765fb0e1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "\n", + "\n", + "output_path = 'tmp/'\n", + "media_path = os.path.join(os.getcwd(), 'datasets/image_dataset_2/media')\n", + "dataset = pd.read_csv('datasets/image_dataset_2/tables/learningData.csv')\n", + "dataset[\"image\"] = dataset[\"image\"].apply(lambda x: os.path.join(media_path, x))\n", + "X = dataset[[\"image\"]]\n", + "y = dataset[[\"label\"]]\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, \n", + " y, \n", + " test_size=0.2, \n", + " shuffle=True,\n", + " random_state=42,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "03e4be35-c438-46ca-bb69-6ee65dc2641a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
49/home/mjk/alpha_automl_eden/alpha-automl/examp...
70/home/mjk/alpha_automl_eden/alpha-automl/examp...
68/home/mjk/alpha_automl_eden/alpha-automl/examp...
15/home/mjk/alpha_automl_eden/alpha-automl/examp...
39/home/mjk/alpha_automl_eden/alpha-automl/examp...
......
60/home/mjk/alpha_automl_eden/alpha-automl/examp...
71/home/mjk/alpha_automl_eden/alpha-automl/examp...
14/home/mjk/alpha_automl_eden/alpha-automl/examp...
92/home/mjk/alpha_automl_eden/alpha-automl/examp...
51/home/mjk/alpha_automl_eden/alpha-automl/examp...
\n", + "

79 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " image\n", + "49 /home/mjk/alpha_automl_eden/alpha-automl/examp...\n", + "70 /home/mjk/alpha_automl_eden/alpha-automl/examp...\n", + "68 /home/mjk/alpha_automl_eden/alpha-automl/examp...\n", + "15 /home/mjk/alpha_automl_eden/alpha-automl/examp...\n", + "39 /home/mjk/alpha_automl_eden/alpha-automl/examp...\n", + ".. ...\n", + "60 /home/mjk/alpha_automl_eden/alpha-automl/examp...\n", + "71 /home/mjk/alpha_automl_eden/alpha-automl/examp...\n", + "14 /home/mjk/alpha_automl_eden/alpha-automl/examp...\n", + "92 /home/mjk/alpha_automl_eden/alpha-automl/examp...\n", + "51 /home/mjk/alpha_automl_eden/alpha-automl/examp...\n", + "\n", + "[79 rows x 1 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2064c443-6657-4230-854d-c41776432e6c", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:gluonts.mx.context:Using CPU\n", + "DEBUG:matplotlib:matplotlib data path: /home/mjk/alpha_automl_eden/lib/python3.10/site-packages/matplotlib/mpl-data\n", + "DEBUG:matplotlib:CONFIGDIR=/home/mjk/.config/matplotlib\n", + "DEBUG:matplotlib:interactive is False\n", + "DEBUG:matplotlib:platform is linux\n", + "DEBUG:matplotlib:CACHEDIR=/home/mjk/.cache/matplotlib\n", + "DEBUG:matplotlib.font_manager:Using fontManager instance from /home/mjk/.cache/matplotlib/fontlist-v330.json\n", + "INFO:gluonts.mx.context:Using CPU\n", + "DEBUG:matplotlib:matplotlib data path: /home/mjk/alpha_automl_eden/lib/python3.10/site-packages/matplotlib/mpl-data\n", + "DEBUG:matplotlib:CONFIGDIR=/home/mjk/.config/matplotlib\n", + "DEBUG:matplotlib:interactive is False\n", + "DEBUG:matplotlib:platform is linux\n", + "DEBUG:matplotlib:CACHEDIR=/home/mjk/.cache/matplotlib\n", + "DEBUG:matplotlib.font_manager:Using fontManager instance from /home/mjk/.cache/matplotlib/fontlist-v330.json\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:12, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:12, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.35\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.2\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:20, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:20, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:22, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:22, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:23, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:24, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:28, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:29, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.35\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:32, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:32, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:32, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.2\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.2\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.75\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:39, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:39, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.35\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:39, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.2\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:41, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:42, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.35\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:48, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:50, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:50, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.2\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.35\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.35\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.2\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:04, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.35\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:10, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.35\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:18, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:18, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:18, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.75\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:22, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:23, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:24, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:28, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:28, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:28, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.35\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:31, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:33, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:35, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.75\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:37, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:37, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:39, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:42, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:46, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:49, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:50, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:50, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:58, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:01:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:06, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:12, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:12, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:13, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:13, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:13, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.2\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:13, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:26, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:27, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.25\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:34, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:35, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.8\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:37, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:37, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:37, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:37, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:41, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:42, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:45, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:48, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:53, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.05\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:54, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.85\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:57, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.9\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:02:59, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.35\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:02, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:03, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.5\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:04, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.4\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:11, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:15, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.1\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.55\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:18, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:18, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.45\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:19, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.6\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:29, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.65\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.3\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.75\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:30, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.15\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:35, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:55, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.0\n", + "INFO:alpha_automl.automl_api:Found 317 pipelines\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from alpha_automl import AutoMLImageClassifier\n", + "\n", + "\n", + "output_path = 'tmp/'\n", + "\n", + "automl = AutoMLImageClassifier(output_path, time_bound=10, verbose=False)\n", + "automl.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "45151d61-d816-4dc2-9292-b1728ff033fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rankingpipelineaccuracy_score
1HogTransformer, StandardScaler, LinearSVC0.900
2HogTransformer, MaxAbsScaler, RandomForestClassifier0.900
3HogTransformer, StandardScaler, SelectPercentile, LinearSVC0.850
4RGB2GrayTransformer, StandardScaler, SelectPercentile, LinearSVC0.800
5RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, LinearSVC0.750
6RGB2GrayTransformer, MaxAbsScaler, LinearSVC0.750
7HogTransformer, MaxAbsScaler, DecisionTreeClassifier0.750
8RGB2GrayTransformer, SelectPercentile, LinearSVC0.750
9ThresholdOtsu, StandardScaler, SelectPercentile, LinearSVC0.700
10ThresholdOtsu, MaxAbsScaler, LinearSVC0.700
11ThresholdOtsu, LinearSVC0.700
12RGB2GrayTransformer, SelectPercentile, RandomForestClassifier0.700
13RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, BernoulliNB0.700
14RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, PassiveAggressiveClassifier0.700
15ThresholdOtsu, MaxAbsScaler, LinearDiscriminantAnalysis0.700
16RGB2GrayTransformer, StandardScaler, LinearSVC0.700
17ThresholdOtsu, MaxAbsScaler, SelectPercentile, RandomForestClassifier0.700
18ThresholdOtsu, MaxAbsScaler, RandomForestClassifier0.700
19HogTransformer, MaxAbsScaler, SelectPercentile, RandomForestClassifier0.700
20ThresholdOtsu, StandardScaler, SelectPercentile, RandomForestClassifier0.700
21RGB2GrayTransformer, LinearSVC0.700
22ThresholdOtsu, StandardScaler, LinearSVC0.650
23ThresholdOtsu, StandardScaler, SelectPercentile, ExtraTreesClassifier0.650
24ThresholdOtsu, StandardScaler, ExtraTreesClassifier0.650
25ThresholdOtsu, MaxAbsScaler, SelectPercentile, LinearSVC0.650
26ThresholdOtsu, SelectPercentile, LinearSVC0.650
27ThresholdOtsu, RobustScaler, SelectPercentile, LinearSVC0.650
28RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, ExtraTreesClassifier0.650
29ThresholdOtsu, StandardScaler, SelectKBest, LinearDiscriminantAnalysis0.650
30RGB2GrayTransformer, RandomForestClassifier0.650
31RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, KNeighborsClassifier0.650
32RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, LogisticRegression0.650
33RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, SGDClassifier0.650
34ThresholdOtsu, MaxAbsScaler, SelectPercentile, LinearDiscriminantAnalysis0.650
35ThresholdOtsu, StandardScaler, RandomForestClassifier0.650
36ThresholdOtsu, StandardScaler, SelectPercentile, LinearDiscriminantAnalysis0.650
37ThresholdOtsu, SelectPercentile, RandomForestClassifier0.650
38ThresholdOtsu, SelectPercentile, LinearDiscriminantAnalysis0.650
39ThresholdOtsu, StandardScaler, SelectKBest, LinearSVC0.600
40ThresholdOtsu, SelectKBest, DecisionTreeClassifier0.600
41ThresholdOtsu, StandardScaler, SelectKBest, RandomForestClassifier0.600
42RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, RandomForestClassifier0.600
43ThresholdOtsu, MaxAbsScaler, SelectKBest, LinearDiscriminantAnalysis0.600
44ThresholdOtsu, MaxAbsScaler, SelectKBest, DecisionTreeClassifier0.600
45ThresholdOtsu, MaxAbsScaler, SelectKBest, ExtraTreesClassifier0.600
46ThresholdOtsu, MaxAbsScaler, SelectKBest, BaggingClassifier0.600
47ThresholdOtsu, MaxAbsScaler, SelectKBest, GaussianNB0.600
48ThresholdOtsu, StandardScaler, SelectKBest, DecisionTreeClassifier0.600
49ThresholdOtsu, StandardScaler, SelectKBest, ExtraTreesClassifier0.600
50RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, SVC0.600
51RGB2GrayTransformer, MaxAbsScaler, LinearDiscriminantAnalysis0.600
52ThresholdOtsu, SelectKBest, LinearDiscriminantAnalysis0.600
53RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, GaussianNB0.600
54RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, MultinomialNB0.600
55RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, XGBClassifier0.600
56RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, LGBMClassifier0.600
57ThresholdOtsu, SelectKBest, ExtraTreesClassifier0.600
58ThresholdOtsu, StandardScaler, SelectPercentile, DecisionTreeClassifier0.600
59ThresholdOtsu, StandardScaler, DecisionTreeClassifier0.600
60ThresholdOtsu, RobustScaler, SelectKBest, DecisionTreeClassifier0.600
61RGB2GrayTransformer, StandardScaler, DecisionTreeClassifier0.600
62ThresholdOtsu, RobustScaler, SelectKBest, RandomForestClassifier0.600
63ThresholdOtsu, RobustScaler, SelectKBest, LinearDiscriminantAnalysis0.600
64ThresholdOtsu, StandardScaler, SelectKBest, LogisticRegression0.600
65ThresholdOtsu, StandardScaler, SelectKBest, LGBMClassifier0.600
66ThresholdOtsu, StandardScaler, SelectKBest, BernoulliNB0.600
67ThresholdOtsu, RandomForestClassifier0.600
68RGB2GrayTransformer, SelectKBest, DecisionTreeClassifier0.550
69RGB2GrayTransformer, MaxAbsScaler, SelectKBest, DecisionTreeClassifier0.550
70RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, LinearDiscriminantAnalysis0.550
71RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, DecisionTreeClassifier0.550
72RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, GradientBoostingClassifier0.550
73ThresholdOtsu, MaxAbsScaler, SelectKBest, RandomForestClassifier0.550
74RGB2GrayTransformer, MaxAbsScaler, DecisionTreeClassifier0.550
75HogTransformer, MaxAbsScaler, SelectPercentile, DecisionTreeClassifier0.550
76ThresholdOtsu, MaxAbsScaler, DecisionTreeClassifier0.550
77HogTransformer, SelectPercentile, DecisionTreeClassifier0.550
78HogTransformer, DecisionTreeClassifier0.550
79RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, BaggingClassifier0.550
80RGB2GrayTransformer, StandardScaler, SelectPercentile, DecisionTreeClassifier0.550
81ThresholdOtsu, StandardScaler, SelectKBest, GradientBoostingClassifier0.550
82ThresholdOtsu, StandardScaler, SelectKBest, SVC0.550
83ThresholdOtsu, StandardScaler, SelectKBest, KNeighborsClassifier0.550
84ThresholdOtsu, StandardScaler, SelectKBest, XGBClassifier0.550
85ThresholdOtsu, StandardScaler, SelectKBest, BaggingClassifier0.550
86ThresholdOtsu, StandardScaler, SelectKBest, SGDClassifier0.550
87ThresholdOtsu, StandardScaler, SelectKBest, GaussianNB0.550
88RGB2GrayTransformer, SelectKBest, RandomForestClassifier0.500
89RGB2GrayTransformer, MaxAbsScaler, SelectKBest, RandomForestClassifier0.500
90RGB2GrayTransformer, MaxAbsScaler, SelectKBest, ExtraTreesClassifier0.500
91RGB2GrayTransformer, MaxAbsScaler, SelectKBest, XGBClassifier0.500
92ThresholdOtsu, MaxAbsScaler, SelectKBest, LinearSVC0.500
93ThresholdOtsu, MaxAbsScaler, SelectKBest, GradientBoostingClassifier0.500
94ThresholdOtsu, MaxAbsScaler, SelectKBest, LogisticRegression0.500
95RGB2GrayTransformer, StandardScaler, SelectKBest, DecisionTreeClassifier0.500
96RGB2GrayTransformer, StandardScaler, SelectKBest, ExtraTreesClassifier0.500
97ThresholdOtsu, SelectKBest, LinearSVC0.500
98ThresholdOtsu, MaxAbsScaler, SelectKBest, SGDClassifier0.500
99ThresholdOtsu, MaxAbsScaler, SelectKBest, BernoulliNB0.500
100RGB2GrayTransformer, StandardScaler, SelectKBest, XGBClassifier0.500
101ThresholdOtsu, SelectPercentile, DecisionTreeClassifier0.500
102ThresholdOtsu, DecisionTreeClassifier0.500
103RGB2GrayTransformer, RobustScaler, SelectKBest, DecisionTreeClassifier0.500
104FisherVectorTransformer, StandardScaler, SelectPercentile, DecisionTreeClassifier0.500
105RGB2GrayTransformer, SelectKBest, ExtraTreesClassifier0.500
106ThresholdOtsu, RobustScaler, SelectKBest, LinearSVC0.500
107ThresholdOtsu, SelectKBest, RandomForestClassifier0.450
108RGB2GrayTransformer, MaxAbsScaler, SelectKBest, GradientBoostingClassifier0.450
109RGB2GrayTransformer, MaxAbsScaler, SelectKBest, BaggingClassifier0.450
110RGB2GrayTransformer, MaxAbsScaler, SelectKBest, LGBMClassifier0.450
111ThresholdOtsu, MaxAbsScaler, SelectKBest, SVC0.450
112ThresholdOtsu, MaxAbsScaler, SelectKBest, KNeighborsClassifier0.450
113ThresholdOtsu, MaxAbsScaler, SelectKBest, XGBClassifier0.450
114RGB2GrayTransformer, StandardScaler, SelectKBest, GradientBoostingClassifier0.450
115RGB2GrayTransformer, StandardScaler, SelectKBest, LGBMClassifier0.450
116HogTransformer, StandardScaler, SelectPercentile, DecisionTreeClassifier0.450
117HogTransformer, StandardScaler, DecisionTreeClassifier0.450
118RGB2GrayTransformer, SelectPercentile, DecisionTreeClassifier0.450
119RGB2GrayTransformer, DecisionTreeClassifier0.450
120ThresholdOtsu, StandardScaler, SelectKBest, PassiveAggressiveClassifier0.450
121RGB2GrayTransformer, StandardScaler, SelectKBest, RandomForestClassifier0.400
122RGB2GrayTransformer, MaxAbsScaler, SelectKBest, GaussianNB0.400
123HogTransformer, MaxAbsScaler, SelectKBest, LinearDiscriminantAnalysis0.400
124HogTransformer, MaxAbsScaler, SelectKBest, GradientBoostingClassifier0.400
125HogTransformer, MaxAbsScaler, SelectKBest, KNeighborsClassifier0.400
126ThresholdOtsu, MaxAbsScaler, SelectPercentile, DecisionTreeClassifier0.400
127FisherVectorTransformer, MaxAbsScaler, SelectPercentile, DecisionTreeClassifier0.400
128ThresholdOtsu, MaxAbsScaler, SelectKBest, LGBMClassifier0.400
129ThresholdOtsu, MaxAbsScaler, SelectKBest, PassiveAggressiveClassifier0.400
130HogTransformer, MaxAbsScaler, SelectKBest, SGDClassifier0.400
131HogTransformer, MaxAbsScaler, SelectKBest, XGBClassifier0.400
132HogTransformer, MaxAbsScaler, SelectKBest, LGBMClassifier0.400
133HogTransformer, MaxAbsScaler, SelectKBest, BernoulliNB0.400
134RGB2GrayTransformer, StandardScaler, SelectKBest, BaggingClassifier0.400
135RGB2GrayTransformer, StandardScaler, SelectKBest, GaussianNB0.400
136CannyEdgeDetection, MaxAbsScaler, DecisionTreeClassifier0.400
137HogTransformer, StandardScaler, SelectKBest, LinearDiscriminantAnalysis0.400
138HogTransformer, SelectKBest, LinearDiscriminantAnalysis0.400
139FisherVectorTransformer, StandardScaler, SelectKBest, RandomForestClassifier0.350
140RGB2GrayTransformer, MaxAbsScaler, SelectKBest, LinearDiscriminantAnalysis0.350
141RGB2GrayTransformer, MaxAbsScaler, SelectKBest, BernoulliNB0.350
142HogTransformer, MaxAbsScaler, SelectKBest, LinearSVC0.350
143RGB2GrayTransformer, StandardScaler, SelectKBest, LinearDiscriminantAnalysis0.350
144RGB2GrayTransformer, StandardScaler, SelectKBest, LinearSVC0.350
145RGB2GrayTransformer, RobustScaler, SelectKBest, LinearSVC0.350
146HogTransformer, StandardScaler, SelectKBest, LinearSVC0.350
147RGB2GrayTransformer, SelectKBest, LinearDiscriminantAnalysis0.350
148FisherVectorTransformer, StandardScaler, SelectKBest, LinearDiscriminantAnalysis0.350
149HogTransformer, SelectKBest, DecisionTreeClassifier0.300
150HogTransformer, MaxAbsScaler, SelectKBest, DecisionTreeClassifier0.300
151FisherVectorTransformer, MaxAbsScaler, SelectKBest, ExtraTreesClassifier0.300
152HogTransformer, SelectKBest, LinearSVC0.300
153FisherVectorTransformer, MaxAbsScaler, SelectKBest, LinearSVC0.300
154HogTransformer, MaxAbsScaler, SelectKBest, LogisticRegression0.300
155HogTransformer, MaxAbsScaler, SelectKBest, MultinomialNB0.300
156HogTransformer, MaxAbsScaler, SelectKBest, PassiveAggressiveClassifier0.300
157HogTransformer, StandardScaler, SelectKBest, ExtraTreesClassifier0.300
158HogTransformer, SelectKBest, ExtraTreesClassifier0.300
159HogTransformer, SelectKBest, RandomForestClassifier0.250
160FisherVectorTransformer, SelectKBest, RandomForestClassifier0.250
161HogTransformer, StandardScaler, SelectKBest, RandomForestClassifier0.250
162FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.250
163HogTransformer, MaxAbsScaler, SelectKBest, ExtraTreesClassifier0.250
164HogTransformer, MaxAbsScaler, SelectKBest, SVC0.250
165HogTransformer, MaxAbsScaler, SelectKBest, GaussianNB0.250
166ThresholdOtsu, MaxAbsScaler, SelectKBest, MultinomialNB0.250
167HogTransformer, StandardScaler, SelectKBest, DecisionTreeClassifier0.250
168HogTransformer, MaxAbsScaler, SelectKBest, QuadraticDiscriminantAnalysis0.250
169RGB2GrayTransformer, StandardScaler, SelectKBest, SGDClassifier0.250
170FisherVectorTransformer, GenericUnivariateSelect, DecisionTreeClassifier0.250
171RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, SGDClassifier0.200
172RGB2GrayTransformer, MaxAbsScaler, SelectKBest, SGDClassifier0.200
173RGB2GrayTransformer, MaxAbsScaler, SelectKBest, QuadraticDiscriminantAnalysis0.200
174HogTransformer, MaxAbsScaler, SelectKBest, RandomForestClassifier0.200
175FisherVectorTransformer, MaxAbsScaler, SelectKBest, BaggingClassifier0.200
176CannyEdgeDetection, MaxAbsScaler, SelectPercentile, DecisionTreeClassifier0.200
177RGB2GrayTransformer, StandardScaler, SelectKBest, QuadraticDiscriminantAnalysis0.200
178CannyEdgeDetection, SelectKBest, DecisionTreeClassifier0.150
179FisherVectorTransformer, SelectKBest, DecisionTreeClassifier0.150
180RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, QuadraticDiscriminantAnalysis0.150
181RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, SGDClassifier0.150
182RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, QuadraticDiscriminantAnalysis0.150
183CannyEdgeDetection, MaxAbsScaler, SelectKBest, DecisionTreeClassifier0.150
184CannyEdgeDetection, MaxAbsScaler, SelectKBest, ExtraTreesClassifier0.150
185HogTransformer, MaxAbsScaler, SelectKBest, BaggingClassifier0.150
186FisherVectorTransformer, RobustScaler, GenericUnivariateSelect, DecisionTreeClassifier0.150
187CannyEdgeDetection, SelectKBest, LinearSVC0.150
188FisherVectorTransformer, StandardScaler, SelectKBest, LinearSVC0.150
189ThresholdOtsu, MaxAbsScaler, SelectKBest, QuadraticDiscriminantAnalysis0.150
190CannyEdgeDetection, MaxAbsScaler, SelectKBest, LinearSVC0.150
191RGB2GrayTransformer, StandardScaler, SelectKBest, KNeighborsClassifier0.150
192CannyEdgeDetection, StandardScaler, SelectKBest, DecisionTreeClassifier0.150
193CannyEdgeDetection, MaxAbsScaler, SelectKBest, KNeighborsClassifier0.150
194CannyEdgeDetection, MaxAbsScaler, SelectKBest, SVC0.150
195CannyEdgeDetection, MaxAbsScaler, SelectKBest, GaussianNB0.150
196RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, QuadraticDiscriminantAnalysis0.150
197RGB2GrayTransformer, StandardScaler, SelectKBest, LogisticRegression0.150
198CannyEdgeDetection, StandardScaler, SelectPercentile, DecisionTreeClassifier0.150
199ThresholdOtsu, StandardScaler, GenericUnivariateSelect, QuadraticDiscriminantAnalysis0.150
200CannyEdgeDetection, StandardScaler, SelectKBest, ExtraTreesClassifier0.150
201CannyEdgeDetection, SelectKBest, ExtraTreesClassifier0.150
202CannyEdgeDetection, SelectKBest, RandomForestClassifier0.100
203CannyEdgeDetection, StandardScaler, SelectKBest, RandomForestClassifier0.100
204RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, PassiveAggressiveClassifier0.100
205RGB2GrayTransformer, MaxAbsScaler, SelectKBest, LinearSVC0.100
206RGB2GrayTransformer, MaxAbsScaler, SelectKBest, KNeighborsClassifier0.100
207RGB2GrayTransformer, MaxAbsScaler, SelectKBest, LogisticRegression0.100
208RGB2GrayTransformer, MaxAbsScaler, SelectKBest, MultinomialNB0.100
209RGB2GrayTransformer, MaxAbsScaler, SelectKBest, PassiveAggressiveClassifier0.100
210CannyEdgeDetection, MaxAbsScaler, SelectKBest, RandomForestClassifier0.100
211FisherVectorTransformer, MaxAbsScaler, SelectKBest, RandomForestClassifier0.100
212HogTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.100
213CannyEdgeDetection, MaxAbsScaler, SelectKBest, LinearDiscriminantAnalysis0.100
214FisherVectorTransformer, MaxAbsScaler, SelectKBest, DecisionTreeClassifier0.100
215CannyEdgeDetection, MaxAbsScaler, SelectKBest, BaggingClassifier0.100
216RGB2GrayTransformer, SelectKBest, LinearSVC0.100
217FisherVectorTransformer, SelectKBest, LinearSVC0.100
218CannyEdgeDetection, StandardScaler, SelectKBest, LinearSVC0.100
219HogTransformer, StandardScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.100
220HogTransformer, MaxAbsScaler, GenericUnivariateSelect, KNeighborsClassifier0.100
221HogTransformer, MaxAbsScaler, GenericUnivariateSelect, GaussianNB0.100
222CannyEdgeDetection, MaxAbsScaler, SelectKBest, GradientBoostingClassifier0.100
223ThresholdOtsu, StandardScaler, SelectKBest, QuadraticDiscriminantAnalysis0.100
224HogTransformer, StandardScaler, GenericUnivariateSelect, LinearSVC0.100
225CannyEdgeDetection, StandardScaler, SelectKBest, LinearDiscriminantAnalysis0.100
226CannyEdgeDetection, SelectKBest, LinearDiscriminantAnalysis0.100
227FisherVectorTransformer, SelectKBest, LinearDiscriminantAnalysis0.100
228HogTransformer, GenericUnivariateSelect, LinearDiscriminantAnalysis0.100
229ThresholdOtsu, StandardScaler, GenericUnivariateSelect, LinearSVC0.050
230FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.050
231RGB2GrayTransformer, MaxAbsScaler, SelectKBest, SVC0.050
232FisherVectorTransformer, MaxAbsScaler, SelectKBest, LinearDiscriminantAnalysis0.050
233ThresholdOtsu, StandardScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.050
234ThresholdOtsu, StandardScaler, GenericUnivariateSelect, DecisionTreeClassifier0.050
235ThresholdOtsu, StandardScaler, GenericUnivariateSelect, ExtraTreesClassifier0.050
236FisherVectorTransformer, StandardScaler, SelectKBest, DecisionTreeClassifier0.050
237HogTransformer, MaxAbsScaler, GenericUnivariateSelect, QuadraticDiscriminantAnalysis0.050
238RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, PassiveAggressiveClassifier0.050
239FisherVectorTransformer, StandardScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.050
240RGB2GrayTransformer, StandardScaler, SelectKBest, SVC0.050
241ThresholdOtsu, StandardScaler, GenericUnivariateSelect, RandomForestClassifier0.050
242ThresholdOtsu, StandardScaler, GenericUnivariateSelect, GradientBoostingClassifier0.050
243ThresholdOtsu, StandardScaler, GenericUnivariateSelect, BaggingClassifier0.050
244ThresholdOtsu, StandardScaler, GenericUnivariateSelect, SVC0.050
245ThresholdOtsu, StandardScaler, GenericUnivariateSelect, KNeighborsClassifier0.050
246ThresholdOtsu, StandardScaler, GenericUnivariateSelect, LogisticRegression0.050
247ThresholdOtsu, StandardScaler, GenericUnivariateSelect, XGBClassifier0.050
248RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.000
249RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.000
250RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.000
251RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.000
252RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, GradientBoostingClassifier0.000
253RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, ExtraTreesClassifier0.000
254RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, SVC0.000
255RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, BaggingClassifier0.000
256RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, KNeighborsClassifier0.000
257RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, GaussianNB0.000
258RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LogisticRegression0.000
259RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, MultinomialNB0.000
260RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LGBMClassifier0.000
261RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, BernoulliNB0.000
262RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, XGBClassifier0.000
263HogTransformer, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.000
264CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.000
265HogTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.000
266CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.000
267RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.000
268RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, DecisionTreeClassifier0.000
269RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, LinearSVC0.000
270RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, RandomForestClassifier0.000
271RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, ExtraTreesClassifier0.000
272RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, GradientBoostingClassifier0.000
273RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, SVC0.000
274RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, KNeighborsClassifier0.000
275RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, BaggingClassifier0.000
276RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, GaussianNB0.000
277RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, LogisticRegression0.000
278RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.000
279RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, DecisionTreeClassifier0.000
280RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, LinearSVC0.000
281RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, RandomForestClassifier0.000
282CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.000
283RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, LGBMClassifier0.000
284RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, BernoulliNB0.000
285HogTransformer, RobustScaler, GenericUnivariateSelect, DecisionTreeClassifier0.000
286CannyEdgeDetection, RobustScaler, GenericUnivariateSelect, DecisionTreeClassifier0.000
287RGB2GrayTransformer, GenericUnivariateSelect, DecisionTreeClassifier0.000
288RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, ExtraTreesClassifier0.000
289RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, SVC0.000
290RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, KNeighborsClassifier0.000
291RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, GradientBoostingClassifier0.000
292RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, BaggingClassifier0.000
293HogTransformer, StandardScaler, GenericUnivariateSelect, DecisionTreeClassifier0.000
294CannyEdgeDetection, StandardScaler, GenericUnivariateSelect, DecisionTreeClassifier0.000
295FisherVectorTransformer, StandardScaler, GenericUnivariateSelect, DecisionTreeClassifier0.000
296RGB2GrayTransformer, GenericUnivariateSelect, RandomForestClassifier0.000
297HogTransformer, GenericUnivariateSelect, RandomForestClassifier0.000
298CannyEdgeDetection, GenericUnivariateSelect, RandomForestClassifier0.000
299HogTransformer, GenericUnivariateSelect, DecisionTreeClassifier0.000
300HogTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.000
301HogTransformer, MaxAbsScaler, GenericUnivariateSelect, ExtraTreesClassifier0.000
302CannyEdgeDetection, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.000
303HogTransformer, MaxAbsScaler, GenericUnivariateSelect, SVC0.000
304HogTransformer, MaxAbsScaler, GenericUnivariateSelect, GradientBoostingClassifier0.000
305HogTransformer, MaxAbsScaler, GenericUnivariateSelect, BaggingClassifier0.000
306FisherVectorTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.000
307HogTransformer, MaxAbsScaler, GenericUnivariateSelect, LogisticRegression0.000
308HogTransformer, MaxAbsScaler, GenericUnivariateSelect, MultinomialNB0.000
309HogTransformer, MaxAbsScaler, GenericUnivariateSelect, XGBClassifier0.000
310RGB2GrayTransformer, StandardScaler, GenericUnivariateSelect, XGBClassifier0.000
311CannyEdgeDetection, StandardScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.000
312RGB2GrayTransformer, GenericUnivariateSelect, LinearDiscriminantAnalysis0.000
313CannyEdgeDetection, GenericUnivariateSelect, DecisionTreeClassifier0.000
314RGB2GrayTransformer, GenericUnivariateSelect, LinearSVC0.000
315RGB2GrayTransformer, GenericUnivariateSelect, ExtraTreesClassifier0.000
316HogTransformer, GenericUnivariateSelect, LinearSVC0.000
317HogTransformer, StandardScaler, GenericUnivariateSelect, RandomForestClassifier0.000
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl.plot_leaderboard()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bc62282e-0c7e-488b-92e0-9901081a2fc6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t \n", + "\t
\n", + "\t
\n", + "\t \n", + "\t\n", + "\t\n", + "\t" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "automl.plot_comparison_pipelines()" + ] + }, + { + "cell_type": "markdown", + "id": "e498a1d6-8f6f-49f0-8276-35c15745e050", + "metadata": {}, + "source": [ + "### Testing Pipelines" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1c6bd444-f72a-4ed3-a471-8372befbf2a4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([6, 1, 0, 6, 8, 7, 0, 7, 3, 3, 8, 2, 5, 9, 3, 9, 9, 3, 0, 6])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = automl.predict(X_test)\n", + "y_pred" + ] + }, + { + "cell_type": "markdown", + "id": "6a166d86-bd73-48ee-8877-5d63fd906b37", + "metadata": {}, + "source": [ + "The pipeline can be evaluated against a held out dataset with the function call:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "200dd60f-3cc8-4420-9506-9e2447ea0117", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Metric: accuracy_score, Score: 0.85\n" + ] + }, + { + "data": { + "text/plain": [ + "{'metric': 'accuracy_score', 'score': 0.85}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54cdb4ce-81bf-4a54-b4dc-82f9b7a39e12", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/image_classification_selfie.ipynb b/examples/image_classification_selfie.ipynb new file mode 100644 index 00000000..2ba13769 --- /dev/null +++ b/examples/image_classification_selfie.ipynb @@ -0,0 +1,644 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Image Classification Pipeline Solving Selfie Classification Task" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, import the class `AutoMLClassifier`\n", + "\n", + "In this example, we are generating pipelines for a CSV dataset. The selfie dataset is used for this example.\n", + "Sample and devide the dataset using _train_test_split_.\n", + "\n", + "For this task, we use the Selfie Dataset, a customize dataset for recognizing selfie from various images. \n", + "The original image dataset is collected from [Selfie-Image-Detection-Dataset](https://www.kaggle.com/datasets/jigrubhatt/selfieimagedetectiondataset) from Kaggle. \n", + "You can download the dataset via the following [google drive link](https://drive.google.com/file/d/1y5d_3LT5jQ4RF7LAKmXjEFu041dH7-Uk/view?usp=drive_link)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
1513/home/yfw215/alpha-automl/examples/datasets/se...
5341/home/yfw215/alpha-automl/examples/datasets/se...
1334/home/yfw215/alpha-automl/examples/datasets/se...
3615/home/yfw215/alpha-automl/examples/datasets/se...
3783/home/yfw215/alpha-automl/examples/datasets/se...
......
6413/home/yfw215/alpha-automl/examples/datasets/se...
7205/home/yfw215/alpha-automl/examples/datasets/se...
4624/home/yfw215/alpha-automl/examples/datasets/se...
3891/home/yfw215/alpha-automl/examples/datasets/se...
532/home/yfw215/alpha-automl/examples/datasets/se...
\n", + "

800 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " image\n", + "1513 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "5341 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "1334 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "3615 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "3783 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "... ...\n", + "6413 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "7205 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "4624 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "3891 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "532 /home/yfw215/alpha-automl/examples/datasets/se...\n", + "\n", + "[800 rows x 1 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from alpha_automl import AutoMLClassifier\n", + "\n", + "output_path = 'tmp/'\n", + "media_path = os.path.join(os.getcwd(), 'datasets/selfie/')\n", + "dataset = pd.read_csv('datasets/selfie/learningData.csv').sample(1000)\n", + "dataset[\"image\"] = dataset[\"image\"].apply(lambda x: os.path.join(media_path, x))\n", + "X = dataset[[\"image\"]]\n", + "y = dataset[[\"label\"]]\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, \n", + " y, \n", + " test_size=0.2, \n", + " shuffle=True,\n", + " random_state=42,\n", + ")\n", + "X_train" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "label\n", + "0 406\n", + "1 394\n", + "dtype: int64" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Adding New Primitives into AlphaAutoML's Search Space" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "output_path = 'tmp/'\n", + "automl = AutoMLClassifier(output_path, time_bound=20, verbose=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEBUG:h5py._conv:Creating converter from 7 to 5\n", + "DEBUG:h5py._conv:Creating converter from 5 to 7\n", + "DEBUG:h5py._conv:Creating converter from 7 to 5\n", + "DEBUG:h5py._conv:Creating converter from 5 to 7\n", + "INFO:gluonts.mx.context:Using CPU\n", + "DEBUG:matplotlib:matplotlib data path: /ext3/miniconda3/lib/python3.10/site-packages/matplotlib/mpl-data\n", + "DEBUG:matplotlib:CONFIGDIR=/home/yfw215/.config/matplotlib\n", + "DEBUG:matplotlib:interactive is False\n", + "DEBUG:matplotlib:platform is linux\n", + "DEBUG:matplotlib:CACHEDIR=/home/yfw215/.cache/matplotlib\n", + "DEBUG:matplotlib.font_manager:Using fontManager instance from /home/yfw215/.cache/matplotlib/fontlist-v330.json\n" + ] + } + ], + "source": [ + "from alpha_automl.wrapper_primitives.clip import HuggingfaceCLIPTransformer \n", + "\n", + "model_id = 'openai/clip-vit-base-patch32'\n", + "my_clip_encoder = HuggingfaceCLIPTransformer(model_id=model_id)\n", + "automl.add_primitives([(my_clip_encoder, 'IMAGE_ENCODER')])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:07, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.635\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:14, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.51\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.635\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:29, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.51\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:36, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.49\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:00:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.595\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:05, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.88\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:12, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.63\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:03:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.71\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:43, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.905\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:05:51, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.595\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:13, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.88\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:22, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.555\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:32, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.695\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:40, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.59\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:48, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.62\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:08:56, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.695\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:04, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.7\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:13, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.755\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:09:21, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.785\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.93\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:11:52, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.775\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:00, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.51\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:09, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.595\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:17, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.59\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:12:25, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.72\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:14:47, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.945\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:17:08, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.88\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:17:16, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.63\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:19:37, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.97\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:19:44, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.635\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:19:53, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.635\n", + "INFO:alpha_automl.automl_api:Found pipeline, time=0:20:01, scoring...\n", + "INFO:alpha_automl.automl_api:Scored pipeline, score=0.63\n", + "INFO:alpha_automl.automl_api:Found 33 pipelines\n" + ] + } + ], + "source": [ + "automl.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exploring Pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After the pipeline search is complete, we can display the leaderboard:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rankingpipelineaccuracy_score
1ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, SelectPercentile, DecisionTreeClassifier0.970
2ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, DecisionTreeClassifier0.945
3ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, SelectKBest, DecisionTreeClassifier0.930
4ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.905
5ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.880
6ColumnTransformer, HuggingfaceCLIPTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.880
7ColumnTransformer, HuggingfaceCLIPTransformer, RobustScaler, GenericUnivariateSelect, DecisionTreeClassifier0.880
8ColumnTransformer, HogTransformer, MaxAbsScaler, SelectKBest, RandomForestClassifier0.785
9ColumnTransformer, HogTransformer, MaxAbsScaler, SelectKBest, ExtraTreesClassifier0.775
10ColumnTransformer, HogTransformer, MaxAbsScaler, SelectKBest, LinearSVC0.755
11ColumnTransformer, HogTransformer, MaxAbsScaler, SelectPercentile, DecisionTreeClassifier0.720
12ColumnTransformer, HogTransformer, MaxAbsScaler, SelectKBest, DecisionTreeClassifier0.710
13ColumnTransformer, HogTransformer, MaxAbsScaler, DecisionTreeClassifier0.700
14ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, DecisionTreeClassifier0.695
15ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectPercentile, DecisionTreeClassifier0.695
16ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.635
17ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.635
18ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectKBest, LinearSVC0.635
19ColumnTransformer, RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, LinearSVC0.635
20ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearSVC0.630
21ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, LinearDiscriminantAnalysis0.630
22ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectKBest, LinearDiscriminantAnalysis0.630
23ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, GradientBoostingClassifier0.620
24ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.595
25ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.595
26ColumnTransformer, HogTransformer, RobustScaler, GenericUnivariateSelect, DecisionTreeClassifier0.595
27ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, ExtraTreesClassifier0.590
28ColumnTransformer, HogTransformer, MaxAbsScaler, GenericUnivariateSelect, GradientBoostingClassifier0.590
29ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, SelectKBest, DecisionTreeClassifier0.555
30ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, DecisionTreeClassifier0.510
31ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, RandomForestClassifier0.510
32ColumnTransformer, RGB2GrayTransformer, RobustScaler, GenericUnivariateSelect, DecisionTreeClassifier0.510
33ColumnTransformer, RGB2GrayTransformer, MaxAbsScaler, GenericUnivariateSelect, ExtraTreesClassifier0.490
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl.plot_leaderboard()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to explore the produced pipelines, we can use [PipelineProfiler](https://github.com/VIDA-NYU/PipelineVis). PipelineProfiler is a visualization that enables users to compare and explore the pipelines generated by the AlphaAutoML system.\n", + "\n", + "After the pipeline search process is completed, we can use PipelineProfiler with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl.plot_comparison_pipelines()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing Pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pipeline predictions are accessed with:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,\n", + " 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,\n", + " 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,\n", + " 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,\n", + " 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,\n", + " 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1,\n", + " 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,\n", + " 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1,\n", + " 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,\n", + " 0, 1])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = automl.predict(X_test)\n", + "y_pred" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The pipeline can be evaluated against a held out dataset with the function call:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:alpha_automl.automl_api:Metric: accuracy_score, Score: 0.98\n" + ] + }, + { + "data": { + "text/plain": [ + "{'metric': 'accuracy_score', 'score': 0.98}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/extra_requirements.txt b/extra_requirements.txt index 92a4f093..af879346 100644 --- a/extra_requirements.txt +++ b/extra_requirements.txt @@ -5,4 +5,5 @@ neuralforecast==1.5.0: timeseries mxnet==1.9.1: timeseries pmdarima==2.0.3: timeseries fasttext-wheel: nlp -transformers: nlp +transformers: nlp, image +scikit-image: image \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 27756378..e3664694 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,4 @@ datamart_profiler feature_engine xgboost lightgbm -numpy<=1.24.3 \ No newline at end of file +numpy<=1.24.3 diff --git a/tests/test_data/digits/digits.csv b/tests/test_data/digits/digits.csv new file mode 100644 index 00000000..5c3c6c01 --- /dev/null +++ b/tests/test_data/digits/digits.csv @@ -0,0 +1,11 @@ +image,label +img_00000.png,5 +img_00001.png,0 +img_00002.png,4 +img_00003.png,1 +img_00004.png,9 +img_00005.png,2 +img_00006.png,1 +img_00007.png,3 +img_00008.png,1 +img_00009.png,4 diff --git a/tests/test_data/digits/media/img_00000.png b/tests/test_data/digits/media/img_00000.png new file mode 100644 index 00000000..ce5888bb Binary files /dev/null and b/tests/test_data/digits/media/img_00000.png differ diff --git a/tests/test_data/digits/media/img_00001.png b/tests/test_data/digits/media/img_00001.png new file mode 100644 index 00000000..5e05ec1c Binary files /dev/null and b/tests/test_data/digits/media/img_00001.png differ diff --git a/tests/test_data/digits/media/img_00002.png b/tests/test_data/digits/media/img_00002.png new file mode 100644 index 00000000..e43e6907 Binary files /dev/null and b/tests/test_data/digits/media/img_00002.png differ diff --git a/tests/test_data/digits/media/img_00003.png b/tests/test_data/digits/media/img_00003.png new file mode 100644 index 00000000..48ba4829 Binary files /dev/null and b/tests/test_data/digits/media/img_00003.png differ diff --git a/tests/test_data/digits/media/img_00004.png b/tests/test_data/digits/media/img_00004.png new file mode 100644 index 00000000..bd8a5cb9 Binary files /dev/null and b/tests/test_data/digits/media/img_00004.png differ diff --git a/tests/test_data/digits/media/img_00005.png b/tests/test_data/digits/media/img_00005.png new file mode 100644 index 00000000..e6bcd614 Binary files /dev/null and b/tests/test_data/digits/media/img_00005.png differ diff --git a/tests/test_data/digits/media/img_00006.png b/tests/test_data/digits/media/img_00006.png new file mode 100644 index 00000000..de530087 Binary files /dev/null and b/tests/test_data/digits/media/img_00006.png differ diff --git a/tests/test_data/digits/media/img_00007.png b/tests/test_data/digits/media/img_00007.png new file mode 100644 index 00000000..534ff5f9 Binary files /dev/null and b/tests/test_data/digits/media/img_00007.png differ diff --git a/tests/test_data/digits/media/img_00008.png b/tests/test_data/digits/media/img_00008.png new file mode 100644 index 00000000..10e4d089 Binary files /dev/null and b/tests/test_data/digits/media/img_00008.png differ diff --git a/tests/test_data/digits/media/img_00009.png b/tests/test_data/digits/media/img_00009.png new file mode 100644 index 00000000..c0ffaec0 Binary files /dev/null and b/tests/test_data/digits/media/img_00009.png differ diff --git a/tests/test_image_encoder.py b/tests/test_image_encoder.py new file mode 100644 index 00000000..dbfa463a --- /dev/null +++ b/tests/test_image_encoder.py @@ -0,0 +1,80 @@ +import os + +import numpy as np +import pandas as pd + +from alpha_automl.builtin_primitives.image_encoder import ( + CannyEdgeDetection, + FisherVectorTransformer, + HogTransformer, + ImageReader, + RGB2GrayTransformer, + SkPatchExtractor, + ThresholdOtsu, +) + +from alpha_automl.wrapper_primitives.clip import HuggingfaceCLIPTransformer + + +class TestImageEncoder: + dataset = pd.read_csv( + os.path.join( + os.path.dirname(__file__), + "test_data/digits/digits.csv", + ) + ) + dataset["image"] = dataset["image"].apply( + lambda x: os.path.join( + os.path.join(os.path.dirname(__file__), "test_data/digits/media"), + x, + ) + ) + X = dataset[["image"]] + y = dataset[["label"]] + + def test_image_reader(self): + reader = ImageReader() + im = reader.transform(self.X) + assert im.shape == (10, 80, 80, 3) + + def test_rgb_2_grey_transformer(self): + rgb2grey = RGB2GrayTransformer() + im = rgb2grey.transform(self.X) + + assert im.shape == (10, 6400) + + def test_hog_transformer(self): + hog = HogTransformer() + im = hog.transform(self.X) + + assert im.shape == (10, 576) + + def test_fisher_vector_transformer(self): + fisher = FisherVectorTransformer() + im = fisher.transform(self.X) + + assert im.shape == (10, 8208) + + def test_sk_patch_extractor(self): + patch = SkPatchExtractor() + im = patch.transform(self.X) + + assert im.shape == (10, 1023168) + + def test_threshold_otsu(self): + threshold = ThresholdOtsu() + im = threshold.transform(self.X) + + assert im.shape == (10, 6400) + + def test_canny_edge_detection(self): + canny = CannyEdgeDetection() + im = canny.transform(self.X) + + assert im.shape == (10, 6400) + + def test_huggingface_CLIP_transformer(self): + clip = HuggingfaceCLIPTransformer() + im = clip.transform(self.X) + + assert im.shape == (10, 512) \ No newline at end of file