Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add image encoder base #61

Merged
merged 6 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion alpha_automl/automl_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def search_pipelines(self, X, y, scoring, splitting_strategy, automl_hyperparams
def _search_pipelines(self, automl_hyperparams):
search_start_time = time.time()
automl_hyperparams = self.check_automl_hyperparams(automl_hyperparams)

metadata = profile_data(self.X)
X, y, is_sample = sample_dataset(self.X, self.y, SAMPLE_SIZE, self.task)
internal_splitting_strategy = make_splitter(SPLITTING_STRATEGY)
Expand Down
223 changes: 223 additions & 0 deletions alpha_automl/builtin_primitives/image_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
import logging
import numpy as np
import pandas as pd
from alpha_automl._optional_dependency import check_optional_dependency
from alpha_automl.base_primitive import BasePrimitive

ml_task = 'image'
check_optional_dependency('skimage', ml_task)

from skimage.color import gray2rgb, rgb2gray, rgba2rgb
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The skimage lib should be loaded as an optional dependency, otherwise it will raise an error because it's not installed in the default version of alpha-automl

from skimage.feature import ORB, canny, fisher_vector, hog, learn_gmm
from skimage.filters import threshold_otsu
from skimage.io import imread
from skimage.transform import resize
from sklearn.feature_extraction import image

logging.getLogger("PIL").setLevel(logging.CRITICAL + 1)
logger = logging.getLogger("automl")


class ImageReader(BasePrimitive):
"""Convert Image path to numpy array"""

def __init__(self, width=80, height=80):
self.width = width
self.height = height

def fit(self, X, y=None):
return self

def transform(self, images):
data = []
if isinstance(images, pd.DataFrame):
for file in images[images.columns[0]]:
im = imread(file)
im = resize(im, (self.width, self.height))
if len(im.shape) < 3:
im = gray2rgb(im)
elif im.shape[2] == 4:
im = rgba2rgb(im)
elif im.shape[2] != 3:
im = gray2rgb(im[:, :, 0])
data.append(im)
else:
for file in images:
im = imread(file[0])
im = resize(im, (self.width, self.height))
if len(im.shape) < 3:
im = gray2rgb(im)
elif im.shape[2] == 4:
im = rgba2rgb(im)
elif im.shape[2] != 3:
im = gray2rgb(im[:, :, 0])
data.append(im)
return np.array(data)


class ThresholdOtsu(BasePrimitive):
"""
Filter image with a calculated threshold
"""

def __init__(self):
self.reader = ImageReader()
pass

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)

def threshold(img):
img = rgb2gray(img)
threashold_value = threshold_otsu(img)
img = img > threashold_value
return img.flatten()

return np.array([threshold(img) for img in X])


class CannyEdgeDetection(BasePrimitive):
"""
Filter image with canny edge detection
"""

def __init__(self):
self.reader = ImageReader()
pass

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)

def canny_edge(img):
img = rgb2gray(img)
img = canny(img)
return img.flatten()

return np.array([canny_edge(img) for img in X])


class RGB2GrayTransformer(BasePrimitive):
"""
Convert an array of RGB images to grayscale
"""

def __init__(self):
self.reader = ImageReader()
pass

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)
return np.array([rgb2gray(img).flatten() for img in X])


class HogTransformer(BasePrimitive):
"""
Expects an array of 2d arrays (1 channel images)
Calculates hog features for each img
"""

def __init__(
self,
y=None,
orientations=9,
pixels_per_cell=(14, 14),
cells_per_block=(2, 2),
block_norm="L2-Hys",
):
self.y = y
self.orientations = orientations
self.pixels_per_cell = pixels_per_cell
self.cells_per_block = cells_per_block
self.block_norm = block_norm
self.reader = ImageReader()

def fit(self, X, y=None):
return self

def transform(self, X, y=None):
def local_hog(X):
return hog(
X,
orientations=self.orientations,
pixels_per_cell=self.pixels_per_cell,
cells_per_block=self.cells_per_block,
block_norm=self.block_norm,
)

X = self.reader.transform(X)
X = np.array([rgb2gray(img) for img in X])
return np.array([local_hog(img) for img in X])


class FisherVectorTransformer(BasePrimitive):
"""
Fisher vector is an image feature encoding and quantization technique
that can be seen as a soft or probabilistic version of the popular
bag-of-visual-words or VLAD algorithms
"""

def __init__(self, n_keypoints=5, harris_k=0.01, k=16):
self.n_keypoints = n_keypoints
self.harris_k = harris_k
self.k = k
self.reader = ImageReader()
self.gmm = None

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)
X = np.array([rgb2gray(img) for img in X])
descriptors = []
for x in X:
detector_extractor = ORB(
n_keypoints=self.n_keypoints, harris_k=self.harris_k
)
detector_extractor.detect_and_extract(x)
descriptors.append(detector_extractor.descriptors.astype("float32"))

if self.gmm is None:
self.gmm = learn_gmm(descriptors, n_modes=self.k)

fvs = np.array(
[fisher_vector(descriptor_mat, self.gmm) for descriptor_mat in descriptors]
)
return fvs


class SkPatchExtractor(BasePrimitive):
"""
Extracts patches from a collection of images
"""

def __init__(self):
self.reader = ImageReader()
self.extractor = image.PatchExtractor()
pass

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)
return self.extractor.transform(X).reshape((X.shape[0], -1))
9 changes: 7 additions & 2 deletions alpha_automl/data_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
DATETIME_COLUMN = 'http://schema.org/DateTime'
TEXT_COLUMN = 'http://schema.org/Text'
EMPTY_COLUMN = 'https://metadata.datadrivendiscovery.org/types/MissingData'
IMAGE_COLUMN = 'https://schema.org/ImageObject'


logger = logging.getLogger(__name__)
Expand All @@ -13,7 +14,7 @@
def profile_data(X):
metadata = {'nonnumeric_columns': {}, 'useless_columns': [], 'missing_values': False}
mapping_encoders = {CATEGORICAL_COLUMN: 'CATEGORICAL_ENCODER', DATETIME_COLUMN: 'DATETIME_ENCODER',
TEXT_COLUMN: 'TEXT_ENCODER'}
TEXT_COLUMN: 'TEXT_ENCODER', IMAGE_COLUMN: 'IMAGE_ENCODER'}

profiled_data = datamart_profiler.process_dataset(X, coverage=False, indexes=False)

Expand All @@ -32,7 +33,11 @@ def profile_data(X):
add_nonnumeric_column(column_type, metadata, index_column, column_name)

elif TEXT_COLUMN == profiled_column['structural_type']:
column_type = mapping_encoders[TEXT_COLUMN]
samples = X[column_name].dropna().sample(5)
if samples.apply(lambda x: x.endswith(('jpg', 'png', 'jpeg', 'gif'))).all():
column_type = mapping_encoders[IMAGE_COLUMN]
else:
column_type = mapping_encoders[TEXT_COLUMN]
add_nonnumeric_column(column_type, metadata, index_column, column_name)

if 'missing_values_ratio' in profiled_column:
Expand Down
2 changes: 1 addition & 1 deletion alpha_automl/pipeline_synthesis/pipeline_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def create_transformers(self, primitive_object, primitive_name, primitive_type):
if primitive_type == 'TEXT_ENCODER':
column_transformers = [(f'{primitive_name}-{col_name}', primitive_object, col_index) for
col_index, col_name in nonnumeric_columns[primitive_type]]
elif primitive_type == 'CATEGORICAL_ENCODER' or primitive_type == 'DATETIME_ENCODER':
elif primitive_type == 'CATEGORICAL_ENCODER' or primitive_type == 'DATETIME_ENCODER' or primitive_type == 'IMAGE_ENCODER':
column_transformers = [(primitive_name, primitive_object, [col_index for col_index, _
in nonnumeric_columns[primitive_type]])]

Expand Down
3 changes: 2 additions & 1 deletion alpha_automl/resource/base_grammar.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ CLUSTERING_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR CLUSTERER
TIME_SERIES_FORECAST_TASK -> REGRESSION_TASK | IMPUTER TIME_SERIES_FORECAST
SEMISUPERVISED_TASK -> IMPUTER ENCODERS FEATURE_SCALER SEMISUPERVISED_CLASSIFIER CLASSIFIER | IMPUTER ENCODERS FEATURE_SCALER LABELPROPAGATION_CLASSIFIER
NA_TASK -> CLASSIFICATION_TASK | REGRESSION_TASK | SEMISUPERVISED_TASK
ENCODERS -> TEXT_ENCODER DATETIME_ENCODER CATEGORICAL_ENCODER
ENCODERS -> TEXT_ENCODER DATETIME_ENCODER CATEGORICAL_ENCODER IMAGE_ENCODER
IMPUTER -> 'primitive_terminal'
FEATURE_SCALER -> 'primitive_terminal' | 'E'
FEATURE_SELECTOR -> 'primitive_terminal' | 'E'
TEXT_ENCODER -> 'primitive_terminal'
CATEGORICAL_ENCODER -> 'primitive_terminal'
DATETIME_ENCODER -> 'primitive_terminal'
IMAGE_ENCODER -> 'primitive_terminal'
CLASSIFIER -> 'primitive_terminal'
REGRESSOR -> 'primitive_terminal'
CLUSTERER -> 'primitive_terminal'
Expand Down
4 changes: 4 additions & 0 deletions alpha_automl/resource/primitives_hierarchy.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@
"sklearn.feature_extraction.text.CountVectorizer",
"sklearn.feature_extraction.text.TfidfVectorizer"
],
"IMAGE_ENCODER": [
"alpha_automl.builtin_primitives.image_encoder.RGB2GrayTransformer",
"alpha_automl.builtin_primitives.image_encoder.HogTransformer"
],
"COLUMN_TRANSFORMER": [
"sklearn.compose.ColumnTransformer"
],
Expand Down
1 change: 0 additions & 1 deletion alpha_automl/wrapper_primitives/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@

39 changes: 39 additions & 0 deletions alpha_automl/wrapper_primitives/clip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import numpy as np
import torch

from alpha_automl._optional_dependency import import_optional_dependency
from alpha_automl.base_primitive import BasePrimitive
from alpha_automl.builtin_primitives.image_encoder import ImageReader

transformers = import_optional_dependency("transformers")

DEFAULT_MODEL_ID = "openai/clip-vit-base-patch32"


class HuggingfaceCLIPTransformer(BasePrimitive):
"""
Convert an array of RGB images to grayscale
"""

def __init__(self, model_id=DEFAULT_MODEL_ID):
self.model_id = model_id
self.reader = ImageReader(width=224, height=224)
self.model = transformers.CLIPModel.from_pretrained(self.model_id)

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)

def clip(img):
img = np.transpose(img, (2, 0, 1))
img = torch.from_numpy(img)
img = img[None, :, :, :]
img = img.float()
img = self.model.get_image_features(img)
return img.detach().numpy()[0]

return np.array([clip(img) for img in X])
Loading