Skip to content

Commit

Permalink
Merge pull request #61 from VIDA-NYU/image_encoder
Browse files Browse the repository at this point in the history
Add image encoder base
  • Loading branch information
EdenWuyifan authored Oct 11, 2023
2 parents 47a884c + aede61d commit a0b7d05
Show file tree
Hide file tree
Showing 26 changed files with 32,338 additions and 10 deletions.
2 changes: 1 addition & 1 deletion alpha_automl/automl_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def fit(self, X, y):
pipeline.get_pipeline().steps[-1][0]
== 'sklearn.semi_supervised.SelfTrainingClassifier'
or pipeline.get_pipeline().steps[-1][0]
== 'alpha_automl.builtin_primitives.AutonBox'
== 'alpha_automl.builtin_primitives.semisupervised_classifier.AutonBox'
):
leaderboard_data.append(
[
Expand Down
1 change: 0 additions & 1 deletion alpha_automl/automl_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def search_pipelines(self, X, y, scoring, splitting_strategy, automl_hyperparams
def _search_pipelines(self, automl_hyperparams):
search_start_time = time.time()
automl_hyperparams = self.check_automl_hyperparams(automl_hyperparams)

metadata = profile_data(self.X)
X, y, is_sample = sample_dataset(self.X, self.y, SAMPLE_SIZE, self.task)
internal_splitting_strategy = make_splitter(SPLITTING_STRATEGY)
Expand Down
223 changes: 223 additions & 0 deletions alpha_automl/builtin_primitives/image_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
import logging
import numpy as np
import pandas as pd
from alpha_automl.base_primitive import BasePrimitive
from alpha_automl._optional_dependency import check_optional_dependency

ml_task = 'image'
check_optional_dependency('skimage', ml_task)

from skimage.color import gray2rgb, rgb2gray, rgba2rgb
from skimage.feature import ORB, canny, fisher_vector, hog, learn_gmm
from skimage.filters import threshold_otsu
from skimage.io import imread
from skimage.transform import resize
from sklearn.feature_extraction import image

logging.getLogger("PIL").setLevel(logging.CRITICAL + 1)
logger = logging.getLogger("automl")


class ImageReader(BasePrimitive):
"""Convert Image path to numpy array"""

def __init__(self, width=80, height=80):
self.width = width
self.height = height

def fit(self, X, y=None):
return self

def transform(self, images):
data = []
if isinstance(images, pd.DataFrame):
for file in images[images.columns[0]]:
im = imread(file)
im = resize(im, (self.width, self.height))
if len(im.shape) < 3:
im = gray2rgb(im)
elif im.shape[2] == 4:
im = rgba2rgb(im)
elif im.shape[2] != 3:
im = gray2rgb(im[:, :, 0])
data.append(im)
else:
for file in images:
im = imread(file[0])
im = resize(im, (self.width, self.height))
if len(im.shape) < 3:
im = gray2rgb(im)
elif im.shape[2] == 4:
im = rgba2rgb(im)
elif im.shape[2] != 3:
im = gray2rgb(im[:, :, 0])
data.append(im)
return np.array(data)


class ThresholdOtsu(BasePrimitive):
"""
Filter image with a calculated threshold
"""

def __init__(self):
self.reader = ImageReader()
pass

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)

def threshold(img):
img = rgb2gray(img)
threashold_value = threshold_otsu(img)
img = img > threashold_value
return img.flatten()

return np.array([threshold(img) for img in X])


class CannyEdgeDetection(BasePrimitive):
"""
Filter image with canny edge detection
"""

def __init__(self):
self.reader = ImageReader()
pass

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)

def canny_edge(img):
img = rgb2gray(img)
img = canny(img)
return img.flatten()

return np.array([canny_edge(img) for img in X])


class RGB2GrayTransformer(BasePrimitive):
"""
Convert an array of RGB images to grayscale
"""

def __init__(self):
self.reader = ImageReader()
pass

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)
return np.array([rgb2gray(img).flatten() for img in X])


class HogTransformer(BasePrimitive):
"""
Expects an array of 2d arrays (1 channel images)
Calculates hog features for each img
"""

def __init__(
self,
y=None,
orientations=9,
pixels_per_cell=(14, 14),
cells_per_block=(2, 2),
block_norm="L2-Hys",
):
self.y = y
self.orientations = orientations
self.pixels_per_cell = pixels_per_cell
self.cells_per_block = cells_per_block
self.block_norm = block_norm
self.reader = ImageReader()

def fit(self, X, y=None):
return self

def transform(self, X, y=None):
def local_hog(X):
return hog(
X,
orientations=self.orientations,
pixels_per_cell=self.pixels_per_cell,
cells_per_block=self.cells_per_block,
block_norm=self.block_norm,
)

X = self.reader.transform(X)
X = np.array([rgb2gray(img) for img in X])
return np.array([local_hog(img) for img in X])


class FisherVectorTransformer(BasePrimitive):
"""
Fisher vector is an image feature encoding and quantization technique
that can be seen as a soft or probabilistic version of the popular
bag-of-visual-words or VLAD algorithms
"""

def __init__(self, n_keypoints=5, harris_k=0.01, k=16):
self.n_keypoints = n_keypoints
self.harris_k = harris_k
self.k = k
self.reader = ImageReader()
self.gmm = None

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)
X = np.array([rgb2gray(img) for img in X])
descriptors = []
for x in X:
detector_extractor = ORB(
n_keypoints=self.n_keypoints, harris_k=self.harris_k
)
detector_extractor.detect_and_extract(x)
descriptors.append(detector_extractor.descriptors.astype("float32"))

if self.gmm is None:
self.gmm = learn_gmm(descriptors, n_modes=self.k)

fvs = np.array(
[fisher_vector(descriptor_mat, self.gmm) for descriptor_mat in descriptors]
)
return fvs


class SkPatchExtractor(BasePrimitive):
"""
Extracts patches from a collection of images
"""

def __init__(self):
self.reader = ImageReader()
self.extractor = image.PatchExtractor()
pass

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)
return self.extractor.transform(X).reshape((X.shape[0], -1))
9 changes: 7 additions & 2 deletions alpha_automl/data_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
DATETIME_COLUMN = 'http://schema.org/DateTime'
TEXT_COLUMN = 'http://schema.org/Text'
EMPTY_COLUMN = 'https://metadata.datadrivendiscovery.org/types/MissingData'
IMAGE_COLUMN = 'https://schema.org/ImageObject'


logger = logging.getLogger(__name__)
Expand All @@ -13,7 +14,7 @@
def profile_data(X):
metadata = {'nonnumeric_columns': {}, 'useless_columns': [], 'missing_values': False}
mapping_encoders = {CATEGORICAL_COLUMN: 'CATEGORICAL_ENCODER', DATETIME_COLUMN: 'DATETIME_ENCODER',
TEXT_COLUMN: 'TEXT_ENCODER'}
TEXT_COLUMN: 'TEXT_ENCODER', IMAGE_COLUMN: 'IMAGE_ENCODER'}

profiled_data = datamart_profiler.process_dataset(X, coverage=False, indexes=False)

Expand All @@ -32,7 +33,11 @@ def profile_data(X):
add_nonnumeric_column(column_type, metadata, index_column, column_name)

elif TEXT_COLUMN == profiled_column['structural_type']:
column_type = mapping_encoders[TEXT_COLUMN]
samples = X[column_name].dropna().sample(5)
if samples.apply(lambda x: x.endswith(('jpg', 'png', 'jpeg', 'gif'))).all():
column_type = mapping_encoders[IMAGE_COLUMN]
else:
column_type = mapping_encoders[TEXT_COLUMN]
add_nonnumeric_column(column_type, metadata, index_column, column_name)

if 'missing_values_ratio' in profiled_column:
Expand Down
2 changes: 1 addition & 1 deletion alpha_automl/pipeline_synthesis/pipeline_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def create_transformers(self, primitive_object, primitive_name, primitive_type):
if primitive_type == 'TEXT_ENCODER':
column_transformers = [(f'{primitive_name}-{col_name}', primitive_object, col_index) for
col_index, col_name in nonnumeric_columns[primitive_type]]
elif primitive_type == 'CATEGORICAL_ENCODER' or primitive_type == 'DATETIME_ENCODER':
elif primitive_type == 'CATEGORICAL_ENCODER' or primitive_type == 'DATETIME_ENCODER' or primitive_type == 'IMAGE_ENCODER':
column_transformers = [(primitive_name, primitive_object, [col_index for col_index, _
in nonnumeric_columns[primitive_type]])]

Expand Down
3 changes: 2 additions & 1 deletion alpha_automl/resource/base_grammar.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ CLUSTERING_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR CLUSTERER
TIME_SERIES_FORECAST_TASK -> REGRESSION_TASK | IMPUTER TIME_SERIES_FORECAST
SEMISUPERVISED_TASK -> IMPUTER ENCODERS FEATURE_SCALER SEMISUPERVISED_CLASSIFIER CLASSIFIER | IMPUTER ENCODERS FEATURE_SCALER LABELPROPAGATION_CLASSIFIER
NA_TASK -> CLASSIFICATION_TASK | REGRESSION_TASK | SEMISUPERVISED_TASK
ENCODERS -> TEXT_ENCODER DATETIME_ENCODER CATEGORICAL_ENCODER
ENCODERS -> TEXT_ENCODER DATETIME_ENCODER CATEGORICAL_ENCODER IMAGE_ENCODER
IMPUTER -> 'primitive_terminal'
FEATURE_SCALER -> 'primitive_terminal' | 'E'
FEATURE_SELECTOR -> 'primitive_terminal' | 'E'
TEXT_ENCODER -> 'primitive_terminal'
CATEGORICAL_ENCODER -> 'primitive_terminal'
DATETIME_ENCODER -> 'primitive_terminal'
IMAGE_ENCODER -> 'primitive_terminal'
CLASSIFIER -> 'primitive_terminal'
REGRESSOR -> 'primitive_terminal'
CLUSTERER -> 'primitive_terminal'
Expand Down
6 changes: 5 additions & 1 deletion alpha_automl/resource/primitives_hierarchy.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@
"sklearn.feature_extraction.text.CountVectorizer",
"sklearn.feature_extraction.text.TfidfVectorizer"
],
"IMAGE_ENCODER": [
"alpha_automl.builtin_primitives.image_encoder.RGB2GrayTransformer",
"alpha_automl.builtin_primitives.image_encoder.HogTransformer"
],
"COLUMN_TRANSFORMER": [
"sklearn.compose.ColumnTransformer"
],
Expand All @@ -86,7 +90,7 @@
],
"SEMISUPERVISED_CLASSIFIER": [
"sklearn.semi_supervised.SelfTrainingClassifier",
"alpha_automl.builtin_primitives.semisupervised_classifier.AutonBox"
"alpha_automl.builtin_primitives.semisupervised_classifier.AutonBox"
],
"LABELPROPAGATION_CLASSIFIER": [
"alpha_automl.builtin_primitives.semisupervised_classifier.SkLabelSpreading",
Expand Down
1 change: 0 additions & 1 deletion alpha_automl/wrapper_primitives/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@

40 changes: 40 additions & 0 deletions alpha_automl/wrapper_primitives/clip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import numpy as np
import torch
from alpha_automl.base_primitive import BasePrimitive
from alpha_automl.builtin_primitives.image_encoder import ImageReader
from alpha_automl._optional_dependency import check_optional_dependency

ml_task = 'image'
check_optional_dependency('transformers', ml_task)
import transformers

DEFAULT_MODEL_ID = "openai/clip-vit-base-patch32"


class HuggingfaceCLIPTransformer(BasePrimitive):
"""
Convert an array of RGB images to grayscale
"""

def __init__(self, model_id=DEFAULT_MODEL_ID):
self.model_id = model_id
self.reader = ImageReader(width=224, height=224)
self.model = transformers.CLIPModel.from_pretrained(self.model_id)

def fit(self, X, y=None):
"""returns itself"""
return self

def transform(self, X, y=None):
"""perform the transformation and return an array"""
X = self.reader.transform(X)

def clip(img):
img = np.transpose(img, (2, 0, 1))
img = torch.from_numpy(img)
img = img[None, :, :, :]
img = img.float()
img = self.model.get_image_features(img)
return img.detach().numpy()[0]

return np.array([clip(img) for img in X])
Loading

0 comments on commit a0b7d05

Please sign in to comment.