Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OTX deploy for visual prompting task #2311

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ All notable changes to this project will be documented in this file.
- Add per-class XAI saliency maps for Mask R-CNN model (https://github.com/openvinotoolkit/training_extensions/pull/2227)
- Add new object detector Deformable DETR (<https://github.com/openvinotoolkit/training_extensions/pull/2249>)
- Add new object detector DINO(<https://github.com/openvinotoolkit/training_extensions/pull/2266>)
- Add new visual prompting task (https://github.com/openvinotoolkit/training_extensions/pull/2203), (https://github.com/openvinotoolkit/training_extensions/pull/2274)
- Add new visual prompting task: train/eval (https://github.com/openvinotoolkit/training_extensions/pull/2203)
- Add new visual prompting task: export (https://github.com/openvinotoolkit/training_extensions/pull/2274)
- Add new visual prompting task: deploy (https://github.com/openvinotoolkit/training_extensions/pull/2311)
- Add new object detector ResNeXt101-ATSS (<https://github.com/openvinotoolkit/training_extensions/pull/2309>)

### Enhancements
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.

from .model_wrappers import * # noqa: F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Wrapper Initialization of OTX Visual Prompting."""

# Copyright (C) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
sungchul2 marked this conversation as resolved.
Show resolved Hide resolved

from .openvino_adapters import VisualPromptingOpenvinoAdapter # noqa: F401
from .openvino_models import Decoder, ImageEncoder # noqa: F401
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
"""Openvino Adapter Wrappers of OTX Visual Prompting.

There is a bug on fit_to_window resize module in model API.
VisualPromptingOpenvinoAdapter is temporarily implemented to use updated `fit_to_window` resize function.
When model API version in otx is upgraded, it can be removed.

Issue: https://github.com/openvinotoolkit/model_api/issues/99
Updated PR: https://github.com/openvinotoolkit/model_api/pull/100
"""

# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#

from functools import partial
from typing import Tuple

import numpy as np
import openvino.runtime as ov
from openvino.model_api.adapters import OpenvinoAdapter
from openvino.preprocess import ColorFormat, PrePostProcessor
from openvino.runtime import Output, Type
from openvino.runtime import opset10 as opset
from openvino.runtime.utils.decorators import custom_preprocess_function


def resize_image_with_aspect_pad(input: Output, size, keep_aspect_ratio, interpolation, pad_value):
sungmanc marked this conversation as resolved.
Show resolved Hide resolved
"""https://github.com/openvinotoolkit/model_api/blob/0.1.3/model_api/python/openvino/model_api/adapters/utils.py#L273-L341."""
h_axis = 1
w_axis = 2
w, h = size

target_size = list(size)
target_size.reverse()

image_shape = opset.shape_of(input, name="shape")
iw = opset.convert(
opset.gather(image_shape, opset.constant(w_axis), axis=0),
destination_type="f32",
)
ih = opset.convert(
opset.gather(image_shape, opset.constant(h_axis), axis=0),
destination_type="f32",
)
w_ratio = opset.divide(np.float32(w), iw)
h_ratio = opset.divide(np.float32(h), ih)
scale = opset.minimum(w_ratio, h_ratio)
nw = opset.convert(opset.round(opset.multiply(iw, scale), "half_to_even"), destination_type="i32")
nh = opset.convert(opset.round(opset.multiply(ih, scale), "half_to_even"), destination_type="i32")
new_size = opset.concat([opset.unsqueeze(nh, 0), opset.unsqueeze(nw, 0)], axis=0)
image = opset.interpolate(
input,
new_size,
scales=np.array([0.0, 0.0], dtype=np.float32),
axes=[h_axis, w_axis],
mode=interpolation,
shape_calculation_mode="sizes",
)

dx_border = opset.subtract(opset.constant(w, dtype=np.int32), nw)
dy_border = opset.subtract(opset.constant(h, dtype=np.int32), nh)
pads_begin = np.array([0, 0, 0, 0], np.int32)
pads_end = opset.concat(
[
opset.constant([0], dtype=np.int32),
opset.unsqueeze(dy_border, 0),
opset.unsqueeze(dx_border, 0),
opset.constant([0], dtype=np.int32),
],
axis=0,
)
return opset.pad(
image,
pads_begin,
pads_end,
"constant",
opset.constant(pad_value, dtype=np.uint8),
)


def resize_image_with_aspect(size, interpolation, pad_value):
"""https://github.com/openvinotoolkit/model_api/blob/0.1.3/model_api/python/openvino/model_api/adapters/utils.py#L356-L365."""
return custom_preprocess_function(
partial(
resize_image_with_aspect_pad,
size=size,
keep_aspect_ratio=True,
interpolation=interpolation,
pad_value=pad_value,
)
)


class VisualPromptingOpenvinoAdapter(OpenvinoAdapter):
"""Openvino Adapter Wrappers of OTX Visual Prompting.

This class is to use fixed `fit_to_window` resize module.
When model API version in otx is upgraded, it can be removed.
"""

def embed_preprocessing(
self,
layout,
resize_mode: str,
interpolation_mode,
target_shape: Tuple[int],
pad_value,
dtype=type(int),
brg2rgb=False,
mean=None,
scale=None,
input_idx=0,
):
"""https://github.com/openvinotoolkit/model_api/blob/0.1.3/model_api/python/openvino/model_api/adapters/openvino_adapter.py#L340-L411."""
ppp = PrePostProcessor(self.model) # type: ignore[has-type]

# Change the input type to the 8-bit image
if dtype == type(int):
ppp.input(input_idx).tensor().set_element_type(Type.u8)

ppp.input(input_idx).tensor().set_layout(ov.Layout("NHWC")).set_color_format(ColorFormat.BGR)

INTERPOLATION_MODE_MAP = {
"LINEAR": "linear",
"CUBIC": "cubic",
"NEAREST": "nearest",
}

RESIZE_MODE_MAP = {"fit_to_window": resize_image_with_aspect}

# Handle resize
# Change to dynamic shape to handle various image size
# TODO: check the number of input channels and rank of input shape
if resize_mode and target_shape:
if resize_mode in RESIZE_MODE_MAP:
input_shape = [1, -1, -1, 3]
ppp.input(input_idx).tensor().set_shape(input_shape)
ppp.input(input_idx).preprocess().custom(
RESIZE_MODE_MAP[resize_mode](
target_shape,
INTERPOLATION_MODE_MAP[interpolation_mode],
pad_value,
)
)

else:
raise ValueError(f"Upsupported resize type in model preprocessing: {resize_mode}")

# Handle layout
ppp.input(input_idx).model().set_layout(ov.Layout(layout))

# Handle color format
if brg2rgb:
ppp.input(input_idx).preprocess().convert_color(ColorFormat.RGB)

ppp.input(input_idx).preprocess().convert_element_type(Type.f32)

if mean:
ppp.input(input_idx).preprocess().mean(mean)
if scale:
ppp.input(input_idx).preprocess().scale(scale)

self.model = ppp.build()
self.load_model()
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Model Wrapper of OTX Visual Prompting."""
"""Openvino Model Wrappers of OTX Visual Prompting."""

# Copyright (C) 2023 Intel Corporation
#
Expand All @@ -14,16 +14,15 @@
# See the License for the specific language governing permissions
# and limitations under the License.

from typing import Any, Dict, Tuple
from copy import deepcopy
from typing import Any, Dict, List, Optional, Tuple, Union

import cv2
import numpy as np
from openvino.model_api.models import ImageModel
from openvino.model_api.models.types import NumericalValue
from openvino.model_api.adapters.inference_adapter import InferenceAdapter
from openvino.model_api.models import ImageModel, SegmentationModel
from openvino.model_api.models.types import NumericalValue, StringValue

from otx.algorithms.segmentation.adapters.openvino.model_wrappers.blur import (
BlurSegmentation,
)
from otx.api.utils.segmentation_utils import create_hard_prediction_from_soft_prediction


Expand All @@ -32,63 +31,93 @@ class ImageEncoder(ImageModel):

__model__ = "image_encoder"

def __init__(self, inference_adapter, configuration=None, preload=False):
super().__init__(inference_adapter, configuration, preload)

@classmethod
def parameters(cls) -> Dict[str, Any]: # noqa: D102
parameters = super().parameters()
parameters["resize_type"].default_value = "fit_to_window"
parameters["mean_values"].default_value = [123.675, 116.28, 103.53]
parameters["scale_values"].default_value = [58.395, 57.12, 57.375]
parameters.update(
{
"resize_type": StringValue(default_value="fit_to_window"),
}
)
return parameters

def preprocess(self, inputs: np.ndarray) -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
"""Update meta for image encoder."""
dict_inputs, meta = super().preprocess(inputs)
meta["resize_type"] = self.resize_type
return dict_inputs, meta

class Decoder(BlurSegmentation):
"""Decoder class for visual prompting of openvino model wrapper.

TODO (sungchul): change parent class
"""
class Decoder(SegmentationModel):
"""Decoder class for visual prompting of openvino model wrapper."""

__model__ = "decoder"

def preprocess(self, bbox: np.ndarray, original_size: Tuple[int]) -> Dict[str, Any]:
"""Ready decoder inputs."""
point_coords = bbox.reshape((-1, 2, 2))
point_labels = np.array([2, 3], dtype=np.float32).reshape((-1, 2))
inputs_decoder = {
"point_coords": point_coords,
"point_labels": point_labels,
# TODO (sungchul): how to generate mask_input and has_mask_input
"mask_input": np.zeros((1, 1, 256, 256), dtype=np.float32),
"has_mask_input": np.zeros((1, 1), dtype=np.float32),
"orig_size": np.array(original_size, dtype=np.float32).reshape((-1, 2)),
}
return inputs_decoder
def __init__(
self,
model_adapter: InferenceAdapter,
configuration: Optional[dict] = None,
preload: bool = False,
):
super().__init__(model_adapter, configuration, preload)
self.output_blob_name = "low_res_masks"

@classmethod
def parameters(cls): # noqa: D102
parameters = super().parameters()
parameters.update({"image_size": NumericalValue(value_type=int, default_value=1024, min=0, max=2048)})
return parameters

def preprocess(self, inputs: Dict[str, Any], meta: Dict[str, Any]):
"""Preprocess prompts."""
processed_prompts = []
# TODO (sungchul): process points
for bbox, label in zip(inputs["bboxes"], inputs["labels"]):
# TODO (sungchul): add condition to check whether using bbox or point
point_coords = self._apply_coords(bbox.reshape(-1, 2, 2), inputs["original_size"])
point_labels = np.array([2, 3], dtype=np.float32).reshape((-1, 2))
processed_prompts.append(
{
"point_coords": point_coords,
"point_labels": point_labels,
# TODO (sungchul): how to generate mask_input and has_mask_input
"mask_input": np.zeros((1, 1, 256, 256), dtype=np.float32),
"has_mask_input": np.zeros((1, 1), dtype=np.float32),
"orig_size": np.array(inputs["original_size"], dtype=np.float32).reshape((-1, 2)),
"label": label,
}
)
return processed_prompts

def _apply_coords(self, coords: np.ndarray, original_size: Union[List[int], Tuple[int, int]]) -> np.ndarray:
"""Process coords according to preprocessed image size using image meta."""
old_h, old_w = original_size
new_h, new_w = self._get_preprocess_shape(original_size[0], original_size[1], self.image_size)
coords = deepcopy(coords).astype(np.float32)
coords[..., 0] = coords[..., 0] * (new_w / old_w)
coords[..., 1] = coords[..., 1] * (new_h / old_h)
return coords

def _get_preprocess_shape(self, old_h: int, old_w: int, image_size: int) -> Tuple[int, int]:
"""Compute the output size given input size and target image size."""
scale = image_size / max(old_h, old_w)
new_h, new_w = old_h * scale, old_w * scale
new_w = int(new_w + 0.5)
new_h = int(new_h + 0.5)
return (new_h, new_w)

def _check_io_number(self, number_of_inputs, number_of_outputs):
pass

def _get_inputs(self):
"""Get input layer name and shape."""
image_blob_names = [name for name in self.inputs.keys()]
image_info_blob_names = []
return image_blob_names, image_info_blob_names

def _get_outputs(self):
"""Get output layer name and shape."""
layer_name = "low_res_masks"
layer_shape = self.outputs[layer_name].shape

if len(layer_shape) == 3:
self.out_channels = 0
elif len(layer_shape) == 4:
self.out_channels = layer_shape[1]
else:
raise Exception(f"Unexpected output layer shape {layer_shape}. Only 4D and 3D output layers are supported")

return layer_name

def postprocess(self, outputs: Dict[str, np.ndarray], meta: Dict[str, Any]) -> Tuple[np.ndarray, np.ndarray]:
"""Postprocess to convert soft prediction to hard prediction.

Expand All @@ -102,10 +131,10 @@ def postprocess(self, outputs: Dict[str, np.ndarray], meta: Dict[str, Any]) -> T
"""

def sigmoid(x):
return 1 / (1 + np.exp(-x))
return np.tanh(x * 0.5) * 0.5 + 0.5 # to avoid overflow

soft_prediction = outputs[self.output_blob_name].squeeze()
soft_prediction = self.resize_and_crop(soft_prediction, meta["original_size"])
soft_prediction = self.resize_and_crop(soft_prediction, meta["original_size"][0])
soft_prediction = sigmoid(soft_prediction)
meta["soft_prediction"] = soft_prediction

Expand Down Expand Up @@ -134,18 +163,18 @@ def resize_and_crop(self, soft_prediction: np.ndarray, original_size: np.ndarray
soft_prediction, (self.image_size, self.image_size), 0, 0, interpolation=cv2.INTER_LINEAR
)

prepadded_size = self.resize_longest_image_size(original_size, self.image_size).astype(np.int64)
prepadded_size = self.get_padded_size(original_size, self.image_size).astype(np.int64)
resized_cropped_soft_prediction = resized_soft_prediction[..., : prepadded_size[0], : prepadded_size[1]]

original_size = original_size.astype(np.int64)
h, w = original_size[0], original_size[1]
h, w = original_size
final_soft_prediction = cv2.resize(
resized_cropped_soft_prediction, (w, h), 0, 0, interpolation=cv2.INTER_LINEAR
)
return final_soft_prediction

def resize_longest_image_size(self, original_size: np.ndarray, longest_side: int) -> np.ndarray:
"""Resizes the longest side of the image to the given size.
def get_padded_size(self, original_size: np.ndarray, longest_side: int) -> np.ndarray:
"""Get padded size from original size and longest side of the image.

Args:
original_size (np.ndarray): The original image size with shape Bx2.
Expand Down
Loading