openvinotoolkit · eugene123tw · Aug 9, 2022 · Jul 7, 2022 · Jul 8, 2022 · Jul 25, 2022
@@ -19,6 +19,7 @@
 import shutil
 import tempfile
 import warnings
+from contextlib import nullcontext
 from typing import Optional
 
 import numpy as np
@@ -47,17 +48,14 @@
     check_input_parameters_type,
 )
 
-
 from mmseg.apis import export_model
-from segmentation_tasks.apis.segmentation.config_utils import (patch_config,
-                                                           prepare_for_testing,
-                                                           set_hyperparams)
-from segmentation_tasks.apis.segmentation.configuration import OTESegmentationConfig
-from segmentation_tasks.apis.segmentation.ote_utils import InferenceProgressCallback, get_activation_map
+from mmseg.core.hooks.auxiliary_hooks import FeatureVectorHook, SaliencyMapHook
 from mmseg.datasets import build_dataloader, build_dataset
 from mmseg.models import build_segmentor
 from mmseg.parallel import MMDataCPU
-
+from segmentation_tasks.apis.segmentation.config_utils import (patch_config, prepare_for_testing, set_hyperparams)
+from segmentation_tasks.apis.segmentation.configuration import OTESegmentationConfig
+from segmentation_tasks.apis.segmentation.ote_utils import InferenceProgressCallback, get_activation_map
 
 logger = logging.getLogger(__name__)
 
@@ -198,54 +196,45 @@ def hook(module, input, output):
         pre_hook_handle = self._model.register_forward_pre_hook(pre_hook)
         hook_handle = self._model.register_forward_hook(hook)
 
-        self._infer_segmentor(self._model, self._config, dataset,
-                              save_mask_visualization=not is_evaluation)
-
+        prediction_results = self._infer_segmentor(self._model, self._config, dataset, dump_features=True,
+                                                   dump_saliency_map=not is_evaluation)
+        self._add_predictions_to_dataset(prediction_results, dataset)
         pre_hook_handle.remove()
         hook_handle.remove()
 
         return dataset
 
-    def _add_predictions_to_dataset_item(self, prediction, feature_vector, dataset_item, save_mask_visualization):
-        soft_prediction = np.transpose(prediction, axes=(1, 2, 0))
-        hard_prediction = create_hard_prediction_from_soft_prediction(
-            soft_prediction=soft_prediction,
-            soft_threshold=self._hyperparams.postprocessing.soft_threshold,
-            blur_strength=self._hyperparams.postprocessing.blur_strength,
-        )
-        annotations = create_annotation_from_segmentation_map(
-            hard_prediction=hard_prediction,
-            soft_prediction=soft_prediction,
-            label_map=self._label_dictionary,
-        )
-        dataset_item.append_annotations(annotations=annotations)
-
-        if feature_vector is not None:
-            active_score = TensorEntity(name="representation_vector", numpy=feature_vector.reshape(-1))
-            dataset_item.append_metadata_item(active_score, model=self._task_environment.model)
-
-        if save_mask_visualization:
-            for label_index, label in self._label_dictionary.items():
-                if label_index == 0:
-                    continue
-
-                if len(soft_prediction.shape) == 3:
-                    current_label_soft_prediction = soft_prediction[:, :, label_index]
-                else:
-                    current_label_soft_prediction = soft_prediction
-
-                class_act_map = get_activation_map(current_label_soft_prediction)
-                result_media = ResultMediaEntity(name=f'{label.name}',
-                                                 type='Soft Prediction',
-                                                 label=label,
+    def _add_predictions_to_dataset(self, prediction_results, dataset):
+        for dataset_item, (prediction, feature_vector, saliency_map) in zip(dataset, prediction_results):
+            soft_prediction = np.transpose(prediction, axes=(1, 2, 0))
+            hard_prediction = create_hard_prediction_from_soft_prediction(
+                soft_prediction=soft_prediction,
+                soft_threshold=self._hyperparams.postprocessing.soft_threshold,
+                blur_strength=self._hyperparams.postprocessing.blur_strength,
+            )
+            annotations = create_annotation_from_segmentation_map(
+                hard_prediction=hard_prediction,
+                soft_prediction=soft_prediction,
+                label_map=self._label_dictionary,
+            )
+            dataset_item.append_annotations(annotations=annotations)
+
+            if feature_vector is not None:
+                active_score = TensorEntity(name="representation_vector", numpy=feature_vector.reshape(-1))
+                dataset_item.append_metadata_item(active_score, model=self._task_environment.model)
+
+            if saliency_map is not None:
+                class_act_map = get_activation_map(saliency_map, (dataset_item.width, dataset_item.height))
+                result_media = ResultMediaEntity(name="saliency_map",
+                                                 type="Saliency map",
                                                  annotation_scene=dataset_item.annotation_scene,
                                                  roi=dataset_item.roi,
                                                  numpy=class_act_map)
                 dataset_item.append_metadata_item(result_media, model=self._task_environment.model)
 
     def _infer_segmentor(self,
                          model: torch.nn.Module, config: Config, dataset: DatasetEntity,
-                         save_mask_visualization: bool = False) -> None:
+                         dump_features: bool = False, dump_saliency_map: bool = False) -> None:
         model.eval()
 
         test_config = prepare_for_testing(config, dataset)
@@ -259,18 +248,28 @@ def _infer_segmentor(self,
                                              dist=False,
                                              shuffle=False)
         if torch.cuda.is_available():
-            eval_model = MMDataParallel(model.cuda(test_config.gpu_ids[0]),
-                                        device_ids=test_config.gpu_ids)
+            model = MMDataParallel(model.cuda(test_config.gpu_ids[0]), device_ids=test_config.gpu_ids)
         else:
-            eval_model = MMDataCPU(model)
+            model = MMDataCPU(model)
 
-        # Use a single gpu for testing. Set in both mm_val_dataloader and eval_model
-        for data, dataset_item in zip(mm_val_dataloader, dataset):
-            with torch.no_grad():
-                result, repr_vector = eval_model(return_loss=False, output_logits=True, **data)
-            assert len(result) == 1
+        eval_predictions = []
+        feature_vectors = []
+        saliency_maps = []
 
-            self._add_predictions_to_dataset_item(result[0], repr_vector, dataset_item, save_mask_visualization)
+        # Use a single gpu for testing. Set in both mm_val_dataloader and eval_model
+        with FeatureVectorHook(model.module.backbone) if dump_features else nullcontext() as fhook:
+            with SaliencyMapHook(model.module.backbone) if dump_saliency_map else nullcontext() as shook:
+                for data in mm_val_dataloader:
+                    with torch.no_grad():
+                        result = model(return_loss=False, output_logits=True, **data)
+                    eval_predictions.extend(result)
+                feature_vectors = fhook.records if dump_features else [None] * len(dataset)
+                saliency_maps = shook.records if dump_saliency_map else [None] * len(dataset)
+        assert len(eval_predictions) == len(feature_vectors) == len(saliency_maps), \
+               'Number of elements should be the same, however, number of outputs are ' \
+               f"{len(eval_predictions)}, {len(feature_vectors)}, and {len(saliency_maps)}"
+        predictions = zip(eval_predictions, feature_vectors, saliency_maps)
+        return predictions
 
     @check_input_parameters_type()
     def evaluate(self, output_result_set: ResultSetEntity, evaluation_metric: Optional[str] = None):

@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions
 # and limitations under the License.
 
+from typing import Any, Dict, Optional, Union, Iterable
+import warnings
+
 import cv2
 import numpy as np
-from typing import Any, Dict, Optional
 
 from openvino.model_zoo.model_api.models import SegmentationModel
 from openvino.model_zoo.model_api.models.types import NumericalValue
@@ -23,6 +25,16 @@
 from ote_sdk.utils.segmentation_utils import create_hard_prediction_from_soft_prediction
 
 
+@check_input_parameters_type()
+def get_actmap(
+    features: Union[np.ndarray, Iterable, int, float], output_res: Union[tuple, list]
+):
+    am = cv2.resize(features, output_res)
+    am = cv2.applyColorMap(am, cv2.COLORMAP_JET)
+    am = cv2.cvtColor(am, cv2.COLOR_BGR2RGB)
+    return am
+
+
 class BlurSegmentation(SegmentationModel):
     __model__ = 'blur_segmentation'
 
@@ -60,17 +72,24 @@ def _get_outputs(self):
     def postprocess(self, outputs: Dict[str, np.ndarray], metadata: Dict[str, Any]):
         predictions = outputs[self.output_blob_name].squeeze()
         soft_prediction = np.transpose(predictions, axes=(1, 2, 0))
-        feature_vector = outputs.get('repr_vector', None)  # Optional output
 
         hard_prediction = create_hard_prediction_from_soft_prediction(
             soft_prediction=soft_prediction,
             soft_threshold=self.soft_threshold,
             blur_strength=self.blur_strength
         )
         hard_prediction = cv2.resize(hard_prediction, metadata['original_shape'][1::-1], 0, 0, interpolation=cv2.INTER_NEAREST)
-        soft_prediction = cv2.resize(soft_prediction, metadata['original_shape'][1::-1], 0, 0, interpolation=cv2.INTER_NEAREST)
-
-        metadata['soft_predictions'] = soft_prediction
-        metadata['feature_vector'] = feature_vector
+
+        if 'feature_vector' not in outputs or 'saliency_map' not in outputs:
+            warnings.warn('Could not find Feature Vector and Saliency Map in OpenVINO output. '
+                'Please rerun OpenVINO export or retrain the model.')
+            metadata["saliency_map"] = None
+            metadata["feature_vector"] = None
+        else:
+            metadata["saliency_map"] = get_actmap(
+                outputs["saliency_map"][0],
+                (metadata["original_shape"][1], metadata["original_shape"][0]),
+            )
+            metadata["feature_vector"] = outputs["feature_vector"] 
 
         return hard_prediction
@@ -105,12 +105,11 @@ def pre_process(self, image: np.ndarray) -> Tuple[Dict[str, np.ndarray], Dict[st
     @check_input_parameters_type()
     def post_process(self, prediction: Dict[str, np.ndarray], metadata: Dict[str, Any]) -> AnnotationSceneEntity:
         hard_prediction = self.model.postprocess(prediction, metadata)
-        soft_prediction = metadata['soft_predictions']
         feature_vector = metadata['feature_vector']
-
+        saliency_map = metadata['saliency_map']
         predicted_scene = self.converter.convert_to_annotation(hard_prediction, metadata)
 
-        return predicted_scene, soft_prediction, feature_vector
+        return predicted_scene, feature_vector, saliency_map
 
     @check_input_parameters_type()
     def forward(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
@@ -165,38 +164,25 @@ def infer(self,
               inference_parameters: Optional[InferenceParameters] = None) -> DatasetEntity:
         if inference_parameters is not None:
             update_progress_callback = inference_parameters.update_progress
-            save_mask_visualization = not inference_parameters.is_evaluation
+            dump_saliency_map = not inference_parameters.is_evaluation
         else:
             update_progress_callback = default_progress_callback
-            save_mask_visualization = True
+            dump_saliency_map = True
 
         dataset_size = len(dataset)
         for i, dataset_item in enumerate(dataset, 1):
-            predicted_scene, soft_prediction, feature_vector = self.inferencer.predict(dataset_item.numpy)
+            predicted_scene, feature_vector, saliency_map = self.inferencer.predict(dataset_item.numpy)
             dataset_item.append_annotations(predicted_scene.annotations)
 
             if feature_vector is not None:
                 feature_vector_media = TensorEntity(name="representation_vector", numpy=feature_vector.reshape(-1))
                 dataset_item.append_metadata_item(feature_vector_media, model=self.model)
 
-            if save_mask_visualization:
-                for label_index, label in self._label_dictionary.items():
-                    if label_index == 0:
-                        continue
-
-                    if len(soft_prediction.shape) == 3:
-                        current_label_soft_prediction = soft_prediction[:, :, label_index]
-                    else:
-                        current_label_soft_prediction = soft_prediction
-
-                    class_act_map = get_activation_map(current_label_soft_prediction)
-                    result_media = ResultMediaEntity(name=f'{label.name}',
-                                                     type='Soft Prediction',
-                                                     label=label,
-                                                     annotation_scene=dataset_item.annotation_scene,
-                                                     roi=dataset_item.roi,
-                                                     numpy=class_act_map)
-                    dataset_item.append_metadata_item(result_media, model=self.model)
+            if dump_saliency_map and saliency_map is not None:
+                saliency_map_media = ResultMediaEntity(name="saliency_map", type="Saliency map",
+                                                annotation_scene=dataset_item.annotation_scene, 
+                                                numpy=saliency_map, roi=dataset_item.roi)
+                dataset_item.append_metadata_item(saliency_map_media, model=self.model)
 
             update_progress_callback(int(i / dataset_size * 100))
 

@@ -13,6 +13,7 @@
 # and limitations under the License.
 
 
+import cv2
 import importlib
 from typing import Iterable, Union
 import yaml
@@ -43,15 +44,11 @@ def get_task_class(path: str):
 
 
 @check_input_parameters_type()
-def get_activation_map(features: Union[np.ndarray, Iterable, int, float]):
-    min_soft_score = np.min(features)
-    max_soft_score = np.max(features)
-    factor = 255.0 / (max_soft_score - min_soft_score + 1e-12)
-
-    float_act_map = factor * (features - min_soft_score)
-    int_act_map = np.uint8(np.floor(float_act_map))
-
-    return int_act_map
+def get_activation_map(features: Union[np.ndarray, Iterable, int, float], output_res: Union[tuple, list]):
+    am = cv2.resize(features, output_res)
+    am = cv2.applyColorMap(am, cv2.COLORMAP_JET)
+    am = cv2.cvtColor(am, cv2.COLOR_BGR2RGB)
+    return am
 
 
 class TrainingProgressCallback(TimeMonitorCallback):