Mergeback/1.10.0rc1 (#1658)

openvinotoolkit · Oct 28, 2024 · 2b4868d · 2b4868d
1 parent 3265766
commit 2b4868d
Show file tree

Hide file tree

Showing 83 changed files with 1,060 additions and 139 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,10 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## \[Unreleased\]
 
 ### New features
-- Support KITTI 3D format
-  (<https://github.com/openvinotoolkit/datumaro/pull/1619>)
-- Add PseudoLabeling transform for unlabeled dataset
-  (<https://github.com/openvinotoolkit/datumaro/pull/1594>)
 - Convert Cuboid2D annotation to/from 3D data
   (<https://github.com/openvinotoolkit/datumaro/pull/1639>)
 - Add label groups for hierarchical classification in ImageNet
@@ -20,16 +16,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Enhancements
 - Enhance 'id_from_image_name' transform to ensure each identifier is unique
   (<https://github.com/openvinotoolkit/datumaro/pull/1635>)
+- Optimize path assignment to handle point cloud in JSON without images
+  (<https://github.com/openvinotoolkit/datumaro/pull/1643>)
+
+### Bug fixes
+- Fix assertion to compare hashkeys against expected value
+  (<https://github.com/openvinotoolkit/datumaro/pull/1641>)
+
+## Q4 2024 Release 1.10.0
+
+### New features
+- Support KITTI 3D format
+  (<https://github.com/openvinotoolkit/datumaro/pull/1619>, <https://github.com/openvinotoolkit/datumaro/pull/1621>)
+- Add PseudoLabeling transform for unlabeled dataset
+  (<https://github.com/openvinotoolkit/datumaro/pull/1594>)
+
+### Enhancements
 - Raise an appropriate error when exporting a datumaro dataset if its subset name contains path separators.
   (<https://github.com/openvinotoolkit/datumaro/pull/1615>)
 - Update docs for transform plugins
   (<https://github.com/openvinotoolkit/datumaro/pull/1599>)
+- Update ov ir model for explorer openvino launcher with CLIP ViT-L/14@336px model
+  (<https://github.com/openvinotoolkit/datumaro/pull/1603>)
 - Optimize path assignment to handle point cloud in JSON without images
   (<https://github.com/openvinotoolkit/datumaro/pull/1643>)
+- Set TabularTransform to process clean transform in parallel
+  (<https://github.com/openvinotoolkit/datumaro/pull/1648>)
 
 ### Bug fixes
-- Fix assertion to compare hashkeys against expected value
-  (<https://github.com/openvinotoolkit/datumaro/pull/1641>)
+- Fix datumaro format to load visibility information from Points annotations
+  (<https://github.com/openvinotoolkit/datumaro/pull/1644>)
 
 ## Q4 2024 Release 1.9.1
 ### Enhancements

diff --git a/docs/source/docs/release_notes.rst b/docs/source/docs/release_notes.rst
@@ -4,6 +4,25 @@ Release Notes
 .. toctree::
    :maxdepth: 1
 
+v1.10.0 (2024 Q4)
+
+New features
+^^^^^^^^^^^^
+- Support KITTI 3D format
+- Add PseudoLabeling transform for unlabeled dataset
+
+Enhancements
+^^^^^^^^^^^^
+- Raise an appropriate error when exporting a datumaro dataset if its subset name contains path separators.
+- Update docs for transform plugins
+- Update ov ir model for explorer openvino launcher with CLIP ViT-L/14@336px model
+- Optimize path assignment to handle point cloud in JSON without images
+- Set TabularTransform to process clean transform in parallel
+
+Bug fixes
+^^^^^^^^^
+- Fix datumaro format to load visibility information from Points annotations
+
 v1.9.1 (2024 Q3)
 ----------------
 

diff --git a/notebooks/21_kaggle_data_cleaning.ipynb b/notebooks/21_kaggle_data_cleaning.ipynb
diff --git a/src/datumaro/components/algorithms/hash_key_inference/base.py b/src/datumaro/components/algorithms/hash_key_inference/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2024 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -21,13 +21,13 @@ def __init__(self, *datasets: Sequence[Dataset]) -> None:
     @property
     def model(self):
         if self._model is None:
-            self._model = explorer.ExplorerLauncher(model_name="clip_visual_ViT-B_32")
+            self._model = explorer.ExplorerLauncher(model_name="clip_visual_vit_l_14_336px_int8")
         return self._model
 
     @property
     def text_model(self):
         if self._text_model is None:
-            self._text_model = explorer.ExplorerLauncher(model_name="clip_text_ViT-B_32")
+            self._text_model = explorer.ExplorerLauncher(model_name="clip_text_vit_l_14_336px_int8")
         return self._text_model
 
     def _compute_hash_key(self, datasets, datasets_to_infer):

diff --git a/src/datumaro/components/annotation.py b/src/datumaro/components/annotation.py
@@ -262,8 +262,8 @@ class HashKey(Annotation):
 
     @hash_key.validator
     def _validate(self, attribute, value: np.ndarray):
-        """Check whether value is a 1D Numpy array having 64 np.uint8 values"""
-        if value.ndim != 1 or value.shape[0] != 64 or value.dtype != np.uint8:
+        """Check whether value is a 1D Numpy array having 96 np.uint8 values"""
+        if value.ndim != 1 or value.shape[0] != 96 or value.dtype != np.uint8:
             raise ValueError(value)
 
     def __eq__(self, other):

diff --git a/src/datumaro/components/transformer.py b/src/datumaro/components/transformer.py
@@ -72,6 +72,80 @@ def __iter__(self):
                 yield item
 
 
+class TabularTransform(Transform):
+    """A transformation class for processing dataset items in batches with optional parallelism.
+
+    This class takes a dataset extractor, batch size, and number of worker threads to process
+    dataset items. Depending on the number of workers specified, it can process items either
+    sequentially (single-process) or in parallel (multi-process), making it efficient for
+    batch transformations.
+
+    Parameters:
+        extractor: The dataset extractor to obtain items from.
+        batch_size: The batch size for processing items. Default is 1.
+        num_workers: The number of worker threads to use for parallel processing.
+            Set to 0 for single-process mode. Default is 0.
+    """
+
+    def __init__(
+        self,
+        extractor: IDataset,
+        batch_size: int = 1,
+        num_workers: int = 0,
+    ):
+        super().__init__(extractor)
+        self._batch_size = batch_size
+        if not (isinstance(num_workers, int) and num_workers >= 0):
+            raise ValueError(
+                f"num_workers should be a non negative integer, but it is {num_workers}"
+            )
+        self._num_workers = num_workers
+
+    def __iter__(self) -> Iterator[DatasetItem]:
+        if self._num_workers == 0:
+            return self._iter_single_proc()
+        return self._iter_multi_procs()
+
+    def _iter_multi_procs(self):
+        with ThreadPool(processes=self._num_workers) as pool:
+
+            def _producer_gen():
+                for batch in take_by(self._extractor, self._batch_size):
+                    future = pool.apply_async(
+                        func=self._process_batch,
+                        args=(batch,),
+                    )
+                    yield future
+
+            with consumer_generator(producer_generator=_producer_gen()) as consumer_gen:
+                for future in consumer_gen:
+                    for item in future.get():
+                        yield item
+
+    def _iter_single_proc(self) -> Iterator[DatasetItem]:
+        for batch in take_by(self._extractor, self._batch_size):
+            for item in self._process_batch(batch=batch):
+                yield item
+
+    def transform_item(self, item: DatasetItem) -> Optional[DatasetItem]:
+        """
+        Returns a modified copy of the input item.
+
+        Avoid changing and returning the input item, because it can lead to
+        unexpected problems. Use wrap_item() or item.wrap() to simplify copying.
+        """
+
+        raise NotImplementedError()
+
+    def _process_batch(
+        self,
+        batch: List[DatasetItem],
+    ) -> List[DatasetItem]:
+        results = [self.transform_item(item) for item in batch]
+
+        return results
+
+
 class ModelTransform(Transform):
     """A transformation class for applying a model's inference to dataset items.
 

diff --git a/src/datumaro/plugins/data_formats/datumaro/base.py b/src/datumaro/plugins/data_formats/datumaro/base.py
@@ -338,6 +338,7 @@ def _load_annotations(self, item: Dict):
                             points,
                             label=label_id,
                             id=ann_id,
+                            visibility=ann.get("visibility"),
                             attributes=attributes,
                             group=group,
                             object_id=object_id,

diff --git a/src/datumaro/plugins/data_formats/kitti_3d/base.py b/src/datumaro/plugins/data_formats/kitti_3d/base.py
@@ -4,17 +4,18 @@
 
 import glob
 import logging
+import os
 import os.path as osp
 from typing import List, Optional, Type, TypeVar
 
-from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories
+from datumaro.components.annotation import AnnotationType, Bbox
 from datumaro.components.dataset_base import DatasetItem, SubsetBase
 from datumaro.components.errors import InvalidAnnotationError
 from datumaro.components.importer import ImportContext
-from datumaro.components.media import Image, PointCloud
+from datumaro.components.media import Image
 from datumaro.util.image import find_images
 
-from .format import Kitti3dPath
+from .format import Kitti3DLabelMap, Kitti3dPath, make_kitti3d_categories
 
 T = TypeVar("T")
 
@@ -30,26 +31,37 @@ def __init__(
         ctx: Optional[ImportContext] = None,
     ):
         assert osp.isdir(path), path
-        super().__init__(subset=subset, media_type=PointCloud, ctx=ctx)
 
         self._path = path
 
-        common_attrs = {"truncated", "occluded", "alpha", "dimensions", "location", "rotation_y"}
-        self._categories = {AnnotationType.label: LabelCategories(attributes=common_attrs)}
+        if not subset:
+            folder_path = path.rsplit(Kitti3dPath.LABEL_DIR, 1)[0]
+            img_dir = osp.join(folder_path, Kitti3dPath.IMAGE_DIR)
+            if any(os.path.isdir(os.path.join(img_dir, item)) for item in os.listdir(img_dir)):
+                subset = osp.split(path)[-1]
+                self._path = folder_path
+        super().__init__(subset=subset, ctx=ctx)
+
+        self._categories = make_kitti3d_categories(Kitti3DLabelMap)
         self._items = self._load_items()
 
     def _load_items(self) -> List[DatasetItem]:
         items = []
+
         image_dir = osp.join(self._path, Kitti3dPath.IMAGE_DIR)
         image_path_by_id = {
-            osp.splitext(osp.relpath(p, image_dir))[0]: p
+            osp.split(osp.splitext(osp.relpath(p, image_dir))[0])[-1]: p
             for p in find_images(image_dir, recursive=True)
         }
 
-        ann_dir = osp.join(self._path, Kitti3dPath.LABEL_DIR)
+        if self._subset == "default":
+            ann_dir = osp.join(self._path, Kitti3dPath.LABEL_DIR)
+        else:
+            ann_dir = osp.join(self._path, Kitti3dPath.LABEL_DIR, self._subset)
+
         label_categories = self._categories[AnnotationType.label]
 
-        for labels_path in sorted(glob.glob(osp.join(ann_dir, "*.txt"), recursive=True)):
+        for labels_path in sorted(glob.glob(osp.join(ann_dir, "**", "*.txt"), recursive=True)):
             item_id = osp.splitext(osp.relpath(labels_path, ann_dir))[0]
             anns = []
 
@@ -116,17 +128,18 @@ def _load_items(self) -> List[DatasetItem]:
             if image:
                 image = Image.from_file(path=image)
 
+            if self._subset == "default":
+                calib_path = osp.join(self._path, Kitti3dPath.CALIB_DIR, item_id + ".txt")
+            else:
+                calib_path = osp.join(
+                    self._path, Kitti3dPath.CALIB_DIR, self._subset, item_id + ".txt"
+                )
             items.append(
                 DatasetItem(
                     id=item_id,
                     subset=self._subset,
-                    media=PointCloud.from_file(
-                        path=osp.join(self._path, Kitti3dPath.PCD_DIR, item_id + ".bin"),
-                        extra_images=[image],
-                    ),
-                    attributes={
-                        "calib_path": osp.join(self._path, Kitti3dPath.CALIB_DIR, item_id + ".txt")
-                    },
+                    media=image,
+                    attributes={"calib_path": calib_path},
                     annotations=anns,
                 )
             )

diff --git a/src/datumaro/plugins/data_formats/kitti_3d/format.py b/src/datumaro/plugins/data_formats/kitti_3d/format.py
@@ -4,9 +4,40 @@
 
 import os.path as osp
 
+from datumaro.components.annotation import AnnotationType, LabelCategories
+
 
 class Kitti3dPath:
     PCD_DIR = osp.join("velodyne")
     IMAGE_DIR = "image_2"
     LABEL_DIR = "label_2"
     CALIB_DIR = "calib"
+
+
+Kitti3DLabelMap = [
+    "DontCare",
+    "Car",
+    "Pedestrian",
+    "Van",
+    "Truck",
+    "Cyclist",
+    "Sitter",
+    "Train",
+    "Motorcycle",
+    "Bus",
+    "Misc",
+]
+
+
+def make_kitti3d_categories(label_map=None):
+    if label_map is None:
+        label_map = Kitti3DLabelMap
+
+    categories = {}
+    common_attrs = {"truncated", "occluded", "alpha", "dimensions", "location", "rotation_y"}
+    label_categories = LabelCategories(attributes=common_attrs)
+    for label in label_map:
+        label_categories.add(label)
+    categories[AnnotationType.label] = label_categories
+
+    return categories
diff --git a/src/datumaro/plugins/data_formats/kitti_3d/importer.py b/src/datumaro/plugins/data_formats/kitti_3d/importer.py
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: MIT
 
+import os.path as osp
 from typing import List
 
 from datumaro.components.errors import DatasetImportError
@@ -16,7 +17,7 @@ class Kitti3dImporter(Importer):
 
     @classmethod
     def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence:
-        context.require_file(f"{Kitti3dPath.PCD_DIR}/*.bin")
+        context.require_file(f"{Kitti3dPath.CALIB_DIR}/*.txt")
         cls._check_ann_file(context.require_file(f"{Kitti3dPath.LABEL_DIR}/*.txt"), context)
         return FormatDetectionConfidence.MEDIUM
 
@@ -42,4 +43,11 @@ def get_file_extensions(cls) -> List[str]:
 
     @classmethod
     def find_sources(cls, path):
-        return [{"url": path, "format": "kitti3d"}]
+        # return [{"url": path, "format": "kitti3d"}]
+        sources = cls._find_sources_recursive(
+            path, "", "kitti3d", dirname=Kitti3dPath.LABEL_DIR, file_filter=lambda p: osp.isdir(p)
+        )
+        if len(sources) == 0:
+            return [{"url": path, "format": "kitti3d"}]
+        else:
+            return sources
diff --git a/src/datumaro/plugins/framework_converter.py b/src/datumaro/plugins/framework_converter.py
@@ -137,7 +137,10 @@ def __getitem__(self, idx):
             image, label = self._gen_item(idx)
 
             if self.task == "tabular":
-                text = image()[self.input_target]
+                try:
+                    text = image[self.input_target]
+                except TypeError:
+                    text = image()[self.input_target]
 
                 if self.output_target:
                     src_tokenizer, tgt_tokenizer = self.tokenizer

diff --git a/src/datumaro/plugins/openvino_plugin/launcher.py b/src/datumaro/plugins/openvino_plugin/launcher.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021 Intel Corporation
+# Copyright (C) 2019-2024 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -92,6 +92,8 @@ class BuiltinOpenvinoModelInfo(OpenvinoModelInfo):
     downloadable_models = {
         "clip_text_ViT-B_32",
         "clip_visual_ViT-B_32",
+        "clip_visual_vit_l_14_336px_int8",
+        "clip_text_vit_l_14_336px_int8",
         "googlenet-v4-tf",
     }