Add panoptic and stuff COCO format (#210)

* add coco stuff and panoptic formats * update CHANGELOG Co-authored-by: Maxim Zhiltsov <[email protected]>
openvinotoolkit · May 5, 2021 · 4375cdb · 4375cdb
1 parent ec4b013
commit 4375cdb
Show file tree

Hide file tree

Showing 15 changed files with 439 additions and 19 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Support for escaping in attribiute values in LabelMe format (<https://github.com/openvinotoolkit/datumaro/issues/49>)
 - Support for Segmentation Splitting (<https://github.com/openvinotoolkit/datumaro/pull/223>)
 - Support for CIFAR-10/100 dataset format (<https://github.com/openvinotoolkit/datumaro/pull/225>)
+- Support COCO panoptic and stuff format (<https://github.com/openvinotoolkit/datumaro/pull/210>)
 
 ### Changed
 - LabelMe format saves dataset items with their relative paths by subsets without changing names (<https://github.com/openvinotoolkit/datumaro/pull/200>)

diff --git a/README.md b/README.md
@@ -124,7 +124,7 @@ CVAT annotations                             ---> Publication, statistics etc.
 [(Back to top)](#table-of-contents)
 
 - Dataset reading, writing, conversion in any direction. [Supported formats](docs/user_manual.md#supported-formats):
-  - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*)
+  - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`)
   - [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) (`classification`, `detection`, `segmentation`, `action_classification`, `person_layout`)
   - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`)
   - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`)

diff --git a/datumaro/plugins/coco_format/converter.py b/datumaro/plugins/coco_format/converter.py
@@ -5,6 +5,7 @@
 
 import json
 import logging as log
+import numpy as np
 import os
 import os.path as osp
 from enum import Enum
@@ -19,6 +20,7 @@
     _COORDINATE_ROUNDING_DIGITS, AnnotationType, Points)
 from datumaro.components.dataset import ItemStatus
 from datumaro.util import cast, find, str_to_bool
+from datumaro.util.image import save_image
 
 from .format import CocoPath, CocoTask
 
@@ -451,6 +453,67 @@ def save_annotations(self, item):
 
             self.annotations.append(elem)
 
+class _StuffConverter(_InstancesConverter):
+    pass
+
+class _PanopticConverter(_TaskConverter):
+    def write(self, path):
+        with open(path, 'w') as outfile:
+            json.dump(self._data, outfile)
+
+    def save_categories(self, dataset):
+        label_categories = dataset.categories().get(AnnotationType.label)
+        if label_categories is None:
+            return
+
+        for idx, cat in enumerate(label_categories.items):
+            self.categories.append({
+                'id': 1 + idx,
+                'name': cast(cat.name, str, ''),
+                'supercategory': cast(cat.parent, str, ''),
+                'isthing': 0, # TODO: can't represent this information yet
+            })
+
+    def save_annotations(self, item):
+        if not item.has_image:
+            return
+
+        ann_filename = item.id + CocoPath.PANOPTIC_EXT
+
+        segments_info = list()
+        masks = []
+        next_id = self._min_ann_id
+        for ann in item.annotations:
+            if ann.type != AnnotationType.mask:
+                continue
+
+            if not ann.id:
+                ann.id = next_id
+                next_id += 1
+
+            segment_info = {}
+            segment_info['id'] = ann.id
+            segment_info['category_id'] = cast(ann.label, int, -1) + 1
+            segment_info['area'] = float(ann.get_area())
+            segment_info['bbox'] = [float(p) for p in ann.get_bbox()]
+            segment_info['iscrowd'] = cast(ann.attributes.get("is_crowd"), int, 0)
+            segments_info.append(segment_info)
+            masks.append(ann)
+
+        if masks:
+            pan_format = mask_tools.merge_masks(
+                ((m.image, m.id) for m in masks),
+                start=np.zeros(item.image.size, dtype=np.uint32))
+            save_image(osp.join(self._context._segmentation_dir, ann_filename),
+                mask_tools.index2bgr(pan_format), create_dir=True)
+
+        elem = {
+            'image_id': self._get_image_id(item),
+            'file_name': ann_filename,
+            'segments_info': segments_info
+        }
+        self.annotations.append(elem)
+
 class CocoConverter(Converter):
     @staticmethod
     def _split_tasks_string(s):
@@ -497,6 +560,8 @@ def build_cmdline_parser(cls, **kwargs):
         CocoTask.person_keypoints: _KeypointsConverter,
         CocoTask.captions: _CaptionsConverter,
         CocoTask.labels: _LabelsConverter,
+        CocoTask.panoptic: _PanopticConverter,
+        CocoTask.stuff: _StuffConverter,
     }
 
     def __init__(self, extractor, save_dir,
@@ -541,6 +606,11 @@ def _make_dirs(self):
         self._ann_dir = osp.join(self._save_dir, CocoPath.ANNOTATIONS_DIR)
         os.makedirs(self._ann_dir, exist_ok=True)
 
+    def _make_segmentation_dir(self, subset_name):
+        self._segmentation_dir = osp.join(self._save_dir,
+            CocoPath.ANNOTATIONS_DIR, 'panoptic_'+ subset_name)
+        os.makedirs(self._segmentation_dir, exist_ok=True)
+
     def _make_task_converter(self, task):
         if task not in self._TASK_CONVERTER:
             raise NotImplementedError()
@@ -568,6 +638,8 @@ def apply(self):
             task_converters = self._make_task_converters()
             for task_conv in task_converters.values():
                 task_conv.save_categories(subset)
+            if CocoTask.panoptic in task_converters:
+                self._make_segmentation_dir(subset_name)
 
             for item in subset:
                 if self._save_images:
@@ -637,3 +709,14 @@ class CocoLabelsConverter(CocoConverter):
     def __init__(self, *args, **kwargs):
         kwargs['tasks'] = CocoTask.labels
         super().__init__(*args, **kwargs)
+
+class CocoPanopticConverter(CocoConverter):
+    def __init__(self, *args, **kwargs):
+        kwargs['tasks'] = CocoTask.panoptic
+        super().__init__(*args, **kwargs)
+
+class CocoStuffConverter(CocoConverter):
+    def __init__(self, *args, **kwargs):
+        kwargs['tasks'] = CocoTask.stuff
+        kwargs['segmentation_mode'] = SegmentationMode.mask
+        super().__init__(*args, **kwargs)
diff --git a/datumaro/plugins/coco_format/extractor.py b/datumaro/plugins/coco_format/extractor.py
@@ -4,18 +4,20 @@
 # SPDX-License-Identifier: MIT
 
 from collections import OrderedDict
+import json
 import logging as log
 import os.path as osp
 
 from pycocotools.coco import COCO
 import pycocotools.mask as mask_utils
 
-from datumaro.components.extractor import (SourceExtractor,
+from datumaro.components.extractor import (CompiledMask, Mask, SourceExtractor,
     DEFAULT_SUBSET_NAME, DatasetItem,
     AnnotationType, Label, RleMask, Points, Polygon, Bbox, Caption,
     LabelCategories, PointsCategories
 )
-from datumaro.util.image import Image
+from datumaro.util.image import Image, lazy_image, load_image
+from datumaro.util.mask_tools import bgr2index
 
 from .format import CocoTask, CocoPath
 
@@ -42,16 +44,24 @@ def __init__(self, path, task, merge_instance_polygons=False, subset=None):
 
         self._merge_instance_polygons = merge_instance_polygons
 
-        loader = self._make_subset_loader(path)
-        self._load_categories(loader)
-        self._items = list(self._load_items(loader).values())
+        if self._task == CocoTask.panoptic:
+            #panoptic is not added to pycocotools
+            panoptic_config = self._load_panoptic_config(path)
+            panoptic_images = osp.splitext(path)[0]
+
+            self._load_panoptic_categories(panoptic_config)
+            self._items = list(self._load_panoptic_items(panoptic_config,
+                panoptic_images).values())
+        else:
+            loader = self._make_subset_loader(path)
+            self._load_categories(loader)
+            self._items = list(self._load_items(loader).values())
 
     @staticmethod
     def _make_subset_loader(path):
         # COCO API has an 'unclosed file' warning
         coco_api = COCO()
         with open(path, 'r') as f:
-            import json
             dataset = json.load(f)
 
         coco_api.dataset = dataset
@@ -62,9 +72,7 @@ def _load_categories(self, loader):
         self._categories = {}
 
         if self._task in [CocoTask.instances, CocoTask.labels,
-                CocoTask.person_keypoints,
-                # TODO: Task.stuff, CocoTask.panoptic
-                ]:
+                CocoTask.person_keypoints, CocoTask.stuff]:
             label_categories, label_map = self._load_label_categories(loader)
             self._categories[AnnotationType.label] = label_categories
             self._label_map = label_map
@@ -100,6 +108,22 @@ def _load_person_kp_categories(self, loader):
 
         return categories
 
+    @staticmethod
+    def _load_panoptic_config(path):
+        with open(path, 'r') as f:
+            return json.load(f)
+
+    def _load_panoptic_categories(self, config):
+        label_categories = LabelCategories()
+        label_map = {}
+        for idx, cat in enumerate(config['categories']):
+            label_map[cat['id']] = idx
+            label_categories.add(name=cat['name'],
+                parent=cat.get('supercategory'))
+
+        self._categories[AnnotationType.label] = label_categories
+        self._label_map = label_map
+
     def _load_items(self, loader):
         items = OrderedDict()
 
@@ -124,6 +148,48 @@ def _load_items(self, loader):
 
         return items
 
+    def _load_panoptic_items(self, config, panoptic_images):
+        items = OrderedDict()
+
+        imgs_info = {}
+        for img in config['images']:
+            imgs_info[img['id']] = img
+
+        for ann in config['annotations']:
+            img_id = int(ann['image_id'])
+            image_path = osp.join(self._images_dir, imgs_info[img_id]['file_name'])
+            image_size = (imgs_info[img_id].get('height'),
+                imgs_info[img_id].get('width'))
+            if all(image_size):
+                image_size = (int(image_size[0]), int(image_size[1]))
+            else:
+                image_size = None
+            image = Image(path=image_path, size=image_size)
+            anns = []
+
+            mask_path = osp.join(panoptic_images, ann['file_name'])
+            mask = lazy_image(mask_path, loader=self._load_pan_mask)
+            mask = CompiledMask(instance_mask=mask)
+            for segm_info in ann['segments_info']:
+                cat_id = self._get_label_id(segm_info)
+                segm_id = segm_info['id']
+                attributes = { 'is_crowd': bool(segm_info['iscrowd']) }
+                anns.append(Mask(image=mask.lazy_extract(segm_id),
+                    label=cat_id, id=segm_id,
+                    group=segm_id, attributes=attributes))
+
+            items[img_id] = DatasetItem(
+                id=osp.splitext(imgs_info[img_id]['file_name'])[0],
+                subset=self._subset, image=image,
+                annotations=anns, attributes={'id': img_id})
+        return items
+
+    @staticmethod
+    def _load_pan_mask(path):
+        mask = load_image(path)
+        mask = bgr2index(mask)
+        return mask
+
     def _get_label_id(self, ann):
         cat_id = ann.get('category_id')
         if cat_id in [0, None]:
@@ -147,7 +213,8 @@ def _load_annotations(self, ann, image_info=None):
 
         group = ann_id # make sure all tasks' annotations are merged
 
-        if self._task in [CocoTask.instances, CocoTask.person_keypoints]:
+        if self._task in [CocoTask.instances, CocoTask.person_keypoints,
+            CocoTask.stuff]:
             x, y, w, h = ann['bbox']
             label_id = self._get_label_id(ann)
 
@@ -250,3 +317,13 @@ class CocoLabelsExtractor(_CocoExtractor):
     def __init__(self, path, **kwargs):
         kwargs['task'] = CocoTask.labels
         super().__init__(path, **kwargs)
+
+class CocoPanopticExtractor(_CocoExtractor):
+    def __init__(self, path, **kwargs):
+        kwargs['task'] = CocoTask.panoptic
+        super().__init__(path, **kwargs)
+
+class CocoStuffExtractor(_CocoExtractor):
+    def __init__(self, path, **kwargs):
+        kwargs['task'] = CocoTask.stuff
+        super().__init__(path, **kwargs)
diff --git a/datumaro/plugins/coco_format/format.py b/datumaro/plugins/coco_format/format.py
@@ -12,12 +12,13 @@
     'captions',
     'labels', # extension, does not exist in the original COCO format
     'image_info',
-    # 'panoptic',
-    # 'stuff',
+    'panoptic',
+    'stuff',
 ])
 
 class CocoPath:
     IMAGES_DIR = 'images'
     ANNOTATIONS_DIR = 'annotations'
 
     IMAGE_EXT = '.jpg'
+    PANOPTIC_EXT = '.png'
diff --git a/datumaro/plugins/coco_format/importer.py b/datumaro/plugins/coco_format/importer.py
@@ -21,6 +21,8 @@ class CocoImporter(Importer):
         CocoTask.captions: 'coco_captions',
         CocoTask.labels: 'coco_labels',
         CocoTask.image_info: 'coco_image_info',
+        CocoTask.panoptic: 'coco_panoptic',
+        CocoTask.stuff: 'coco_stuff',
     }
 
     @classmethod
@@ -39,7 +41,8 @@ def __call__(self, path, **extra_params):
 
         # TODO: should be removed when proper label merging is implemented
         conflicting_types = {CocoTask.instances,
-            CocoTask.person_keypoints, CocoTask.labels}
+            CocoTask.person_keypoints, CocoTask.labels,
+            CocoTask.panoptic, CocoTask.stuff}
         ann_types = set(t for s in subsets.values() for t in s) \
             & conflicting_types
         if 1 <= len(ann_types):

diff --git a/datumaro/plugins/datumaro_format/converter.py b/datumaro/plugins/datumaro_format/converter.py
@@ -131,11 +131,16 @@ def _convert_mask_object(self, obj):
             rle = mask_utils.encode(
                 np.require(obj.image, dtype=np.uint8, requirements='F'))
 
+        if isinstance(rle['counts'], str):
+           counts = rle['counts']
+        else:
+           counts = rle['counts'].decode('ascii')
+
         converted.update({
             'label_id': cast(obj.label, int),
             'rle': {
                 # serialize as compressed COCO mask
-                'counts': rle['counts'].decode('ascii'),
+                'counts': counts,
                 'size': list(int(c) for c in rle['size']),
             },
             'z_order': obj.z_order,

diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py
@@ -3,6 +3,7 @@
 #
 # SPDX-License-Identifier: MIT
 
+from itertools import chain
 import numpy as np
 
 from datumaro.util.image import lazy_image, load_image
@@ -112,6 +113,13 @@ def make_binary_mask(mask):
         return mask
     return mask.astype(bool)
 
+def bgr2index(img):
+    if img.dtype.kind not in {'b', 'i', 'u'}:
+        img = img.astype(np.uint8)
+    return (img[..., 0] << 16) + (img[..., 1] << 8) + img[..., 2]
+
+def index2bgr(id_map):
+    return np.dstack((id_map >> 16, id_map >> 8, id_map)).astype(np.uint8)
 
 def load_mask(path, inverse_colormap=None):
     mask = load_image(path, dtype=np.uint8)
@@ -279,7 +287,7 @@ def find_mask_bbox(mask):
     y0, y1 = np.where(rows)[0][[0, -1]]
     return [x0, y0, x1 - x0, y1 - y0]
 
-def merge_masks(masks):
+def merge_masks(masks, start=None):
     """
         Merges masks into one, mask order is responsible for z order.
         To avoid memory explosion on mask materialization, consider passing
@@ -288,6 +296,9 @@ def merge_masks(masks):
         Inputs: a sequence of index masks or (binary mask, index) pairs
         Outputs: an index mask
     """
+    if start is not None:
+        masks = chain([start], masks)
+
     it = iter(masks)
 
     try: