From 4375cdb8d5eb0068fb281d31e1ae91bfb95decad Mon Sep 17 00:00:00 2001 From: Zoya Maslova Date: Wed, 5 May 2021 14:34:55 +0300 Subject: [PATCH] Add panoptic and stuff COCO format (#210) * add coco stuff and panoptic formats * update CHANGELOG Co-authored-by: Maxim Zhiltsov --- CHANGELOG.md | 1 + README.md | 2 +- datumaro/plugins/coco_format/converter.py | 83 ++++++++++++ datumaro/plugins/coco_format/extractor.py | 97 ++++++++++++-- datumaro/plugins/coco_format/format.py | 5 +- datumaro/plugins/coco_format/importer.py | 5 +- datumaro/plugins/datumaro_format/converter.py | 7 +- datumaro/util/mask_tools.py | 13 +- docs/user_manual.md | 2 +- .../annotations/panoptic_val.json | 75 +++++++++++ .../annotations/panoptic_val/000000000001.png | Bin 0 -> 78 bytes .../coco_panoptic/images/val/000000000001.jpg | Bin 0 -> 631 bytes .../coco_stuff/annotations/stuff_val.json | 50 ++++++++ .../coco_stuff/images/val/000000000001.jpg | Bin 0 -> 631 bytes tests/test_coco_format.py | 118 +++++++++++++++++- 15 files changed, 439 insertions(+), 19 deletions(-) create mode 100644 tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val.json create mode 100644 tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val/000000000001.png create mode 100644 tests/assets/coco_dataset/coco_panoptic/images/val/000000000001.jpg create mode 100644 tests/assets/coco_dataset/coco_stuff/annotations/stuff_val.json create mode 100644 tests/assets/coco_dataset/coco_stuff/images/val/000000000001.jpg diff --git a/CHANGELOG.md b/CHANGELOG.md index ea1be7bcde..c5b775ee01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for escaping in attribiute values in LabelMe format () - Support for Segmentation Splitting () - Support for CIFAR-10/100 dataset format () +- Support COCO panoptic and stuff format () ### Changed - LabelMe format saves dataset items with their relative paths by subsets without changing names () diff --git a/README.md b/README.md index 4293d400bc..2c3a2f70ba 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ CVAT annotations ---> Publication, statistics etc. [(Back to top)](#table-of-contents) - Dataset reading, writing, conversion in any direction. [Supported formats](docs/user_manual.md#supported-formats): - - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*) + - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`) - [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) (`classification`, `detection`, `segmentation`, `action_classification`, `person_layout`) - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`) - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`) diff --git a/datumaro/plugins/coco_format/converter.py b/datumaro/plugins/coco_format/converter.py index 0caf89de3b..36781a0396 100644 --- a/datumaro/plugins/coco_format/converter.py +++ b/datumaro/plugins/coco_format/converter.py @@ -5,6 +5,7 @@ import json import logging as log +import numpy as np import os import os.path as osp from enum import Enum @@ -19,6 +20,7 @@ _COORDINATE_ROUNDING_DIGITS, AnnotationType, Points) from datumaro.components.dataset import ItemStatus from datumaro.util import cast, find, str_to_bool +from datumaro.util.image import save_image from .format import CocoPath, CocoTask @@ -451,6 +453,67 @@ def save_annotations(self, item): self.annotations.append(elem) +class _StuffConverter(_InstancesConverter): + pass + +class _PanopticConverter(_TaskConverter): + def write(self, path): + with open(path, 'w') as outfile: + json.dump(self._data, outfile) + + def save_categories(self, dataset): + label_categories = dataset.categories().get(AnnotationType.label) + if label_categories is None: + return + + for idx, cat in enumerate(label_categories.items): + self.categories.append({ + 'id': 1 + idx, + 'name': cast(cat.name, str, ''), + 'supercategory': cast(cat.parent, str, ''), + 'isthing': 0, # TODO: can't represent this information yet + }) + + def save_annotations(self, item): + if not item.has_image: + return + + ann_filename = item.id + CocoPath.PANOPTIC_EXT + + segments_info = list() + masks = [] + next_id = self._min_ann_id + for ann in item.annotations: + if ann.type != AnnotationType.mask: + continue + + if not ann.id: + ann.id = next_id + next_id += 1 + + segment_info = {} + segment_info['id'] = ann.id + segment_info['category_id'] = cast(ann.label, int, -1) + 1 + segment_info['area'] = float(ann.get_area()) + segment_info['bbox'] = [float(p) for p in ann.get_bbox()] + segment_info['iscrowd'] = cast(ann.attributes.get("is_crowd"), int, 0) + segments_info.append(segment_info) + masks.append(ann) + + if masks: + pan_format = mask_tools.merge_masks( + ((m.image, m.id) for m in masks), + start=np.zeros(item.image.size, dtype=np.uint32)) + save_image(osp.join(self._context._segmentation_dir, ann_filename), + mask_tools.index2bgr(pan_format), create_dir=True) + + elem = { + 'image_id': self._get_image_id(item), + 'file_name': ann_filename, + 'segments_info': segments_info + } + self.annotations.append(elem) + class CocoConverter(Converter): @staticmethod def _split_tasks_string(s): @@ -497,6 +560,8 @@ def build_cmdline_parser(cls, **kwargs): CocoTask.person_keypoints: _KeypointsConverter, CocoTask.captions: _CaptionsConverter, CocoTask.labels: _LabelsConverter, + CocoTask.panoptic: _PanopticConverter, + CocoTask.stuff: _StuffConverter, } def __init__(self, extractor, save_dir, @@ -541,6 +606,11 @@ def _make_dirs(self): self._ann_dir = osp.join(self._save_dir, CocoPath.ANNOTATIONS_DIR) os.makedirs(self._ann_dir, exist_ok=True) + def _make_segmentation_dir(self, subset_name): + self._segmentation_dir = osp.join(self._save_dir, + CocoPath.ANNOTATIONS_DIR, 'panoptic_'+ subset_name) + os.makedirs(self._segmentation_dir, exist_ok=True) + def _make_task_converter(self, task): if task not in self._TASK_CONVERTER: raise NotImplementedError() @@ -568,6 +638,8 @@ def apply(self): task_converters = self._make_task_converters() for task_conv in task_converters.values(): task_conv.save_categories(subset) + if CocoTask.panoptic in task_converters: + self._make_segmentation_dir(subset_name) for item in subset: if self._save_images: @@ -637,3 +709,14 @@ class CocoLabelsConverter(CocoConverter): def __init__(self, *args, **kwargs): kwargs['tasks'] = CocoTask.labels super().__init__(*args, **kwargs) + +class CocoPanopticConverter(CocoConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = CocoTask.panoptic + super().__init__(*args, **kwargs) + +class CocoStuffConverter(CocoConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = CocoTask.stuff + kwargs['segmentation_mode'] = SegmentationMode.mask + super().__init__(*args, **kwargs) diff --git a/datumaro/plugins/coco_format/extractor.py b/datumaro/plugins/coco_format/extractor.py index 29b97f7e27..faecf79f8e 100644 --- a/datumaro/plugins/coco_format/extractor.py +++ b/datumaro/plugins/coco_format/extractor.py @@ -4,18 +4,20 @@ # SPDX-License-Identifier: MIT from collections import OrderedDict +import json import logging as log import os.path as osp from pycocotools.coco import COCO import pycocotools.mask as mask_utils -from datumaro.components.extractor import (SourceExtractor, +from datumaro.components.extractor import (CompiledMask, Mask, SourceExtractor, DEFAULT_SUBSET_NAME, DatasetItem, AnnotationType, Label, RleMask, Points, Polygon, Bbox, Caption, LabelCategories, PointsCategories ) -from datumaro.util.image import Image +from datumaro.util.image import Image, lazy_image, load_image +from datumaro.util.mask_tools import bgr2index from .format import CocoTask, CocoPath @@ -42,16 +44,24 @@ def __init__(self, path, task, merge_instance_polygons=False, subset=None): self._merge_instance_polygons = merge_instance_polygons - loader = self._make_subset_loader(path) - self._load_categories(loader) - self._items = list(self._load_items(loader).values()) + if self._task == CocoTask.panoptic: + #panoptic is not added to pycocotools + panoptic_config = self._load_panoptic_config(path) + panoptic_images = osp.splitext(path)[0] + + self._load_panoptic_categories(panoptic_config) + self._items = list(self._load_panoptic_items(panoptic_config, + panoptic_images).values()) + else: + loader = self._make_subset_loader(path) + self._load_categories(loader) + self._items = list(self._load_items(loader).values()) @staticmethod def _make_subset_loader(path): # COCO API has an 'unclosed file' warning coco_api = COCO() with open(path, 'r') as f: - import json dataset = json.load(f) coco_api.dataset = dataset @@ -62,9 +72,7 @@ def _load_categories(self, loader): self._categories = {} if self._task in [CocoTask.instances, CocoTask.labels, - CocoTask.person_keypoints, - # TODO: Task.stuff, CocoTask.panoptic - ]: + CocoTask.person_keypoints, CocoTask.stuff]: label_categories, label_map = self._load_label_categories(loader) self._categories[AnnotationType.label] = label_categories self._label_map = label_map @@ -100,6 +108,22 @@ def _load_person_kp_categories(self, loader): return categories + @staticmethod + def _load_panoptic_config(path): + with open(path, 'r') as f: + return json.load(f) + + def _load_panoptic_categories(self, config): + label_categories = LabelCategories() + label_map = {} + for idx, cat in enumerate(config['categories']): + label_map[cat['id']] = idx + label_categories.add(name=cat['name'], + parent=cat.get('supercategory')) + + self._categories[AnnotationType.label] = label_categories + self._label_map = label_map + def _load_items(self, loader): items = OrderedDict() @@ -124,6 +148,48 @@ def _load_items(self, loader): return items + def _load_panoptic_items(self, config, panoptic_images): + items = OrderedDict() + + imgs_info = {} + for img in config['images']: + imgs_info[img['id']] = img + + for ann in config['annotations']: + img_id = int(ann['image_id']) + image_path = osp.join(self._images_dir, imgs_info[img_id]['file_name']) + image_size = (imgs_info[img_id].get('height'), + imgs_info[img_id].get('width')) + if all(image_size): + image_size = (int(image_size[0]), int(image_size[1])) + else: + image_size = None + image = Image(path=image_path, size=image_size) + anns = [] + + mask_path = osp.join(panoptic_images, ann['file_name']) + mask = lazy_image(mask_path, loader=self._load_pan_mask) + mask = CompiledMask(instance_mask=mask) + for segm_info in ann['segments_info']: + cat_id = self._get_label_id(segm_info) + segm_id = segm_info['id'] + attributes = { 'is_crowd': bool(segm_info['iscrowd']) } + anns.append(Mask(image=mask.lazy_extract(segm_id), + label=cat_id, id=segm_id, + group=segm_id, attributes=attributes)) + + items[img_id] = DatasetItem( + id=osp.splitext(imgs_info[img_id]['file_name'])[0], + subset=self._subset, image=image, + annotations=anns, attributes={'id': img_id}) + return items + + @staticmethod + def _load_pan_mask(path): + mask = load_image(path) + mask = bgr2index(mask) + return mask + def _get_label_id(self, ann): cat_id = ann.get('category_id') if cat_id in [0, None]: @@ -147,7 +213,8 @@ def _load_annotations(self, ann, image_info=None): group = ann_id # make sure all tasks' annotations are merged - if self._task in [CocoTask.instances, CocoTask.person_keypoints]: + if self._task in [CocoTask.instances, CocoTask.person_keypoints, + CocoTask.stuff]: x, y, w, h = ann['bbox'] label_id = self._get_label_id(ann) @@ -250,3 +317,13 @@ class CocoLabelsExtractor(_CocoExtractor): def __init__(self, path, **kwargs): kwargs['task'] = CocoTask.labels super().__init__(path, **kwargs) + +class CocoPanopticExtractor(_CocoExtractor): + def __init__(self, path, **kwargs): + kwargs['task'] = CocoTask.panoptic + super().__init__(path, **kwargs) + +class CocoStuffExtractor(_CocoExtractor): + def __init__(self, path, **kwargs): + kwargs['task'] = CocoTask.stuff + super().__init__(path, **kwargs) diff --git a/datumaro/plugins/coco_format/format.py b/datumaro/plugins/coco_format/format.py index 5129d49d9a..7a37bb709c 100644 --- a/datumaro/plugins/coco_format/format.py +++ b/datumaro/plugins/coco_format/format.py @@ -12,8 +12,8 @@ 'captions', 'labels', # extension, does not exist in the original COCO format 'image_info', - # 'panoptic', - # 'stuff', + 'panoptic', + 'stuff', ]) class CocoPath: @@ -21,3 +21,4 @@ class CocoPath: ANNOTATIONS_DIR = 'annotations' IMAGE_EXT = '.jpg' + PANOPTIC_EXT = '.png' diff --git a/datumaro/plugins/coco_format/importer.py b/datumaro/plugins/coco_format/importer.py index f613143e15..2e8f8a2ac8 100644 --- a/datumaro/plugins/coco_format/importer.py +++ b/datumaro/plugins/coco_format/importer.py @@ -21,6 +21,8 @@ class CocoImporter(Importer): CocoTask.captions: 'coco_captions', CocoTask.labels: 'coco_labels', CocoTask.image_info: 'coco_image_info', + CocoTask.panoptic: 'coco_panoptic', + CocoTask.stuff: 'coco_stuff', } @classmethod @@ -39,7 +41,8 @@ def __call__(self, path, **extra_params): # TODO: should be removed when proper label merging is implemented conflicting_types = {CocoTask.instances, - CocoTask.person_keypoints, CocoTask.labels} + CocoTask.person_keypoints, CocoTask.labels, + CocoTask.panoptic, CocoTask.stuff} ann_types = set(t for s in subsets.values() for t in s) \ & conflicting_types if 1 <= len(ann_types): diff --git a/datumaro/plugins/datumaro_format/converter.py b/datumaro/plugins/datumaro_format/converter.py index 6e9de7142c..18d16c1428 100644 --- a/datumaro/plugins/datumaro_format/converter.py +++ b/datumaro/plugins/datumaro_format/converter.py @@ -131,11 +131,16 @@ def _convert_mask_object(self, obj): rle = mask_utils.encode( np.require(obj.image, dtype=np.uint8, requirements='F')) + if isinstance(rle['counts'], str): + counts = rle['counts'] + else: + counts = rle['counts'].decode('ascii') + converted.update({ 'label_id': cast(obj.label, int), 'rle': { # serialize as compressed COCO mask - 'counts': rle['counts'].decode('ascii'), + 'counts': counts, 'size': list(int(c) for c in rle['size']), }, 'z_order': obj.z_order, diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py index b6c2bc9462..bd763dffea 100644 --- a/datumaro/util/mask_tools.py +++ b/datumaro/util/mask_tools.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: MIT +from itertools import chain import numpy as np from datumaro.util.image import lazy_image, load_image @@ -112,6 +113,13 @@ def make_binary_mask(mask): return mask return mask.astype(bool) +def bgr2index(img): + if img.dtype.kind not in {'b', 'i', 'u'}: + img = img.astype(np.uint8) + return (img[..., 0] << 16) + (img[..., 1] << 8) + img[..., 2] + +def index2bgr(id_map): + return np.dstack((id_map >> 16, id_map >> 8, id_map)).astype(np.uint8) def load_mask(path, inverse_colormap=None): mask = load_image(path, dtype=np.uint8) @@ -279,7 +287,7 @@ def find_mask_bbox(mask): y0, y1 = np.where(rows)[0][[0, -1]] return [x0, y0, x1 - x0, y1 - y0] -def merge_masks(masks): +def merge_masks(masks, start=None): """ Merges masks into one, mask order is responsible for z order. To avoid memory explosion on mask materialization, consider passing @@ -288,6 +296,9 @@ def merge_masks(masks): Inputs: a sequence of index masks or (binary mask, index) pairs Outputs: an index mask """ + if start is not None: + masks = chain([start], masks) + it = iter(masks) try: diff --git a/docs/user_manual.md b/docs/user_manual.md index 9930e31394..a1602be2ec 100644 --- a/docs/user_manual.md +++ b/docs/user_manual.md @@ -84,7 +84,7 @@ import datumaro ## Supported Formats List of supported formats: -- MS COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*) +- MS COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`) - [Format specification](http://cocodataset.org/#format-data) - [Dataset example](../tests/assets/coco_dataset) - `labels` are our extension - like `instances` with only `category_id` diff --git a/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val.json b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val.json new file mode 100644 index 0000000000..c945de7ca5 --- /dev/null +++ b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val.json @@ -0,0 +1,75 @@ +{ + "licenses": [{ + "name": "", + "id": 0, + "url": "" + }], + "info": { + "contributor": "", + "date_created": "", + "description": "", + "url": "", + "version": "", + "year": "" + }, + "categories": [ + { + "id": 1, + "name": "a", + "supercategory": "", + "isthing": 1 + }, + { + "id": 2, + "name": "b", + "supercategory": "", + "isthing": 1 + }, + { + "id": 3, + "name": "c", + "supercategory": "", + "isthing": 1 + }, + { + "id": 4, + "name": "d", + "supercategory": "", + "isthing": 1 + } + ], + "images": [ + { + "id": 40, + "width": 5, + "height": 1, + "file_name": "000000000001.jpg", + "license": 0, + "flickr_url": "", + "coco_url": "", + "date_captured": 0 + } + ], + "annotations": [ + { + "image_id": 40, + "file_name": "000000000001.png", + "segments_info": [ + { + "id": 7, + "category_id": 4, + "area": 2.0, + "bbox": [2.0, 0.0, 1.0, 0.0], + "iscrowd": 0 + }, + { + "id": 20, + "category_id": 2, + "area": 2.0, + "bbox": [1.0, 0.0, 3.0, 0.0], + "iscrowd": 1 + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val/000000000001.png b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val/000000000001.png new file mode 100644 index 0000000000000000000000000000000000000000..e471bfed416252e6619cfb903be67ce3e1104417 GIT binary patch literal 78 zcmeAS@N?(olHy`uVBq!ia0vp^tU%1j!2~2{&iT9qNQrv7IEHY@CZ{AM2qY%_IK#lS bhlhuO^AMw0yP>lwPyvIdtDnm{r-UW|NK_FF literal 0 HcmV?d00001 diff --git a/tests/assets/coco_dataset/coco_panoptic/images/val/000000000001.jpg b/tests/assets/coco_dataset/coco_panoptic/images/val/000000000001.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a082a80324c398d11403c8aba2946f58746be4ea GIT binary patch literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<5A1R0qH8UG()kO#Vxl@SaWpn#EynT3^&or9B$8>nEb z00R>vGcywlGb<|#3s7|}P@aKBkX1<0(2-3zFp*uUP{gQl;zAB(r;P_igD!qhF-|IK z;^Yz&myncFRa4i{)G{$OGqmaka3YSZQ|TeofBv2)j3M&~ka)>xhT)6Qdr?PR-2hpUWi(FzVCJ$9Vg1iRy l8F3zKBFkrRk0JbZi-Cuk5g2*Qf(-TyAGkCYHQ4{Z2>|I(&5!^9 literal 0 HcmV?d00001 diff --git a/tests/assets/coco_dataset/coco_stuff/annotations/stuff_val.json b/tests/assets/coco_dataset/coco_stuff/annotations/stuff_val.json new file mode 100644 index 0000000000..51a654f9d1 --- /dev/null +++ b/tests/assets/coco_dataset/coco_stuff/annotations/stuff_val.json @@ -0,0 +1,50 @@ +{ + "licenses": [ + { + "name": "", + "id": 0, + "url": "" + } + ], + "info": { + "contributor": "", + "date_created": "", + "description": "", + "url": "", + "version": "", + "year": "" + }, + "categories": [ + { + "id": 1, + "name": "TEST", + "supercategory": "" + } + ], + "images": [ + { + "id": 1, + "width": 5, + "height": 10, + "file_name": "000000000001.jpg", + "license": 0, + "flickr_url": "", + "coco_url": "", + "date_captured": 0 + } + ], + "annotations": [ + { + "id": 2, + "image_id": 1, + "category_id": 1, + "segmentation": { + "counts": [0, 10, 5, 5, 5, 5, 0, 10, 10, 0], + "size": [10, 5] + }, + "area": 30, + "bbox": [0, 0, 10, 4], + "iscrowd": 0 + } + ] + } diff --git a/tests/assets/coco_dataset/coco_stuff/images/val/000000000001.jpg b/tests/assets/coco_dataset/coco_stuff/images/val/000000000001.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8bce84d3bf50bd756621338e0da944a42428fb06 GIT binary patch literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<c1}I=;VrF4wW9Q)H;sz?% zD!{d!pzFb!U9xX3zTPI5o8roG<0MW4oqZMDikqloVbuf*=gfJ(V&YTRE(2~ znmD<{#3dx9RMpfqG__1j&CD$#!v`*nMGf}