Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] Dataset Adapter: Avoid duplicated annotation and permit empty image #1873

Merged
merged 8 commits into from
Mar 16, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions otx/core/data/adapter/detection_dataset_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from otx.api.entities.datasets import DatasetEntity
from otx.api.entities.image import Image
from otx.api.entities.model_template import TaskType
from otx.api.entities.subset import Subset
from otx.core.data.adapter.base_dataset_adapter import BaseDatasetAdapter


Expand Down Expand Up @@ -48,8 +49,9 @@ def get_otx_dataset(self) -> DatasetEntity:

if ann.label not in used_labels:
used_labels.append(ann.label)
dataset_item = DatasetItemEntity(image, self._get_ann_scene_entity(shapes), subset=subset)
dataset_items.append(dataset_item)

if len(shapes) > 0 or subset == Subset.UNLABELED or (subset == Subset.VALIDATION and len(datumaro_item.annotations)) == 0:
sungmanc marked this conversation as resolved.
Show resolved Hide resolved
dataset_item = DatasetItemEntity(image, self._get_ann_scene_entity(shapes), subset=subset)
dataset_items.append(dataset_item)
self.remove_unused_label_entities(used_labels)
return DatasetEntity(items=dataset_items)
3 changes: 2 additions & 1 deletion otx/core/data/adapter/segmentation_dataset_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ def get_otx_dataset(self) -> DatasetEntity:
shapes.append(self._get_polygon_entity(d_polygon, image.width, image.height))
if d_polygon.label not in used_labels:
used_labels.append(d_polygon.label)
if len(shapes) > 0 or subset == Subset.UNLABELED:

if len(shapes) > 0 or subset == Subset.UNLABELED == 0:
dataset_item = DatasetItemEntity(image, self._get_ann_scene_entity(shapes), subset=subset)
dataset_items.append(dataset_item)

Expand Down
12 changes: 11 additions & 1 deletion tests/assets/car_tree_bug/annotations/instances_val.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,17 @@
"id": 8,
"width": 1280,
"height": 720,
"file_name": "Slide20.PNG",
"file_name": "Slide4.PNG",
"license": 0,
"flickr_url": "",
"coco_url": "",
"date_captured": 0
},
{
"id": 9,
"width": 1280,
"height": 720,
"file_name": "Slide5.PNG",
"license": 0,
"flickr_url": "",
"coco_url": "",
Expand Down
Binary file added tests/assets/car_tree_bug/images/val/Slide5.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
78 changes: 47 additions & 31 deletions tests/unit/core/data/adapter/test_detection_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# SPDX-License-Identifier: Apache-2.0
#
import os
from typing import Optional

from otx.api.entities.annotation import NullAnnotationSceneEntity
from otx.api.entities.datasets import DatasetEntity
from otx.api.entities.label_schema import LabelSchemaEntity
from otx.api.entities.model_template import TaskType
Expand All @@ -20,42 +20,48 @@
class TestOTXDetectionDatasetAdapter:
def setup_method(self):
self.root_path = os.getcwd()

@e2e_pytest_unit
def test_detection(self):
task = "detection"

self.task_type: TaskType = TASK_NAME_TO_TASK_TYPE[task]
task_type: TaskType = TASK_NAME_TO_TASK_TYPE[task]
data_root_dict: dict = TASK_NAME_TO_DATA_ROOT[task]

self.train_data_roots: str = os.path.join(self.root_path, data_root_dict["train"])
self.val_data_roots: str = os.path.join(self.root_path, data_root_dict["val"])
self.test_data_roots: str = os.path.join(self.root_path, data_root_dict["test"])
self.unlabeled_data_roots: Optional[str] = None
if "unlabeled" in data_root_dict:
self.unlabeled_data_roots = os.path.join(self.root_path, data_root_dict["unlabeled"])

self.train_dataset_adapter = DetectionDatasetAdapter(
task_type=self.task_type,
train_data_roots=self.train_data_roots,
val_data_roots=self.val_data_roots,
unlabeled_data_roots=self.unlabeled_data_roots,
)
train_data_roots: str = os.path.join(self.root_path, data_root_dict["train"])
val_data_roots: str = os.path.join(self.root_path, data_root_dict["val"])
test_data_roots: str = os.path.join(self.root_path, data_root_dict["test"])

self.test_dataset_adapter = DetectionDatasetAdapter(
task_type=self.task_type,
test_data_roots=self.test_data_roots,
det_train_dataset_adapter = DetectionDatasetAdapter(
task_type=task_type,
train_data_roots=train_data_roots,
val_data_roots=val_data_roots,
)

@e2e_pytest_unit
def test_init(self):
assert Subset.TRAINING in self.train_dataset_adapter.dataset
assert Subset.VALIDATION in self.train_dataset_adapter.dataset
assert Subset.TESTING in self.test_dataset_adapter.dataset
if self.unlabeled_data_roots is not None:
assert Subset.UNLABELED in self.train_dataset_adapter.dataset
assert Subset.TRAINING in det_train_dataset_adapter.dataset
assert Subset.VALIDATION in det_train_dataset_adapter.dataset

@e2e_pytest_unit
def test_get_otx_dataset(self):
assert isinstance(self.train_dataset_adapter.get_otx_dataset(), DatasetEntity)
assert isinstance(self.test_dataset_adapter.get_otx_dataset(), DatasetEntity)
det_train_dataset = det_train_dataset_adapter.get_otx_dataset()
det_train_label_schema = det_train_dataset_adapter.get_label_schema()
assert isinstance(det_train_dataset, DatasetEntity)
assert isinstance(det_train_label_schema, LabelSchemaEntity)

# In the test data, there is a empty_label image.
# So, has_empty_label should be True
has_empty_label = False
for train_data in det_train_dataset:
if isinstance(train_data.annotation_scene, NullAnnotationSceneEntity):
has_empty_label = True
assert has_empty_label is True

det_test_dataset_adapter = DetectionDatasetAdapter(
task_type=task_type,
test_data_roots=test_data_roots,
)

assert Subset.TESTING in det_test_dataset_adapter.dataset
assert isinstance(det_test_dataset_adapter.get_otx_dataset(), DatasetEntity)
assert isinstance(det_test_dataset_adapter.get_label_schema(), LabelSchemaEntity)

@e2e_pytest_unit
def test_instance_segmentation(self):
Expand All @@ -77,8 +83,18 @@ def test_instance_segmentation(self):
assert Subset.TRAINING in instance_seg_train_dataset_adapter.dataset
assert Subset.VALIDATION in instance_seg_train_dataset_adapter.dataset

assert isinstance(instance_seg_train_dataset_adapter.get_otx_dataset(), DatasetEntity)
assert isinstance(instance_seg_train_dataset_adapter.get_label_schema(), LabelSchemaEntity)
instance_seg_otx_train_data = instance_seg_train_dataset_adapter.get_otx_dataset()
instance_seg_otx_train_label_schema = instance_seg_train_dataset_adapter.get_label_schema()
assert isinstance(instance_seg_otx_train_data, DatasetEntity)
assert isinstance(instance_seg_otx_train_label_schema, LabelSchemaEntity)

# In the test data, there is a empty_label image.
# So, has_empty_label should be True
has_empty_label = False
for train_data in instance_seg_otx_train_data:
if isinstance(train_data.annotation_scene, NullAnnotationSceneEntity):
has_empty_label = True
assert has_empty_label is True

instance_seg_test_dataset_adapter = DetectionDatasetAdapter(
task_type=task_type,
Expand Down