diff --git a/src/otx/recipe/_base_/data/detection.yaml b/src/otx/recipe/_base_/data/detection.yaml index d08fc731f9e..c08a5fea022 100644 --- a/src/otx/recipe/_base_/data/detection.yaml +++ b/src/otx/recipe/_base_/data/detection.yaml @@ -1,4 +1,7 @@ task: DETECTION +input_size: + - 800 + - 992 mem_cache_size: 1GB mem_cache_img_max_size: null image_color_channel: RGB @@ -15,9 +18,7 @@ train_subset: - class_path: otx.core.data.transform_libs.torchvision.MinIoURandomCrop - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 800 - - 992 + scale: $(input_size) transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -42,9 +43,7 @@ val_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 800 - - 992 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -65,9 +64,7 @@ test_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 800 - - 992 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/_base_/data/instance_segmentation.yaml b/src/otx/recipe/_base_/data/instance_segmentation.yaml index 94744217e95..3520f3930a7 100644 --- a/src/otx/recipe/_base_/data/instance_segmentation.yaml +++ b/src/otx/recipe/_base_/data/instance_segmentation.yaml @@ -1,4 +1,7 @@ task: INSTANCE_SEGMENTATION +input_size: + - 1024 + - 1024 mem_cache_size: 1GB mem_cache_img_max_size: null image_color_channel: RGB @@ -18,9 +21,7 @@ train_subset: keep_ratio: true transform_bbox: true transform_mask: true - scale: - - 1024 - - 1024 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -49,9 +50,7 @@ val_subset: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - scale: - - 1024 - - 1024 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -76,9 +75,7 @@ test_subset: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - scale: - - 1024 - - 1024 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/_base_/data/rotated_detection.yaml b/src/otx/recipe/_base_/data/rotated_detection.yaml index 4418cd0363f..8ac4759ffc5 100644 --- a/src/otx/recipe/_base_/data/rotated_detection.yaml +++ b/src/otx/recipe/_base_/data/rotated_detection.yaml @@ -1,4 +1,7 @@ task: ROTATED_DETECTION +input_size: + - 1024 + - 1024 mem_cache_size: 1GB mem_cache_img_max_size: null image_color_channel: RGB @@ -16,9 +19,7 @@ train_subset: keep_ratio: true transform_bbox: true transform_mask: true - scale: - - 1024 - - 1024 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 @@ -47,9 +48,7 @@ val_subset: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - scale: - - 1024 - - 1024 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 @@ -74,9 +73,7 @@ test_subset: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - scale: - - 1024 - - 1024 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 diff --git a/src/otx/recipe/_base_/data/semantic_segmentation.yaml b/src/otx/recipe/_base_/data/semantic_segmentation.yaml index df4c750b99b..52b3dec6f63 100644 --- a/src/otx/recipe/_base_/data/semantic_segmentation.yaml +++ b/src/otx/recipe/_base_/data/semantic_segmentation.yaml @@ -1,4 +1,7 @@ task: SEMANTIC_SEGMENTATION +input_size: + - 512 + - 512 mem_cache_size: 1GB mem_cache_img_max_size: null image_color_channel: RGB @@ -15,9 +18,7 @@ train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: - - 512 - - 512 + scale: $(input_size) crop_ratio_range: - 0.2 - 1.0 @@ -49,9 +50,7 @@ val_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 512 - - 512 + scale: $(input_size) transform_mask: true is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype @@ -73,9 +72,7 @@ test_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 512 - - 512 + scale: $(input_size) transform_mask: true is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype diff --git a/src/otx/recipe/_base_/data/torchvision_base.yaml b/src/otx/recipe/_base_/data/torchvision_base.yaml index c00dd996eed..ab71ff16e86 100644 --- a/src/otx/recipe/_base_/data/torchvision_base.yaml +++ b/src/otx/recipe/_base_/data/torchvision_base.yaml @@ -15,6 +15,7 @@ train_subset: num_workers: 2 sampler: class_path: torch.utils.data.RandomSampler + val_subset: subset_name: val transform_lib_type: TORCHVISION @@ -25,6 +26,7 @@ val_subset: num_workers: 2 sampler: class_path: torch.utils.data.RandomSampler + test_subset: subset_name: test transform_lib_type: TORCHVISION diff --git a/src/otx/recipe/_base_/data/torchvision_semisl.yaml b/src/otx/recipe/_base_/data/torchvision_semisl.yaml index d85d026474a..1b5d630a1ec 100644 --- a/src/otx/recipe/_base_/data/torchvision_semisl.yaml +++ b/src/otx/recipe/_base_/data/torchvision_semisl.yaml @@ -1,4 +1,5 @@ task: MULTI_CLASS_CLS +input_size: 224 mem_cache_size: 1GB mem_cache_img_max_size: - 500 @@ -16,7 +17,7 @@ train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -31,6 +32,7 @@ train_subset: std: [58.395, 57.12, 57.375] sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + val_subset: subset_name: val transform_lib_type: TORCHVISION @@ -40,7 +42,7 @@ val_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -52,6 +54,7 @@ val_subset: std: [58.395, 57.12, 57.375] sampler: class_path: torch.utils.data.RandomSampler + test_subset: subset_name: test transform_lib_type: TORCHVISION @@ -61,7 +64,7 @@ test_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -73,6 +76,7 @@ test_subset: std: [58.395, 57.12, 57.375] sampler: class_path: torch.utils.data.RandomSampler + unlabeled_subset: data_format: image_dir batch_size: 48 @@ -82,7 +86,7 @@ unlabeled_subset: weak_transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -101,10 +105,11 @@ unlabeled_subset: - 58.395 - 57.12 - 57.375 + strong_transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/_base_/data/visual_prompting.yaml b/src/otx/recipe/_base_/data/visual_prompting.yaml index e10f2c112cc..f51287efdec 100644 --- a/src/otx/recipe/_base_/data/visual_prompting.yaml +++ b/src/otx/recipe/_base_/data/visual_prompting.yaml @@ -1,4 +1,7 @@ task: VISUAL_PROMPTING +input_size: + - 1024 + - 1024 mem_cache_size: 1GB mem_cache_img_max_size: null image_color_channel: RGB @@ -8,7 +11,6 @@ unannotated_items_ratio: 0.0 vpm_config: use_bbox: true use_point: false - train_subset: subset_name: train transform_lib_type: TORCHVISION @@ -18,9 +20,7 @@ train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 1024 - - 1024 + scale: $(input_size) keep_ratio: true transform_bbox: true transform_point: true @@ -47,9 +47,7 @@ val_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 1024 - - 1024 + scale: $(input_size) keep_ratio: true transform_bbox: true transform_point: true @@ -76,9 +74,7 @@ test_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 1024 - - 1024 + scale: $(input_size) keep_ratio: true transform_bbox: true transform_point: true diff --git a/src/otx/recipe/action_classification/movinet.yaml b/src/otx/recipe/action_classification/movinet.yaml index 5565910be9b..6964d236cf5 100644 --- a/src/otx/recipe/action_classification/movinet.yaml +++ b/src/otx/recipe/action_classification/movinet.yaml @@ -25,6 +25,9 @@ callback_monitor: val/accuracy data: task: ACTION_CLASSIFICATION + input_size: + - 224 + - 224 data_format: kinetics mem_cache_size: 1GB mem_cache_img_max_size: @@ -48,9 +51,7 @@ data: - class_path: otx.core.data.transform_libs.torchvision.DecordDecode - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 224 - - 224 + scale: $(input_size) keep_ratio: false - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -68,6 +69,7 @@ data: std: [255.0, 255.0, 255.0] sampler: class_path: torch.utils.data.RandomSampler + val_subset: subset_name: val transform_lib_type: TORCHVISION @@ -83,9 +85,7 @@ data: - class_path: otx.core.data.transform_libs.torchvision.DecordDecode - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 224 - - 224 + scale: $(input_size) keep_ratio: false - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -101,6 +101,7 @@ data: init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] + test_subset: subset_name: test transform_lib_type: TORCHVISION @@ -116,9 +117,7 @@ data: - class_path: otx.core.data.transform_libs.torchvision.DecordDecode - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 224 - - 224 + scale: $(input_size) keep_ratio: false - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: diff --git a/src/otx/recipe/action_classification/x3d.yaml b/src/otx/recipe/action_classification/x3d.yaml index 972bdcdabd9..283daa72b2a 100644 --- a/src/otx/recipe/action_classification/x3d.yaml +++ b/src/otx/recipe/action_classification/x3d.yaml @@ -25,6 +25,9 @@ callback_monitor: val/accuracy data: task: ACTION_CLASSIFICATION + input_size: + - 224 + - 224 data_format: kinetics mem_cache_size: 1GB mem_cache_img_max_size: @@ -48,9 +51,7 @@ data: - class_path: otx.core.data.transform_libs.torchvision.DecordDecode - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 224 - - 224 + scale: $(input_size) keep_ratio: false - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -68,6 +69,7 @@ data: std: [57.38, 57.38, 57.38] sampler: class_path: torch.utils.data.RandomSampler + val_subset: subset_name: val transform_lib_type: TORCHVISION @@ -83,9 +85,7 @@ data: - class_path: otx.core.data.transform_libs.torchvision.DecordDecode - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 224 - - 224 + scale: $(input_size) keep_ratio: false - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -101,6 +101,7 @@ data: init_args: mean: [114.75, 114.75, 114.75] std: [57.38, 57.38, 57.38] + test_subset: subset_name: test transform_lib_type: TORCHVISION @@ -116,9 +117,7 @@ data: - class_path: otx.core.data.transform_libs.torchvision.DecordDecode - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 224 - - 224 + scale: $(input_size) keep_ratio: false - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: diff --git a/src/otx/recipe/anomaly_classification/padim.yaml b/src/otx/recipe/anomaly_classification/padim.yaml index 9857d22f4b8..91bf41499bd 100644 --- a/src/otx/recipe/anomaly_classification/padim.yaml +++ b/src/otx/recipe/anomaly_classification/padim.yaml @@ -29,18 +29,18 @@ overrides: data: task: ANOMALY_CLASSIFICATION + input_size: 256 data_format: mvtec vpm_config: use_bbox: true use_point: false - train_subset: batch_size: 32 num_workers: 4 transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -59,7 +59,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -77,7 +77,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype diff --git a/src/otx/recipe/anomaly_classification/stfpm.yaml b/src/otx/recipe/anomaly_classification/stfpm.yaml index 2d55a04375d..ebebe6c651e 100644 --- a/src/otx/recipe/anomaly_classification/stfpm.yaml +++ b/src/otx/recipe/anomaly_classification/stfpm.yaml @@ -30,6 +30,7 @@ overrides: data: task: ANOMALY_CLASSIFICATION + input_size: 256 data_format: mvtec train_subset: batch_size: 32 @@ -37,7 +38,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -56,7 +57,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -74,7 +75,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype diff --git a/src/otx/recipe/anomaly_detection/padim.yaml b/src/otx/recipe/anomaly_detection/padim.yaml index 6653d67233d..746b88b5f85 100644 --- a/src/otx/recipe/anomaly_detection/padim.yaml +++ b/src/otx/recipe/anomaly_detection/padim.yaml @@ -29,6 +29,7 @@ overrides: data: task: ANOMALY_DETECTION + input_size: 256 data_format: mvtec vpm_config: use_bbox: true @@ -39,7 +40,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -58,7 +59,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -76,7 +77,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype diff --git a/src/otx/recipe/anomaly_detection/stfpm.yaml b/src/otx/recipe/anomaly_detection/stfpm.yaml index 765816bc072..c415169d1c9 100644 --- a/src/otx/recipe/anomaly_detection/stfpm.yaml +++ b/src/otx/recipe/anomaly_detection/stfpm.yaml @@ -30,6 +30,7 @@ overrides: data: task: ANOMALY_DETECTION + input_size: 256 data_format: mvtec train_subset: batch_size: 32 @@ -37,7 +38,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -56,7 +57,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -74,7 +75,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype diff --git a/src/otx/recipe/anomaly_segmentation/padim.yaml b/src/otx/recipe/anomaly_segmentation/padim.yaml index 3c53d1bc8a7..ce7f38c6ad4 100644 --- a/src/otx/recipe/anomaly_segmentation/padim.yaml +++ b/src/otx/recipe/anomaly_segmentation/padim.yaml @@ -29,6 +29,7 @@ overrides: data: task: ANOMALY_SEGMENTATION + input_size: 256 data_format: mvtec vpm_config: use_bbox: true @@ -39,7 +40,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -58,7 +59,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -76,7 +77,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype diff --git a/src/otx/recipe/anomaly_segmentation/stfpm.yaml b/src/otx/recipe/anomaly_segmentation/stfpm.yaml index 363f038fce0..ff6dce4e574 100644 --- a/src/otx/recipe/anomaly_segmentation/stfpm.yaml +++ b/src/otx/recipe/anomaly_segmentation/stfpm.yaml @@ -30,6 +30,7 @@ overrides: data: task: ANOMALY_SEGMENTATION + input_size: 256 data_format: mvtec train_subset: batch_size: 32 @@ -37,7 +38,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -56,7 +57,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype @@ -74,7 +75,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge init_args: - size: 256 + size: $(input_size) antialias: true - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - class_path: torchvision.transforms.v2.ToDtype diff --git a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml index 2442d6c344e..d041bd11164 100644 --- a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml @@ -38,6 +38,7 @@ overrides: data: task: H_LABEL_CLS + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -49,7 +50,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -71,7 +72,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -88,7 +89,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml index a7743b049eb..285d74565ef 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml @@ -37,6 +37,7 @@ overrides: data: task: H_LABEL_CLS + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -48,7 +49,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -70,7 +71,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -87,7 +88,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml index 07725b600b2..24fc8659528 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml @@ -37,6 +37,7 @@ overrides: data: task: H_LABEL_CLS + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -48,7 +49,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -70,7 +71,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -87,7 +88,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml index 55a222f052b..8a0c0441998 100644 --- a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml @@ -43,6 +43,7 @@ overrides: data: task: H_LABEL_CLS + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -54,7 +55,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -76,7 +77,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -93,7 +94,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml index 2b9f12cb357..47abc890e23 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml @@ -39,6 +39,7 @@ overrides: data: task: H_LABEL_CLS + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -50,7 +51,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -72,7 +73,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -89,7 +90,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml index 49dd3f0061a..85d9cb1a3e9 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml @@ -39,6 +39,7 @@ overrides: data: task: H_LABEL_CLS + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -50,7 +51,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -72,7 +73,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -89,7 +90,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml index 5d115ae241e..792f4cfa137 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml @@ -39,6 +39,7 @@ overrides: data: task: H_LABEL_CLS + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -50,7 +51,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -72,7 +73,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -89,7 +90,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml index b8cdd9e0282..f4667838256 100644 --- a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml @@ -39,6 +39,7 @@ overrides: patience: 3 data: + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -49,7 +50,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -71,7 +72,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -88,7 +89,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml index 481ea8b2aa8..8f1bb992ada 100644 --- a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml @@ -38,6 +38,7 @@ overrides: patience: 3 data: + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -55,7 +56,7 @@ overrides: std: [58.395, 57.12, 57.375] - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler @@ -73,7 +74,7 @@ overrides: std: [58.395, 57.12, 57.375] - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true test_subset: @@ -89,5 +90,5 @@ overrides: std: [58.395, 57.12, 57.375] - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml index e53772944cf..64d020de239 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml @@ -39,6 +39,7 @@ overrides: patience: 3 data: + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -49,7 +50,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -71,7 +72,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -88,7 +89,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml index d6d17a364f7..bda30832556 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml @@ -38,6 +38,7 @@ overrides: patience: 3 data: + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -48,7 +49,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -70,7 +71,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -87,7 +88,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml index ea298343bcf..ccf5be93722 100644 --- a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml @@ -43,6 +43,7 @@ overrides: patience: 3 data: + input_size: 224 mem_cache_img_max_size: - 500 - 500 @@ -53,7 +54,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -75,7 +76,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -92,7 +93,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml index 5fa62dd632d..20e2953368d 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml @@ -39,14 +39,15 @@ overrides: patience: 3 data: + input_size: + - 224 + - 224 train_subset: batch_size: 64 transforms: - class_path: torchvision.transforms.v2.RandomResizedCrop init_args: - size: - - 224 - - 224 + size: $(input_size) antialias: true - class_path: torchvision.transforms.v2.RandomHorizontalFlip init_args: @@ -74,9 +75,7 @@ overrides: transforms: - class_path: torchvision.transforms.v2.Resize init_args: - size: - - 224 - - 224 + size: $(input_size) - class_path: torchvision.transforms.v2.ToImage - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -98,9 +97,7 @@ overrides: transforms: - class_path: torchvision.transforms.v2.Resize init_args: - size: - - 224 - - 224 + size: $(input_size) - class_path: torchvision.transforms.v2.ToImage - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml index ba81bf8d7ce..3de6a45daf6 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml @@ -39,14 +39,15 @@ overrides: patience: 3 data: + input_size: + - 224 + - 224 train_subset: batch_size: 64 transforms: - class_path: torchvision.transforms.v2.RandomResizedCrop init_args: - size: - - 224 - - 224 + size: $(input_size) antialias: true - class_path: torchvision.transforms.v2.RandomHorizontalFlip init_args: @@ -74,9 +75,7 @@ overrides: transforms: - class_path: torchvision.transforms.v2.Resize init_args: - size: - - 224 - - 224 + size: $(input_size) - class_path: torchvision.transforms.v2.ToImage - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -98,9 +97,7 @@ overrides: transforms: - class_path: torchvision.transforms.v2.Resize init_args: - size: - - 224 - - 224 + size: $(input_size) - class_path: torchvision.transforms.v2.ToImage - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml index b27e5b16809..c138359e9fe 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml @@ -39,14 +39,15 @@ overrides: patience: 3 data: + input_size: + - 224 + - 224 train_subset: batch_size: 64 transforms: - class_path: torchvision.transforms.v2.RandomResizedCrop init_args: - size: - - 224 - - 224 + size: $(input_size) antialias: true - class_path: torchvision.transforms.v2.RandomHorizontalFlip init_args: @@ -74,9 +75,7 @@ overrides: transforms: - class_path: torchvision.transforms.v2.Resize init_args: - size: - - 224 - - 224 + size: $(input_size) - class_path: torchvision.transforms.v2.ToImage - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -98,9 +97,7 @@ overrides: transforms: - class_path: torchvision.transforms.v2.Resize init_args: - size: - - 224 - - 224 + size: $(input_size) - class_path: torchvision.transforms.v2.ToImage - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml index c8665063f05..725f7dcb263 100644 --- a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml @@ -43,6 +43,7 @@ overrides: data: task: MULTI_LABEL_CLS + input_size: 224 data_format: datumaro mem_cache_img_max_size: - 500 @@ -54,7 +55,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -76,7 +77,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -93,7 +94,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml index 6c34ca182b7..1d8fd37f665 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml @@ -40,6 +40,7 @@ overrides: data: task: MULTI_LABEL_CLS + input_size: 224 data_format: datumaro mem_cache_img_max_size: - 500 @@ -51,7 +52,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -73,7 +74,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -90,7 +91,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml index fefd4fc1a47..9711df45c6c 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml @@ -43,6 +43,7 @@ overrides: data: task: MULTI_LABEL_CLS + input_size: 224 data_format: datumaro mem_cache_img_max_size: - 500 @@ -54,7 +55,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -76,7 +77,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -93,7 +94,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml index 92b20c53b96..e51057acdfd 100644 --- a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml @@ -44,6 +44,7 @@ overrides: data: task: MULTI_LABEL_CLS + input_size: 224 data_format: datumaro mem_cache_img_max_size: - 500 @@ -55,7 +56,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -77,7 +78,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -94,7 +95,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml index 8c7df23d173..2a2403b2025 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml @@ -39,6 +39,7 @@ overrides: data: task: MULTI_LABEL_CLS + input_size: 224 data_format: datumaro mem_cache_img_max_size: - 500 @@ -50,7 +51,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -72,7 +73,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -89,7 +90,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml index 1c9bb2f0d93..90320ce9724 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml @@ -43,6 +43,7 @@ overrides: data: task: MULTI_LABEL_CLS + input_size: 224 data_format: datumaro mem_cache_img_max_size: - 500 @@ -54,7 +55,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -76,7 +77,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -93,7 +94,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml index 5328f0dd890..425a66c61d7 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml @@ -39,6 +39,7 @@ overrides: data: task: MULTI_LABEL_CLS + input_size: 224 data_format: datumaro mem_cache_img_max_size: - 500 @@ -50,7 +51,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: 224 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 @@ -72,7 +73,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -89,7 +90,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: 224 + scale: $(input_size) is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index 59d61c16cde..6a74d780ab7 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -36,22 +36,21 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 640 + - 640 image_color_channel: BGR train_subset: batch_size: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.CachedMosaic init_args: - img_scale: - - 640 - - 640 + img_scale: $(input_size) max_cached_images: 20 random_pop: false - class_path: otx.core.data.transform_libs.torchvision.RandomResize init_args: - scale: - - 1280 - - 1280 + scale: $(input_size) * 2 ratio_range: - 0.5 - 2.0 @@ -59,24 +58,18 @@ overrides: transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomCrop init_args: - crop_size: - - 640 - - 640 + crop_size: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: - size: - - 640 - - 640 + size: $(input_size) pad_val: 114 - class_path: otx.core.data.transform_libs.torchvision.CachedMixUp init_args: - img_scale: - - 640 - - 640 + img_scale: $(input_size) ratio_range: - 1.0 - 1.0 @@ -97,15 +90,11 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: - size: - - 640 - - 640 + size: $(input_size) pad_val: 114 is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype @@ -121,15 +110,11 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: - size: - - 640 - - 640 + size: $(input_size) pad_val: 114 is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 047446ccee5..60f1cb02391 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -35,6 +35,9 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 864 + - 864 train_subset: batch_size: 8 transforms: @@ -42,9 +45,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.MinIoURandomCrop - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 864 - - 864 + scale: $(input_size) transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -65,15 +66,11 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 864 - - 864 + scale: $(input_size) test_subset: batch_size: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 864 - - 864 + scale: $(input_size) diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index 23e11782d5d..33d6bf4c261 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -35,6 +35,9 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 864 + - 864 tile_config: enable_tiler: true enable_adaptive_tiling: true @@ -46,9 +49,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.MinIoURandomCrop - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 864 - - 864 + scale: $(input_size) transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -69,15 +70,11 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 864 - - 864 + scale: $(input_size) test_subset: batch_size: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 864 - - 864 + scale: $(input_size) diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index 65cfc195097..23a76f0e1d4 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -37,6 +37,9 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 640 + - 640 image_color_channel: BGR train_subset: batch_size: 8 @@ -45,22 +48,16 @@ overrides: init_args: random_pop: false max_cached_images: 20 - img_scale: # (H, W) - - 640 - - 640 + img_scale: $(input_size) # (H, W) - class_path: otx.core.data.transform_libs.torchvision.RandomAffine init_args: scaling_ratio_range: - 0.1 - 2.0 - border: - - -320 - - -320 + border: $(input_size) * -0.5 - class_path: otx.core.data.transform_libs.torchvision.CachedMixUp init_args: - img_scale: # (H, W) - - 640 - - 640 + img_scale: $(input_size) # (H, W) ratio_range: - 1.0 - 1.0 @@ -70,9 +67,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip @@ -98,9 +93,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -120,9 +113,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index 138e9d125e4..f69cd804357 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -37,6 +37,9 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 640 + - 640 image_color_channel: BGR tile_config: enable_tiler: true @@ -49,9 +52,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -75,9 +76,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -97,9 +96,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index 3a767b5d79f..12600d5e536 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -37,6 +37,9 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 640 + - 640 image_color_channel: BGR train_subset: batch_size: 8 @@ -45,22 +48,16 @@ overrides: init_args: random_pop: false max_cached_images: 20 - img_scale: # (H, W) - - 640 - - 640 + img_scale: $(input_size) # (H, W) - class_path: otx.core.data.transform_libs.torchvision.RandomAffine init_args: scaling_ratio_range: - 0.1 - 2.0 - border: - - -320 - - -320 + border: $(input_size) * -0.5 - class_path: otx.core.data.transform_libs.torchvision.CachedMixUp init_args: - img_scale: # (H, W) - - 640 - - 640 + img_scale: $(input_size) # (H, W) ratio_range: - 1.0 - 1.0 @@ -70,9 +67,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip @@ -98,9 +93,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -120,9 +113,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index 8a694e58905..a5758eca47c 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -37,6 +37,9 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 640 + - 640 image_color_channel: BGR tile_config: enable_tiler: true @@ -49,9 +52,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -75,9 +76,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -97,9 +96,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 5ff50aa8c62..9950a427274 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -37,6 +37,9 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 640 + - 640 train_subset: batch_size: 8 transforms: @@ -44,20 +47,14 @@ overrides: init_args: random_pop: false max_cached_images: 20 - img_scale: # (H, W) - - 640 - - 640 + img_scale: $(input_size) # (H, W) - class_path: otx.core.data.transform_libs.torchvision.RandomAffine init_args: - border: - - -320 - - -320 + border: $(input_size) * -0.5 - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip @@ -79,13 +76,14 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: + input_size: + - 416 + - 416 batch_size: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 416 - - 416 + scale: $(input_size) keep_ratio: true - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -101,13 +99,14 @@ overrides: std: [58.395, 57.12, 57.375] test_subset: + input_size: + - 416 + - 416 batch_size: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 416 - - 416 + scale: $(input_size) keep_ratio: true - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index dff467bae8f..768f96c4dbc 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -37,6 +37,9 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 640 + - 640 tile_config: enable_tiler: true enable_adaptive_tiling: true @@ -48,9 +51,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -69,14 +70,15 @@ overrides: std: [58.395, 57.12, 57.375] val_subset: + input_size: + - 416 + - 416 num_workers: 4 batch_size: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 416 - - 416 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -91,14 +93,15 @@ overrides: std: [58.395, 57.12, 57.375] test_subset: + input_size: + - 416 + - 416 num_workers: 4 batch_size: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 416 - - 416 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index bbbc85ea163..a99f0ce9122 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -37,6 +37,9 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 640 + - 640 image_color_channel: BGR train_subset: batch_size: 4 @@ -45,19 +48,13 @@ overrides: init_args: random_pop: false max_cached_images: 20 - img_scale: # (H, W) - - 640 - - 640 + img_scale: $(input_size) # (H, W) - class_path: otx.core.data.transform_libs.torchvision.RandomAffine init_args: - border: - - -320 - - -320 + border: $(input_size) * -0.5 - class_path: otx.core.data.transform_libs.torchvision.CachedMixUp init_args: - img_scale: # (H, W) - - 640 - - 640 + img_scale: $(input_size) # (H, W) ratio_range: - 1.0 - 1.0 @@ -67,9 +64,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip @@ -95,9 +90,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -117,9 +110,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) keep_ratio: true - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index f18dfeb2dfd..0431814cb6e 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -37,6 +37,9 @@ overrides: gradient_clip_val: 35.0 data: + input_size: + - 640 + - 640 image_color_channel: BGR tile_config: enable_tiler: true @@ -49,9 +52,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -75,9 +76,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -97,9 +96,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml index 5215d0f8dce..527911af487 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml @@ -32,6 +32,9 @@ data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 data: + input_size: + - 512 + - 512 tile_config: enable_tiler: true enable_adaptive_tiling: true @@ -43,9 +46,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false @@ -62,9 +63,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false @@ -79,9 +78,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml index b73562e9235..34603c8a1d8 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml @@ -33,6 +33,9 @@ overrides: max_epochs: 100 gradient_clip_val: 35.0 data: + input_size: + - 512 + - 512 tile_config: enable_tiler: true enable_adaptive_tiling: true @@ -44,9 +47,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false @@ -58,9 +59,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false @@ -72,9 +71,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml index eb90585e0a7..499410a0742 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml @@ -33,6 +33,9 @@ overrides: max_epochs: 100 gradient_clip_val: 35.0 data: + input_size: + - 512 + - 512 tile_config: enable_tiler: true enable_adaptive_tiling: true @@ -44,9 +47,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false @@ -58,9 +59,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false @@ -72,9 +71,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml index 58ae89ae61c..39720ba66a3 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml @@ -31,15 +31,16 @@ data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 data: + input_size: + - 1344 + - 1344 train_subset: batch_size: 4 num_workers: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 1344 - - 1344 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 @@ -49,9 +50,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 1344 - - 1344 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 @@ -61,9 +60,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 1344 - - 1344 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml index 52cf277b05a..009b3e7da3f 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml @@ -31,6 +31,9 @@ data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 data: + input_size: + - 512 + - 512 tile_config: enable_tiler: true enable_adaptive_tiling: true @@ -42,9 +45,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false @@ -56,9 +57,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false @@ -70,9 +69,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false diff --git a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml index 91e7a82528a..9c82c41bdea 100644 --- a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml +++ b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml @@ -36,22 +36,21 @@ overrides: max_epochs: 100 gradient_clip_val: 35.0 data: + input_size: + - 640 + - 640 train_subset: batch_size: 4 num_workers: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.CachedMosaic init_args: - img_scale: - - 640 - - 640 + img_scale: $(input_size) max_cached_images: 20 random_pop: false - class_path: otx.core.data.transform_libs.torchvision.RandomResize init_args: - scale: - - 1280 - - 1280 + scale: $(input_size) * 2 ratio_range: - 0.5 - 2.0 @@ -60,9 +59,7 @@ overrides: transform_mask: true - class_path: otx.core.data.transform_libs.torchvision.RandomCrop init_args: - crop_size: - - 640 - - 640 + crop_size: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -74,9 +71,7 @@ overrides: transform_mask: true - class_path: otx.core.data.transform_libs.torchvision.CachedMixUp init_args: - img_scale: - - 640 - - 640 + img_scale: $(input_size) ratio_range: - 1.0 - 1.0 @@ -99,9 +94,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_val: 114 @@ -111,9 +104,7 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 640 - - 640 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_val: 114 diff --git a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml index fca10f2b3f8..d219367eb24 100644 --- a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml +++ b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml @@ -35,6 +35,9 @@ overrides: max_epochs: 100 gradient_clip_val: 35.0 data: + input_size: + - 512 + - 512 tile_config: enable_tiler: true enable_adaptive_tiling: true @@ -46,9 +49,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false @@ -60,9 +61,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false @@ -74,9 +73,7 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: false - scale: - - 512 - - 512 + scale: $(input_size) - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: false diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 02e0d9d5a89..b62e173e74c 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -40,29 +40,26 @@ callback_monitor: val/Dice data: ../_base_/data/semantic_segmentation.yaml overrides: data: + input_size: + - 560 + - 560 train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: - scale: - - 560 - - 560 + scale: $(input_size) val_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 560 - - 560 + scale: $(input_size) test_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - scale: - - 560 - - 560 + scale: $(input_size) callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup diff --git a/tests/unit/engine/utils/test_auto_configurator.py b/tests/unit/engine/utils/test_auto_configurator.py index d2663d8cb45..078c81fc84c 100644 --- a/tests/unit/engine/utils/test_auto_configurator.py +++ b/tests/unit/engine/utils/test_auto_configurator.py @@ -171,7 +171,7 @@ def test_update_ov_subset_pipeline(self) -> None: { "class_path": "otx.core.data.transform_libs.torchvision.Resize", "init_args": { - "scale": [800, 992], + "scale": (800, 992), "is_numpy_to_tvtensor": True, }, },