Merge pull request #3763 from sungchul2/update-recipe-using-input_size

Update recipes using `$(input_size)`
openvinotoolkit · Jul 25, 2024 · 8e1d5c3 · 8e1d5c3
2 parents 821c808 + 53c45e7
commit 8e1d5c3
Show file tree

Hide file tree

Showing 57 changed files with 323 additions and 404 deletions.
diff --git a/src/otx/recipe/_base_/data/detection.yaml b/src/otx/recipe/_base_/data/detection.yaml
@@ -1,4 +1,7 @@
 task: DETECTION
+input_size:
+  - 800
+  - 992
 mem_cache_size: 1GB
 mem_cache_img_max_size: null
 image_color_channel: RGB
@@ -15,9 +18,7 @@ train_subset:
     - class_path: otx.core.data.transform_libs.torchvision.MinIoURandomCrop
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale:
-          - 800
-          - 992
+        scale: $(input_size)
         transform_bbox: true
     - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
       init_args:
@@ -42,9 +43,7 @@ val_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale:
-          - 800
-          - 992
+        scale: $(input_size)
         is_numpy_to_tvtensor: true
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:
@@ -65,9 +64,7 @@ test_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale:
-          - 800
-          - 992
+        scale: $(input_size)
         is_numpy_to_tvtensor: true
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:

diff --git a/src/otx/recipe/_base_/data/instance_segmentation.yaml b/src/otx/recipe/_base_/data/instance_segmentation.yaml
@@ -1,4 +1,7 @@
 task: INSTANCE_SEGMENTATION
+input_size:
+  - 1024
+  - 1024
 mem_cache_size: 1GB
 mem_cache_img_max_size: null
 image_color_channel: RGB
@@ -18,9 +21,7 @@ train_subset:
         keep_ratio: true
         transform_bbox: true
         transform_mask: true
-        scale:
-          - 1024
-          - 1024
+        scale: $(input_size)
     - class_path: otx.core.data.transform_libs.torchvision.Pad
       init_args:
         pad_to_square: true
@@ -49,9 +50,7 @@ val_subset:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
         keep_ratio: true
-        scale:
-          - 1024
-          - 1024
+        scale: $(input_size)
     - class_path: otx.core.data.transform_libs.torchvision.Pad
       init_args:
         pad_to_square: true
@@ -76,9 +75,7 @@ test_subset:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
         keep_ratio: true
-        scale:
-          - 1024
-          - 1024
+        scale: $(input_size)
     - class_path: otx.core.data.transform_libs.torchvision.Pad
       init_args:
         pad_to_square: true

diff --git a/src/otx/recipe/_base_/data/rotated_detection.yaml b/src/otx/recipe/_base_/data/rotated_detection.yaml
@@ -1,4 +1,7 @@
 task: ROTATED_DETECTION
+input_size:
+  - 1024
+  - 1024
 mem_cache_size: 1GB
 mem_cache_img_max_size: null
 image_color_channel: RGB
@@ -16,9 +19,7 @@ train_subset:
         keep_ratio: true
         transform_bbox: true
         transform_mask: true
-        scale:
-          - 1024
-          - 1024
+        scale: $(input_size)
     - class_path: otx.core.data.transform_libs.torchvision.Pad
       init_args:
         size_divisor: 32
@@ -47,9 +48,7 @@ val_subset:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
         keep_ratio: true
-        scale:
-          - 1024
-          - 1024
+        scale: $(input_size)
     - class_path: otx.core.data.transform_libs.torchvision.Pad
       init_args:
         size_divisor: 32
@@ -74,9 +73,7 @@ test_subset:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
         keep_ratio: true
-        scale:
-          - 1024
-          - 1024
+        scale: $(input_size)
     - class_path: otx.core.data.transform_libs.torchvision.Pad
       init_args:
         size_divisor: 32

diff --git a/src/otx/recipe/_base_/data/semantic_segmentation.yaml b/src/otx/recipe/_base_/data/semantic_segmentation.yaml
@@ -1,4 +1,7 @@
 task: SEMANTIC_SEGMENTATION
+input_size:
+  - 512
+  - 512
 mem_cache_size: 1GB
 mem_cache_img_max_size: null
 image_color_channel: RGB
@@ -15,9 +18,7 @@ train_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
       init_args:
-        scale:
-          - 512
-          - 512
+        scale: $(input_size)
         crop_ratio_range:
           - 0.2
           - 1.0
@@ -49,9 +50,7 @@ val_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale:
-          - 512
-          - 512
+        scale: $(input_size)
         transform_mask: true
         is_numpy_to_tvtensor: true
     - class_path: torchvision.transforms.v2.ToDtype
@@ -73,9 +72,7 @@ test_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale:
-          - 512
-          - 512
+        scale: $(input_size)
         transform_mask: true
         is_numpy_to_tvtensor: true
     - class_path: torchvision.transforms.v2.ToDtype

diff --git a/src/otx/recipe/_base_/data/torchvision_base.yaml b/src/otx/recipe/_base_/data/torchvision_base.yaml
@@ -15,6 +15,7 @@ train_subset:
   num_workers: 2
   sampler:
     class_path: torch.utils.data.RandomSampler
+
 val_subset:
   subset_name: val
   transform_lib_type: TORCHVISION
@@ -25,6 +26,7 @@ val_subset:
   num_workers: 2
   sampler:
     class_path: torch.utils.data.RandomSampler
+
 test_subset:
   subset_name: test
   transform_lib_type: TORCHVISION

diff --git a/src/otx/recipe/_base_/data/torchvision_semisl.yaml b/src/otx/recipe/_base_/data/torchvision_semisl.yaml
@@ -1,4 +1,5 @@
 task: MULTI_CLASS_CLS
+input_size: 224
 mem_cache_size: 1GB
 mem_cache_img_max_size:
   - 500
@@ -16,7 +17,7 @@ train_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale: 224
+        scale: $(input_size)
     - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
       init_args:
         prob: 0.5
@@ -31,6 +32,7 @@ train_subset:
         std: [58.395, 57.12, 57.375]
   sampler:
     class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
+
 val_subset:
   subset_name: val
   transform_lib_type: TORCHVISION
@@ -40,7 +42,7 @@ val_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale: 224
+        scale: $(input_size)
         is_numpy_to_tvtensor: true
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:
@@ -52,6 +54,7 @@ val_subset:
         std: [58.395, 57.12, 57.375]
   sampler:
     class_path: torch.utils.data.RandomSampler
+
 test_subset:
   subset_name: test
   transform_lib_type: TORCHVISION
@@ -61,7 +64,7 @@ test_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale: 224
+        scale: $(input_size)
         is_numpy_to_tvtensor: true
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:
@@ -73,6 +76,7 @@ test_subset:
         std: [58.395, 57.12, 57.375]
   sampler:
     class_path: torch.utils.data.RandomSampler
+
 unlabeled_subset:
   data_format: image_dir
   batch_size: 48
@@ -82,7 +86,7 @@ unlabeled_subset:
     weak_transforms:
       - class_path: otx.core.data.transform_libs.torchvision.Resize
         init_args:
-          scale: 224
+          scale: $(input_size)
       - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
         init_args:
           prob: 0.5
@@ -101,10 +105,11 @@ unlabeled_subset:
             - 58.395
             - 57.12
             - 57.375
+
     strong_transforms:
       - class_path: otx.core.data.transform_libs.torchvision.Resize
         init_args:
-          scale: 224
+          scale: $(input_size)
       - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
         init_args:
           prob: 0.5

diff --git a/src/otx/recipe/_base_/data/visual_prompting.yaml b/src/otx/recipe/_base_/data/visual_prompting.yaml
@@ -1,4 +1,7 @@
 task: VISUAL_PROMPTING
+input_size:
+  - 1024
+  - 1024
 mem_cache_size: 1GB
 mem_cache_img_max_size: null
 image_color_channel: RGB
@@ -8,7 +11,6 @@ unannotated_items_ratio: 0.0
 vpm_config:
   use_bbox: true
   use_point: false
-
 train_subset:
   subset_name: train
   transform_lib_type: TORCHVISION
@@ -18,9 +20,7 @@ train_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale:
-          - 1024
-          - 1024
+        scale: $(input_size)
         keep_ratio: true
         transform_bbox: true
         transform_point: true
@@ -47,9 +47,7 @@ val_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale:
-          - 1024
-          - 1024
+        scale: $(input_size)
         keep_ratio: true
         transform_bbox: true
         transform_point: true
@@ -76,9 +74,7 @@ test_subset:
   transforms:
     - class_path: otx.core.data.transform_libs.torchvision.Resize
       init_args:
-        scale:
-          - 1024
-          - 1024
+        scale: $(input_size)
         keep_ratio: true
         transform_bbox: true
         transform_point: true

diff --git a/src/otx/recipe/action_classification/movinet.yaml b/src/otx/recipe/action_classification/movinet.yaml
@@ -25,6 +25,9 @@ callback_monitor: val/accuracy
 
 data:
   task: ACTION_CLASSIFICATION
+  input_size:
+    - 224
+    - 224
   data_format: kinetics
   mem_cache_size: 1GB
   mem_cache_img_max_size:
@@ -48,9 +51,7 @@ data:
       - class_path: otx.core.data.transform_libs.torchvision.DecordDecode
       - class_path: otx.core.data.transform_libs.torchvision.Resize
         init_args:
-          scale:
-            - 224
-            - 224
+          scale: $(input_size)
           keep_ratio: false
       - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
         init_args:
@@ -68,6 +69,7 @@ data:
           std: [255.0, 255.0, 255.0]
     sampler:
       class_path: torch.utils.data.RandomSampler
+
   val_subset:
     subset_name: val
     transform_lib_type: TORCHVISION
@@ -83,9 +85,7 @@ data:
       - class_path: otx.core.data.transform_libs.torchvision.DecordDecode
       - class_path: otx.core.data.transform_libs.torchvision.Resize
         init_args:
-          scale:
-            - 224
-            - 224
+          scale: $(input_size)
           keep_ratio: false
       - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
         init_args:
@@ -101,6 +101,7 @@ data:
         init_args:
           mean: [0.0, 0.0, 0.0]
           std: [255.0, 255.0, 255.0]
+
   test_subset:
     subset_name: test
     transform_lib_type: TORCHVISION
@@ -116,9 +117,7 @@ data:
       - class_path: otx.core.data.transform_libs.torchvision.DecordDecode
       - class_path: otx.core.data.transform_libs.torchvision.Resize
         init_args:
-          scale:
-            - 224
-            - 224
+          scale: $(input_size)
           keep_ratio: false
       - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
         init_args: