Merge main into feature/pimo (#1248)

* Configure readthedocs via `.readthedocs.yaml` file (#1229) * 🚚 Refactor Benchmarking Script (#1216) * New printing stuff * Remove dead code + address codacy issues * Refactor try/except + log to comet/wandb during runs * pre-commit error * third-party configuration --------- Co-authored-by: Ashwin Vaidya <[email protected]> * Update CODEOWNERS * Enable training with only normal images for MVTec (#1241) * ignore mask check when dataset has only normal samples * update changelog * Revert "🚚 Refactor Benchmarking Script" (#1239) Revert "🚚 Refactor Benchmarking Script (#1216)" This reverts commit 784767f. * Update benchmarking notebook (#1242) * Fix metadata path * Update benchmarking notebook --------- Co-authored-by: Ashwin Vaidya <[email protected]> Co-authored-by: Ashwin Vaidya <[email protected]> Co-authored-by: Dick Ameln <[email protected]>
openvinotoolkit · Aug 9, 2023 · 1dd9434 · 1dd9434
1 parent 9323985
commit 1dd9434
Show file tree

Hide file tree

Showing 5 changed files with 108 additions and 67 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -34,7 +34,7 @@
 /src/anomalib/models/cflow                                  @ashwinvaidya17
 /src/anomalib/models/csflow                                 @ashwinvaidya17
 /src/anomalib/models/dfkde                                  @djdameln
-/src/anomalib/models/dfm                                    @djdameln @nahuja-intel
+/src/anomalib/models/dfm                                    @djdameln
 /src/anomalib/models/draem                                  @djdameln
 /src/anomalib/models/fastflow                               @samet-akcay
 /src/anomalib/models/ganomaly                               @ashwinvaidya17

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -0,0 +1,32 @@
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+    # You can also specify other tool versions:
+    # nodejs: "20"
+    # rust: "1.70"
+    # golang: "1.20"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+  # builder: "dirhtml"
+  # Fail on all warnings to avoid broken references
+  # fail_on_warning: true
+
+# Optionally build your docs in additional formats such as PDF and ePub
+formats:
+  - pdf
+  - epub
+
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+  install:
+    - requirements: requirements/docs.txt
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
+- Enable training with only normal images for MVTecv in https://github.com/openvinotoolkit/anomalib/pull/1241
 - Improve default settings of EfficientAD
 
 ### Deprecated

diff --git a/notebooks/300_benchmarking/301_benchmarking.ipynb b/notebooks/300_benchmarking/301_benchmarking.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Short walkthrough on Benchmarking in Anomalib"
+    "# Short walkthrough on Benchmarking in Anomalib\n"
    ]
   },
   {
@@ -22,7 +22,7 @@
     "id": "IJlBPLRvOYuv"
    },
    "source": [
-    "## Install Anomalib"
+    "## Install Anomalib\n"
    ]
   },
   {
@@ -37,7 +37,7 @@
    },
    "outputs": [],
    "source": [
-    "!git clone https://github.com/openvinotoolkit/anomalib.git"
+    "!git clone https://github.com/openvinotoolkit/anomalib.git --branch main --single-branch"
    ]
   },
   {
@@ -92,7 +92,7 @@
     "id": "0NJboi_7XSSN"
    },
    "source": [
-    "> Note: Restart Runtime if promted by clicking the button at the end of the install logs"
+    "> Note: Restart Runtime if promted by clicking the button at the end of the install logs\n"
    ]
   },
   {
@@ -101,7 +101,7 @@
     "id": "y4sQOIwOUO0u"
    },
    "source": [
-    "## Download and setup dataset"
+    "## Download and setup dataset\n"
    ]
   },
   {
@@ -151,7 +151,7 @@
     "id": "Mb_kkxi-URk7"
    },
    "source": [
-    "## Create configuration file for training using Folder Dataset"
+    "## Create configuration file for training using Folder Dataset\n"
    ]
   },
   {
@@ -188,15 +188,15 @@
     "  task: segmentation # classification or segmentation\n",
     "  mask: <path/to/mask/annotations> #optional\n",
     "  extensions: null\n",
-    "  split_ratio: 0.2  # ratio of the normal images that will be used to create a test split\n",
+    "  split_ratio: 0.2 # ratio of the normal images that will be used to create a test split\n",
     "```\n",
     "\n",
-    "The complete configuration is in the codeblock below."
+    "The complete configuration is in the codeblock below.\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "id": "GNSo19XlPixN"
    },
@@ -210,18 +210,22 @@
     "  normal_dir: good # name of the folder containing normal images.\n",
     "  abnormal_dir: colour # name of the folder containing abnormal images.\n",
     "  normal_test_dir: null # name of the folder containing normal test images.\n",
+    "  mask_dir: /content/anomalib/datasets/hazelnut_toy/mask/colour # optional\n",
     "  task: segmentation # classification or segmentation\n",
-    "  mask: /content/anomalib/datasets/hazelnut_toy/mask/colour # optional\n",
     "  extensions: null\n",
-    "  split_ratio: 0.2  # ratio of the normal images that will be used to create a test split\n",
-    "  image_size: 256\n",
     "  train_batch_size: 32\n",
-    "  test_batch_size: 32\n",
+    "  eval_batch_size: 32\n",
     "  num_workers: 8\n",
+    "  image_size: 256 # dimensions to which images are resized (mandatory)\n",
+    "  center_crop: null # dimensions to which images are center-cropped after resizing (optional)\n",
+    "  normalization: imagenet # data distribution to which the images will be normalized: [none, imagenet]\n",
     "  transform_config:\n",
     "    train: null\n",
-    "    val: null\n",
-    "  create_validation_set: false\n",
+    "    eval: null\n",
+    "  test_split_mode: from_dir # options: [from_dir, synthetic]\n",
+    "  test_split_ratio: 0.2 # fraction of train images held out testing (usage depends on test_split_mode)\n",
+    "  val_split_mode: same_as_test # options: [same_as_test, from_test, synthetic]\n",
+    "  val_split_ratio: 0.5 # fraction of train/test images held out for validation (usage depends on val_split_mode)\n",
     "  tiling:\n",
     "    apply: false\n",
     "    tile_size: null\n",
@@ -233,6 +237,7 @@
     "model:\n",
     "  name: padim\n",
     "  backbone: resnet18\n",
+    "  pre_trained: true\n",
     "  layers:\n",
     "    - layer1\n",
     "    - layer2\n",
@@ -251,65 +256,67 @@
     "    pixel_default: 3\n",
     "    adaptive: true\n",
     "\n",
+    "visualization:\n",
+    "  show_images: False # show images on the screen\n",
+    "  save_images: True # save images to the file system\n",
+    "  log_images: True # log images to the available loggers (if any)\n",
+    "  image_save_path: null # path to which images will be saved\n",
+    "  mode: full # options: [\"full\", \"simple\"]\n",
+    "\n",
     "project:\n",
     "  seed: 42\n",
     "  path: ./results\n",
     "\n",
     "logging:\n",
-    "  log_images_to: [\"local\"] # options: [wandb, tensorboard, local].\n",
-    "  logger: [] # options: [tensorboard, wandb, csv] or combinations.\n",
+    "  logger: [] # options: [comet, tensorboard, wandb, csv] or combinations.\n",
     "  log_graph: false # Logs the model graph to respective logger.\n",
     "\n",
     "optimization:\n",
-    "  openvino:\n",
-    "    apply: false\n",
+    "  export_mode: null # options: torch, onnx, openvino\n",
     "\n",
     "# PL Trainer Args. Don't add extra parameter here.\n",
     "trainer:\n",
-    "  accelerator: auto # <\"cpu\", \"gpu\", \"tpu\", \"ipu\", \"hpu\", \"auto\">\n",
-    "  accumulate_grad_batches: 1\n",
-    "  amp_backend: native\n",
-    "  auto_lr_find: false\n",
-    "  auto_scale_batch_size: false\n",
-    "  auto_select_gpus: false\n",
-    "  benchmark: false\n",
-    "  check_val_every_n_epoch: 1 # Don't validate before extracting features.\n",
+    "  enable_checkpointing: true\n",
     "  default_root_dir: null\n",
-    "  detect_anomaly: false\n",
-    "  deterministic: false\n",
+    "  gradient_clip_val: 0\n",
+    "  gradient_clip_algorithm: norm\n",
+    "  num_nodes: 1\n",
     "  devices: 1\n",
-    "  enable_checkpointing: true\n",
-    "  enable_model_summary: true\n",
     "  enable_progress_bar: true\n",
+    "  overfit_batches: 0.0\n",
+    "  track_grad_norm: -1\n",
+    "  check_val_every_n_epoch: 1 # Don't validate before extracting features.\n",
     "  fast_dev_run: false\n",
-    "  gpus: null # Set automatically\n",
-    "  gradient_clip_val: 0\n",
-    "  ipus: null\n",
-    "  limit_predict_batches: 1.0\n",
-    "  limit_test_batches: 1.0\n",
-    "  limit_train_batches: 1.0\n",
-    "  limit_val_batches: 1.0\n",
-    "  log_every_n_steps: 50\n",
+    "  accumulate_grad_batches: 1\n",
     "  max_epochs: 1\n",
-    "  max_steps: -1\n",
-    "  max_time: null\n",
     "  min_epochs: null\n",
+    "  max_steps: -1\n",
     "  min_steps: null\n",
-    "  move_metrics_to_cpu: false\n",
-    "  multiple_trainloader_mode: max_size_cycle\n",
-    "  num_nodes: 1\n",
-    "  num_processes: null\n",
-    "  num_sanity_val_steps: 0\n",
-    "  overfit_batches: 0.0\n",
-    "  plugins: null\n",
+    "  max_time: null\n",
+    "  limit_train_batches: 1.0\n",
+    "  limit_val_batches: 1.0\n",
+    "  limit_test_batches: 1.0\n",
+    "  limit_predict_batches: 1.0\n",
+    "  val_check_interval: 1.0 # Don't validate before extracting features.\n",
+    "  log_every_n_steps: 50\n",
+    "  accelerator: auto # <\"cpu\", \"gpu\", \"tpu\", \"ipu\", \"hpu\", \"auto\">\n",
+    "  strategy: null\n",
+    "  sync_batchnorm: false\n",
     "  precision: 32\n",
+    "  enable_model_summary: true\n",
+    "  num_sanity_val_steps: 0\n",
     "  profiler: null\n",
+    "  benchmark: false\n",
+    "  deterministic: false\n",
     "  reload_dataloaders_every_n_epochs: 0\n",
+    "  auto_lr_find: false\n",
     "  replace_sampler_ddp: true\n",
-    "  sync_batchnorm: false\n",
-    "  tpu_cores: null\n",
-    "  track_grad_norm: -1\n",
-    "  val_check_interval: 1.0 # Don't validate before extracting features.\n",
+    "  detect_anomaly: false\n",
+    "  auto_scale_batch_size: false\n",
+    "  plugins: null\n",
+    "  move_metrics_to_cpu: false\n",
+    "  multiple_trainloader_mode: max_size_cycle\n",
+    "\n",
     "\"\"\"\n",
     "with open(\"config.yaml\", \"w\", encoding=\"utf8\") as f:\n",
     "    f.writelines(folder_padim)"
@@ -321,7 +328,7 @@
     "id": "jpjtUHyWUXx0"
    },
    "source": [
-    "## Train the model to see if it is working"
+    "## Train the model to see if it is working\n"
    ]
   },
   {
@@ -345,7 +352,7 @@
     "id": "Wt6BCkcoUch7"
    },
    "source": [
-    "## Create Benchmarking config"
+    "## Create Benchmarking config\n"
    ]
   },
   {
@@ -356,11 +363,11 @@
     "\n",
     "> Note: Not all models in Anomalib support OpenVINO export.\n",
     "\n",
-    "The `hardware` section of the config file is used to pass the list of hardwares on which to compute the benchmarking results. If the host system has multiple GPUs, then the benchmarking computation is distributed across GPUs to speed up collection of results. By default, the results are gathered in a `csv` file but with the `writer` flag, you can also save the results to `tensorboard` and `wandb` loggers. The final section is the `grid_search` section. It has two parameters, _dataset_ and *model_name*. The _dataset_ field is used to set the values of grid search while the *model_name* section is used to pass the list of models for which the benchmark is computed.\n",
+    "The `hardware` section of the config file is used to pass the list of hardwares on which to compute the benchmarking results. If the host system has multiple GPUs, then the benchmarking computation is distributed across GPUs to speed up collection of results. By default, the results are gathered in a `csv` file but with the `writer` flag, you can also save the results to `tensorboard` and `wandb` loggers. The final section is the `grid_search` section. It has two parameters, _dataset_ and _model_name_. The _dataset_ field is used to set the values of grid search while the _model_name_ section is used to pass the list of models for which the benchmark is computed.\n",
     "\n",
     "In this notebook we are working with a toy dataset, so we also need to tell the benchmarking script to use that particular dataset instead of the default `MVTec` as defined in each of the model config file. We can either update each config file or just pass a list of one value for the fields such as _format_, _path_, etc., as shown below.\n",
     "\n",
-    "For more information about benchmarking, you can look at the [Anomalib Documentation](https://openvinotoolkit.github.io/anomalib/guides/benchmarking.html)."
+    "For more information about benchmarking, you can look at the [Anomalib Documentation](https://openvinotoolkit.github.io/anomalib/guides/benchmarking.html).\n"
    ]
   },
   {
@@ -383,12 +390,12 @@
     "  dataset:\n",
     "    name: [hazelnut]\n",
     "    format: [folder]\n",
-    "    path: [/content/anomalib/datasets/hazelnut_toy]\n",
+    "    root: [/content/anomalib/datasets/hazelnut_toy]\n",
     "    normal_dir: [good]\n",
     "    abnormal_dir: [colour]\n",
     "    normal_test_dir: [null]\n",
     "    task: [segmentation]\n",
-    "    mask: [/content/anomalib/datasets/hazelnut_toy/mask/colour]\n",
+    "    mask_dir: [/content/anomalib/datasets/hazelnut_toy/mask/colour]\n",
     "    extensions: [null]\n",
     "    split_ratio: [0.2]\n",
     "    image_size: [256, 128]\n",
@@ -455,7 +462,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.10.11"
   },
   "vscode": {
    "interpreter": {

diff --git a/src/anomalib/data/mvtec.py b/src/anomalib/data/mvtec.py
@@ -153,13 +153,14 @@ def make_mvtec_dataset(
     ] = mask_samples.image_path.values
 
     # assert that the right mask files are associated with the right test images
-    assert (
-        samples.loc[samples.label_index == LabelName.ABNORMAL]
-        .apply(lambda x: Path(x.image_path).stem in Path(x.mask_path).stem, axis=1)
-        .all()
-    ), "Mismatch between anomalous images and ground truth masks. Make sure the mask files in 'ground_truth' \
-              folder follow the same naming convention as the anomalous images in the dataset (e.g. image: '000.png', \
-              mask: '000.png' or '000_mask.png')."
+    if len(samples.loc[samples.label_index == LabelName.ABNORMAL]):
+        assert (
+            samples.loc[samples.label_index == LabelName.ABNORMAL]
+            .apply(lambda x: Path(x.image_path).stem in Path(x.mask_path).stem, axis=1)
+            .all()
+        ), "Mismatch between anomalous images and ground truth masks. Make sure the mask files in 'ground_truth' \
+                folder follow the same naming convention as the anomalous images in the dataset (e.g. image: \
+                '000.png', mask: '000.png' or '000_mask.png')."
 
     if split:
         samples = samples[samples.split == split].reset_index(drop=True)