From 467a656e3c2b6bd3fad8f524b6cad3371ce505e0 Mon Sep 17 00:00:00 2001
From: Victor Turrisi <vt.turrisi@gmail.com>
Date: Fri, 1 Apr 2022 11:42:10 +0200
Subject: [PATCH] Add MosaicML's ChannelLast optization (#235)

---
 README.md            |  1 +
 requirements.txt     |  1 +
 setup.py             |  1 +
 solo/methods/base.py | 14 ++++++++++++++
 4 files changed, 17 insertions(+)

diff --git a/README.md b/README.md
index 3a445630..1aa5e375 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ While the library is self-contained, it is possible to use the models outside of
 ---
 
 ## News
+* **[Apr 01 2022]**: :mag: Added [MosaicML's](https://github.com/mosaicml/composer) ChannelLast operation which considerably decreases training times.
 * **[Feb 04 2022]**: :partying_face: Paper got accepted to JMLR.
 * **[Jan 31 2022]**: :eye: Added ConvNeXt support with timm.
 * **[Dec 20 2021]**: :thermometer: Added ImageNet results, scripts and checkpoints for MoCo V2+.
diff --git a/requirements.txt b/requirements.txt
index f9fea4e6..30f44a23 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ tqdm
 wandb
 scipy
 timm
+mosaicml
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 41378818..34d8e4f6 100644
--- a/setup.py
+++ b/setup.py
@@ -54,6 +54,7 @@ def parse_requirements(path):
         "wandb",
         "scipy",
         "timm",
+        "mosaicml",
     ],
     extras_require=EXTRA_REQUIREMENTS,
     dependency_links=["https://developer.download.nvidia.com/compute/redist"],
diff --git a/solo/methods/base.py b/solo/methods/base.py
index 5ab3bd44..07f59f6e 100644
--- a/solo/methods/base.py
+++ b/solo/methods/base.py
@@ -22,6 +22,7 @@
 from functools import partial
 from typing import Any, Callable, Dict, List, Sequence, Tuple, Union
 
+import composer.functional as cf
 import pytorch_lightning as pl
 import torch
 import torch.nn as nn
@@ -115,6 +116,7 @@ def __init__(
         lr_decay_steps: Sequence = None,
         knn_eval: bool = False,
         knn_k: int = 20,
+        no_mosaicml_channel_last=False,
         **kwargs,
     ):
         """Base model that implements all basic operations for all self-supervised methods.
@@ -157,6 +159,9 @@ def __init__(
                 step. Defaults to None.
             knn_eval (bool): enables online knn evaluation while training.
             knn_k (int): the number of neighbors to use for knn.
+            no_mosaicml_channel_last (bool). Disables MosaicML ChannelLast operation which
+                speeds up training considerably (https://github.com/mosaicml/composer).
+                Defaults to False.
 
         .. note::
             When using distributed data parallel, the batch size and the number of workers are
@@ -258,6 +263,11 @@ def __init__(
                 "issues when resuming a checkpoint."
             )
 
+        # https://docs.mosaicml.com/en/v0.5.0/method_cards/channels_last.html
+        # can provide up to ~20% speed up
+        if not no_mosaicml_channel_last:
+            cf.apply_channels_last(self)
+
     @staticmethod
     def add_model_specific_args(parent_parser: ArgumentParser) -> ArgumentParser:
         """Adds shared basic arguments that are shared for all methods.
@@ -331,6 +341,10 @@ def add_model_specific_args(parent_parser: ArgumentParser) -> ArgumentParser:
         parser.add_argument("--knn_eval", action="store_true")
         parser.add_argument("--knn_k", default=20, type=int)
 
+        # mosaicml optimization
+        # disables mosaicml channel last optization
+        parser.add_argument("--no_mosaicml_channel_last", action="store_true")
+
         return parent_parser
 
     def set_loaders(self, train_loader: DataLoader = None, val_loader: DataLoader = None) -> None: