chaiNNer-org · RunDevelopment · Apr 1, 2024 · Apr 1, 2024 · Apr 1, 2024 · Apr 1, 2024
diff --git a/libs/spandrel/spandrel/__helpers/main_registry.py b/libs/spandrel/spandrel/__helpers/main_registry.py
@@ -2,6 +2,7 @@
 
 from ..architectures import (
     ATD,
+    CAIN,
     CRAFT,
     DAT,
     DCTLSA,
@@ -73,5 +74,6 @@
     ArchSupport.from_architecture(DRUNet.DRUNetArch()),
     ArchSupport.from_architecture(DnCNN.DnCNNArch()),
     ArchSupport.from_architecture(IPT.IPTArch()),
+    ArchSupport.from_architecture(CAIN.CAINArch()),
     ArchSupport.from_architecture(ESRGAN.ESRGANArch()),
 )
diff --git a/libs/spandrel/spandrel/architectures/CAIN/__init__.py b/libs/spandrel/spandrel/architectures/CAIN/__init__.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+import math
+from typing import Union
+
+from typing_extensions import override
+
+from spandrel.util import KeyCondition
+
+from ...__helpers.model_descriptor import Architecture, ImageModelDescriptor, StateDict
+from .arch.cain import CAIN
+from .arch.cain_encdec import CAIN_EncDec
+from .arch.cain_noca import CAIN_NoCA
+
+_CainModel = Union[CAIN, CAIN_NoCA, CAIN_EncDec]
+
+
+class CAINArch(Architecture[_CainModel]):
+    def __init__(self) -> None:
+        super().__init__(
+            id="CAIN",
+            detect=KeyCondition.has_any(
+                # CAIN
+                KeyCondition.has_all(
+                    "encoder.interpolate.headConv.weight",
+                    "encoder.interpolate.headConv.bias",
+                    "encoder.interpolate.body.0.body.0.body.0.conv.weight",
+                    "encoder.interpolate.body.0.body.0.body.2.conv.weight",
+                    "encoder.interpolate.body.0.body.0.body.3.conv_du.0.weight",
+                    "encoder.interpolate.body.0.body.1.body.0.conv.weight",
+                    "encoder.interpolate.body.0.body.1.body.3.conv_du.2.weight",
+                    "encoder.interpolate.body.0.body.3.body.2.conv.weight",
+                    "encoder.interpolate.body.3.body.8.body.0.conv.weight",
+                    "encoder.interpolate.body.4.body.5.body.3.conv_du.2.weight",
+                    "encoder.interpolate.tailConv.weight",
+                    "encoder.interpolate.tailConv.bias",
+                ),
+                # CAIN_NoCA
+                KeyCondition.has_all(
+                    "encoder.interpolate.headConv.weight",
+                    "encoder.interpolate.headConv.bias",
+                    "encoder.interpolate.body.0.body.0.body.0.conv.weight",
+                    "encoder.interpolate.body.0.body.0.body.2.conv.weight",
+                    "encoder.interpolate.body.0.body.1.body.0.conv.weight",
+                    "encoder.interpolate.body.0.body.3.body.2.conv.weight",
+                    "encoder.interpolate.body.3.body.8.body.0.conv.weight",
+                    "encoder.interpolate.tailConv.weight",
+                    "encoder.interpolate.tailConv.bias",
+                ),
+                # CAIN_EncDec
+                KeyCondition.has_all(
+                    "encoder.body.0.conv.weight",
+                    "encoder.body.6.conv.weight",
+                    "encoder.interpolate.headConv.weight",
+                    "encoder.interpolate.headConv.bias",
+                    "encoder.interpolate.body.0.body.0.body.0.conv.weight",
+                    "encoder.interpolate.body.0.body.0.body.2.conv.weight",
+                    "encoder.interpolate.body.0.body.0.body.3.conv_du.0.weight",
+                    "encoder.interpolate.body.0.body.1.body.0.conv.weight",
+                    "encoder.interpolate.body.0.body.1.body.3.conv_du.2.weight",
+                    "encoder.interpolate.body.0.body.3.body.2.conv.weight",
+                    "encoder.interpolate.body.3.body.8.body.0.conv.weight",
+                    "encoder.interpolate.body.4.body.5.body.3.conv_du.2.weight",
+                    "encoder.interpolate.tailConv.weight",
+                    "encoder.interpolate.tailConv.bias",
+                    "decoder.body.1.body.0.conv.weight",
+                    "decoder.body.1.body.2.conv.bias",
+                    "decoder.body.3.body.0.conv.weight",
+                    "decoder.body.3.body.2.conv.bias",
+                    "decoder.body.5.body.0.conv.weight",
+                    "decoder.body.5.body.2.conv.bias",
+                ),
+            ),
+        )
+
+    @override
+    def load(self, state_dict: StateDict) -> ImageModelDescriptor[_CainModel]:
+        tags: list[str] = []
+
+        if "encoder.body.0.conv.weight" in state_dict:
+            start_filters = 32
+            up_mode = "shuffle"
+
+            start_filters = state_dict["decoder.body.5.body.0.conv.weight"].shape[0]
+
+            if "decoder.body.0.upconv.0.conv.weight" in state_dict:
+                up_mode = "shuffle"
+            elif "decoder.body.0.upconv.weight" in state_dict:
+                up_mode = "transpose"
+            else:
+                up_mode = "direct"
+
+            model = CAIN_EncDec(
+                start_filters=start_filters,
+                up_mode=up_mode,
+            )
+            tags.extend(("EncDec", f"{start_filters}sf"))
+        else:
+            depth = 3
+
+            # detect
+            # n_feats = 3 * (4**depth)
+            n_feats = state_dict["encoder.interpolate.headConv.weight"].shape[1]
+            depth = math.isqrt(math.isqrt(n_feats // 3))
+
+            if (
+                "encoder.interpolate.body.0.body.0.body.3.conv_du.0.weight"
+                in state_dict
+            ):
+                model = CAIN(
+                    depth=depth,
+                )
+            else:
+                model = CAIN_NoCA(
+                    depth=depth,
+                )
+                tags.append("NoCA")
+
+            tags.append(f"{depth}depth")
+
+        return ImageModelDescriptor(
+            model,
+            state_dict,
+            architecture=self,
+            purpose="Restoration",
+            tags=tags,
+            supports_half=True,
+            supports_bfloat16=True,
+            scale=1,
+            input_channels=3,
+            output_channels=3,
+        )
diff --git a/libs/spandrel/spandrel/architectures/CAIN/arch/LICENSE b/libs/spandrel/spandrel/architectures/CAIN/arch/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Myungsub Choi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/libs/spandrel/spandrel/architectures/CAIN/arch/cain.py b/libs/spandrel/spandrel/architectures/CAIN/arch/cain.py
@@ -0,0 +1,70 @@
+import torch
+import torch.nn as nn
+
+from spandrel.util import store_hyperparameters
+
+from .common import InOutPaddings, Interpolation, PixelShuffle, sub_mean
+
+
+class Encoder(nn.Module):
+    def __init__(self, in_channels=3, depth=3):
+        super().__init__()
+
+        # Shuffle pixels to expand in channel dimension
+        self.shuffler = PixelShuffle(1 / 2**depth)
+
+        relu = nn.LeakyReLU(0.2, True)
+
+        # FF_RCAN or FF_Resblocks
+        self.interpolate = Interpolation(5, 12, in_channels * (4**depth), act=relu)
+
+    def forward(self, x1, x2):
+        """
+        Encoder: Shuffle-spread --> Feature Fusion --> Return fused features
+        """
+        feats1 = self.shuffler(x1)
+        feats2 = self.shuffler(x2)
+
+        feats = self.interpolate(feats1, feats2)
+
+        return feats
+
+
+class Decoder(nn.Module):
+    def __init__(self, depth=3):
+        super().__init__()
+
+        self.shuffler = PixelShuffle(2**depth)
+
+    def forward(self, feats):
+        out = self.shuffler(feats)
+        return out
+
+
+@store_hyperparameters(extra_parameters={"kind": "CAIN"})
+class CAIN(nn.Module):
+    hyperparameters = {}
+
+    def __init__(self, depth=3):
+        super().__init__()
+
+        self.encoder = Encoder(in_channels=3, depth=depth)
+        self.decoder = Decoder(depth=depth)
+
+    def forward(self, x1: torch.Tensor, x2: torch.Tensor):
+        x1, m1 = sub_mean(x1)
+        x2, m2 = sub_mean(x2)
+
+        paddingInput, paddingOutput = InOutPaddings(x1)
+        x1 = paddingInput(x1)
+        x2 = paddingInput(x2)
+
+        feats = self.encoder(x1, x2)
+        out = self.decoder(feats)
+
+        out = paddingOutput(out)
+
+        mi = (m1 + m2) / 2
+        out += mi
+
+        return out, feats
diff --git a/libs/spandrel/spandrel/architectures/CAIN/arch/cain_encdec.py b/libs/spandrel/spandrel/architectures/CAIN/arch/cain_encdec.py
@@ -0,0 +1,101 @@
+import torch.nn as nn
+
+from spandrel.util import store_hyperparameters
+
+from .common import (
+    ConvNorm,
+    InOutPaddings,
+    Interpolation,
+    ResBlock,
+    UpConvNorm,
+    conv7x7,
+    sub_mean,
+)
+
+
+class Encoder(nn.Module):
+    def __init__(self, in_channels=3, nf_start=32, norm=False):
+        super().__init__()
+
+        nf = nf_start
+        relu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+
+        self.body = nn.Sequential(
+            ConvNorm(in_channels, nf * 1, 7, stride=1, norm=norm),
+            relu,
+            ConvNorm(nf * 1, nf * 2, 5, stride=2, norm=norm),
+            relu,
+            ConvNorm(nf * 2, nf * 4, 5, stride=2, norm=norm),
+            relu,
+            ConvNorm(nf * 4, nf * 6, 5, stride=2, norm=norm),
+        )
+
+        self.interpolate = Interpolation(5, 12, nf * 6, reduction=16, act=relu)
+
+    def forward(self, x1, x2):
+        """
+        Encoder: Feature Extraction --> Feature Fusion --> Return
+        """
+        feats1 = self.body(x1)
+        feats2 = self.body(x2)
+
+        feats = self.interpolate(feats1, feats2)
+
+        return feats
+
+
+class Decoder(nn.Module):
+    def __init__(self, in_channels=192, out_channels=3, norm=False, up_mode="shuffle"):
+        super().__init__()
+
+        relu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+
+        nf = [in_channels, (in_channels * 2) // 3, in_channels // 3, in_channels // 6]
+        # nf = [192, 128, 64, 32]
+        # nf = [186, 124, 62, 31]
+        self.body = nn.Sequential(
+            UpConvNorm(nf[0], nf[1], mode=up_mode, norm=norm),
+            ResBlock(nf[1], nf[1], act=relu),
+            UpConvNorm(nf[1], nf[2], mode=up_mode, norm=norm),
+            ResBlock(nf[2], nf[2], act=relu),
+            UpConvNorm(nf[2], nf[3], mode=up_mode, norm=norm),
+            ResBlock(nf[3], nf[3], act=relu),
+            conv7x7(nf[3], out_channels),
+        )
+
+    def forward(self, feats):
+        out = self.body(feats)
+        # out = self.conv_final(out)
+
+        return out
+
+
+@store_hyperparameters(extra_parameters={"kind": "CAIN_EncDec"})
+class CAIN_EncDec(nn.Module):
+    hyperparameters = {}
+
+    def __init__(self, start_filters=32, up_mode="shuffle"):
+        super().__init__()
+
+        self.encoder = Encoder(in_channels=3, norm=False)
+        self.decoder = Decoder(
+            in_channels=start_filters * 6, norm=False, up_mode=up_mode
+        )
+
+    def forward(self, x1, x2):
+        x1, m1 = sub_mean(x1)
+        x2, m2 = sub_mean(x2)
+
+        paddingInput, paddingOutput = InOutPaddings(x1)
+        x1 = paddingInput(x1)
+        x2 = paddingInput(x2)
+
+        feats = self.encoder(x1, x2)
+        out = self.decoder(feats)
+
+        out = paddingOutput(out)
+
+        mi = (m1 + m2) / 2
+        out += mi
+
+        return out, feats