Mikubill · Mikubill · Feb 17, 2023 · Feb 17, 2023
diff --git a/scripts/cldm.py b/scripts/cldm.py
@@ -5,6 +5,7 @@
 import torch as th
 import torch.nn as nn
 from modules import devices, lowvram, shared
+from modules.devices import cond_cast_unet
 
 from ldm.modules.diffusionmodules.util import (
     conv_nd,
@@ -128,8 +129,8 @@ def forward(self, x, timesteps=None, context=None, **kwargs):
             assert timesteps is not None, ValueError(f"insufficient timestep: {timesteps}")
             hs = []
             with torch.no_grad():
-                t_emb = timestep_embedding(
-                    timesteps, self.model_channels, repeat_only=False)
+                t_emb = cond_cast_unet(timestep_embedding(
+                    timesteps, self.model_channels, repeat_only=False))
                 emb = self.time_embed(t_emb)
                 h = x.type(self.dtype)
                 for module in self.input_blocks:
@@ -212,6 +213,7 @@ def __init__(
         disable_middle_self_attn=False,
         use_linear_in_transformer=False,
     ):
+        use_fp16 = devices.dtype_unet == torch.float16
         super().__init__()
         if use_spatial_transformer:
             assert context_dim is not None, 'Fool!! You forgot to include the dimension of your cross-attention conditioning...'
@@ -427,11 +429,11 @@ def align(self, hint, h, w):
         return hint
 
     def forward(self, x, hint, timesteps, context, **kwargs):
-        t_emb = timestep_embedding(
-            timesteps, self.model_channels, repeat_only=False)
+        t_emb = cond_cast_unet(timestep_embedding(
+            timesteps, self.model_channels, repeat_only=False))
         emb = self.time_embed(t_emb)
 
-        guided_hint = self.input_hint_block(hint, emb, context)
+        guided_hint = self.input_hint_block(cond_cast_unet(hint), emb, context)
         outs = []
 
         h1, w1 = x.shape[-2:]