Replace Original Merge Method Major Cleanup

AI-Casanova · Nov 18, 2023 · d9d3c2e · d9d3c2e
1 parent 026790f
commit d9d3c2e
Show file tree

Hide file tree

Showing 3 changed files with 47 additions and 325 deletions.
diff --git a/modules/extras.py b/modules/extras.py
@@ -1,5 +1,4 @@
 import os
-import re
 import html
 import json
 import shutil
@@ -13,8 +12,6 @@
 
 from modules import shared, images, sd_models, sd_vae, sd_models_config, devices
 
-checkpoint_dict_skip_on_merge = ["cond_stage_model.transformer.text_model.embeddings.position_ids"]
-
 
 def run_pnginfo(image):
     if image is None:
@@ -55,202 +52,7 @@ def to_half(tensor, enable):
     return tensor
 
 
-def run_modelmerger(id_task, primary_model_name, secondary_model_name, tertiary_model_name, interp_method, multiplier,
-                    save_as_half, custom_name, checkpoint_format, config_source, bake_in_vae, discard_weights,
-                    save_metadata):  # pylint: disable=unused-argument
-    shared.state.begin('merge')
-    save_as_half = save_as_half == 0
-
-    def fail(message):
-        shared.state.textinfo = message
-        shared.state.end()
-        return [*[gr.update() for _ in range(4)], message]
-
-    def weighted_sum(theta0, theta1, alpha):
-        return ((1 - alpha) * theta0) + (alpha * theta1)
-
-    def get_difference(theta1, theta2):
-        return theta1 - theta2
-
-    def add_difference(theta0, theta1_2_diff, alpha):
-        return theta0 + (alpha * theta1_2_diff)
-
-    def filename_weighted_sum():
-        a = primary_model_info.model_name
-        b = secondary_model_info.model_name
-        Ma = round(1 - multiplier, 2)
-        Mb = round(multiplier, 2)
-        return f"{Ma}({a}) + {Mb}({b})"
-
-    def filename_add_difference():
-        a = primary_model_info.model_name
-        b = secondary_model_info.model_name
-        c = tertiary_model_info.model_name
-        M = round(multiplier, 2)
-        return f"{a} + {M}({b} - {c})"
-
-    def filename_nothing():
-        return primary_model_info.model_name
-
-    theta_funcs = {
-        "Weighted sum": (filename_weighted_sum, None, weighted_sum),
-        "Add difference": (filename_add_difference, get_difference, add_difference),
-        "No interpolation": (filename_nothing, None, None),
-    }
-    filename_generator, theta_func1, theta_func2 = theta_funcs[interp_method]
-    shared.state.job_count = (1 if theta_func1 else 0) + (1 if theta_func2 else 0)
-    if not primary_model_name or primary_model_name == 'None':
-        return fail("Failed: Merging requires a primary model.")
-    primary_model_info = sd_models.checkpoints_list[primary_model_name]
-    if theta_func2 and (not secondary_model_name or secondary_model_name == 'None'):
-        return fail("Failed: Merging requires a secondary model.")
-    secondary_model_info = sd_models.checkpoints_list[secondary_model_name] if theta_func2 else None
-    if theta_func1 and (not tertiary_model_name or tertiary_model_name == 'None'):
-        return fail(f"Failed: Interpolation method ({interp_method}) requires a tertiary model.")
-    tertiary_model_info = sd_models.checkpoints_list[tertiary_model_name] if theta_func1 else None
-    result_is_inpainting_model = False
-    result_is_instruct_pix2pix_model = False
-    if theta_func2:
-        shared.state.textinfo = "Loading B"
-        shared.log.info(f"Model merge loading secondary model: {secondary_model_info.filename}")
-        theta_1 = sd_models.read_state_dict(secondary_model_info.filename)
-    else:
-        theta_1 = None
-    if theta_func1:
-        shared.state.textinfo = "Loading C"
-        shared.log.info(f"Model merge loading tertiary model: {tertiary_model_info.filename}")
-        theta_2 = sd_models.read_state_dict(tertiary_model_info.filename)
-        shared.state.textinfo = 'Merging B and C'
-        shared.state.sampling_steps = len(theta_1.keys())
-        for key in tqdm.tqdm(theta_1.keys()):
-            if key in checkpoint_dict_skip_on_merge:
-                continue
-            if 'model' in key:
-                if key in theta_2:
-                    t2 = theta_2.get(key, torch.zeros_like(theta_1[key]))
-                    theta_1[key] = theta_func1(theta_1[key], t2)
-                else:
-                    theta_1[key] = torch.zeros_like(theta_1[key])
-            shared.state.sampling_step += 1
-        del theta_2
-        shared.state.nextjob()
-    shared.state.textinfo = f"Loading {primary_model_info.filename}..."
-    shared.log.info(f"Model merge loading primary model: {primary_model_info.filename}")
-    theta_0 = sd_models.read_state_dict(primary_model_info.filename)
-    shared.log.info("Model merge: running")
-    shared.state.textinfo = 'Merging A and B'
-    shared.state.sampling_steps = len(theta_0.keys())
-    for key in tqdm.tqdm(theta_0.keys()):
-        if theta_1 and 'model' in key and key in theta_1:
-            if key in checkpoint_dict_skip_on_merge:
-                continue
-            a = theta_0[key]
-            b = theta_1[key]
-            # this enables merging an inpainting model (A) with another one (B);
-            # where normal model would have 4 channels, for latenst space, inpainting model would
-            # have another 4 channels for unmasked picture's latent space, plus one channel for mask, for a total of 9
-            if a.shape != b.shape and a.shape[0:1] + a.shape[2:] == b.shape[0:1] + b.shape[2:]:
-                if a.shape[1] == 4 and b.shape[1] == 9:
-                    raise RuntimeError(
-                        "When merging inpainting model with a normal one, A must be the inpainting model.")
-                if a.shape[1] == 4 and b.shape[1] == 8:
-                    raise RuntimeError(
-                        "When merging instruct-pix2pix model with a normal one, A must be the instruct-pix2pix model.")
-                if a.shape[1] == 8 and b.shape[1] == 4:  # If we have an Instruct-Pix2Pix model...
-                    theta_0[key][:, 0:4, :, :] = theta_func2(a[:, 0:4, :, :], b,
-                                                             multiplier)  # Merge only the vectors the models have in common.  Otherwise we get an error due to dimension mismatch.
-                    result_is_instruct_pix2pix_model = True
-                else:
-                    assert a.shape[1] == 9 and b.shape[
-                        1] == 4, f"Bad dimensions for merged layer {key}: A={a.shape}, B={b.shape}"
-                    theta_0[key][:, 0:4, :, :] = theta_func2(a[:, 0:4, :, :], b, multiplier)
-                    result_is_inpainting_model = True
-            else:
-                theta_0[key] = theta_func2(a, b, multiplier)
-            theta_0[key] = to_half(theta_0[key], save_as_half)
-        shared.state.sampling_step += 1
-    del theta_1
-    bake_in_vae_filename = sd_vae.vae_dict.get(bake_in_vae, None)
-    if bake_in_vae_filename is not None:
-        shared.log.info(f"Model merge: baking in VAE: {bake_in_vae_filename}")
-        shared.state.textinfo = 'Baking in VAE'
-        vae_dict = sd_vae.load_vae_dict(bake_in_vae_filename)
-        for key in vae_dict.keys():
-            theta_0_key = 'first_stage_model.' + key
-            if theta_0_key in theta_0:
-                theta_0[theta_0_key] = to_half(vae_dict[key], save_as_half)
-        del vae_dict
-    if save_as_half and not theta_func2:
-        for key in theta_0.keys():
-            theta_0[key] = to_half(theta_0[key], save_as_half)
-    if discard_weights:
-        regex = re.compile(discard_weights)
-        for key in list(theta_0):
-            if re.search(regex, key):
-                theta_0.pop(key, None)
-    ckpt_dir = shared.opts.ckpt_dir or sd_models.model_path
-    filename = filename_generator() if custom_name == '' else custom_name
-    filename += ".inpainting" if result_is_inpainting_model else ""
-    filename += ".instruct-pix2pix" if result_is_instruct_pix2pix_model else ""
-    filename += "." + checkpoint_format
-    output_modelname = os.path.join(ckpt_dir, filename)
-    shared.state.nextjob()
-    shared.state.textinfo = "Saving"
-    metadata = None
-    if save_metadata:
-        metadata = {"format": "pt", "sd_merge_models": {}}
-        merge_recipe = {
-            "type": "webui",  # indicate this model was merged with webui's built-in merger
-            "primary_model_hash": primary_model_info.sha256,
-            "secondary_model_hash": secondary_model_info.sha256 if secondary_model_info else None,
-            "tertiary_model_hash": tertiary_model_info.sha256 if tertiary_model_info else None,
-            "interp_method": interp_method,
-            "multiplier": multiplier,
-            "save_as_half": save_as_half,
-            "custom_name": custom_name,
-            "config_source": config_source,
-            "bake_in_vae": bake_in_vae,
-            "discard_weights": discard_weights,
-            "is_inpainting": result_is_inpainting_model,
-            "is_instruct_pix2pix": result_is_instruct_pix2pix_model
-        }
-        metadata["sd_merge_recipe"] = json.dumps(merge_recipe)
-
-        def add_model_metadata(checkpoint_info):
-            checkpoint_info.calculate_shorthash()
-            metadata["sd_merge_models"][checkpoint_info.sha256] = {
-                "name": checkpoint_info.name,
-                "legacy_hash": checkpoint_info.hash,
-                "sd_merge_recipe": checkpoint_info.metadata.get("sd_merge_recipe", None)
-            }
-            metadata["sd_merge_models"].update(checkpoint_info.metadata.get("sd_merge_models", {}))
-
-        add_model_metadata(primary_model_info)
-        if secondary_model_info:
-            add_model_metadata(secondary_model_info)
-        if tertiary_model_info:
-            add_model_metadata(tertiary_model_info)
-        metadata["sd_merge_models"] = json.dumps(metadata["sd_merge_models"])
-
-    _, extension = os.path.splitext(output_modelname)
-    theta_0 = theta_0.to_dict()
-    if extension.lower() == ".safetensors":
-        safetensors.torch.save_file(theta_0, output_modelname, metadata=metadata)
-    else:
-        torch.save(theta_0, output_modelname)
-    sd_models.list_models()
-    created_model = next((ckpt for ckpt in sd_models.checkpoints_list.values() if ckpt.name == filename), None)
-    if created_model:
-        created_model.calculate_shorthash()
-    create_config(output_modelname, config_source, primary_model_info, secondary_model_info, tertiary_model_info)
-    shared.log.info(f"Model merge saved: {output_modelname}.")
-    shared.state.textinfo = "Checkpoint saved"
-    shared.state.end()
-    return [*[gr.Dropdown.update(choices=sd_models.checkpoint_tiles()) for _ in range(4)],
-            "Checkpoint saved to " + output_modelname]
-
-
-def run_MEHmodelmerger(id_task, **kwargs):  # pylint: disable=unused-argument
+def run_modelmerger(id_task, **kwargs):  # pylint: disable=unused-argument
     shared.state.begin('merge')
 
     def fail(message):
@@ -377,6 +179,7 @@ def add_model_metadata(checkpoint_info):
         metadata["sd_merge_models"] = json.dumps(metadata["sd_merge_models"])
 
     _, extension = os.path.splitext(output_modelname)
+
     try:
         theta_0 = theta_0.to_dict()
     except:
@@ -391,7 +194,8 @@ def add_model_metadata(checkpoint_info):
     created_model = next((ckpt for ckpt in sd_models.checkpoints_list.values() if ckpt.name == filename), None)
     if created_model:
         created_model.calculate_shorthash()
-    devices.torch_gc(force=True)
+    if kwargs["device"].type != "cpu":
+        devices.torch_gc(force=True)
     shared.log.info(f"Model merge saved: {output_modelname}.")
     shared.state.textinfo = "Checkpoint saved"
     shared.state.end()

diff --git a/modules/merging/merge.py b/modules/merging/merge.py
@@ -4,6 +4,7 @@
 from typing import Dict, Optional, Tuple
 import safetensors.torch
 import torch
+from tensordict import TensorDict
 from tqdm import tqdm
 import modules.memstats
 import modules.devices as devices
@@ -37,7 +38,6 @@
 )
 
 
-
 def fix_clip(model: Dict) -> Dict:
     if KEY_POSITION_IDS in model.keys():
         model[KEY_POSITION_IDS] = torch.tensor(
@@ -80,9 +80,9 @@ def load_thetas(
 ) -> Dict:
     log_vram("before loading models")
     if prune:
-        thetas = {k: prune_sd_model(read_state_dict(m, "cpu")) for k, m in models.items()}
+        thetas = {k: prune_sd_model(TensorDict.from_dict(read_state_dict(m, "cpu"))) for k, m in models.items()}
     else:
-        thetas = {k: read_state_dict(m, device) for k, m in models.items()}
+        thetas = {k: TensorDict.from_dict(read_state_dict(m, device)) for k, m in models.items()}
 
     for model_key, model in thetas.items():
         for key, block in model.items():
@@ -152,7 +152,7 @@ def un_prune_model(
         del thetas
         devices.torch_gc(force=True)
         log_vram("remove thetas")
-        original_a = read_state_dict(models["model_a"], device)
+        original_a = TensorDict.from_dict(read_state_dict(models["model_a"], device))
         for key in tqdm(original_a.keys(), desc="un-prune model a"):
             if KEY_POSITION_IDS in key:
                 continue
@@ -162,8 +162,7 @@ def un_prune_model(
                     merged.update({key: merged[key].half()})
         del original_a
         devices.torch_gc(force=True)
-        # log_vram("remove original_a")
-        original_b = read_state_dict(models["model_b"], device)
+        original_b = TensorDict.from_dict(read_state_dict(models["model_b"], device))
         for key in tqdm(original_b.keys(), desc="un-prune model b"):
             if KEY_POSITION_IDS in key:
                 continue