Skip to content

Commit

Permalink
Replace Original Merge Method Major Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
AI-Casanova committed Nov 18, 2023
1 parent 026790f commit d9d3c2e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 325 deletions.
204 changes: 4 additions & 200 deletions modules/extras.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import re
import html
import json
import shutil
Expand All @@ -13,8 +12,6 @@

from modules import shared, images, sd_models, sd_vae, sd_models_config, devices

checkpoint_dict_skip_on_merge = ["cond_stage_model.transformer.text_model.embeddings.position_ids"]


def run_pnginfo(image):
if image is None:
Expand Down Expand Up @@ -55,202 +52,7 @@ def to_half(tensor, enable):
return tensor


def run_modelmerger(id_task, primary_model_name, secondary_model_name, tertiary_model_name, interp_method, multiplier,
save_as_half, custom_name, checkpoint_format, config_source, bake_in_vae, discard_weights,
save_metadata): # pylint: disable=unused-argument
shared.state.begin('merge')
save_as_half = save_as_half == 0

def fail(message):
shared.state.textinfo = message
shared.state.end()
return [*[gr.update() for _ in range(4)], message]

def weighted_sum(theta0, theta1, alpha):
return ((1 - alpha) * theta0) + (alpha * theta1)

def get_difference(theta1, theta2):
return theta1 - theta2

def add_difference(theta0, theta1_2_diff, alpha):
return theta0 + (alpha * theta1_2_diff)

def filename_weighted_sum():
a = primary_model_info.model_name
b = secondary_model_info.model_name
Ma = round(1 - multiplier, 2)
Mb = round(multiplier, 2)
return f"{Ma}({a}) + {Mb}({b})"

def filename_add_difference():
a = primary_model_info.model_name
b = secondary_model_info.model_name
c = tertiary_model_info.model_name
M = round(multiplier, 2)
return f"{a} + {M}({b} - {c})"

def filename_nothing():
return primary_model_info.model_name

theta_funcs = {
"Weighted sum": (filename_weighted_sum, None, weighted_sum),
"Add difference": (filename_add_difference, get_difference, add_difference),
"No interpolation": (filename_nothing, None, None),
}
filename_generator, theta_func1, theta_func2 = theta_funcs[interp_method]
shared.state.job_count = (1 if theta_func1 else 0) + (1 if theta_func2 else 0)
if not primary_model_name or primary_model_name == 'None':
return fail("Failed: Merging requires a primary model.")
primary_model_info = sd_models.checkpoints_list[primary_model_name]
if theta_func2 and (not secondary_model_name or secondary_model_name == 'None'):
return fail("Failed: Merging requires a secondary model.")
secondary_model_info = sd_models.checkpoints_list[secondary_model_name] if theta_func2 else None
if theta_func1 and (not tertiary_model_name or tertiary_model_name == 'None'):
return fail(f"Failed: Interpolation method ({interp_method}) requires a tertiary model.")
tertiary_model_info = sd_models.checkpoints_list[tertiary_model_name] if theta_func1 else None
result_is_inpainting_model = False
result_is_instruct_pix2pix_model = False
if theta_func2:
shared.state.textinfo = "Loading B"
shared.log.info(f"Model merge loading secondary model: {secondary_model_info.filename}")
theta_1 = sd_models.read_state_dict(secondary_model_info.filename)
else:
theta_1 = None
if theta_func1:
shared.state.textinfo = "Loading C"
shared.log.info(f"Model merge loading tertiary model: {tertiary_model_info.filename}")
theta_2 = sd_models.read_state_dict(tertiary_model_info.filename)
shared.state.textinfo = 'Merging B and C'
shared.state.sampling_steps = len(theta_1.keys())
for key in tqdm.tqdm(theta_1.keys()):
if key in checkpoint_dict_skip_on_merge:
continue
if 'model' in key:
if key in theta_2:
t2 = theta_2.get(key, torch.zeros_like(theta_1[key]))
theta_1[key] = theta_func1(theta_1[key], t2)
else:
theta_1[key] = torch.zeros_like(theta_1[key])
shared.state.sampling_step += 1
del theta_2
shared.state.nextjob()
shared.state.textinfo = f"Loading {primary_model_info.filename}..."
shared.log.info(f"Model merge loading primary model: {primary_model_info.filename}")
theta_0 = sd_models.read_state_dict(primary_model_info.filename)
shared.log.info("Model merge: running")
shared.state.textinfo = 'Merging A and B'
shared.state.sampling_steps = len(theta_0.keys())
for key in tqdm.tqdm(theta_0.keys()):
if theta_1 and 'model' in key and key in theta_1:
if key in checkpoint_dict_skip_on_merge:
continue
a = theta_0[key]
b = theta_1[key]
# this enables merging an inpainting model (A) with another one (B);
# where normal model would have 4 channels, for latenst space, inpainting model would
# have another 4 channels for unmasked picture's latent space, plus one channel for mask, for a total of 9
if a.shape != b.shape and a.shape[0:1] + a.shape[2:] == b.shape[0:1] + b.shape[2:]:
if a.shape[1] == 4 and b.shape[1] == 9:
raise RuntimeError(
"When merging inpainting model with a normal one, A must be the inpainting model.")
if a.shape[1] == 4 and b.shape[1] == 8:
raise RuntimeError(
"When merging instruct-pix2pix model with a normal one, A must be the instruct-pix2pix model.")
if a.shape[1] == 8 and b.shape[1] == 4: # If we have an Instruct-Pix2Pix model...
theta_0[key][:, 0:4, :, :] = theta_func2(a[:, 0:4, :, :], b,
multiplier) # Merge only the vectors the models have in common. Otherwise we get an error due to dimension mismatch.
result_is_instruct_pix2pix_model = True
else:
assert a.shape[1] == 9 and b.shape[
1] == 4, f"Bad dimensions for merged layer {key}: A={a.shape}, B={b.shape}"
theta_0[key][:, 0:4, :, :] = theta_func2(a[:, 0:4, :, :], b, multiplier)
result_is_inpainting_model = True
else:
theta_0[key] = theta_func2(a, b, multiplier)
theta_0[key] = to_half(theta_0[key], save_as_half)
shared.state.sampling_step += 1
del theta_1
bake_in_vae_filename = sd_vae.vae_dict.get(bake_in_vae, None)
if bake_in_vae_filename is not None:
shared.log.info(f"Model merge: baking in VAE: {bake_in_vae_filename}")
shared.state.textinfo = 'Baking in VAE'
vae_dict = sd_vae.load_vae_dict(bake_in_vae_filename)
for key in vae_dict.keys():
theta_0_key = 'first_stage_model.' + key
if theta_0_key in theta_0:
theta_0[theta_0_key] = to_half(vae_dict[key], save_as_half)
del vae_dict
if save_as_half and not theta_func2:
for key in theta_0.keys():
theta_0[key] = to_half(theta_0[key], save_as_half)
if discard_weights:
regex = re.compile(discard_weights)
for key in list(theta_0):
if re.search(regex, key):
theta_0.pop(key, None)
ckpt_dir = shared.opts.ckpt_dir or sd_models.model_path
filename = filename_generator() if custom_name == '' else custom_name
filename += ".inpainting" if result_is_inpainting_model else ""
filename += ".instruct-pix2pix" if result_is_instruct_pix2pix_model else ""
filename += "." + checkpoint_format
output_modelname = os.path.join(ckpt_dir, filename)
shared.state.nextjob()
shared.state.textinfo = "Saving"
metadata = None
if save_metadata:
metadata = {"format": "pt", "sd_merge_models": {}}
merge_recipe = {
"type": "webui", # indicate this model was merged with webui's built-in merger
"primary_model_hash": primary_model_info.sha256,
"secondary_model_hash": secondary_model_info.sha256 if secondary_model_info else None,
"tertiary_model_hash": tertiary_model_info.sha256 if tertiary_model_info else None,
"interp_method": interp_method,
"multiplier": multiplier,
"save_as_half": save_as_half,
"custom_name": custom_name,
"config_source": config_source,
"bake_in_vae": bake_in_vae,
"discard_weights": discard_weights,
"is_inpainting": result_is_inpainting_model,
"is_instruct_pix2pix": result_is_instruct_pix2pix_model
}
metadata["sd_merge_recipe"] = json.dumps(merge_recipe)

def add_model_metadata(checkpoint_info):
checkpoint_info.calculate_shorthash()
metadata["sd_merge_models"][checkpoint_info.sha256] = {
"name": checkpoint_info.name,
"legacy_hash": checkpoint_info.hash,
"sd_merge_recipe": checkpoint_info.metadata.get("sd_merge_recipe", None)
}
metadata["sd_merge_models"].update(checkpoint_info.metadata.get("sd_merge_models", {}))

add_model_metadata(primary_model_info)
if secondary_model_info:
add_model_metadata(secondary_model_info)
if tertiary_model_info:
add_model_metadata(tertiary_model_info)
metadata["sd_merge_models"] = json.dumps(metadata["sd_merge_models"])

_, extension = os.path.splitext(output_modelname)
theta_0 = theta_0.to_dict()
if extension.lower() == ".safetensors":
safetensors.torch.save_file(theta_0, output_modelname, metadata=metadata)
else:
torch.save(theta_0, output_modelname)
sd_models.list_models()
created_model = next((ckpt for ckpt in sd_models.checkpoints_list.values() if ckpt.name == filename), None)
if created_model:
created_model.calculate_shorthash()
create_config(output_modelname, config_source, primary_model_info, secondary_model_info, tertiary_model_info)
shared.log.info(f"Model merge saved: {output_modelname}.")
shared.state.textinfo = "Checkpoint saved"
shared.state.end()
return [*[gr.Dropdown.update(choices=sd_models.checkpoint_tiles()) for _ in range(4)],
"Checkpoint saved to " + output_modelname]


def run_MEHmodelmerger(id_task, **kwargs): # pylint: disable=unused-argument
def run_modelmerger(id_task, **kwargs): # pylint: disable=unused-argument
shared.state.begin('merge')

def fail(message):
Expand Down Expand Up @@ -377,6 +179,7 @@ def add_model_metadata(checkpoint_info):
metadata["sd_merge_models"] = json.dumps(metadata["sd_merge_models"])

_, extension = os.path.splitext(output_modelname)

try:
theta_0 = theta_0.to_dict()
except:
Expand All @@ -391,7 +194,8 @@ def add_model_metadata(checkpoint_info):
created_model = next((ckpt for ckpt in sd_models.checkpoints_list.values() if ckpt.name == filename), None)
if created_model:
created_model.calculate_shorthash()
devices.torch_gc(force=True)
if kwargs["device"].type != "cpu":
devices.torch_gc(force=True)
shared.log.info(f"Model merge saved: {output_modelname}.")
shared.state.textinfo = "Checkpoint saved"
shared.state.end()
Expand Down
11 changes: 5 additions & 6 deletions modules/merging/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Dict, Optional, Tuple
import safetensors.torch
import torch
from tensordict import TensorDict
from tqdm import tqdm
import modules.memstats
import modules.devices as devices
Expand Down Expand Up @@ -37,7 +38,6 @@
)



def fix_clip(model: Dict) -> Dict:
if KEY_POSITION_IDS in model.keys():
model[KEY_POSITION_IDS] = torch.tensor(
Expand Down Expand Up @@ -80,9 +80,9 @@ def load_thetas(
) -> Dict:
log_vram("before loading models")
if prune:
thetas = {k: prune_sd_model(read_state_dict(m, "cpu")) for k, m in models.items()}
thetas = {k: prune_sd_model(TensorDict.from_dict(read_state_dict(m, "cpu"))) for k, m in models.items()}
else:
thetas = {k: read_state_dict(m, device) for k, m in models.items()}
thetas = {k: TensorDict.from_dict(read_state_dict(m, device)) for k, m in models.items()}

for model_key, model in thetas.items():
for key, block in model.items():
Expand Down Expand Up @@ -152,7 +152,7 @@ def un_prune_model(
del thetas
devices.torch_gc(force=True)
log_vram("remove thetas")
original_a = read_state_dict(models["model_a"], device)
original_a = TensorDict.from_dict(read_state_dict(models["model_a"], device))
for key in tqdm(original_a.keys(), desc="un-prune model a"):
if KEY_POSITION_IDS in key:
continue
Expand All @@ -162,8 +162,7 @@ def un_prune_model(
merged.update({key: merged[key].half()})
del original_a
devices.torch_gc(force=True)
# log_vram("remove original_a")
original_b = read_state_dict(models["model_b"], device)
original_b = TensorDict.from_dict(read_state_dict(models["model_b"], device))
for key in tqdm(original_b.keys(), desc="un-prune model b"):
if KEY_POSITION_IDS in key:
continue
Expand Down
Loading

0 comments on commit d9d3c2e

Please sign in to comment.