diff --git a/hordelib/comfy_horde.py b/hordelib/comfy_horde.py index e762f1ac..6dbc9f40 100644 --- a/hordelib/comfy_horde.py +++ b/hordelib/comfy_horde.py @@ -213,14 +213,7 @@ def do_comfy_import( # comfy.model_management.unet_offload_device = _unet_offload_device_hijack - total_vram = get_torch_total_vram_mb() - total_ram = psutil.virtual_memory().total / (1024 * 1024) - free_ram = psutil.virtual_memory().available / (1024 * 1024) - - free_vram = get_torch_free_vram_mb() - - logger.debug(f"Total VRAM {total_vram:0.0f} MB, Total System RAM {total_ram:0.0f} MB") - logger.debug(f"Free VRAM {free_vram:0.0f} MB, Free System RAM {free_ram:0.0f} MB") + log_free_ram() output_collector.replay() @@ -308,6 +301,11 @@ def get_torch_free_vram_mb(): return round(_comfy_get_free_memory() / (1024 * 1024)) +def log_free_ram(): + logger.debug(f"Free VRAM: {get_torch_free_vram_mb():0.0f} MB") + logger.debug(f"Free RAM: {psutil.virtual_memory().available / (1024 * 1024):0.0f} MB") + + class Comfy_Horde: """Handles horde-specific behavior against ComfyUI.""" @@ -446,7 +444,7 @@ def _this_dir(self, filename: str, subdir="") -> str: def _load_custom_nodes(self) -> None: """Force ComfyUI to load its normal custom nodes and the horde custom nodes.""" - _comfy_nodes.init_custom_nodes() + _comfy_nodes.init_extra_nodes(init_custom_nodes=True) def _get_executor(self): """Return the ComfyUI PromptExecutor object.""" @@ -744,7 +742,7 @@ def _run_pipeline( # if time.time() - self._gc_timer > Comfy_Horde.GC_TIME: # self._gc_timer = time.time() # garbage_collect() - + log_free_ram() return self.images # Run a pipeline that returns an image in pixel space diff --git a/hordelib/consts.py b/hordelib/consts.py index 3c335967..212f165e 100644 --- a/hordelib/consts.py +++ b/hordelib/consts.py @@ -6,7 +6,7 @@ from hordelib.config_path import get_hordelib_path -COMFYUI_VERSION = "16eabdf70dbdb64dc4822908f0fe455c56d11ec3" +COMFYUI_VERSION = "2dc84d14447782683862616eaf8c19c0c1feacf3" """The exact version of ComfyUI version to load.""" REMOTE_PROXY = "" diff --git a/hordelib/horde.py b/hordelib/horde.py index 76ef9d90..718cbfd2 100644 --- a/hordelib/horde.py +++ b/hordelib/horde.py @@ -11,6 +11,7 @@ from collections.abc import Callable from copy import deepcopy from enum import Enum, auto +from types import FunctionType from horde_sdk.ai_horde_api.apimodels import ImageGenerateJobPopResponse from horde_sdk.ai_horde_api.apimodels.base import ( @@ -77,6 +78,17 @@ def __init__( self.faults = faults +def _calc_upscale_sampler_steps(payload): + """Calculates the amount of hires_fix upscaler steps based on the denoising used and the steps used for the + primary image""" + upscale_steps = round(payload["ddim_steps"] * (0.9 - payload["hires_fix_denoising_strength"])) + if upscale_steps < 3: + upscale_steps = 3 + + logger.debug(f"Upscale steps calculated as {upscale_steps}") + return upscale_steps + + class HordeLib: _instance: HordeLib | None = None _initialised = False @@ -227,7 +239,7 @@ class HordeLib: "upscale_sampler.denoise": "hires_fix_denoising_strength", "upscale_sampler.seed": "seed", "upscale_sampler.cfg": "cfg_scale", - "upscale_sampler.steps": "ddim_steps", + "upscale_sampler.steps": _calc_upscale_sampler_steps, "upscale_sampler.sampler_name": "sampler_name", "controlnet_apply.strength": "control_strength", "controlnet_model_loader.control_net_name": "control_type", @@ -243,6 +255,8 @@ class HordeLib: "sampler_stage_c.denoise": "denoising_strength", "sampler_stage_b.seed": "seed", "sampler_stage_c.seed": "seed", + "sampler_stage_b.steps": "ddim_steps*0.33", + "sampler_stage_c.steps": "ddim_steps*0.67", "model_loader_stage_c.ckpt_name": "stable_cascade_stage_c", "model_loader_stage_c.model_name": "stable_cascade_stage_c", "model_loader_stage_c.horde_model_name": "model_name", @@ -251,8 +265,10 @@ class HordeLib: "model_loader_stage_b.horde_model_name": "model_name", # Stable Cascade 2pass "2pass_sampler_stage_c.sampler_name": "sampler_name", + "2pass_sampler_stage_c.steps": "ddim_steps*0.67", "2pass_sampler_stage_c.denoise": "hires_fix_denoising_strength", "2pass_sampler_stage_b.sampler_name": "sampler_name", + "2pass_sampler_stage_b.steps": "ddim_steps*0.33", # QR Codes "sampler_bg.sampler_name": "sampler_name", "sampler_bg.cfg": "cfg_scale", @@ -519,8 +535,15 @@ def _apply_aihorde_compatibility_hacks(self, payload: dict) -> tuple[dict, list[ # Turn off hires fix if we're not generating a hires image, or if the params are just confused try: - if "hires_fix" in payload and (payload["width"] <= 512 or payload["height"] <= 512): - payload["hires_fix"] = False + if "hires_fix" in payload: + if SharedModelManager.manager.compvis.model_reference[model].get( + "baseline", + ) == "stable diffusion 1" and (payload["width"] <= 512 or payload["height"] <= 512): + payload["hires_fix"] = False + elif SharedModelManager.manager.compvis.model_reference[model].get( + "baseline", + ) == "stable_diffusion_xl" and (payload["width"] <= 1024 or payload["height"] <= 1024): + payload["hires_fix"] = False except (TypeError, KeyError): payload["hires_fix"] = False @@ -792,8 +815,18 @@ def _final_pipeline_adjustments(self, payload, pipeline_data) -> tuple[dict, lis # Translate the payload parameters into pipeline parameters pipeline_params = {} for newkey, key in HordeLib.PAYLOAD_TO_PIPELINE_PARAMETER_MAPPING.items(): - if key in payload: - pipeline_params[newkey] = payload.get(key) + multiplier = None + # We allow a multiplier in the param, so that I can adjust easily the + # values for steps on things like stable cascade + if isinstance(key, FunctionType): + pipeline_params[newkey] = key(payload) + elif "*" in key: + key, multiplier = key.split("*", 1) + elif key in payload: + if multiplier: + pipeline_params[newkey] = round(payload.get(key) * float(multiplier)) + else: + pipeline_params[newkey] = payload.get(key) else: logger.error(f"Parameter {key} not found") # We inject these parameters to ensure the HordeCheckpointLoader knows what file to load, if necessary @@ -827,8 +860,12 @@ def _final_pipeline_adjustments(self, payload, pipeline_data) -> tuple[dict, lis original_height = pipeline_params.get("empty_latent_image.height") if original_width is None or original_height is None: - logger.error("empty_latent_image.width or empty_latent_image.height not found. Using 512x512.") - original_width, original_height = (512, 512) + if model_details and model_details.get("baseline") == "stable diffusion 1": + logger.error("empty_latent_image.width or empty_latent_image.height not found. Using 512x512.") + original_width, original_height = (512, 512) + else: + logger.error("empty_latent_image.width or empty_latent_image.height not found. Using 1024x1024.") + original_width, original_height = (1024, 1024) new_width, new_height = (None, None) @@ -1041,6 +1078,8 @@ def _final_pipeline_adjustments(self, payload, pipeline_data) -> tuple[dict, lis self.generator.reconnect_input(pipeline_data, "layer_diffuse_apply.model", "model_loader") self.generator.reconnect_input(pipeline_data, "output_image.images", "layer_diffuse_decode_rgba") self.generator.reconnect_input(pipeline_data, "layer_diffuse_decode_rgba.images", "vae_decode") + if payload.get("hires_fix") is True: + self.generator.reconnect_input(pipeline_data, "upscale_sampler.model", "layer_diffuse_apply") if model_details.get("baseline") == "stable diffusion 1": pipeline_params["layer_diffuse_apply.config"] = "SD15, Attention Injection, attn_sharing" pipeline_params["layer_diffuse_decode_rgba.sd_version"] = "SD15" @@ -1489,6 +1528,11 @@ def basic_inference_rawpng(self, payload: dict) -> list[io.BytesIO]: def image_upscale(self, payload) -> ResultingImageReturn: logger.debug("image_upscale called") + + from hordelib.comfy_horde import log_free_ram + + log_free_ram() + # AIHorde hacks to payload payload, compatibility_faults = self._apply_aihorde_compatibility_hacks(payload) # Remember if we were passed width and height, we wouldn't normally be passed width and height @@ -1522,10 +1566,16 @@ def image_upscale(self, payload) -> ResultingImageReturn: if not isinstance(image, Image.Image): raise RuntimeError(f"Expected a PIL.Image.Image but got {type(image)}") + log_free_ram() return ResultingImageReturn(image=image, rawpng=rawpng, faults=compatibility_faults + final_adjustment_faults) def image_facefix(self, payload) -> ResultingImageReturn: logger.debug("image_facefix called") + + from hordelib.comfy_horde import log_free_ram + + log_free_ram() + # AIHorde hacks to payload payload, compatibility_faults = self._apply_aihorde_compatibility_hacks(payload) # Check payload types/values and normalise it's format @@ -1547,4 +1597,6 @@ def image_facefix(self, payload) -> ResultingImageReturn: if not isinstance(image, Image.Image): raise RuntimeError(f"Expected a PIL.Image.Image but got {type(image)}") + log_free_ram() + return ResultingImageReturn(image=image, rawpng=rawpng, faults=compatibility_faults + final_adjustment_faults) diff --git a/hordelib/nodes/node_controlnet_model_loader.py b/hordelib/nodes/node_controlnet_model_loader.py index 4352161f..cc168660 100644 --- a/hordelib/nodes/node_controlnet_model_loader.py +++ b/hordelib/nodes/node_controlnet_model_loader.py @@ -18,16 +18,21 @@ def INPUT_TYPES(s): CATEGORY = "loaders" def load_controlnet(self, model, control_net_name, model_manager): + from hordelib.comfy_horde import log_free_ram + logger.debug(f"Loading controlnet {control_net_name} through our custom node") + log_free_ram() if not model_manager or not model_manager.manager or not model_manager.manager.controlnet: logger.error("controlnet model_manager appears to be missing!") raise RuntimeError # XXX better guarantees need to be made - return model_manager.manager.controlnet.merge_controlnet( + merge_result = model_manager.manager.controlnet.merge_controlnet( control_net_name, model, ) + log_free_ram() + return merge_result NODE_CLASS_MAPPINGS = {"HordeDiffControlNetLoader": HordeDiffControlNetLoader} diff --git a/hordelib/nodes/node_lora_loader.py b/hordelib/nodes/node_lora_loader.py index 2c0fa1c1..6d817d93 100644 --- a/hordelib/nodes/node_lora_loader.py +++ b/hordelib/nodes/node_lora_loader.py @@ -27,11 +27,18 @@ def INPUT_TYPES(s): CATEGORY = "loaders" def load_lora(self, model, clip, lora_name, strength_model, strength_clip): + from hordelib.comfy_horde import log_free_ram + + log_free_ram() + _test_exception = os.getenv("FAILURE_TEST", False) if _test_exception: raise Exception("This tests exceptions being thrown from within the pipeline") + logger.debug(f"Loading lora {lora_name} through our custom node") + if strength_model == 0 and strength_clip == 0: + logger.debug("Strengths are 0, skipping lora loading") return (model, clip) if lora_name is None or lora_name == "" or lora_name == "None": @@ -67,6 +74,7 @@ def load_lora(self, model, clip, lora_name, strength_model, strength_clip): self.loaded_lora = (lora_path, lora) model_lora, clip_lora = comfy.sd.load_lora_for_models(model, clip, lora, strength_model, strength_clip) + log_free_ram() return (model_lora, clip_lora) diff --git a/hordelib/nodes/node_model_loader.py b/hordelib/nodes/node_model_loader.py index 6f37b22b..4ca17900 100644 --- a/hordelib/nodes/node_model_loader.py +++ b/hordelib/nodes/node_model_loader.py @@ -10,6 +10,7 @@ from loguru import logger from hordelib.shared_model_manager import SharedModelManager +from hordelib.comfy_horde import log_free_ram # Don't let the name fool you, this class is trying to load all the files that will be necessary @@ -44,6 +45,7 @@ def load_checkpoint( output_clip=True, preloading=False, ): + log_free_ram() if file_type is not None: logger.debug(f"Loading model {horde_model_name}:{file_type}") else: @@ -77,7 +79,7 @@ def load_checkpoint( make_regular_vae(same_loaded_model[0][2]) logger.debug("Model was previously loaded, returning it.") - + log_free_ram() return same_loaded_model[0] if not ckpt_name: @@ -133,6 +135,7 @@ def load_checkpoint( result[0].model.apply(make_regular) make_regular_vae(result[2]) + log_free_ram() return result diff --git a/hordelib/nodes/node_upscale_model_loader.py b/hordelib/nodes/node_upscale_model_loader.py index ff32ee80..1a345feb 100644 --- a/hordelib/nodes/node_upscale_model_loader.py +++ b/hordelib/nodes/node_upscale_model_loader.py @@ -18,11 +18,15 @@ def INPUT_TYPES(s): CATEGORY = "loaders" def load_model(self, model_name): + from hordelib.comfy_horde import log_free_ram + + log_free_ram() model_path = folder_paths.get_full_path("upscale_models", model_name) sd = comfy.utils.load_torch_file(model_path, safe_load=True) if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd: sd = comfy.utils.state_dict_prefix_replace(sd, {"module.": ""}) out = model_loading.load_state_dict(sd).eval() + log_free_ram() return (out,) diff --git a/hordelib/pipeline_designs/pipeline_stable_diffusion.json b/hordelib/pipeline_designs/pipeline_stable_diffusion.json index d68daa96..6052bc37 100644 --- a/hordelib/pipeline_designs/pipeline_stable_diffusion.json +++ b/hordelib/pipeline_designs/pipeline_stable_diffusion.json @@ -1,6 +1,6 @@ { - "last_node_id": 15, - "last_link_id": 23, + "last_node_id": 17, + "last_link_id": 36, "nodes": [ { "id": 7, @@ -14,7 +14,7 @@ "1": 90.3333740234375 }, "flags": {}, - "order": 6, + "order": 8, "mode": 0, "inputs": [ { @@ -54,7 +54,7 @@ "1": 83.00006103515625 }, "flags": {}, - "order": 5, + "order": 7, "mode": 0, "inputs": [ { @@ -93,7 +93,7 @@ "1": 58 }, "flags": {}, - "order": 3, + "order": 5, "mode": 0, "inputs": [ { @@ -133,13 +133,13 @@ "1": 400 }, "flags": {}, - "order": 10, + "order": 12, "mode": 0, "inputs": [ { "name": "images", "type": "IMAGE", - "link": 21 + "link": 36 } ], "title": "output_image", @@ -148,6 +148,84 @@ "ComfyUI" ] }, + { + "id": 15, + "type": "RepeatImageBatch", + "pos": [ + 257, + 1026 + ], + "size": { + "0": 315, + "1": 58 + }, + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 22 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 23 + ], + "shape": 3, + "slot_index": 0 + } + ], + "title": "repeat_image_batch", + "properties": { + "Node name for S&R": "RepeatImageBatch" + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 11, + "type": "LoadImage", + "pos": [ + -91, + 768 + ], + "size": { + "0": 315, + "1": 314 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 22 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "title": "image_loader", + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "example.png", + "image" + ] + }, { "id": 12, "type": "VAEEncode", @@ -160,7 +238,7 @@ "1": 46 }, "flags": {}, - "order": 7, + "order": 6, "mode": 0, "inputs": [ { @@ -188,114 +266,180 @@ } }, { - "id": 5, - "type": "EmptyLatentImage", + "id": 3, + "type": "KSampler", "pos": [ - 575, - 565 + 1019, + 107 ], "size": { "0": 315, - "1": 106 + "1": 262 }, "flags": {}, - "order": 1, + "order": 9, "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 35 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 4 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 34 + } + ], "outputs": [ { "name": "LATENT", "type": "LATENT", "links": [ - 18 + 20, + 24 ], "slot_index": 0 } ], - "title": "empty_latent_image", + "title": "sampler", "properties": { - "Node name for S&R": "EmptyLatentImage" + "Node name for S&R": "KSampler" }, "widgets_values": [ - 512, - 512, - 1 + 283224582220517, + "randomize", + 20, + 8, + "euler", + "normal", + 0.8 ] }, { - "id": 11, - "type": "LoadImage", + "id": 16, + "type": "LayeredDiffusionApply", "pos": [ - -91, - 768 - ], - "size": [ - 315, - 314 + 597, + 254 ], + "size": { + "0": 315, + "1": 82 + }, "flags": {}, - "order": 2, + "order": 4, "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 26 + } + ], "outputs": [ { - "name": "IMAGE", - "type": "IMAGE", - "links": [ - 22 - ], + "name": "MODEL", + "type": "MODEL", + "links": [], + "shape": 3, "slot_index": 0 - }, - { - "name": "MASK", - "type": "MASK", - "links": null } ], - "title": "image_loader", + "title": "layer_diffuse_apply", "properties": { - "Node name for S&R": "LoadImage" + "Node name for S&R": "LayeredDiffusionApply" }, "widgets_values": [ - "example.png", - "image" + "SD15, Attention Injection, attn_sharing", + 1 ] }, { - "id": 15, - "type": "RepeatImageBatch", + "id": 17, + "type": "LayeredDiffusionDecodeRGBA", "pos": [ - 257, - 1026 + 1621, + 234 ], "size": { "0": 315, - "1": 58 + "1": 102 }, "flags": {}, - "order": 4, + "order": 11, "mode": 0, "inputs": [ { - "name": "image", + "name": "samples", + "type": "LATENT", + "link": 24 + }, + { + "name": "images", "type": "IMAGE", - "link": 22 + "link": 27 } ], "outputs": [ { "name": "IMAGE", "type": "IMAGE", + "links": [], + "shape": 3, + "slot_index": 0 + } + ], + "title": "layer_diffuse_decode_rgba", + "properties": { + "Node name for S&R": "LayeredDiffusionDecodeRGBA" + }, + "widgets_values": [ + "SD15", + 16 + ] + }, + { + "id": 5, + "type": "EmptyLatentImage", + "pos": [ + 575, + 565 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", "links": [ - 23 + 34 ], - "shape": 3, "slot_index": 0 } ], - "title": "repeat_image_batch", + "title": "empty_latent_image", "properties": { - "Node name for S&R": "RepeatImageBatch" + "Node name for S&R": "EmptyLatentImage" }, "widgets_values": [ + 512, + 512, 1 ] }, @@ -311,14 +455,15 @@ "1": 98 }, "flags": {}, - "order": 0, + "order": 2, "mode": 0, "outputs": [ { "name": "MODEL", "type": "MODEL", "links": [ - 1 + 26, + 35 ], "slot_index": 0 }, @@ -348,66 +493,6 @@ "Deliberate.ckpt" ] }, - { - "id": 3, - "type": "KSampler", - "pos": [ - 1019, - 107 - ], - "size": { - "0": 315, - "1": 262 - }, - "flags": {}, - "order": 8, - "mode": 0, - "inputs": [ - { - "name": "model", - "type": "MODEL", - "link": 1 - }, - { - "name": "positive", - "type": "CONDITIONING", - "link": 4 - }, - { - "name": "negative", - "type": "CONDITIONING", - "link": 6 - }, - { - "name": "latent_image", - "type": "LATENT", - "link": 18 - } - ], - "outputs": [ - { - "name": "LATENT", - "type": "LATENT", - "links": [ - 20 - ], - "slot_index": 0 - } - ], - "title": "sampler", - "properties": { - "Node name for S&R": "KSampler" - }, - "widgets_values": [ - 62706718437716, - "randomize", - 20, - 8, - "euler", - "normal", - 1 - ] - }, { "id": 14, "type": "VAEDecode", @@ -420,7 +505,7 @@ "1": 46 }, "flags": {}, - "order": 9, + "order": 10, "mode": 0, "inputs": [ { @@ -439,7 +524,8 @@ "name": "IMAGE", "type": "IMAGE", "links": [ - 21 + 27, + 36 ], "slot_index": 0 } @@ -451,14 +537,6 @@ } ], "links": [ - [ - 1, - 4, - 0, - 3, - 0, - "MODEL" - ], [ 4, 6, @@ -507,14 +585,6 @@ 1, "VAE" ], - [ - 18, - 5, - 0, - 3, - 3, - "LATENT" - ], [ 19, 4, @@ -531,14 +601,6 @@ 0, "LATENT" ], - [ - 21, - 14, - 0, - 9, - 0, - "IMAGE" - ], [ 22, 11, @@ -554,10 +616,66 @@ 12, 0, "IMAGE" + ], + [ + 24, + 3, + 0, + 17, + 0, + "LATENT" + ], + [ + 26, + 4, + 0, + 16, + 0, + "MODEL" + ], + [ + 27, + 14, + 0, + 17, + 1, + "IMAGE" + ], + [ + 34, + 5, + 0, + 3, + 3, + "LATENT" + ], + [ + 35, + 4, + 0, + 3, + 0, + "MODEL" + ], + [ + 36, + 14, + 0, + 9, + 0, + "IMAGE" ] ], "groups": [], "config": {}, - "extra": {}, + "extra": { + "ds": { + "scale": 0.8264462809917354, + "offset": [ + -271.8083588837497, + 68.64949882587337 + ] + } + }, "version": 0.4 } diff --git a/hordelib/pipeline_designs/pipeline_stable_diffusion_hires_fix.json b/hordelib/pipeline_designs/pipeline_stable_diffusion_hires_fix.json index df6843b0..de9a550a 100644 --- a/hordelib/pipeline_designs/pipeline_stable_diffusion_hires_fix.json +++ b/hordelib/pipeline_designs/pipeline_stable_diffusion_hires_fix.json @@ -1,6 +1,6 @@ { - "last_node_id": 22, - "last_link_id": 36, + "last_node_id": 24, + "last_link_id": 45, "nodes": [ { "id": 12, @@ -14,13 +14,13 @@ "1": 468.13226318359375 }, "flags": {}, - "order": 12, + "order": 14, "mode": 0, "inputs": [ { "name": "images", "type": "IMAGE", - "link": 34 + "link": 43 } ], "title": "output_image", @@ -41,7 +41,7 @@ "1": 58 }, "flags": {}, - "order": 3, + "order": 5, "mode": 0, "inputs": [ { @@ -82,7 +82,7 @@ "1": 180.6060791015625 }, "flags": {}, - "order": 6, + "order": 8, "mode": 0, "inputs": [ { @@ -122,7 +122,7 @@ "1": 164.31304931640625 }, "flags": {}, - "order": 5, + "order": 7, "mode": 0, "inputs": [ { @@ -162,7 +162,7 @@ "1": 130 }, "flags": {}, - "order": 9, + "order": 10, "mode": 0, "inputs": [ { @@ -237,13 +237,13 @@ "1": 262 }, "flags": {}, - "order": 8, + "order": 9, "mode": 0, "inputs": [ { "name": "model", "type": "MODEL", - "link": 18 + "link": 44 }, { "name": "positive", @@ -276,7 +276,7 @@ "Node name for S&R": "KSampler" }, "widgets_values": [ - 458841867575267, + 709052412707182, "randomize", 12, 8, @@ -297,7 +297,7 @@ "1": 46 }, "flags": {}, - "order": 7, + "order": 6, "mode": 0, "inputs": [ { @@ -325,53 +325,81 @@ } }, { - "id": 16, - "type": "CheckpointLoaderSimple", + "id": 18, + "type": "LoadImage", "pos": [ - -420, - 334 + 261, + 934 ], "size": { "0": 315, - "1": 98 + "1": 314 }, "flags": {}, "order": 1, "mode": 0, "outputs": [ { - "name": "MODEL", - "type": "MODEL", + "name": "IMAGE", + "type": "IMAGE", "links": [ - 18, - 23 + 35 ], "slot_index": 0 }, { - "name": "CLIP", - "type": "CLIP", - "links": [ - 24 - ], - "slot_index": 1 - }, + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "title": "image_loader", + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "example.png", + "image" + ] + }, + { + "id": 22, + "type": "RepeatImageBatch", + "pos": [ + 624, + 934 + ], + "size": { + "0": 315, + "1": 58 + }, + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ { - "name": "VAE", - "type": "VAE", + "name": "image", + "type": "IMAGE", + "link": 35 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", "links": [ - 28, - 32 + 36 ], - "slot_index": 2 + "shape": 3, + "slot_index": 0 } ], - "title": "model_loader", + "title": "repeat_image_batch", "properties": { - "Node name for S&R": "CheckpointLoaderSimple" + "Node name for S&R": "RepeatImageBatch" }, "widgets_values": [ - "Deliberate.ckpt" + 1 ] }, { @@ -386,13 +414,13 @@ "1": 262 }, "flags": {}, - "order": 10, + "order": 11, "mode": 0, "inputs": [ { "name": "model", "type": "MODEL", - "link": 23, + "link": 45, "slot_index": 0 }, { @@ -419,7 +447,8 @@ "name": "LATENT", "type": "LATENT", "links": [ - 33 + 33, + 42 ], "slot_index": 0 } @@ -429,7 +458,7 @@ "Node name for S&R": "KSampler" }, "widgets_values": [ - 65484213431324, + 932988885298999, "randomize", 14, 8, @@ -438,6 +467,50 @@ 0.5 ] }, + { + "id": 24, + "type": "LayeredDiffusionDecodeRGBA", + "pos": [ + 2135, + -102 + ], + "size": { + "0": 315, + "1": 102 + }, + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 42 + }, + { + "name": "images", + "type": "IMAGE", + "link": 40 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [], + "shape": 3, + "slot_index": 0 + } + ], + "title": "layer_diffuse_decode_rgba", + "properties": { + "Node name for S&R": "LayeredDiffusionDecodeRGBA" + }, + "widgets_values": [ + "SD15", + 16 + ] + }, { "id": 21, "type": "VAEDecode", @@ -450,7 +523,7 @@ "1": 46 }, "flags": {}, - "order": 11, + "order": 12, "mode": 0, "inputs": [ { @@ -469,7 +542,8 @@ "name": "IMAGE", "type": "IMAGE", "links": [ - 34 + 40, + 43 ], "slot_index": 0 } @@ -480,80 +554,92 @@ } }, { - "id": 18, - "type": "LoadImage", + "id": 16, + "type": "CheckpointLoaderSimple", "pos": [ - 261, - 934 - ], - "size": [ - 315, - 314 + -420, + 334 ], + "size": { + "0": 315, + "1": 98 + }, "flags": {}, "order": 2, "mode": 0, "outputs": [ { - "name": "IMAGE", - "type": "IMAGE", + "name": "MODEL", + "type": "MODEL", "links": [ - 35 + 37, + 44, + 45 ], "slot_index": 0 }, { - "name": "MASK", - "type": "MASK", - "links": null + "name": "CLIP", + "type": "CLIP", + "links": [ + 24 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 28, + 32 + ], + "slot_index": 2 } ], - "title": "image_loader", + "title": "model_loader", "properties": { - "Node name for S&R": "LoadImage" + "Node name for S&R": "CheckpointLoaderSimple" }, "widgets_values": [ - "example.png", - "image" + "Deliberate.ckpt" ] }, { - "id": 22, - "type": "RepeatImageBatch", + "id": 23, + "type": "LayeredDiffusionApply", "pos": [ - 624, - 934 + 443, + -13 ], "size": { "0": 315, - "1": 58 + "1": 82 }, "flags": {}, "order": 4, "mode": 0, "inputs": [ { - "name": "image", - "type": "IMAGE", - "link": 35 + "name": "model", + "type": "MODEL", + "link": 37 } ], "outputs": [ { - "name": "IMAGE", - "type": "IMAGE", - "links": [ - 36 - ], + "name": "MODEL", + "type": "MODEL", + "links": [], "shape": 3, "slot_index": 0 } ], - "title": "repeat_image_batch", + "title": "layer_diffuse_apply", "properties": { - "Node name for S&R": "RepeatImageBatch" + "Node name for S&R": "LayeredDiffusionApply" }, "widgets_values": [ + "SD15, Attention Injection, attn_sharing", 1 ] } @@ -607,22 +693,6 @@ 3, "LATENT" ], - [ - 18, - 16, - 0, - 3, - 0, - "MODEL" - ], - [ - 23, - 16, - 0, - 11, - 0, - "MODEL" - ], [ 24, 16, @@ -679,14 +749,6 @@ 0, "LATENT" ], - [ - 34, - 21, - 0, - 12, - 0, - "IMAGE" - ], [ 35, 18, @@ -702,6 +764,54 @@ 19, 0, "IMAGE" + ], + [ + 37, + 16, + 0, + 23, + 0, + "MODEL" + ], + [ + 40, + 21, + 0, + 24, + 1, + "IMAGE" + ], + [ + 42, + 11, + 0, + 24, + 0, + "LATENT" + ], + [ + 43, + 21, + 0, + 12, + 0, + "IMAGE" + ], + [ + 44, + 16, + 0, + 3, + 0, + "MODEL" + ], + [ + 45, + 16, + 0, + 11, + 0, + "MODEL" ] ], "groups": [ @@ -751,6 +861,14 @@ } ], "config": {}, - "extra": {}, + "extra": { + "ds": { + "scale": 0.683013455365071, + "offset": [ + -1143.7145712697502, + 509.85654565587373 + ] + } + }, "version": 0.4 } diff --git a/hordelib/pipelines/pipeline_stable_diffusion_hires_fix.json b/hordelib/pipelines/pipeline_stable_diffusion_hires_fix.json index fed95de1..8d0e1c04 100644 --- a/hordelib/pipelines/pipeline_stable_diffusion_hires_fix.json +++ b/hordelib/pipelines/pipeline_stable_diffusion_hires_fix.json @@ -1,7 +1,7 @@ { "3": { "inputs": { - "seed": 458841867575267, + "seed": 709052412707182, "steps": 12, "cfg": 8, "sampler_name": "dpmpp_sde", @@ -84,7 +84,7 @@ }, "11": { "inputs": { - "seed": 65484213431324, + "seed": 932988885298999, "steps": 14, "cfg": 8, "sampler_name": "dpmpp_2m", @@ -201,5 +201,37 @@ "_meta": { "title": "repeat_image_batch" } + }, + "23": { + "inputs": { + "config": "SD15, Attention Injection, attn_sharing", + "weight": 1, + "model": [ + "16", + 0 + ] + }, + "class_type": "LayeredDiffusionApply", + "_meta": { + "title": "layer_diffuse_apply" + } + }, + "24": { + "inputs": { + "sd_version": "SD15", + "sub_batch_size": 16, + "samples": [ + "11", + 0 + ], + "images": [ + "21", + 0 + ] + }, + "class_type": "LayeredDiffusionDecodeRGBA", + "_meta": { + "title": "layer_diffuse_decode_rgba" + } } } diff --git a/hordelib/utils/ioredirect.py b/hordelib/utils/ioredirect.py index a51dec0f..5243245c 100644 --- a/hordelib/utils/ioredirect.py +++ b/hordelib/utils/ioredirect.py @@ -106,6 +106,7 @@ def write(self, message: str): # Remove any double spaces message = self.pattern_double_space.sub(" ", message) + from hordelib.comfy_horde import log_free_ram if ( self.slow_message_count < 5 @@ -115,11 +116,14 @@ def write(self, message: str): self.slow_message_count += 1 if self.slow_message_count == 5: logger.warning("Suppressing further slow job warnings. Please investigate.") + + log_free_ram() else: rate_unit = "iterations per second" if is_iterations_per_second else "*seconds per iterations*" logger.warning(f"Job Slowdown: Job is running at {iteration_rate} {rate_unit}.") if found_current_step == 0: + log_free_ram() logger.info("Job will show progress for the first three steps, and then every 10 steps.") # Log the first 3 steps, then every 10 steps, then the last step diff --git a/images_expected/image_to_image_hires_fix_large.png b/images_expected/image_to_image_hires_fix_large.png index 48369d61..8e69efcd 100644 Binary files a/images_expected/image_to_image_hires_fix_large.png and b/images_expected/image_to_image_hires_fix_large.png differ diff --git a/images_expected/img2img_hires_fix_n_iter_0.png b/images_expected/img2img_hires_fix_n_iter_0.png new file mode 100644 index 00000000..eacefbcf Binary files /dev/null and b/images_expected/img2img_hires_fix_n_iter_0.png differ diff --git a/images_expected/img2img_hires_fix_n_iter_1.png b/images_expected/img2img_hires_fix_n_iter_1.png new file mode 100644 index 00000000..a922f65a Binary files /dev/null and b/images_expected/img2img_hires_fix_n_iter_1.png differ diff --git a/images_expected/layer_diffusion_hires_fix.png b/images_expected/layer_diffusion_hires_fix.png new file mode 100644 index 00000000..aecb9ae5 Binary files /dev/null and b/images_expected/layer_diffusion_hires_fix.png differ diff --git a/images_expected/qr_code.png b/images_expected/qr_code.png index 5ca125ab..23c98035 100644 Binary files a/images_expected/qr_code.png and b/images_expected/qr_code.png differ diff --git a/images_expected/qr_code_out_of_bounds.png b/images_expected/qr_code_out_of_bounds.png index 2d35251d..5e8b9e85 100644 Binary files a/images_expected/qr_code_out_of_bounds.png and b/images_expected/qr_code_out_of_bounds.png differ diff --git a/images_expected/sdxl_text_to_image_hires_fix.png b/images_expected/sdxl_text_to_image_hires_fix.png new file mode 100644 index 00000000..7c095e98 Binary files /dev/null and b/images_expected/sdxl_text_to_image_hires_fix.png differ diff --git a/images_expected/text_to_image_hires_fix_n_iter_1.png b/images_expected/text_to_image_hires_fix_n_iter_1.png index fce6993b..9f78f932 100644 Binary files a/images_expected/text_to_image_hires_fix_n_iter_1.png and b/images_expected/text_to_image_hires_fix_n_iter_1.png differ diff --git a/requirements.txt b/requirements.txt index 6874f189..ab2ca5ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ # Add this in for tox, comment out for build --extra-index-url https://download.pytorch.org/whl/cu121 -horde_sdk>=0.12.0 +horde_sdk>=0.13.0 horde_model_reference>=0.7.0 pydantic numpy==1.26.4 -torch>=2.1.0 +torch>=2.3.1 # xformers>=0.0.19 torchvision # torchaudio @@ -13,7 +13,7 @@ torchsde einops open-clip-torch transformers>=4.25.1 -safetensors>=0.3.0 +safetensors>=0.4.2 pytorch_lightning pynvml aiohttp @@ -48,3 +48,5 @@ fuzzywuzzy strenum kornia qrcode +spandrel +spandrel_extra_arches diff --git a/tests/test_horde_inference.py b/tests/test_horde_inference.py index 8e65dd54..fae205a2 100644 --- a/tests/test_horde_inference.py +++ b/tests/test_horde_inference.py @@ -130,6 +130,42 @@ def test_sdxl_text_to_image( pil_image, ) + def test_sdxl_text_to_image_hires_fix( + self, + hordelib_instance: HordeLib, + sdxl_refined_model_name: str, + ): + data = { + "sampler_name": "k_dpmpp_2m", + "cfg_scale": 7.5, + "denoising_strength": 1.0, + "seed": 123456789, + "height": 1536, + "width": 1280, + "karras": False, + "tiling": False, + "hires_fix": True, + "clip_skip": 1, + "control_type": None, + "image_is_control": False, + "return_control_map": False, + "prompt": "an ancient llamia monster", + "ddim_steps": 25, + "n_iter": 1, + "model": sdxl_refined_model_name, + } + pil_image = hordelib_instance.basic_inference_single_image(data).image + assert pil_image is not None + assert isinstance(pil_image, Image.Image) + + img_filename = "sdxl_text_to_image_hires_fix.png" + pil_image.save(f"images/{img_filename}", quality=100) + + assert check_single_inference_image_similarity( + f"images_expected/{img_filename}", + pil_image, + ) + @pytest.mark.skip(reason="This test is too slow to run on every test run") def test_sdxl_text_to_image_recommended_resolutions( self, diff --git a/tests/test_horde_inference_cascade.py b/tests/test_horde_inference_cascade.py index bc6c1e55..54efb8a1 100644 --- a/tests/test_horde_inference_cascade.py +++ b/tests/test_horde_inference_cascade.py @@ -34,7 +34,7 @@ def test_cascade_text_to_image( "A magic glowing long sword lying flat on a medieval shop rack, in the style of Dungeons and Dragons. " "Splash art, Digital Painting, ornate handle with gems" ), - "ddim_steps": 20, + "ddim_steps": 30, "n_iter": 1, "model": stable_cascade_base_model_name, } @@ -74,7 +74,7 @@ def test_cascade_text_to_image_n_iter( "A magic glowing long sword lying flat on a medieval shop rack, in the style of Dungeons and Dragons. " "Splash art, Digital Painting, ornate handle with gems" ), - "ddim_steps": 20, + "ddim_steps": 30, "n_iter": 2, "model": stable_cascade_base_model_name, } @@ -125,7 +125,7 @@ def test_cascade_image_to_image( "a medieval fantasy swashbuckler with a big floppy hat walking towards " "a camera while there's an explosion in the background" ), - "ddim_steps": 20, + "ddim_steps": 30, "n_iter": 2, "model": stable_cascade_base_model_name, "source_image": Image.open("images/test_db0.jpg"), @@ -170,7 +170,7 @@ def test_cascade_image_remix_single( "image_is_control": False, "return_control_map": False, "prompt": "A herd of goats grazing under the sunset", - "ddim_steps": 20, + "ddim_steps": 30, "n_iter": 1, "model": stable_cascade_base_model_name, "source_image": Image.open("images/test_mountains.png"), @@ -208,7 +208,7 @@ def test_cascade_image_remix_double( "image_is_control": False, "return_control_map": False, "prompt": "A herd of goats grazing", - "ddim_steps": 20, + "ddim_steps": 30, "n_iter": 1, "model": stable_cascade_base_model_name, "source_image": Image.open("images/test_mountains.png"), @@ -251,7 +251,7 @@ def test_cascade_image_remix_double_weak( "image_is_control": False, "return_control_map": False, "prompt": "A herd of goats grazing", - "ddim_steps": 20, + "ddim_steps": 30, "n_iter": 1, "model": stable_cascade_base_model_name, "source_image": Image.open("images/test_mountains.png"), @@ -295,7 +295,7 @@ def test_cascade_image_remix_triple( "image_is_control": False, "return_control_map": False, "prompt": "Baking Sun", - "ddim_steps": 20, + "ddim_steps": 30, "n_iter": 1, "model": stable_cascade_base_model_name, "source_image": Image.open("images/test_mountains.png"), @@ -345,7 +345,7 @@ def test_cascade_text_to_image_hires_2pass( "Lucid Creations, Deep Forest, Moss, ethereal, dreamlike, surreal, " "beautiful, illustration, incredible detail, 8k, abstract" ), - "ddim_steps": 20, + "ddim_steps": 30, "n_iter": 1, "model": stable_cascade_base_model_name, } diff --git a/tests/test_horde_inference_img2img.py b/tests/test_horde_inference_img2img.py index 862a683b..3d150266 100644 --- a/tests/test_horde_inference_img2img.py +++ b/tests/test_horde_inference_img2img.py @@ -540,3 +540,5 @@ def test_image_to_image_hires_fix_n_iter( image_result.image.save(f"images/{img_filename}", quality=100) img_pairs_to_check.append((f"images_expected/{img_filename}", image_result.image)) + + assert check_list_inference_images_similarity(img_pairs_to_check) diff --git a/tests/test_horde_inference_layerdiffusion.py b/tests/test_horde_inference_layerdiffusion.py index c4deec27..8e95814a 100644 --- a/tests/test_horde_inference_layerdiffusion.py +++ b/tests/test_horde_inference_layerdiffusion.py @@ -4,6 +4,8 @@ from hordelib.horde import HordeLib +from .testing_shared_functions import check_single_inference_image_similarity + class TestHordeInferenceTransparent: def test_layerdiffuse_sd15( @@ -90,3 +92,40 @@ def test_layerdiffuse_sdxl( image_result.image.save(f"images/{img_filename}", quality=100) img_pairs_to_check.append((f"images_expected/{img_filename}", image_result.image)) + + def test_layerdiffusion_hires_fix( + self, + hordelib_instance: HordeLib, + stable_diffusion_model_name_for_testing: str, + ): + data = { + "sampler_name": "k_dpmpp_2m", + "cfg_scale": 7.5, + "denoising_strength": 1.0, + "seed": 123456789, + "height": 768, + "width": 768, + "karras": False, + "tiling": False, + "hires_fix": True, + "transparent": True, + "clip_skip": 1, + "control_type": None, + "image_is_control": False, + "return_control_map": False, + "prompt": "an ancient llamia monster", + "ddim_steps": 25, + "n_iter": 1, + "model": stable_diffusion_model_name_for_testing, + } + pil_image = hordelib_instance.basic_inference_single_image(data).image + assert pil_image is not None + assert isinstance(pil_image, Image.Image) + + img_filename = "layer_diffusion_hires_fix.png" + pil_image.save(f"images/{img_filename}", quality=100) + + assert check_single_inference_image_similarity( + f"images_expected/{img_filename}", + pil_image, + )