-
Notifications
You must be signed in to change notification settings - Fork 5.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoRA] fix: lora loading when using with a device_mapped model. #9449
Changes from 25 commits
dc1aee2
949a929
64b3ad1
6d03c12
d4bd94b
5479198
2846549
1ed0eb0
d2d59c3
5f3cae2
8f670e2
e42ec19
f63b04c
eefda54
ea727a3
71989e3
f62afac
2334f78
5ea1173
f64751e
c0dee87
4b6124a
fe2cca8
2db5d48
61903c8
03377b7
0bd40cb
a61b754
ccd8d2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -389,6 +389,11 @@ def to(self, *args, **kwargs): | |
|
||
device = device or device_arg | ||
|
||
def model_has_device_map(model): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @DN6 it would make sense to make this a separate utility instead of having redefine three times. WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, you can add as a util function inside pipeline_utils. |
||
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): | ||
return False | ||
return getattr(model, "hf_device_map", None) is not None | ||
|
||
# throw warning if pipeline is in "offloaded"-mode but user tries to manually set to GPU. | ||
def module_is_sequentially_offloaded(module): | ||
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): | ||
|
@@ -406,6 +411,16 @@ def module_is_offloaded(module): | |
|
||
return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload) | ||
|
||
# device-mapped modules should not go through any device placements. | ||
device_mapped_components = [ | ||
key for key, component in self.components.items() if model_has_device_map(component) | ||
] | ||
if device_mapped_components: | ||
raise ValueError( | ||
"The following pipeline components have been found to use a device map: " | ||
f"{device_mapped_components}. This is incompatible with explicitly setting the device using `to()`." | ||
) | ||
|
||
# .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer | ||
pipeline_is_sequentially_offloaded = any( | ||
module_is_sequentially_offloaded(module) for _, module in self.components.items() | ||
|
@@ -1002,6 +1017,22 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t | |
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will | ||
default to "cuda". | ||
""" | ||
|
||
def model_has_device_map(model): | ||
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): | ||
return False | ||
return getattr(model, "hf_device_map", None) is not None | ||
|
||
# device-mapped modules should not go through any device placements. | ||
device_mapped_components = [ | ||
key for key, component in self.components.items() if model_has_device_map(component) | ||
] | ||
if device_mapped_components: | ||
raise ValueError( | ||
"The following pipeline components have been found to use a device map: " | ||
f"{device_mapped_components}. This is incompatible with `enable_model_cpu_offload()`." | ||
) | ||
|
||
is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1 | ||
if is_pipeline_device_mapped: | ||
raise ValueError( | ||
|
@@ -1104,6 +1135,22 @@ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Un | |
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will | ||
default to "cuda". | ||
""" | ||
|
||
def model_has_device_map(model): | ||
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): | ||
return False | ||
return getattr(model, "hf_device_map", None) is not None | ||
|
||
# device-mapped modules should not go through any device placements. | ||
device_mapped_components = [ | ||
key for key, component in self.components.items() if model_has_device_map(component) | ||
] | ||
if device_mapped_components: | ||
raise ValueError( | ||
"The following pipeline components have been found to use a device map: " | ||
f"{device_mapped_components}. This is incompatible with `enable_sequential_cpu_offload()`." | ||
) | ||
|
||
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"): | ||
from accelerate import cpu_offload | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,19 +30,24 @@ | |
) | ||
from diffusers.image_processor import VaeImageProcessor | ||
from diffusers.loaders import IPAdapterMixin | ||
from diffusers.models.adapter import MultiAdapter | ||
from diffusers.models.attention_processor import AttnProcessor | ||
from diffusers.models.controlnet_xs import UNetControlNetXSModel | ||
from diffusers.models.unets.unet_3d_condition import UNet3DConditionModel | ||
from diffusers.models.unets.unet_i2vgen_xl import I2VGenXLUNet | ||
from diffusers.models.unets.unet_motion_model import UNetMotionModel | ||
from diffusers.pipelines.controlnet import MultiControlNetModel | ||
from diffusers.pipelines.pipeline_utils import StableDiffusionMixin | ||
from diffusers.schedulers import KarrasDiffusionSchedulers | ||
from diffusers.utils import logging | ||
from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available | ||
from diffusers.utils.testing_utils import ( | ||
CaptureLogger, | ||
nightly, | ||
require_torch, | ||
require_torch_multi_gpu, | ||
skip_mps, | ||
slow, | ||
torch_device, | ||
) | ||
|
||
|
@@ -59,6 +64,10 @@ | |
from ..others.test_utils import TOKEN, USER, is_staging_test | ||
|
||
|
||
if is_accelerate_available(): | ||
from accelerate.utils import compute_module_sizes | ||
|
||
|
||
def to_np(tensor): | ||
if isinstance(tensor, torch.Tensor): | ||
tensor = tensor.detach().cpu().numpy() | ||
|
@@ -1908,6 +1917,99 @@ def test_StableDiffusionMixin_component(self): | |
) | ||
) | ||
|
||
@require_torch_multi_gpu | ||
@slow | ||
@nightly | ||
def test_calling_to_raises_error_device_mapped_components(self): | ||
if "Combined" in self.pipeline_class.__name__: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because for connected pipelines, we don't support device mapping in the first place. |
||
return | ||
|
||
# TODO (sayakpaul): skip these for now. revisit later. | ||
components = self.get_dummy_components() | ||
if any(isinstance(component, (MultiControlNetModel, MultiAdapter)) for component in components): | ||
return | ||
|
||
pipe = self.pipeline_class(**components) | ||
max_model_size = max( | ||
compute_module_sizes(module)[""] | ||
for _, module in pipe.components.items() | ||
if isinstance(module, torch.nn.Module) | ||
) | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
pipe.save_pretrained(tmpdir) | ||
max_memory = {0: max_model_size, 1: max_model_size} | ||
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory) | ||
|
||
with self.assertRaises(ValueError) as err_context: | ||
loaded_pipe.to(torch_device) | ||
|
||
self.assertTrue( | ||
"The following pipeline components have been found" in str(err_context.exception) | ||
and "This is incompatible with explicitly setting the device using `to()`" in str(err_context.exception) | ||
) | ||
|
||
@require_torch_multi_gpu | ||
@slow | ||
@nightly | ||
def test_calling_mco_raises_error_device_mapped_components(self): | ||
if "Combined" in self.pipeline_class.__name__: | ||
return | ||
|
||
# TODO (sayakpaul): skip these for now. revisit later. | ||
components = self.get_dummy_components() | ||
if any(isinstance(component, (MultiControlNetModel, MultiAdapter)) for component in components): | ||
return | ||
|
||
pipe = self.pipeline_class(**components) | ||
max_model_size = max( | ||
compute_module_sizes(module)[""] | ||
for _, module in pipe.components.items() | ||
if isinstance(module, torch.nn.Module) | ||
) | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
pipe.save_pretrained(tmpdir) | ||
max_memory = {0: max_model_size, 1: max_model_size} | ||
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory) | ||
|
||
with self.assertRaises(ValueError) as err_context: | ||
loaded_pipe.enable_model_cpu_offload() | ||
|
||
self.assertTrue( | ||
"The following pipeline components have been found" in str(err_context.exception) | ||
and "This is incompatible with `enable_model_cpu_offload()`" in str(err_context.exception) | ||
) | ||
|
||
@require_torch_multi_gpu | ||
@slow | ||
@nightly | ||
def test_calling_sco_raises_error_device_mapped_components(self): | ||
if "Combined" in self.pipeline_class.__name__: | ||
return | ||
|
||
# TODO (sayakpaul): skip these for now. revisit later. | ||
components = self.get_dummy_components() | ||
if any(isinstance(component, (MultiControlNetModel, MultiAdapter)) for component in components): | ||
return | ||
|
||
pipe = self.pipeline_class(**components) | ||
max_model_size = max( | ||
compute_module_sizes(module)[""] | ||
for _, module in pipe.components.items() | ||
if isinstance(module, torch.nn.Module) | ||
) | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
pipe.save_pretrained(tmpdir) | ||
max_memory = {0: max_model_size, 1: max_model_size} | ||
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory) | ||
|
||
with self.assertRaises(ValueError) as err_context: | ||
loaded_pipe.enable_sequential_cpu_offload() | ||
|
||
self.assertTrue( | ||
"The following pipeline components have been found" in str(err_context.exception) | ||
and "This is incompatible with `enable_sequential_cpu_offload()`" in str(err_context.exception) | ||
) | ||
|
||
|
||
@is_staging_test | ||
class PipelinePushToHubTester(unittest.TestCase): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After-effects of
make fix-copies
.