FIX Don't eagerly import bnb for LoftQ (#1683)

We accidentally added code in loftq_utils.py that eagerly imports bnb, which we want to avoid to prevent CUDA from being initialized too early.
huggingface · Apr 25, 2024 · b1d6c77 · b1d6c77
1 parent f0d3c6b
commit b1d6c77
Showing 1 changed file with 7 additions and 4 deletions.
diff --git a/src/peft/utils/loftq_utils.py b/src/peft/utils/loftq_utils.py
@@ -31,10 +31,6 @@
 from peft.import_utils import is_bnb_4bit_available, is_bnb_available
 
 
-if is_bnb_available():
-    import bitsandbytes as bnb
-
-
 class NFQuantizer:
     def __init__(self, num_bits=2, device="cuda", method="normal", block_size=64, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -192,6 +188,11 @@ def _low_rank_decomposition(weight, reduced_rank=32):
 
 @torch.no_grad()
 def loftq_init(weight: Union[torch.Tensor, torch.nn.Parameter], num_bits: int, reduced_rank: int, num_iter=1):
+    if is_bnb_available():
+        import bitsandbytes as bnb
+    else:
+        raise ValueError("bitsandbytes is not available, please install it to use LoftQ.")
+
     if num_bits not in [2, 4, 8]:
         raise ValueError("Only support 2, 4, 8 bits quantization")
     if num_iter <= 0:
@@ -239,6 +240,8 @@ def loftq_init(weight: Union[torch.Tensor, torch.nn.Parameter], num_bits: int, r
 
 @torch.no_grad()
 def _loftq_init_new(qweight, weight, num_bits: int, reduced_rank: int):
+    import bitsandbytes as bnb
+
     if num_bits != 4:
         raise ValueError("Only 4 bit quantization supported at the moment.")
     if not is_bnb_4bit_available():