huggingface · muellerzr · Jul 16, 2024 · Jul 16, 2024
diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py
@@ -20,7 +20,6 @@
 import sys
 import tempfile
 import threading
-import time
 import unittest
 import unittest.mock as mock
 import uuid
@@ -895,28 +894,22 @@ def test_from_pretrained_low_cpu_mem_usage_functional(self):
     @require_usr_bin_time
     @require_accelerate
     @mark.accelerate_tests
-    def test_from_pretrained_low_cpu_mem_usage_slower(self):
+    def test_from_pretrained_low_cpu_mem_usage_equal(self):
         # Before this would test that `from_pretrained(..., low_cpu_mem_usage=True)` uses less cpu memory than default
-        # Now though the memory is the same, we simply test that loading with `low_cpu_mem_usage` winds up being *slower*
-        # (mostly from extra logic needed)
+        # Now though these should be around the same.
+        # TODO: Look for good bounds to check that their timings are near the same
 
         mname = "hf-internal-testing/tiny-random-bert"
 
         preamble = "from transformers import AutoModel"
         one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=False)'
-        start_time = time.time()
         # Save this output as `max_rss_normal` if testing memory results
         max_rss_normal = self.python_one_liner_max_rss(one_liner_str)
-        end_time = time.time()
-        elapsed_time_normal = end_time - start_time
         # print(f"{max_rss_normal=}")
 
         one_liner_str = f'{preamble};  AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=True)'
-        start_time = time.time()
         # Save this output as `max_rss_low_mem` if testing memory results
         max_rss_low_mem = self.python_one_liner_max_rss(one_liner_str)
-        end_time = time.time()
-        elapsed_time_low_mem = end_time - start_time
 
         # Should be within 2MBs of each other (overhead)
         self.assertAlmostEqual(
@@ -926,13 +919,6 @@ def test_from_pretrained_low_cpu_mem_usage_slower(self):
             msg="using `low_cpu_mem_usage` should incur the same memory usage in both cases.",
         )
 
-        self.assertGreater(
-            elapsed_time_low_mem,
-            elapsed_time_normal,
-            "using `low_cpu_mem_usage` should be slower due to extra logic, "
-            f"but got elapsed_time_normal={elapsed_time_normal} and elapsed_time_low_mem={elapsed_time_low_mem}",
-        )
-
         # if you want to compare things manually, let's first look at the size of the model in bytes
         # model = BertModel.from_pretrained(mname, low_cpu_mem_usage=False)
         # total_numel = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())