diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index deaac1755401ae..ec39c1428b28c8 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -20,7 +20,6 @@ import sys import tempfile import threading -import time import unittest import unittest.mock as mock import uuid @@ -895,28 +894,22 @@ def test_from_pretrained_low_cpu_mem_usage_functional(self): @require_usr_bin_time @require_accelerate @mark.accelerate_tests - def test_from_pretrained_low_cpu_mem_usage_slower(self): + def test_from_pretrained_low_cpu_mem_usage_equal(self): # Before this would test that `from_pretrained(..., low_cpu_mem_usage=True)` uses less cpu memory than default - # Now though the memory is the same, we simply test that loading with `low_cpu_mem_usage` winds up being *slower* - # (mostly from extra logic needed) + # Now though these should be around the same. + # TODO: Look for good bounds to check that their timings are near the same mname = "hf-internal-testing/tiny-random-bert" preamble = "from transformers import AutoModel" one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=False)' - start_time = time.time() # Save this output as `max_rss_normal` if testing memory results max_rss_normal = self.python_one_liner_max_rss(one_liner_str) - end_time = time.time() - elapsed_time_normal = end_time - start_time # print(f"{max_rss_normal=}") one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=True)' - start_time = time.time() # Save this output as `max_rss_low_mem` if testing memory results max_rss_low_mem = self.python_one_liner_max_rss(one_liner_str) - end_time = time.time() - elapsed_time_low_mem = end_time - start_time # Should be within 2MBs of each other (overhead) self.assertAlmostEqual( @@ -926,13 +919,6 @@ def test_from_pretrained_low_cpu_mem_usage_slower(self): msg="using `low_cpu_mem_usage` should incur the same memory usage in both cases.", ) - self.assertGreater( - elapsed_time_low_mem, - elapsed_time_normal, - "using `low_cpu_mem_usage` should be slower due to extra logic, " - f"but got elapsed_time_normal={elapsed_time_normal} and elapsed_time_low_mem={elapsed_time_low_mem}", - ) - # if you want to compare things manually, let's first look at the size of the model in bytes # model = BertModel.from_pretrained(mname, low_cpu_mem_usage=False) # total_numel = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())