Skip to content

Commit

Permalink
Parallel test with pytest-xdist (#518)
Browse files Browse the repository at this point in the history
* try pytest-xdist

* run multi_gpu tests separately

* each worker run 1 file

* use loadscope to balance load better (for test_integration.py)

* change back to loadfile

* per-test schedule

* per-test sharding. avoid name collision
  • Loading branch information
gau-nernst authored Jul 18, 2024
1 parent 4f53882 commit cbaff6c
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 4 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/regression_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,5 @@ jobs:
pip install ${{ matrix.torch-spec }}
pip install -r dev-requirements.txt
pip install .
pytest test --verbose -s
pytest test --verbose -s -m "not multi_gpu" --dist load --tx popen//env:CUDA_VISIBLE_DEVICES=0 --tx popen//env:CUDA_VISIBLE_DEVICES=1 --tx popen//env:CUDA_VISIBLE_DEVICES=2 --tx popen//env:CUDA_VISIBLE_DEVICES=3
pytest test --verbose -s -m "multi_gpu"
1 change: 1 addition & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ transformers
hypothesis # Avoid test derandomization warning
sentencepiece # for gpt-fast tokenizer
expecttest
pytest-xdist

# For prototype features and benchmarks
bitsandbytes #needed for testing triton quant / dequant ops for 8-bit optimizers
Expand Down
3 changes: 3 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
markers =
multi_gpu: marks tests as require multi GPUs (deselect with '-m "not multi_gpu"')
1 change: 1 addition & 0 deletions test/dtypes/test_nf4.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,7 @@ class TestQLoRA(FSDPTest):
def world_size(self) -> int:
return 2

@pytest.mark.multi_gpu
@pytest.mark.skipif(
version.parse(torch.__version__).base_version < "2.4.0",
reason="torch >= 2.4 required",
Expand Down
9 changes: 6 additions & 3 deletions test/integration/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,7 +985,10 @@ def forward(self, x):
# save quantized state_dict
api(model)

torch.save(model.state_dict(), "test.pth")
# unique filename to avoid collision in parallel tests
ckpt_name = f"{api.__name__}_{test_device}_{test_dtype}_test.pth"

torch.save(model.state_dict(), ckpt_name)
# get quantized reference
model_qc = torch.compile(model, mode="max-autotune")
ref_q = model_qc(x).detach()
Expand All @@ -998,8 +1001,8 @@ def forward(self, x):
api(model)

# load quantized state_dict
state_dict = torch.load("test.pth", mmap=True)
os.remove("test.pth")
state_dict = torch.load(ckpt_name, mmap=True)
os.remove(ckpt_name)

model.load_state_dict(state_dict, assign=True)
model = model.to(device=test_device, dtype=test_dtype).eval()
Expand Down
1 change: 1 addition & 0 deletions test/prototype/test_low_bit_optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ class TestFSDP2(FSDPTest):
def world_size(self) -> int:
return 2

@pytest.mark.multi_gpu
@pytest.mark.skipif(not TORCH_VERSION_AFTER_2_4, reason="torch >= 2.4 required")
@skip_if_lt_x_gpu(2)
def test_fsdp2(self):
Expand Down

0 comments on commit cbaff6c

Please sign in to comment.