Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bump macos to m1 #1725

Merged
merged 47 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
0bd56c6
bump macos to m1
t-vi Sep 13, 2024
08ba84d
try skip
t-vi Sep 13, 2024
334bd8b
add sys
t-vi Sep 13, 2024
671ba25
experimentally run tests separately
t-vi Sep 13, 2024
4ef2617
try to find segfaulting test
t-vi Sep 13, 2024
367b41c
sprinkle skip
t-vi Sep 13, 2024
87b3043
more sprinkle
t-vi Sep 13, 2024
299f95c
skip some imports
t-vi Sep 13, 2024
452e343
skip all
t-vi Sep 13, 2024
abe05ed
drop external for loop again
t-vi Sep 13, 2024
4ba4848
add back two import
t-vi Sep 13, 2024
aee1ad3
more commenting out modules
t-vi Sep 13, 2024
3aea569
Merge branch 'main' into tom/mac-runners
rasbt Sep 13, 2024
80e8548
test sth
rasbt Sep 16, 2024
6851008
skip out-of-memory issues on macos CI
rasbt Sep 16, 2024
c9afc8b
update
rasbt Sep 16, 2024
998a0fd
update
rasbt Sep 16, 2024
1c68608
update
rasbt Sep 16, 2024
9dc677f
update
rasbt Sep 16, 2024
562cb19
add back api tests
rasbt Sep 16, 2024
1e226e5
truncate test_api.py
rasbt Sep 23, 2024
f036e88
Add back test_api function one at a time to find segfault culprit
rasbt Sep 23, 2024
a11ae86
add more tests back
rasbt Sep 23, 2024
09cd6d7
Update test_api.py
rasbt Sep 23, 2024
dbfcd96
Update test_api.py
rasbt Sep 23, 2024
74eca33
Update test_api.py
rasbt Sep 23, 2024
2b5bbc8
Update test_api.py
rasbt Sep 23, 2024
bd6073a
Update test_api.py
rasbt Sep 23, 2024
5ae3dbf
Update test_api.py
rasbt Sep 23, 2024
baebff2
Update test_api.py
rasbt Sep 23, 2024
47ef4fb
Update test_api.py
rasbt Sep 23, 2024
e8a70a1
Update cpu-tests.yml
rasbt Sep 23, 2024
3448e70
Update test_api.py
rasbt Sep 23, 2024
9190764
Update test_api.py
rasbt Sep 23, 2024
467cc72
Update test_api.py
rasbt Sep 23, 2024
a110fc3
Update test_api.py
rasbt Sep 23, 2024
5945c71
test only on cpu
rasbt Sep 23, 2024
fc9a20f
update
rasbt Sep 23, 2024
8b97873
update
rasbt Sep 23, 2024
5895a20
add tests back
rasbt Sep 23, 2024
c92e564
disable mps in CI
rasbt Sep 23, 2024
8861e46
mock litgpt
rasbt Sep 24, 2024
e92889f
add test matrix back
rasbt Sep 24, 2024
8c1bfea
udpdates
rasbt Sep 24, 2024
4dc3862
updates
rasbt Sep 24, 2024
270a0f5
upgrade to macos 15
rasbt Sep 24, 2024
624607d
revert
rasbt Sep 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/cpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
fail-fast: false
matrix:
include:
- {os: "macOS-12", python-version: "3.10"}
- {os: "macOS-14", python-version: "3.10"}
- {os: "ubuntu-22.04", python-version: "3.11"}
- {os: "ubuntu-22.04", python-version: "3.10"}
- {os: "ubuntu-22.04", python-version: "3.9"}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.ipynb_checkpoints/
__pycache__
.idea
.DS_Store
Expand Down
108 changes: 66 additions & 42 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
from collections import OrderedDict
import os
from pathlib import Path
import sys

import pytest
import re
import torch
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch
from tests.conftest import RunIf

from lightning.fabric.accelerators import CUDAAccelerator
Expand All @@ -20,6 +21,13 @@
from litgpt.scripts.download import download_from_hub


if sys.platform == "darwin" and os.getenv("GITHUB_ACTIONS") == "true":
USE_MPS = False
elif torch.backends.mps.is_available():
USE_MPS = True
else:
USE_MPS = False


@pytest.fixture
def mock_llm():
Expand Down Expand Up @@ -83,11 +91,12 @@ def test_llm_load_random_init(tmp_path):
download_from_hub(repo_id="EleutherAI/pythia-14m", tokenizer_only=True, checkpoint_dir=tmp_path)

torch.manual_seed(123)
llm = LLM.load(
model="pythia-160m",
init="random",
tokenizer_dir=Path(tmp_path/"EleutherAI/pythia-14m")
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="pythia-160m",
init="random",
tokenizer_dir=Path(tmp_path/"EleutherAI/pythia-14m")
)

input_text = "some text text"
output_text = llm.generate(input_text, max_new_tokens=15)
Expand All @@ -110,10 +119,11 @@ def test_llm_load_random_init(tmp_path):

def test_llm_load_hub_init(tmp_path):
torch.manual_seed(123)
llm = LLM.load(
model="EleutherAI/pythia-14m",
init="pretrained"
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="EleutherAI/pythia-14m",
init="pretrained"
)

text_1 = llm.generate("text", max_new_tokens=10, top_k=1)
assert len(text_1) > 0
Expand Down Expand Up @@ -159,9 +169,10 @@ def test_more_than_1_device_for_sequential_gpu(tmp_path):
model_name = "EleutherAI/pythia-14m"
else:
model_name = "EleutherAI/pythia-160m"
llm = LLM.load(
model=model_name,
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model=model_name,
)

with pytest.raises(NotImplementedError, match=f"Support for multiple devices is currently only implemented for generate_strategy='sequential'|'tensor_parallel'."):
llm.distribute(devices=2)
Expand All @@ -181,9 +192,10 @@ def test_more_than_1_device_for_sequential_gpu(tmp_path):

@RunIf(min_cuda_gpus=2)
def test_more_than_1_device_for_tensor_parallel_gpu(tmp_path):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)

if os.getenv("CI") != "true":
# this crashes the CI, maybe because of process forking; works fine locally though
Expand All @@ -193,20 +205,24 @@ def test_more_than_1_device_for_tensor_parallel_gpu(tmp_path):

@RunIf(min_cuda_gpus=1)
def test_sequential_tp_incompatibility_with_random_weights(tmp_path):
llm = LLM.load(
model="EleutherAI/pythia-14m",
tokenizer_dir="EleutherAI/pythia-14m",
init="random"
)

with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="EleutherAI/pythia-14m",
tokenizer_dir="EleutherAI/pythia-14m",
init="random"
)
for strategy in ("sequential", "tensor_parallel"):
with pytest.raises(NotImplementedError, match=re.escape("The LLM was initialized with init='random' but .distribute() currently only supports pretrained weights.")):
llm.distribute(devices=1, generate_strategy=strategy)


def test_sequential_tp_cpu(tmp_path):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="EleutherAI/pythia-14m",
distribute=None,
)
for strategy in ("sequential", "tensor_parallel"):
with pytest.raises(NotImplementedError, match=f"generate_strategy='{strategy}' is only supported for accelerator='cuda'|'gpu'."):
llm.distribute(
Expand Down Expand Up @@ -235,19 +251,21 @@ def test_initialization_for_trainer(tmp_path):

@RunIf(min_cuda_gpus=1)
def test_quantization_is_applied(tmp_path):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
llm.distribute(devices=1, quantize="bnb.nf4", precision="bf16-true")
strtype = str(type(llm.model.lm_head))
assert "NF4Linear" in strtype, strtype


@RunIf(min_cuda_gpus=1)
def test_fixed_kv_cache(tmp_path):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
llm.distribute(devices=1, fixed_kv_cache_size=100)

# Request too many tokens
Expand All @@ -258,15 +276,17 @@ def test_fixed_kv_cache(tmp_path):
def test_invalid_accelerator(tmp_path):
llm = LLM.load(
model="EleutherAI/pythia-14m",
distribute=None
)
with pytest.raises(ValueError, match="Invalid accelerator"):
llm.distribute(accelerator="invalid")


def test_returned_benchmark_dir(tmp_path):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)

text, bench_d = llm.benchmark(prompt="hello world")
assert isinstance(bench_d["Inference speed in tokens/sec"], list)
Expand Down Expand Up @@ -305,6 +325,7 @@ def test_benchmark_dict_to_markdown_table_single_values():

assert benchmark_dict_to_markdown_table(bench_d) == expected_output


def test_benchmark_dict_to_markdown_table_multiple_values():
bench_d_list = {
'Inference speed in tokens/sec': [17.034547562152305, 32.8974175404589, 33.04784205046782, 32.445697744648584,
Expand Down Expand Up @@ -335,17 +356,19 @@ def test_benchmark_dict_to_markdown_table_multiple_values():


def test_state_dict(tmp_path):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
assert isinstance(llm.state_dict(), OrderedDict)
assert llm.state_dict()['lm_head.weight'].shape == torch.Size([50304, 128])


def test_save_method(tmp_path):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)

target_dir = "saved_model"
llm.save(target_dir)
Expand All @@ -366,9 +389,10 @@ def test_save_method(tmp_path):


def test_forward_method(tmp_path):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
with patch("torch.backends.mps.is_available", return_value=USE_MPS):
llm = LLM.load(
model="EleutherAI/pythia-14m",
)
inputs = torch.ones(6, 128, dtype=torch.int64).to(next(llm.model.parameters()).device)

assert llm(inputs).shape == torch.Size([6, 128, 50304])
Expand Down
11 changes: 11 additions & 0 deletions tests/test_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from itertools import repeat
from pathlib import Path
from unittest.mock import ANY, MagicMock, Mock, call, patch
import sys
from typing import Iterable

import pytest
Expand All @@ -20,6 +21,12 @@
from litgpt.utils import save_config, auto_download_checkpoint


skip_in_ci_on_macos = pytest.mark.skipif(
sys.platform == "darwin" and os.getenv("GITHUB_ACTIONS") == "true",
reason="Skipped on macOS in CI environment because CI machine does not have enough memory to run this test."
)


@pytest.mark.parametrize(
("generated", "stop_tokens", "expected"),
[
Expand Down Expand Up @@ -80,6 +87,7 @@ def test_decode():
assert text == decoded, (text, decoded)


@skip_in_ci_on_macos
@patch("litgpt.chat.base.input")
@pytest.mark.parametrize("stop_iteration", [KeyboardInterrupt, ""])
def test_main(mocked_input, stop_iteration, fake_checkpoint_dir, monkeypatch, tensor_like):
Expand Down Expand Up @@ -129,6 +137,7 @@ def test_cli():
assert "Chat with a model" in output


@skip_in_ci_on_macos
@patch("litgpt.chat.base.input")
@patch("litgpt.chat.base.merge_lora")
def test_merge_lora_if_needed(mocked_merge_lora, mocked_input, fake_checkpoint_dir, monkeypatch, tensor_like):
Expand All @@ -152,6 +161,7 @@ def test_merge_lora_if_needed(mocked_merge_lora, mocked_input, fake_checkpoint_d
mocked_merge_lora.assert_called_once()


@skip_in_ci_on_macos
def test_litgpt_chat_endtoend():
from litgpt.chat.base import main

Expand All @@ -172,6 +182,7 @@ def test_litgpt_chat_endtoend():
assert simulated_input.call_count == 2


@skip_in_ci_on_macos
def test_litgpt_generate_endtoend():
from litgpt.generate.base import main

Expand Down
4 changes: 2 additions & 2 deletions tests/test_convert_lit_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def test_against_original_gemma(model_name, device, dtype):
theirs_state_dict = {}
copy_weights_llama(ours_config, theirs_state_dict, ours_state_dict, untie_weights=True)
theirs_model = GemmaForCausalLM(theirs_config).to(device)
theirs_model.load_state_dict(theirs_state_dict, strict=False)
theirs_model.load_state_dict(theirs_state_dict, strict=False,)

# test end to end
x = torch.tensor([[9856, 23, 491, 1536, 304]], dtype=torch.int32, device=device)
Expand Down Expand Up @@ -459,7 +459,7 @@ def test_against_original_gemma_2(model_name, device, dtype):
assert x.size(1) == T
ours_y = ours_model(x)
theirs_y = theirs_model(x)["logits"].to(dtype) # HF converts logits to float
torch.testing.assert_close(ours_y, theirs_y)
torch.testing.assert_close(ours_y, theirs_y, rtol=3e-5, atol=3e-5)


def test_check_conversion_supported_adapter():
Expand Down
8 changes: 8 additions & 0 deletions tests/test_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys
from contextlib import redirect_stderr, redirect_stdout
from io import StringIO
import os
from pathlib import Path
from unittest import mock
from unittest.mock import ANY, Mock, call
Expand All @@ -18,6 +19,12 @@
from litgpt.generate.base import sample


skip_in_ci_on_macos = pytest.mark.skipif(
sys.platform == "darwin" and os.getenv("GITHUB_ACTIONS") == "true",
reason="Skipped on macOS in CI environment because CI machine does not have enough memory to run this test."
)


@pytest.mark.parametrize(
"max_seq_length", (pytest.param(10, marks=pytest.mark.xfail(raises=NotImplementedError, strict=True)), 20 + 5)
)
Expand Down Expand Up @@ -51,6 +58,7 @@ def multinomial(*args, **kwargs):
torch.testing.assert_close(out, expected)


@skip_in_ci_on_macos
def test_main(fake_checkpoint_dir, monkeypatch, tensor_like):
config_path = fake_checkpoint_dir / "model_config.yaml"
config = {"block_size": 128, "vocab_size": 50, "n_layer": 2, "n_head": 4, "n_embd": 8, "rotary_percentage": 1}
Expand Down
8 changes: 8 additions & 0 deletions tests/test_generate_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys
from contextlib import redirect_stderr, redirect_stdout
from io import StringIO
import os
from pathlib import Path
from unittest.mock import ANY, Mock, call

Expand All @@ -13,6 +14,13 @@
import yaml


skip_in_ci_on_macos = pytest.mark.skipif(
sys.platform == "darwin" and os.getenv("GITHUB_ACTIONS") == "true",
reason="Skipped on macOS in CI environment because CI machine does not have enough memory to run this test."
)


@skip_in_ci_on_macos
@pytest.mark.parametrize("version", ("v1", "v2"))
def test_main(fake_checkpoint_dir, monkeypatch, version, tensor_like):
if version == "v1":
Expand Down
2 changes: 1 addition & 1 deletion tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def test_against_original_gemma_2(model_name, device, dtype):
assert x.size(1) == T
ours_y = ours_model(x)
theirs_y = theirs_model(x)["logits"].to(dtype) # HF converts logits to float
torch.testing.assert_close(ours_y, theirs_y)
torch.testing.assert_close(ours_y, theirs_y, rtol=3e-5, atol=3e-5)


@RunIf(dynamo=True)
Expand Down
Loading
Loading