Lightning-AI · carmocca · Mar 12, 2024 · Mar 12, 2024 · Mar 12, 2024 · Mar 12, 2024
diff --git a/eval/lm_eval_harness.py b/eval/lm_eval_harness.py
@@ -157,7 +157,7 @@ def run_eval_harness(
     check_valid_checkpoint_dir(checkpoint_dir)
     tokenizer = Tokenizer(checkpoint_dir)
 
-    config = Config.from_json(checkpoint_dir / "lit_config.json")
+    config = Config.from_file(checkpoint_dir / "model_config.yaml")
 
     checkpoint_path = checkpoint_dir / "lit_model.pth"
 

diff --git a/litgpt/chat/base.py b/litgpt/chat/base.py
@@ -132,7 +132,7 @@ def main(
 
     check_valid_checkpoint_dir(checkpoint_dir)
 
-    config = Config.from_json(checkpoint_dir / "lit_config.json")
+    config = Config.from_file(checkpoint_dir / "model_config.yaml")
 
     checkpoint_path = checkpoint_dir / "lit_model.pth"
 

diff --git a/litgpt/config.py b/litgpt/config.py
@@ -1,6 +1,6 @@
 # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file.
 
-import json
+import yaml
 from copy import deepcopy
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -107,29 +107,29 @@ def from_name(cls, name: str, **kwargs: Any) -> Self:
         return cls(**conf_dict)
 
     @classmethod
-    def from_json(cls, path: Union[str, Path], **kwargs: Any) -> Self:
+    def from_file(cls, path: Union[str, Path], **kwargs: Any) -> Self:
         with open(path, encoding="utf-8") as fp:
-            json_kwargs = json.load(fp)
-        json_kwargs.update(kwargs)
-        return cls(**json_kwargs)
+            file_kwargs = yaml.safe_load(fp)
+        file_kwargs.update(kwargs)
+        return cls(**file_kwargs)
 
     @classmethod
     def from_checkpoint(cls, path: Path, **kwargs: Any) -> Self:
-        """Automatically load `lit_config.json` and if it doesn't exist - a matching config from `litgpt/config.py`."""
-        if (config_path := path / "lit_config.json").is_file():
-            return cls.from_json(config_path, **kwargs)
+        """Automatically load `model_config.yaml` and if it doesn't exist - a matching config from `litgpt/config.py`."""
+        if (config_path := path / "model_config.yaml").is_file():
+            return cls.from_file(config_path, **kwargs)
         if (model_name := path.name) in name_to_config:
             return cls.from_name(model_name, **kwargs)
-        raise FileNotFoundError(f"For {str(path)!r} neither 'lit_config.json' nor matching config exists.")
+        raise FileNotFoundError(f"For {str(path)!r} neither 'model_config.yaml' nor matching config exists.")
 
     @property
     def mlp_class(self) -> Type:
-        # `self.mlp_class_name` cannot be the type to keep the config json serializable
+        # `self.mlp_class_name` cannot be the type to keep the config serializable
         return getattr(litgpt.model, self.mlp_class_name)
 
     @property
     def norm_class(self) -> Type:
-        # `self.norm_class_name` cannot be the type to keep the config json serializable
+        # `self.norm_class_name` cannot be the type to keep the config serializable
         if self.norm_class_name == "RMSNorm":
             from functools import partial
 

diff --git a/litgpt/generate/adapter.py b/litgpt/generate/adapter.py
@@ -61,7 +61,7 @@ def main(
 
     check_valid_checkpoint_dir(checkpoint_dir)
 
-    config = Config.from_json(checkpoint_dir / "lit_config.json")
+    config = Config.from_file(checkpoint_dir / "model_config.yaml")
 
     checkpoint_path = checkpoint_dir / "lit_model.pth"
 

diff --git a/litgpt/generate/adapter_v2.py b/litgpt/generate/adapter_v2.py
@@ -61,7 +61,7 @@ def main(
 
     check_valid_checkpoint_dir(checkpoint_dir)
 
-    config = Config.from_json(checkpoint_dir / "lit_config.json")
+    config = Config.from_file(checkpoint_dir / "model_config.yaml")
 
     checkpoint_path = checkpoint_dir / "lit_model.pth"
 

diff --git a/litgpt/generate/base.py b/litgpt/generate/base.py
@@ -134,7 +134,7 @@ def main(
 
     check_valid_checkpoint_dir(checkpoint_dir)
 
-    config = Config.from_json(checkpoint_dir / "lit_config.json")
+    config = Config.from_file(checkpoint_dir / "model_config.yaml")
 
     checkpoint_path = checkpoint_dir / "lit_model.pth"
 

diff --git a/litgpt/generate/full.py b/litgpt/generate/full.py
@@ -60,7 +60,7 @@ def main(
 
     check_valid_checkpoint_dir(checkpoint_dir)
 
-    config = Config.from_json(checkpoint_dir / "lit_config.json")
+    config = Config.from_file(checkpoint_dir / "model_config.yaml")
 
     checkpoint_path = finetuned_path
 

diff --git a/litgpt/generate/lora.py b/litgpt/generate/lora.py
@@ -70,8 +70,8 @@ def main(
 
     check_valid_checkpoint_dir(checkpoint_dir)
 
-    config = Config.from_json(
-        checkpoint_dir / "lit_config.json",
+    config = Config.from_file(
+        checkpoint_dir / "model_config.yaml",
         lora_r=lora_r,
         lora_alpha=lora_alpha,
         lora_dropout=lora_dropout,

diff --git a/litgpt/generate/sequentially.py b/litgpt/generate/sequentially.py
@@ -159,7 +159,7 @@ def main(
 
     check_valid_checkpoint_dir(checkpoint_dir)
 
-    config = Config.from_json(checkpoint_dir / "lit_config.json")
+    config = Config.from_file(checkpoint_dir / "model_config.yaml")
 
     checkpoint_path = checkpoint_dir / "lit_model.pth"
 

diff --git a/litgpt/generate/tp.py b/litgpt/generate/tp.py
@@ -138,7 +138,7 @@ def main(
 
     check_valid_checkpoint_dir(checkpoint_dir)
 
-    config = Config.from_json(checkpoint_dir / "lit_config.json")
+    config = Config.from_file(checkpoint_dir / "model_config.yaml")
 
     model_file = "lit_model.pth"
     checkpoint_path = checkpoint_dir / model_file

diff --git a/litgpt/prompts.py b/litgpt/prompts.py
@@ -1,12 +1,13 @@
 # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file.
 import importlib
-import json
 import re
 from abc import abstractmethod
 from json import dumps
 from pathlib import Path
 from typing import TYPE_CHECKING, Dict, List, Type, Tuple, Union
 
+import yaml
+
 from litgpt.config import Config
 
 if TYPE_CHECKING:
@@ -338,13 +339,13 @@ def save_prompt_style(style: Union[str, PromptStyle], checkpoint_dir: Path) -> N
     cls = type(style)
     # Allow saving the full module path for user-defined prompt classes
     config = {"class_path": f"{cls.__module__}.{cls.__name__}"}
-    with open(checkpoint_dir / "prompt_style.json", "w") as file:
-        json.dump(config, file)
+    with open(checkpoint_dir / "prompt_style.yaml", "w") as file:
+        yaml.dump(config, file)
 
 
 def load_prompt_style(checkpoint_dir: Path) -> PromptStyle:
-    with open(checkpoint_dir / "prompt_style.json", "r") as file:
-        config = json.load(file)
+    with open(checkpoint_dir / "prompt_style.yaml", "r") as file:
+        config = yaml.safe_load(file)
     # Support loading the full module path for user-defined prompt classes
     full_module_path, cls_name = config["class_path"].rsplit(".", 1)
     module = importlib.import_module(full_module_path)
@@ -353,4 +354,4 @@ def load_prompt_style(checkpoint_dir: Path) -> PromptStyle:
 
 
 def has_prompt_style(checkpoint_dir: Path) -> bool:
-    return (checkpoint_dir / "prompt_style.json").is_file()
+    return (checkpoint_dir / "prompt_style.yaml").is_file()
diff --git a/litgpt/scripts/convert_lit_checkpoint.py b/litgpt/scripts/convert_lit_checkpoint.py
@@ -241,7 +241,7 @@ def check_conversion_supported(lit_weights: Dict[str, torch.Tensor]) -> None:
 @torch.inference_mode()
 def convert_lit_checkpoint(checkpoint_dir: Path, output_dir: Path) -> None:
     """Convert a LitGPT trained checkpoint into a Hugging Face Transformers checkpoint."""
-    config = Config.from_json(checkpoint_dir / "lit_config.json")
+    config = Config.from_file(checkpoint_dir / "model_config.yaml")
 
     output_dir.mkdir(parents=True, exist_ok=True)
     output_path = output_dir / "model.pth"

diff --git a/litgpt/scripts/merge_lora.py b/litgpt/scripts/merge_lora.py
@@ -43,7 +43,7 @@ def merge_lora(
     precision = precision if precision is not None else lora_precision
 
     fabric = L.Fabric(devices=1, precision=precision)
-    config = Config.from_json(checkpoint_dir / "lit_config.json", **lora_params)
+    config = Config.from_file(checkpoint_dir / "model_config.yaml", **lora_params)
 
     with fabric.init_module(empty_init=True):
         model = GPT(config)

diff --git a/litgpt/utils.py b/litgpt/utils.py
@@ -1,7 +1,6 @@
 # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file.
 
 """Utility functions for training and inference."""
-import json
 import math
 import pickle
 import shutil
@@ -15,6 +14,7 @@
 import torch
 import torch.nn as nn
 import torch.utils._device
+import yaml
 from lightning.fabric.loggers import CSVLogger, TensorBoardLogger
 from lightning.fabric.strategies import FSDPStrategy
 from lightning.fabric.utilities.load import _lazy_load as lazy_load
@@ -50,7 +50,7 @@ def check_valid_checkpoint_dir(checkpoint_dir: Path, lora: bool = False) -> None
     model_filename = "lit_model.pth.lora" if lora else "lit_model.pth"
     files = {
         model_filename: (checkpoint_dir / model_filename).is_file(),
-        "lit_config.json": (checkpoint_dir / "lit_config.json").is_file(),
+        "model_config.yaml": (checkpoint_dir / "model_config.yaml").is_file(),
         "tokenizer.json OR tokenizer.model": (checkpoint_dir / "tokenizer.json").is_file()
         or (checkpoint_dir / "tokenizer.model").is_file(),
         "tokenizer_config.json": (checkpoint_dir / "tokenizer_config.json").is_file(),
@@ -379,7 +379,7 @@ def __iter__(self) -> Self:
 def copy_config_files(source_dir: Path, out_dir: Path) -> None:
     """Copies the specified configuration and tokenizer files into the output directory."""
 
-    config_files = ["generation_config.json", "lit_config.json"]
+    config_files = ["generation_config.json", "model_config.yaml"]
     tokenizer_files = ["tokenizer.json", "tokenizer.model",  "tokenizer_config.json"]
 
     for file_name in config_files + tokenizer_files:
@@ -410,8 +410,8 @@ def save_hyperparameters(function: callable, checkpoint_dir: Path) -> None:
 
 def save_config(config: "Config", checkpoint_dir: Path) -> None:
     config_dict = asdict(config)
-    with open(checkpoint_dir / "lit_config.json", "w") as json_config:
-        json.dump(config_dict, json_config)
+    with open(checkpoint_dir / "model_config.yaml", "w") as fp:
+        yaml.dump(config_dict, fp)
 
 
 def parse_devices(devices: Union[str, int]) -> int:

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -25,7 +25,7 @@ def fake_checkpoint_dir(tmp_path):
     checkpoint_dir = tmp_path / "checkpoints" / "tmp"
     checkpoint_dir.mkdir(parents=True)
     (checkpoint_dir / "lit_model.pth").touch()
-    (checkpoint_dir / "lit_config.json").touch()
+    (checkpoint_dir / "model_config.yaml").touch()
     (checkpoint_dir / "tokenizer.json").touch()
     (checkpoint_dir / "tokenizer_config.json").touch()
     return checkpoint_dir

diff --git a/tests/test_adapter.py b/tests/test_adapter.py
@@ -90,11 +90,11 @@ def test_adapter_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_path)
     for checkpoint_dir in checkpoint_dirs:
         assert {p.name for p in (out_dir / checkpoint_dir).iterdir()} == {
             "lit_model.pth",
-            "lit_config.json",
+            "model_config.yaml",
             "tokenizer_config.json",
             "tokenizer.json",
             "hyperparameters.yaml",
-            "prompt_style.json",
+            "prompt_style.yaml",
         }
     assert (out_dir / "version_0" / "metrics.csv").is_file()
 

diff --git a/tests/test_adapter_v2.py b/tests/test_adapter_v2.py
@@ -113,11 +113,11 @@ def test_adapter_v2_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_pa
     for checkpoint_dir in checkpoint_dirs:
         assert {p.name for p in (out_dir / checkpoint_dir).iterdir()} == {
             "lit_model.pth",
-            "lit_config.json",
+            "model_config.yaml",
             "tokenizer_config.json",
             "tokenizer.json",
             "hyperparameters.yaml",
-            "prompt_style.json",
+            "prompt_style.yaml",
         }
     assert (out_dir / "version_0" / "metrics.csv").is_file()
 

diff --git a/tests/test_chat.py b/tests/test_chat.py
@@ -1,6 +1,5 @@
 # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file.
 
-import json
 import subprocess
 import sys
 from contextlib import redirect_stderr, redirect_stdout
@@ -11,6 +10,7 @@
 
 import pytest
 import torch
+import yaml
 
 
 @pytest.mark.parametrize(
@@ -86,9 +86,9 @@ def test_main(mocked_input, stop_iteration, fake_checkpoint_dir, monkeypatch, te
     # these values will be iteratively provided for each `input()` call
     mocked_input.side_effect = ["Hello", stop_iteration]
 
-    config_path = fake_checkpoint_dir / "lit_config.json"
+    config_path = fake_checkpoint_dir / "model_config.yaml"
     config = {"block_size": 128, "vocab_size": 50, "n_layer": 2, "n_head": 4, "n_embd": 8, "rotary_percentage": 1}
-    config_path.write_text(json.dumps(config))
+    config_path.write_text(yaml.dump(config))
 
     load_mock = Mock()
     load_mock.return_value = load_mock

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 
 import pytest
+import yaml
 
 # support running without installing as a package
 wd = Path(__file__).parent.parent.resolve()
@@ -65,7 +66,7 @@ def test_from_checkpoint(tmp_path):
     from litgpt import Config
 
     # 1. Neither `lit_config.py` nor matching config exists.
-    with pytest.raises(FileNotFoundError, match="neither 'lit_config.json' nor matching config exists"):
+    with pytest.raises(FileNotFoundError, match="neither 'model_config.yaml' nor matching config exists"):
         Config.from_checkpoint(tmp_path / "non_existing_checkpoint")
 
     # 2. If `lit_config.py` doesn't exists, but there is a matching config in `litgpt/config.py`.
@@ -76,17 +77,17 @@ def test_from_checkpoint(tmp_path):
 
     # 3. If only `lit_config.py` exists.
     config_data = {"name": "pythia-14m", "block_size": 24, "n_layer": 2}
-    with open(tmp_path / "lit_config.json", "w") as file:
-        json.dump(config_data, file)
+    with open(tmp_path / "model_config.yaml", "w") as file:
+        yaml.dump(config_data, file)
     config = Config.from_checkpoint(tmp_path)
     assert config.name == "pythia-14m"
     assert config.block_size == 24
     assert config.n_layer == 2
 
     # 4. Both `lit_config.py` and a matching config exist, but `lit_config.py` supersedes matching config
     (tmp_path / "pythia-14m").mkdir()
-    with open(tmp_path / "pythia-14m/lit_config.json", "w") as file:
-        json.dump(config_data, file)
+    with open(tmp_path / "pythia-14m/model_config.yaml", "w") as file:
+        yaml.dump(config_data, file)
     config = Config.from_checkpoint(tmp_path / "pythia-14m")
     assert config.name == "pythia-14m"
     assert config.block_size == 24

diff --git a/tests/test_convert_hf_checkpoint.py b/tests/test_convert_hf_checkpoint.py
@@ -113,10 +113,10 @@ def test_convert_hf_checkpoint(tmp_path):
         convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-14m")
     load.assert_called_with(bin_file)
 
-    assert {p.name for p in tmp_path.glob("*")} == {"foo.bin", "lit_config.json", "lit_model.pth"}
+    assert {p.name for p in tmp_path.glob("*")} == {"foo.bin", "model_config.yaml", "lit_model.pth"}
 
     # ensure that the config dict can be loaded
     from litgpt import Config
 
-    config = Config.from_json(tmp_path / "lit_config.json")
+    config = Config.from_file(tmp_path / "model_config.yaml")
     assert isinstance(config, Config)
diff --git a/tests/test_convert_lit_checkpoint.py b/tests/test_convert_lit_checkpoint.py
@@ -9,6 +9,8 @@
 
 import pytest
 import torch
+import yaml
+
 from conftest import RunIf
 
 wd = Path(__file__).parent.parent.absolute()
@@ -21,14 +23,14 @@ def test_convert_lit_checkpoint(tmp_path):
     ours_config = Config.from_name("Llama-2-7b-hf", block_size=8, n_layer=2, n_embd=32, n_head=2, padding_multiple=128)
     ours_model = GPT(ours_config)
     checkpoint_path = tmp_path / "lit_model.pth"
-    config_path = tmp_path / "lit_config.json"
+    config_path = tmp_path / "model_config.yaml"
     torch.save(ours_model.state_dict(), checkpoint_path)
     with open(config_path, "w") as fp:
-        json.dump(asdict(ours_config), fp)
+        yaml.dump(asdict(ours_config), fp)
     output_dir = tmp_path / "out_dir"
 
     convert_lit_checkpoint(checkpoint_path.parent, output_dir)
-    assert set(os.listdir(tmp_path)) == {"lit_model.pth", "lit_config.json", "out_dir"}
+    assert set(os.listdir(tmp_path)) == {"lit_model.pth", "model_config.yaml", "out_dir"}
     assert os.path.isfile(output_dir / "model.pth")
 
     # check checkpoint is unwrapped

diff --git a/tests/test_convert_pretrained_checkpoint.py b/tests/test_convert_pretrained_checkpoint.py
@@ -19,7 +19,7 @@ def test_convert_pretrained_checkpoint(tmp_path, fake_checkpoint_dir):
     convert_pretrained_checkpoint(checkpoint_dir=fake_checkpoint_dir, output_dir=(tmp_path / "converted"))
 
     assert set(os.listdir(tmp_path / "converted")) == {
-        "lit_model.pth", "lit_config.json", "tokenizer_config.json", "tokenizer.json"
+        "lit_model.pth", "model_config.yaml", "tokenizer_config.json", "tokenizer.json"
     }
     converted_checkpoint = torch.load(tmp_path / "converted" / "lit_model.pth")
     assert list(converted_checkpoint.keys()) == ["some.module.weight", "some.other.module.weight"]
diff --git a/tests/test_full.py b/tests/test_full.py
@@ -50,11 +50,11 @@ def test_full_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_path):
     for checkpoint_dir in checkpoint_dirs:
         assert set(os.listdir(out_dir / checkpoint_dir)) == {
             "lit_model.pth",
-            "lit_config.json",
+            "model_config.yaml",
             "tokenizer_config.json",
             "tokenizer.json",
             "hyperparameters.yaml",
-            "prompt_style.json",
+            "prompt_style.yaml",
         }
     assert (out_dir / "logs" / "csv" / "version_0" / "metrics.csv").is_file()