diff --git a/nemo/collections/llm/quantization/quantizer.py b/nemo/collections/llm/quantization/quantizer.py index 15367cb25aba..2f3e0e1e986e 100644 --- a/nemo/collections/llm/quantization/quantizer.py +++ b/nemo/collections/llm/quantization/quantizer.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import shutil from dataclasses import dataclass from typing import Optional, Union @@ -22,6 +23,7 @@ from tqdm import tqdm from nemo.collections import llm +from nemo.lightning.ckpt_utils import CONTEXT_PATH from nemo.utils import logging from .utils import get_unwrapped_mcore_model @@ -259,7 +261,7 @@ def loop(model): return loop - def export(self, model: llm.GPTModel) -> None: + def export(self, model: llm.GPTModel, model_dir: str) -> None: assert self.export_config is not None, "Export config is not set" # TODO: Add sample generate # TODO: Support megatron_amp_O2 @@ -277,15 +279,16 @@ def export(self, model: llm.GPTModel) -> None: use_nfs_workspace=use_nfs_workspace, ) - dist.barrier() # Wait until all ranks complete export_model_config step - logging.info(f"Export succeeded, model has been exported to {export_dir}. Saving tokenizer if possible...") + # Save the model context in order to restore its tokenizer later. The destination + # path is "nemo_context" as this name is used in nemo.export to setup tokenizer. + shutil.copytree( + os.path.join(model_dir, CONTEXT_PATH), + os.path.join(export_dir, "nemo_context"), + dirs_exist_ok=True, + ) + logging.info(f"Model context saved.") - if dist.get_rank() == 0: - try: - tokenizer_dst = os.path.join(export_dir, 'tokenizer') - model.tokenizer.tokenizer.save_pretrained(tokenizer_dst) - except Exception as err: - logging.warning("Could not save the tokenizer: " + str(err)) + logging.info(f"Export succeeded, model has been exported to {export_dir}.") def get_calib_data_iter( diff --git a/nemo/export/tensorrt_llm.py b/nemo/export/tensorrt_llm.py index fb43224d59a9..08b0b822cad4 100644 --- a/nemo/export/tensorrt_llm.py +++ b/nemo/export/tensorrt_llm.py @@ -37,12 +37,12 @@ from nemo.export.trt_llm.converter.utils import init_model_parallel_from_nemo from nemo.export.trt_llm.nemo_ckpt_loader.nemo_file import ( build_tokenizer, - get_tokenzier, + get_tokenizer, is_nemo_file, load_nemo_model, ) from nemo.export.trt_llm.qnemo import qnemo_to_tensorrt_llm -from nemo.export.trt_llm.qnemo.tokenizer_utils import get_nmt_tokenizer +from nemo.export.trt_llm.qnemo.tokenizer_utils import TOKENIZER_CONFIG_FILE, get_nmt_tokenizer from nemo.export.trt_llm.qnemo.utils import is_qnemo_checkpoint from nemo.export.trt_llm.tensorrt_llm_build import build_and_save_engine from nemo.export.trt_llm.tensorrt_llm_run import ( @@ -294,7 +294,14 @@ def export( else: unpack_tarball(nemo_checkpoint_path, tmp_dir.name) nemo_checkpoint_path = tmp_dir.name - self.tokenizer = get_nmt_tokenizer(nemo_checkpoint_path) + + if os.path.exists(os.path.join(nemo_checkpoint_path, TOKENIZER_CONFIG_FILE)): + # Instantiate tokenizer for a legacy "Nemo 1" quantized checkpoint from a tokenizer config. + # Note that using the config is deprecated and it will be removed in future releases. + LOGGER.warning("Detected legacy tokenizer_config.yaml, using it to build tokenizer.") + self.tokenizer = get_nmt_tokenizer(nemo_checkpoint_path) + else: + self.tokenizer = get_tokenizer(nemo_checkpoint_path) qnemo_to_tensorrt_llm( nemo_checkpoint_path=nemo_checkpoint_path, @@ -1092,7 +1099,7 @@ def _load(self): if len(folders) > 0: try: self._load_config_file() - self.tokenizer = get_tokenzier(Path(os.path.join(self.model_dir))) + self.tokenizer = get_tokenizer(self.model_dir) self.model = load( tokenizer=self.tokenizer, engine_dir=self.model_dir, diff --git a/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py b/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py index 407a7ce600c9..23d227d32acf 100644 --- a/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py +++ b/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py @@ -283,16 +283,17 @@ def copy_tokenizer_files(config, out_dir): outfile.write(infile.read()) -def get_tokenzier(tokenizer_dir_or_path: Path) -> PreTrainedTokenizer: - """Loads the tokenizer from the decoded NEMO weights dir.""" +def get_tokenizer(tokenizer_dir_or_path: Union[str, Path]) -> PreTrainedTokenizer: + """Loads the tokenizer from the decoded NeMo weights dir.""" + tokenizer_dir_or_path = Path(tokenizer_dir_or_path) if (tokenizer_dir_or_path / "nemo_context").exists(): from nemo.lightning import io tokenizer_spec = io.load_context((tokenizer_dir_or_path / "nemo_context"), subpath="model.tokenizer") return build_tokenizer(tokenizer_spec) else: - if os.path.isdir(os.path.join(tokenizer_dir_or_path, "huggingface_tokenizer")): - return AutoTokenizer.from_pretrained(os.path.join(tokenizer_dir_or_path, "huggingface_tokenizer")) + if (tokenizer_dir_or_path / "huggingface_tokenizer").is_dir(): + return AutoTokenizer.from_pretrained(tokenizer_dir_or_path / "huggingface_tokenizer") model_path = ( tokenizer_dir_or_path / "tokenizer.model" if tokenizer_dir_or_path.is_dir() else tokenizer_dir_or_path diff --git a/nemo/export/trt_llm/qnemo/tokenizer_utils.py b/nemo/export/trt_llm/qnemo/tokenizer_utils.py index 36efa9259f9d..beca40bcd3d7 100644 --- a/nemo/export/trt_llm/qnemo/tokenizer_utils.py +++ b/nemo/export/trt_llm/qnemo/tokenizer_utils.py @@ -29,11 +29,6 @@ def get_nmt_tokenizer(nemo_checkpoint_path: str): """Build tokenizer from Nemo tokenizer config.""" - tokenizer_dir = os.path.join(nemo_checkpoint_path, TOKENIZER_DIR) - if os.path.exists(tokenizer_dir): - print(f"Initializing tokenizer from {TOKENIZER_DIR} directory") - return AutoTokenizer.from_pretrained(tokenizer_dir) - print(f"Initializing tokenizer from {TOKENIZER_CONFIG_FILE}") tokenizer_cfg = OmegaConf.load(os.path.join(nemo_checkpoint_path, TOKENIZER_CONFIG_FILE)) diff --git a/nemo/utils/model_utils.py b/nemo/utils/model_utils.py index b417c088b22e..5d7d019c6099 100644 --- a/nemo/utils/model_utils.py +++ b/nemo/utils/model_utils.py @@ -724,6 +724,10 @@ def save_artifacts(model, output_dir: str, use_abspath: bool = False) -> None: app_state = AppState() model_file = app_state.model_restore_path model_cfg = copy.deepcopy(model.cfg) + + if model_cfg.tokenizer.library == "huggingface": + model.tokenizer.save_pretrained(os.path.join(output_dir, "huggingface_tokenizer")) + if not hasattr(model, "artifacts"): if hasattr(model_cfg, "tokenizer"): OmegaConf.save(model_cfg.tokenizer, os.path.join(output_dir, "tokenizer_config.yaml")) diff --git a/scripts/llm/ptq.py b/scripts/llm/ptq.py index 0fd2c5682e8a..c04d32290e5f 100644 --- a/scripts/llm/ptq.py +++ b/scripts/llm/ptq.py @@ -92,7 +92,7 @@ def main(): quantizer = quantization.Quantizer(quantization_config, export_config) model = quantization.load_with_modelopt_layer_spec(args.nemo_checkpoint, args.calib_tp, args.calib_pp) model = quantizer.quantize(model) - quantizer.export(model) + quantizer.export(model, args.nemo_checkpoint) if __name__ == '__main__':