diff --git a/paddlenlp/transformers/auto/configuration.py b/paddlenlp/transformers/auto/configuration.py index 182a82514558..510dc1e02a32 100644 --- a/paddlenlp/transformers/auto/configuration.py +++ b/paddlenlp/transformers/auto/configuration.py @@ -159,7 +159,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *model_args, **kwar config = AutoConfig.from_pretrained("bert-base-uncased") config.save_pretrained('./bert-base-uncased') """ - subfolder = kwargs.pop("subfolder", "") + subfolder = kwargs.get("subfolder", "") if subfolder is None: subfolder = "" from_aistudio = kwargs.pop("from_aistudio", False) diff --git a/paddlenlp/transformers/auto/tokenizer.py b/paddlenlp/transformers/auto/tokenizer.py index a20e763d26c6..0d0b7b93e281 100644 --- a/paddlenlp/transformers/auto/tokenizer.py +++ b/paddlenlp/transformers/auto/tokenizer.py @@ -281,7 +281,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): all_tokenizer_names.append(name) # From local dir path if os.path.isdir(pretrained_model_name_or_path): - config_file = os.path.join(pretrained_model_name_or_path, cls.tokenizer_config_file) + config_file = os.path.join(pretrained_model_name_or_path, subfolder, cls.tokenizer_config_file) if os.path.exists(config_file): tokenizer_class = cls._get_tokenizer_class_from_config( pretrained_model_name_or_path, config_file, use_fast diff --git a/paddlenlp/transformers/conversion_utils.py b/paddlenlp/transformers/conversion_utils.py index 054239e91e1b..c883c1f87a14 100644 --- a/paddlenlp/transformers/conversion_utils.py +++ b/paddlenlp/transformers/conversion_utils.py @@ -1012,7 +1012,6 @@ def convert(cls, weight_file: str, config: PretrainedConfig, cache_dir: str) -> """ # FIXME(wj-Mcat): add compatibility with downstream models name_mappings = cls._get_name_mappings(config) - if weight_file.endswith(".index.json"): if ".safetensors." in weight_file: files = [file for file in os.listdir(os.path.dirname(weight_file)) if file.startswith("model-")] diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py index a630b917061b..48abe2e610b6 100644 --- a/paddlenlp/transformers/model_utils.py +++ b/paddlenlp/transformers/model_utils.py @@ -354,7 +354,9 @@ def load_state_dict( return state_dict -def resolve_weight_file_from_hf_hub(repo_id: str, cache_dir: str, support_conversion: bool, subfolder=None): +def resolve_weight_file_from_hf_hub( + repo_id: str, cache_dir: str, support_conversion: bool, subfolder=None, use_safetensors=False +): """find the suitable weight file name Args: @@ -363,30 +365,34 @@ def resolve_weight_file_from_hf_hub(repo_id: str, cache_dir: str, support_conver support_conversion (bool): whether support converting pytorch weight file to paddle weight file subfolder (str, optional) An optional value corresponding to a folder inside the repo. """ - is_local = os.path.isdir(repo_id) - if not is_local: - if hf_file_exists(repo_id, PADDLE_WEIGHTS_NAME, subfolder=subfolder): + is_sharded = False + if use_safetensors: + # SAFE WEIGHTS + if hf_file_exists(repo_id, SAFE_WEIGHTS_INDEX_NAME, subfolder=subfolder): + file_name = SAFE_WEIGHTS_INDEX_NAME + is_sharded = True + elif hf_file_exists(repo_id, SAFE_WEIGHTS_NAME, subfolder=subfolder): + file_name = SAFE_WEIGHTS_NAME + else: + # RAW WEIGHTS + if hf_file_exists(repo_id, PADDLE_WEIGHTS_INDEX_NAME, subfolder=subfolder): + file_name = PADDLE_WEIGHTS_INDEX_NAME + is_sharded = True + elif hf_file_exists(repo_id, PYTORCH_WEIGHTS_INDEX_NAME, subfolder=subfolder): + file_name = PYTORCH_WEIGHTS_INDEX_NAME + is_sharded = True + elif hf_file_exists(repo_id, PADDLE_WEIGHTS_NAME, subfolder=subfolder): file_name = PADDLE_WEIGHTS_NAME - assert ( - support_conversion is False - ), "Please call set convert_from_torch for paddle weights on huggingface hub, eg. Model.from_pretrained(model_name, from_hf_hub=True, convert_from_torch=False)" elif hf_file_exists(repo_id, PYTORCH_WEIGHTS_NAME, subfolder=subfolder): - if not support_conversion: - raise EntryNotFoundError( - f"can not download `{PADDLE_WEIGHTS_NAME} from https://huggingface.co/{repo_id}` " - "and current model doesn't support conversion from pytorch weight file to paddle weight file" - ) file_name = PYTORCH_WEIGHTS_NAME + else: raise EntryNotFoundError( message=f"can not find the paddle/pytorch weight file from: https://huggingface.co/{repo_id}", response=None, ) - else: - # for local file, we use support_conversion to select paddle or torch weight. - file_name = PYTORCH_WEIGHTS_NAME if support_conversion else PADDLE_WEIGHTS_NAME - file_name_list = [SAFE_WEIGHTS_NAME] + [file_name] + [PYTORCH_WEIGHTS_INDEX_NAME] + [SAFE_WEIGHTS_INDEX_NAME] + file_name_list = [file_name] resolved_file = None for fn in file_name_list: resolved_file = cached_file_for_hf_hub( @@ -402,7 +408,7 @@ def resolve_weight_file_from_hf_hub(repo_id: str, cache_dir: str, support_conver f"'https://huggingface.co/{repo_id}' for available files." ) - return resolved_file + return resolved_file, is_sharded def register_base_model(cls): @@ -1441,16 +1447,6 @@ def _resolve_model_file_path( is_sharded = False sharded_metadata = None - # -1. when it's from HF - if from_hf_hub or convert_from_torch: - resolved_archive_file = resolve_weight_file_from_hf_hub( - pretrained_model_name_or_path, - cache_dir=cache_dir, - support_conversion=convert_from_torch, - subfolder=subfolder, - ) - return resolved_archive_file, sharded_metadata, is_sharded - if pretrained_model_name_or_path is not None: # the following code use a lot of os.path.join, hence setting subfolder to empty str if None if subfolder is None: @@ -1554,105 +1550,118 @@ def get_file_path(pretrained_model_name_or_path, subfolder, SAFE_WEIGHTS_NAME, v filename = pretrained_model_name_or_path resolved_archive_file = get_path_from_url_with_filelock(pretrained_model_name_or_path) else: - # set correct filename - if use_safetensors is not False: - filename = _add_variant(SAFE_WEIGHTS_NAME, variant) - else: - filename = _add_variant(PADDLE_WEIGHTS_NAME, variant) - - try: - # Load from URL or cache if already cached - cached_file_kwargs = dict( + # -1. when it's from HF + if from_hf_hub: + resolved_archive_file, is_sharded = resolve_weight_file_from_hf_hub( + pretrained_model_name_or_path, cache_dir=cache_dir, + support_conversion=convert_from_torch, subfolder=subfolder, - from_aistudio=from_aistudio, - _raise_exceptions_for_missing_entries=False, + use_safetensors=use_safetensors, ) - resolved_archive_file = None - if pretrained_model_name_or_path in cls.pretrained_init_configuration: - # fetch the weight url from the `pretrained_resource_files_map` - resource_file_url = cls.pretrained_resource_files_map["model_state"][ - pretrained_model_name_or_path - ] - resolved_archive_file = cached_file( - resource_file_url, _add_variant(PADDLE_WEIGHTS_NAME, variant), **cached_file_kwargs - ) - - if resolved_archive_file is None: - resolved_archive_file = cached_file( - pretrained_model_name_or_path, filename, **cached_file_kwargs - ) + else: + # set correct filename + if use_safetensors is not False: + filename = _add_variant(SAFE_WEIGHTS_NAME, variant) else: - # xxx.pdparams in pretrained_resource_files_map renamed model_state.pdparams filename = _add_variant(PADDLE_WEIGHTS_NAME, variant) - # Since we set _raise_exceptions_for_missing_entries=False, we don't get an exception but a None - # result when internet is up, the repo and revision exist, but the file does not. - if resolved_archive_file is None and filename == _add_variant(SAFE_WEIGHTS_NAME, variant): - # Maybe the checkpoint is sharded, we try to grab the index name in this case. - resolved_archive_file = cached_file( - pretrained_model_name_or_path, - _add_variant(SAFE_WEIGHTS_INDEX_NAME, variant), - **cached_file_kwargs, + try: + # Load from URL or cache if already cached + cached_file_kwargs = dict( + cache_dir=cache_dir, + subfolder=subfolder, + from_aistudio=from_aistudio, + _raise_exceptions_for_missing_entries=False, ) - if resolved_archive_file is not None: - is_sharded = True - elif use_safetensors: - raise EnvironmentError( - f" {_add_variant(SAFE_WEIGHTS_NAME, variant)} or {_add_variant(SAFE_WEIGHTS_INDEX_NAME, variant)} and thus cannot be loaded with `safetensors`. Please make sure that the model has been saved with `safe_serialization=True` or do not set `use_safetensors=True`." + resolved_archive_file = None + if pretrained_model_name_or_path in cls.pretrained_init_configuration: + # fetch the weight url from the `pretrained_resource_files_map` + resource_file_url = cls.pretrained_resource_files_map["model_state"][ + pretrained_model_name_or_path + ] + resolved_archive_file = cached_file( + resource_file_url, _add_variant(PADDLE_WEIGHTS_NAME, variant), **cached_file_kwargs + ) + + if resolved_archive_file is None: + resolved_archive_file = cached_file( + pretrained_model_name_or_path, filename, **cached_file_kwargs ) + else: - # This repo has no safetensors file of any kind, we switch to PyTorch. + # xxx.pdparams in pretrained_resource_files_map renamed model_state.pdparams filename = _add_variant(PADDLE_WEIGHTS_NAME, variant) + + # Since we set _raise_exceptions_for_missing_entries=False, we don't get an exception but a None + # result when internet is up, the repo and revision exist, but the file does not. + if resolved_archive_file is None and filename == _add_variant(SAFE_WEIGHTS_NAME, variant): + # Maybe the checkpoint is sharded, we try to grab the index name in this case. resolved_archive_file = cached_file( - pretrained_model_name_or_path, filename, **cached_file_kwargs + pretrained_model_name_or_path, + _add_variant(SAFE_WEIGHTS_INDEX_NAME, variant), + **cached_file_kwargs, ) - if resolved_archive_file is None and filename == _add_variant(PADDLE_WEIGHTS_NAME, variant): - # Maybe the checkpoint is sharded, we try to grab the index name in this case. - resolved_archive_file = cached_file( - pretrained_model_name_or_path, - _add_variant(PADDLE_WEIGHTS_INDEX_NAME, variant), - **cached_file_kwargs, - ) - # raise ValueError(resolved_archive_file) - if resolved_archive_file is not None: - is_sharded = True - if resolved_archive_file is None: - # Otherwise, maybe there is a TF or Flax model file. We try those to give a helpful error - # message. + if resolved_archive_file is not None: + is_sharded = True + elif use_safetensors: + raise EnvironmentError( + f" {_add_variant(SAFE_WEIGHTS_NAME, variant)} or {_add_variant(SAFE_WEIGHTS_INDEX_NAME, variant)} and thus cannot be loaded with `safetensors`. Please make sure that the model has been saved with `safe_serialization=True` or do not set `use_safetensors=True`." + ) + else: + # This repo has no safetensors file of any kind, we switch to PyTorch. + filename = _add_variant(PADDLE_WEIGHTS_NAME, variant) + resolved_archive_file = cached_file( + pretrained_model_name_or_path, filename, **cached_file_kwargs + ) + if resolved_archive_file is None and filename == _add_variant(PADDLE_WEIGHTS_NAME, variant): + # Maybe the checkpoint is sharded, we try to grab the index name in this case. + resolved_archive_file = cached_file( + pretrained_model_name_or_path, + _add_variant(PADDLE_WEIGHTS_INDEX_NAME, variant), + **cached_file_kwargs, + ) + # raise ValueError(resolved_archive_file) + if resolved_archive_file is not None: + is_sharded = True + if resolved_archive_file is None: + # Otherwise, maybe there is a TF or Flax model file. We try those to give a helpful error + # message. + raise EnvironmentError( + f"{pretrained_model_name_or_path} does not appear to have a file named" + f" {_add_variant(PADDLE_WEIGHTS_NAME, variant)}." + ) + except Exception as e: + logger.info(e) + # For any other exception, we throw a generic error. raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named" - f" {_add_variant(PADDLE_WEIGHTS_NAME, variant)}." + f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it" + " from 'https://paddlenlp.bj.bcebos.com'" ) - except Exception as e: - logger.info(e) - # For any other exception, we throw a generic error. - raise EnvironmentError( - f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it" - " from 'https://paddlenlp.bj.bcebos.com'" - ) if is_local: logger.info(f"Loading weights file {archive_file}") resolved_archive_file = archive_file else: - logger.info(f"Loading weights file {filename} from cache at {resolved_archive_file}") + logger.info(f"Loading weights file from cache at {resolved_archive_file}") else: resolved_archive_file = None # We'll need to download and cache each checkpoint shard if the checkpoint is sharded. + resolved_sharded_files = None if is_sharded: # resolved_archive_file becomes a list of files that point to the different checkpoint shards in this case. - resolved_archive_file, sharded_metadata = get_checkpoint_shard_files( + resolved_sharded_files, sharded_metadata = get_checkpoint_shard_files( pretrained_model_name_or_path, resolved_archive_file, from_aistudio=from_aistudio, + from_hf_hub=from_hf_hub, cache_dir=cache_dir, subfolder=subfolder, ) - return resolved_archive_file, sharded_metadata, is_sharded + return resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded @classmethod def _load_pretrained_model( @@ -1660,7 +1669,7 @@ def _load_pretrained_model( model: PretrainedModel, state_dict: Dict[str, Tensor], loaded_keys: List[str], - resolved_archive_file, + resolved_archive_file: Union[str, List], pretrained_model_name_or_path, config=None, ignore_mismatched_sizes=False, @@ -2113,7 +2122,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs): use_keep_in_fp32_modules = False # resolve model_weight file - resolved_archive_file, sharded_metadata, is_sharded = cls._resolve_model_file_path( + resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path( pretrained_model_name_or_path, cache_dir=cache_dir, subfolder=subfolder, @@ -2125,41 +2134,40 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs): variant=variant, ) - # load pt weights early so that we know which dtype to init the model under + if convert_from_torch and state_dict is None: + if ( + resolved_archive_file.endswith(PYTORCH_WEIGHTS_NAME) + or resolved_archive_file.endswith(PYTORCH_WEIGHTS_INDEX_NAME) + or resolved_archive_file.endswith(SAFE_WEIGHTS_NAME) + or resolved_archive_file.endswith(SAFE_WEIGHTS_INDEX_NAME) + ): + # try to get the name-mapping info + logger.info( + f"Starting to convert pytorch weight file<{resolved_archive_file}> to " + f"paddle weight file<{os.path.join(cache_dir, pretrained_model_name_or_path, subfolder, PADDLE_WEIGHTS_NAME)}> ..." + ) + state_dict = cls.convert( + resolved_archive_file, + config, + cache_dir=os.path.join(cache_dir, pretrained_model_name_or_path, subfolder), + ) + else: + raise ValueError(f"Unexpected file: {resolved_archive_file} for weight conversion.") + # load pt weights early so that we know which dtype to init the model under + if not is_sharded and state_dict is None: - # Time to load the checkpoint - if convert_from_torch: - if ( - resolved_archive_file.endswith(PYTORCH_WEIGHTS_NAME) - or resolved_archive_file.endswith(PYTORCH_WEIGHTS_INDEX_NAME) - or resolved_archive_file.endswith(SAFE_WEIGHTS_NAME) - or resolved_archive_file.endswith(SAFE_WEIGHTS_INDEX_NAME) - ): - # try to get the name-mapping info - logger.info( - f"Starting to convert pytorch weight file<{resolved_archive_file}> to " - f"paddle weight file<{os.path.join(cache_dir, pretrained_model_name_or_path, subfolder, PADDLE_WEIGHTS_NAME)}> ..." - ) - state_dict = cls.convert( - resolved_archive_file, - config, - cache_dir=os.path.join(cache_dir, pretrained_model_name_or_path, subfolder), - ) - else: - raise ValueError(f"Unexpected file: {resolved_archive_file} for weight conversion.") + # 4. loading non-sharded ckpt from the state dict + if config.tensor_parallel_degree > 1 and resolved_archive_file.endswith("model_state.pdparams"): + state_dict = cls.convert_tensor_parallel(resolved_archive_file, config) + elif config.tensor_parallel_degree > 1 and resolved_archive_file.endswith("model.safetensors"): + with safe_open(resolved_archive_file, framework="np", device="cpu") as f: + loaded_keys = f.keys() + tp_actions = cls.get_tensor_parallel_convert_actions(config, loaded_keys) + state_dict = load_state_dict(resolved_archive_file, tp_actions) else: - # 4. loading non-sharded ckpt from the state dict - if config.tensor_parallel_degree > 1 and resolved_archive_file.endswith("model_state.pdparams"): - state_dict = cls.convert_tensor_parallel(resolved_archive_file, config) - elif config.tensor_parallel_degree > 1 and resolved_archive_file.endswith("model.safetensors"): - with safe_open(resolved_archive_file, framework="np", device="cpu") as f: - loaded_keys = f.keys() - tp_actions = cls.get_tensor_parallel_convert_actions(config, loaded_keys) - state_dict = load_state_dict(resolved_archive_file, tp_actions) - else: - state_dict = load_state_dict(resolved_archive_file) + state_dict = load_state_dict(resolved_archive_file) - logger.info("Loaded weights file from disk, setting weights to model.") + logger.info("Loaded weights file from disk, setting weights to model.") # Check if `_keep_in_fp32_modules` is not None use_keep_in_fp32_modules = (cls._keep_in_fp32_modules is not None) and dtype == "float16" @@ -2202,7 +2210,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs): model=model, state_dict=state_dict, loaded_keys=loaded_state_dict_keys, - resolved_archive_file=resolved_archive_file, + resolved_archive_file=resolved_sharded_files if is_sharded else resolved_archive_file, pretrained_model_name_or_path=pretrained_model_name_or_path, config=config, ignore_mismatched_sizes=ignore_mismatched_sizes, diff --git a/paddlenlp/transformers/tokenizer_utils_base.py b/paddlenlp/transformers/tokenizer_utils_base.py index 9596da645f6d..3df310373e22 100644 --- a/paddlenlp/transformers/tokenizer_utils_base.py +++ b/paddlenlp/transformers/tokenizer_utils_base.py @@ -1559,7 +1559,9 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs): for k, v in resolved_vocab_files.items(): if v is not None and os.path.isfile(v): tokenizer_config_file_dir_list.add(os.path.dirname(v)) - assert len(tokenizer_config_file_dir_list) == 1, "All tokenizer files should be in the same directory." + tokenizer_config_file_dir_list = list(tokenizer_config_file_dir_list) + # TODO: check this + assert len(tokenizer_config_file_dir_list) > 0, "All tokenizer files should be in the same directory." # Prepare tokenizer initialization kwargs # Did we saved some inputs and kwargs to reload ? has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None diff --git a/paddlenlp/transformers/utils.py b/paddlenlp/transformers/utils.py index 49a8a9d532c1..9b1afe235afd 100644 --- a/paddlenlp/transformers/utils.py +++ b/paddlenlp/transformers/utils.py @@ -612,6 +612,7 @@ def get_checkpoint_shard_files( cache_dir=None, subfolder="", from_aistudio=False, + from_hf_hub=False, ): """ For a given model: @@ -666,6 +667,13 @@ def get_checkpoint_shard_files( subfolder=subfolder, cache_dir=cache_dir, ) + elif from_hf_hub: + cached_filename = hf_hub_download( + repo_id=pretrained_model_name_or_path, + filename=shard_filename, + subfolder=subfolder, + cache_dir=cache_dir, + ) else: cached_filename = paddlenlp_hub_download( pretrained_model_name_or_path, diff --git a/tests/transformers/load_subfolder/test_config.py b/tests/transformers/load_subfolder/test_config.py new file mode 100644 index 000000000000..bc5f150cd182 --- /dev/null +++ b/tests/transformers/load_subfolder/test_config.py @@ -0,0 +1,87 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from paddlenlp.transformers import AutoConfig, BertConfig, CLIPConfig, T5Config +from paddlenlp.utils.log import logger + + +class ConfigLoadTester(unittest.TestCase): + def test_bert_config_load(self): + logger.info("Download Bert Config from PaddleNLP BOS") + bert_config = BertConfig.from_pretrained("bert-base-uncased", from_hf_hub=False) + bert_config = AutoConfig.from_pretrained("bert-base-uncased", from_hf_hub=False) + + logger.info("Download config from local") + bert_config.save_pretrained("./paddlenlp-test-config/bert-base-uncased") + bert_config = BertConfig.from_pretrained("./paddlenlp-test-config/bert-base-uncased") + bert_config = AutoConfig.from_pretrained("./paddlenlp-test-config/bert-base-uncased") + logger.info("Download config from local with subfolder") + bert_config = BertConfig.from_pretrained("./paddlenlp-test-config", subfolder="bert-base-uncased") + bert_config = AutoConfig.from_pretrained("./paddlenlp-test-config", subfolder="bert-base-uncased") + + logger.info("Download Bert Config from PaddleNLP BOS with subfolder") + bert_config = BertConfig.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="bert-base-uncased", from_hf_hub=False + ) + bert_config = AutoConfig.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="bert-base-uncased", from_hf_hub=False + ) + + logger.info("Download Bert Config from aistudio") + bert_config = BertConfig.from_pretrained("aistudio/bert-base-uncased", from_aistudio=True) + bert_config = AutoConfig.from_pretrained("aistudio/bert-base-uncased", from_aistudio=True) + + def test_clip_config_load(self): + logger.info("Download CLIP Config from PaddleNLP BOS") + clip_config = CLIPConfig.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False) + clip_config = AutoConfig.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False) + + logger.info("Download CLIP Config from local") + clip_config.save_pretrained("./paddlenlp-test-config/clip-vit-base-patch32") + clip_config = CLIPConfig.from_pretrained("./paddlenlp-test-config/clip-vit-base-patch32") + clip_config = AutoConfig.from_pretrained("./paddlenlp-test-config/clip-vit-base-patch32") + logger.info("Download CLIP Config from local with subfolder") + clip_config = CLIPConfig.from_pretrained("./paddlenlp-test-config", subfolder="clip-vit-base-patch32") + clip_config = AutoConfig.from_pretrained("./paddlenlp-test-config", subfolder="clip-vit-base-patch32") + + logger.info("Download CLIP Config from PaddleNLP BOS with subfolder") + clip_config = CLIPConfig.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_hf_hub=False + ) + clip_config = AutoConfig.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_hf_hub=False + ) + + logger.info("Download CLIP Config from aistudio") + clip_config = CLIPConfig.from_pretrained("aistudio/clip-vit-base-patch32", from_aistudio=True) + clip_config = AutoConfig.from_pretrained("aistudio/clip-vit-base-patch32", from_aistudio=True) + + def test_t5_config_load(self): + logger.info("Download T5 Config from PaddleNLP BOS") + t5_config = T5Config.from_pretrained("t5-small", from_hf_hub=False) + t5_config = AutoConfig.from_pretrained("t5-small", from_hf_hub=False) + + logger.info("Download T5 Config from PaddleNLP BOS with subfolder") + t5_config = T5Config.from_pretrained("baicai/paddlenlp-test-model", subfolder="t5-small", from_hf_hub=False) + t5_config = AutoConfig.from_pretrained("baicai/paddlenlp-test-model", subfolder="t5-small", from_hf_hub=False) + logger.info("Download T5 Config from local") + t5_config.save_pretrained("./paddlenlp-test-config/t5-small") + t5_config = T5Config.from_pretrained("./paddlenlp-test-config/t5-small") + t5_config = AutoConfig.from_pretrained("./paddlenlp-test-config/t5-small") + + logger.info("Download T5 Config from aistudio") + t5_config = T5Config.from_pretrained("aistudio/t5-small", from_aistudio=True) + t5_config = AutoConfig.from_pretrained("aistudio/t5-small", from_aistudio=True) diff --git a/tests/transformers/load_subfolder/test_image_processor.py b/tests/transformers/load_subfolder/test_image_processor.py new file mode 100644 index 000000000000..a909015e804d --- /dev/null +++ b/tests/transformers/load_subfolder/test_image_processor.py @@ -0,0 +1,57 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from paddlenlp.transformers import AutoImageProcessor, CLIPImageProcessor +from paddlenlp.utils.log import logger + + +class ImageProcessorLoadTester(unittest.TestCase): + def test_clip_load(self): + logger.info("Download model from PaddleNLP BOS") + clip_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False) + clip_processor = AutoImageProcessor.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False) + + logger.info("Download model from local") + clip_processor.save_pretrained("./paddlenlp-test-model/clip-vit-base-patch32") + clip_processor = CLIPImageProcessor.from_pretrained("./paddlenlp-test-model/clip-vit-base-patch32") + clip_processor = AutoImageProcessor.from_pretrained("./paddlenlp-test-model/clip-vit-base-patch32") + logger.info("Download model from PaddleNLP BOS with subfolder") + clip_processor = CLIPImageProcessor.from_pretrained( + "./paddlenlp-test-model/", subfolder="clip-vit-base-patch32" + ) + clip_processor = AutoImageProcessor.from_pretrained( + "./paddlenlp-test-model/", subfolder="clip-vit-base-patch32" + ) + + logger.info("Download model from PaddleNLP BOS with subfolder") + clip_processor = CLIPImageProcessor.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_hf_hub=False + ) + clip_processor = AutoImageProcessor.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_hf_hub=False + ) + + logger.info("Download model from aistudio") + clip_processor = CLIPImageProcessor.from_pretrained("aistudio/clip-vit-base-patch32", from_aistudio=True) + clip_processor = AutoImageProcessor.from_pretrained("aistudio/clip-vit-base-patch32", from_aistudio=True) + + logger.info("Download model from aistudio with subfolder") + clip_processor = CLIPImageProcessor.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_aistudio=True + ) + clip_processor = AutoImageProcessor.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_aistudio=True + ) diff --git a/tests/transformers/load_subfolder/test_model.py b/tests/transformers/load_subfolder/test_model.py new file mode 100644 index 000000000000..285d46e8d402 --- /dev/null +++ b/tests/transformers/load_subfolder/test_model.py @@ -0,0 +1,845 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import pytest + +from paddlenlp.transformers import AutoModel, BertModel, CLIPTextModel, T5Model +from paddlenlp.utils.log import logger + + +class ModelLoadTester(unittest.TestCase): + @pytest.mark.skip + def test_config_diff(self, config_1, config_2): + config_1 = config_1.to_dict() + config_2 = config_2.to_dict() + config_1.pop("architectures", None) + config_2.pop("architectures", None) + assert config_1 == config_2, "config not equal" + + @pytest.mark.skip + def test_cache_dir( + self, model_cls, repo_id="", subfolder=None, use_safetensors=False, from_aistudio=False, from_hf_hub=False + ): + with tempfile.TemporaryDirectory() as cache_dir: + model_cls.from_pretrained( + repo_id, + subfolder=subfolder, + cache_dir=cache_dir, + use_safetensors=use_safetensors, + from_aistudio=from_aistudio, + from_hf_hub=from_hf_hub, + ) + file_list = [] + for root, dirs, files in os.walk(cache_dir): + for file in files: + file_list.append(file) + assert len(file_list) > 0, "cache_dir is empty" + assert "config.json" in file_list, "config.json not in cache_dir" + if use_safetensors: + assert any(".safetensors" in f for f in file_list), "*.safetensors not in cache_dir" + else: + if from_hf_hub: + assert any(".bin" in f for f in file_list), "*.bin not in cache_dir" + else: + assert any(".pdparams" in f for f in file_list), "*.pdparams not in cache_dir" + + def test_bert_load(self): + # BOS + logger.info("Download model from PaddleNLP BOS") + bert_model_bos = BertModel.from_pretrained("baicai/tiny-bert-2", use_safetensors=False, from_hf_hub=False) + bert_model_bos_auto = AutoModel.from_pretrained("baicai/tiny-bert-2", use_safetensors=False, from_hf_hub=False) + self.test_config_diff(bert_model_bos.config, bert_model_bos_auto.config) + + logger.info("Download model from PaddleNLP BOS with subfolder") + bert_model_bos_sub = BertModel.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=False, from_hf_hub=False + ) + self.test_config_diff(bert_model_bos.config, bert_model_bos_sub.config) + + bert_model_bos_sub_auto = AutoModel.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=False, from_hf_hub=False + ) + self.test_config_diff(bert_model_bos_sub.config, bert_model_bos_sub_auto.config) + + # aistudio + logger.info("Download model from aistudio") + bert_model_aistudio = BertModel.from_pretrained( + "aistudio/tiny-bert", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(bert_model_bos.config, bert_model_aistudio.config) + bert_model_aistudio_auto = AutoModel.from_pretrained( + "aistudio/tiny-bert", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(bert_model_aistudio.config, bert_model_aistudio_auto.config) + + # hf + logger.info("Download model from hf") + bert_model_hf = BertModel.from_pretrained("Baicai003/tiny-bert", from_hf_hub=True, use_safetensors=False) + bert_model_hf_auto = AutoModel.from_pretrained("Baicai003/tiny-bert", from_hf_hub=True, use_safetensors=False) + self.test_config_diff(bert_model_hf.config, bert_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + bert_model_hf_sub = BertModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-bert", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(bert_model_hf.config, bert_model_hf_sub.config) + bert_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-bert", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(bert_model_hf_sub.config, bert_model_hf_sub_auto.config) + bert_model_hf = BertModel.from_pretrained("Baicai003/tiny-bert-one", from_hf_hub=True, use_safetensors=False) + self.test_config_diff(bert_model_hf.config, bert_model_hf.config) + bert_model_hf_auto = AutoModel.from_pretrained( + "Baicai003/tiny-bert-one", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(bert_model_hf.config, bert_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + bert_model_hf_sub = BertModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-bert-one", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(bert_model_hf.config, bert_model_hf_sub.config) + bert_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-bert-one", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(bert_model_hf_sub.config, bert_model_hf_sub_auto.config) + + logger.info("Download model from aistudio with subfolder") + bert_model_aistudio_sub = BertModel.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(bert_model_aistudio.config, bert_model_aistudio_sub.config) + bert_model_aistudio_sub_auto = AutoModel.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(bert_model_aistudio_sub.config, bert_model_aistudio_sub_auto.config) + + # local + logger.info("Download model from local") + bert_model_bos.save_pretrained("./paddlenlp-test-model/tiny-bert", safe_serialization=True) + bert_model_local = BertModel.from_pretrained( + "./paddlenlp-test-model/", subfolder="tiny-bert", use_safetensors=False + ) + self.test_config_diff(bert_model_bos.config, bert_model_local.config) + bert_model_local_auto = AutoModel.from_pretrained( + "./paddlenlp-test-model/", subfolder="tiny-bert", use_safetensors=False + ) + self.test_config_diff(bert_model_local.config, bert_model_local_auto.config) + + logger.info("Test cache_dir") + # BOS + self.test_cache_dir(BertModel, "baicai/tiny-bert-2", use_safetensors=False, from_hf_hub=False) + self.test_cache_dir(AutoModel, "baicai/tiny-bert-2", use_safetensors=False, from_hf_hub=False) + self.test_cache_dir( + BertModel, "baicai/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=False, from_hf_hub=False + ) + self.test_cache_dir( + AutoModel, "baicai/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=False, from_hf_hub=False + ) + + # aistudio + self.test_cache_dir(BertModel, "aistudio/tiny-bert", use_safetensors=False, from_aistudio=True) + self.test_cache_dir(AutoModel, "aistudio/tiny-bert", use_safetensors=False, from_aistudio=True) + self.test_cache_dir( + BertModel, + "aistudio/paddlenlp-test-model", + subfolder="tiny-bert", + use_safetensors=False, + from_aistudio=True, + ) + self.test_cache_dir( + AutoModel, + "aistudio/paddlenlp-test-model", + subfolder="tiny-bert", + use_safetensors=False, + from_aistudio=True, + ) + + # hf + self.test_cache_dir(BertModel, "Baicai003/tiny-bert", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir(AutoModel, "Baicai003/tiny-bert", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir( + BertModel, "Baicai003/paddlenlp-test-model", subfolder="tiny-bert", from_hf_hub=True, use_safetensors=False + ) + self.test_cache_dir( + AutoModel, "Baicai003/paddlenlp-test-model", subfolder="tiny-bert", from_hf_hub=True, use_safetensors=False + ) + self.test_cache_dir(BertModel, "Baicai003/tiny-bert-one", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir(AutoModel, "Baicai003/tiny-bert-one", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir( + BertModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-bert-one", + from_hf_hub=True, + use_safetensors=False, + ) + self.test_cache_dir( + AutoModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-bert-one", + from_hf_hub=True, + use_safetensors=False, + ) + + def test_bert_load_safe(self): + # BOS + logger.info("Download model from PaddleNLP BOS") + bert_model_bos = BertModel.from_pretrained("baicai/tiny-bert-2", use_safetensors=True, from_hf_hub=False) + bert_model_bos_auto = AutoModel.from_pretrained("baicai/tiny-bert-2", use_safetensors=True, from_hf_hub=False) + self.test_config_diff(bert_model_bos.config, bert_model_bos_auto.config) + + logger.info("Download model from PaddleNLP BOS with subfolder") + bert_model_bos_sub = BertModel.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=True, from_hf_hub=False + ) + self.test_config_diff(bert_model_bos.config, bert_model_bos_sub.config) + + bert_model_bos_sub_auto = AutoModel.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=True, from_hf_hub=False + ) + self.test_config_diff(bert_model_bos_sub.config, bert_model_bos_sub_auto.config) + + # aistudio + logger.info("Download model from aistudio") + bert_model_aistudio = BertModel.from_pretrained("aistudio/tiny-bert", use_safetensors=True, from_aistudio=True) + self.test_config_diff(bert_model_bos.config, bert_model_aistudio.config) + bert_model_aistudio_auto = AutoModel.from_pretrained( + "aistudio/tiny-bert", use_safetensors=True, from_aistudio=True + ) + self.test_config_diff(bert_model_aistudio.config, bert_model_aistudio_auto.config) + + # hf + logger.info("Download model from hf") + bert_model_hf = BertModel.from_pretrained("Baicai003/tiny-bert", from_hf_hub=True, use_safetensors=True) + bert_model_hf_auto = AutoModel.from_pretrained("Baicai003/tiny-bert", from_hf_hub=True, use_safetensors=True) + self.test_config_diff(bert_model_hf.config, bert_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + bert_model_hf_sub = BertModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-bert", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(bert_model_hf.config, bert_model_hf_sub.config) + bert_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-bert", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(bert_model_hf_sub.config, bert_model_hf_sub_auto.config) + bert_model_hf = BertModel.from_pretrained("Baicai003/tiny-bert-one", from_hf_hub=True, use_safetensors=True) + self.test_config_diff(bert_model_hf.config, bert_model_hf.config) + bert_model_hf_auto = AutoModel.from_pretrained( + "Baicai003/tiny-bert-one", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(bert_model_hf.config, bert_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + bert_model_hf_sub = BertModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-bert-one", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(bert_model_hf.config, bert_model_hf_sub.config) + bert_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-bert-one", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(bert_model_hf_sub.config, bert_model_hf_sub_auto.config) + + logger.info("Download model from aistudio with subfolder") + bert_model_aistudio_sub = BertModel.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=True, from_aistudio=True + ) + self.test_config_diff(bert_model_aistudio.config, bert_model_aistudio_sub.config) + bert_model_aistudio_sub_auto = AutoModel.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=True, from_aistudio=True + ) + self.test_config_diff(bert_model_aistudio_sub.config, bert_model_aistudio_sub_auto.config) + + # local + logger.info("Download model from local") + bert_model_bos.save_pretrained("./paddlenlp-test-model/tiny-bert", safe_serialization=True) + bert_model_local = BertModel.from_pretrained( + "./paddlenlp-test-model/", subfolder="tiny-bert", use_safetensors=True + ) + self.test_config_diff(bert_model_bos.config, bert_model_local.config) + bert_model_local_auto = AutoModel.from_pretrained( + "./paddlenlp-test-model/", subfolder="tiny-bert", use_safetensors=True + ) + self.test_config_diff(bert_model_local.config, bert_model_local_auto.config) + + logger.info("Test cache_dir") + # BOS + self.test_cache_dir(BertModel, "baicai/tiny-bert-2", use_safetensors=True, from_hf_hub=False) + self.test_cache_dir(AutoModel, "baicai/tiny-bert-2", use_safetensors=True, from_hf_hub=False) + self.test_cache_dir( + BertModel, "baicai/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=True, from_hf_hub=False + ) + self.test_cache_dir( + AutoModel, "baicai/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=True, from_hf_hub=False + ) + + # aistudio + self.test_cache_dir(BertModel, "aistudio/tiny-bert", use_safetensors=True, from_aistudio=True) + self.test_cache_dir(AutoModel, "aistudio/tiny-bert", use_safetensors=True, from_aistudio=True) + self.test_cache_dir( + BertModel, "aistudio/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=True, from_aistudio=True + ) + self.test_cache_dir( + AutoModel, "aistudio/paddlenlp-test-model", subfolder="tiny-bert", use_safetensors=True, from_aistudio=True + ) + + # hf + self.test_cache_dir(BertModel, "Baicai003/tiny-bert", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir(AutoModel, "Baicai003/tiny-bert", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir( + BertModel, "Baicai003/paddlenlp-test-model", subfolder="tiny-bert", from_hf_hub=True, use_safetensors=True + ) + self.test_cache_dir( + AutoModel, "Baicai003/paddlenlp-test-model", subfolder="tiny-bert", from_hf_hub=True, use_safetensors=True + ) + self.test_cache_dir(BertModel, "Baicai003/tiny-bert-one", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir(AutoModel, "Baicai003/tiny-bert-one", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir( + BertModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-bert-one", + from_hf_hub=True, + use_safetensors=True, + ) + self.test_cache_dir( + AutoModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-bert-one", + from_hf_hub=True, + use_safetensors=True, + ) + + def test_clip_load(self): + # BOS + logger.info("Download model from PaddleNLP BOS") + clip_model_bos = CLIPTextModel.from_pretrained("baicai/tiny-clip", use_safetensors=False, from_hf_hub=False) + clip_model_bos_auto = AutoModel.from_pretrained("baicai/tiny-clip", use_safetensors=False, from_hf_hub=False) + self.test_config_diff(clip_model_bos.config, clip_model_bos_auto.config) + + logger.info("Download model from PaddleNLP BOS with subfolder") + clip_model_bos_sub = CLIPTextModel.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=False, from_hf_hub=False + ) + self.test_config_diff(clip_model_bos.config, clip_model_bos_sub.config) + + clip_model_bos_sub_auto = AutoModel.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=False, from_hf_hub=False + ) + self.test_config_diff(clip_model_bos_sub.config, clip_model_bos_sub_auto.config) + + # aistudio + logger.info("Download model from aistudio") + clip_model_aistudio = CLIPTextModel.from_pretrained( + "aistudio/tiny-clip", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(clip_model_bos.config, clip_model_aistudio.config) + clip_model_aistudio_auto = AutoModel.from_pretrained( + "aistudio/tiny-clip", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(clip_model_aistudio.config, clip_model_aistudio_auto.config) + + logger.info("Download model from aistudio with subfolder") + clip_model_aistudio_sub = CLIPTextModel.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(clip_model_aistudio.config, clip_model_aistudio_sub.config) + clip_model_aistudio_sub_auto = AutoModel.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(clip_model_aistudio_sub.config, clip_model_aistudio_sub_auto.config) + + # hf + logger.info("Download model from hf") + clip_model_hf = CLIPTextModel.from_pretrained("Baicai003/tiny-clip", from_hf_hub=True, use_safetensors=False) + clip_model_hf_auto = AutoModel.from_pretrained("Baicai003/tiny-clip", from_hf_hub=True, use_safetensors=False) + self.test_config_diff(clip_model_hf.config, clip_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + clip_model_hf_sub = CLIPTextModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-clip", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(clip_model_hf.config, clip_model_hf_sub.config) + clip_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-clip", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(clip_model_hf_sub.config, clip_model_hf_sub_auto.config) + clip_model_hf = CLIPTextModel.from_pretrained( + "Baicai003/tiny-clip-one", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(clip_model_hf.config, clip_model_hf.config) + clip_model_hf_auto = AutoModel.from_pretrained( + "Baicai003/tiny-clip-one", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(clip_model_hf.config, clip_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + clip_model_hf_sub = CLIPTextModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-clip-one", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(clip_model_hf.config, clip_model_hf_sub.config) + clip_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-clip-one", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(clip_model_hf_sub.config, clip_model_hf_sub_auto.config) + + # local + logger.info("Download model from local") + clip_model_bos.save_pretrained("./paddlenlp-test-model/tiny-clip", safe_serialization=True) + clip_model_local = CLIPTextModel.from_pretrained( + "./paddlenlp-test-model/", subfolder="tiny-clip", use_safetensors=False + ) + self.test_config_diff(clip_model_bos.config, clip_model_local.config) + clip_model_local_auto = AutoModel.from_pretrained( + "./paddlenlp-test-model/", subfolder="tiny-clip", use_safetensors=False + ) + self.test_config_diff(clip_model_local.config, clip_model_local_auto.config) + + logger.info("Test cache_dir") + # BOS + self.test_cache_dir(CLIPTextModel, "baicai/tiny-clip", use_safetensors=False, from_hf_hub=False) + self.test_cache_dir(AutoModel, "baicai/tiny-clip", use_safetensors=False, from_hf_hub=False) + self.test_cache_dir( + CLIPTextModel, + "baicai/paddlenlp-test-model", + subfolder="tiny-clip", + use_safetensors=False, + from_hf_hub=False, + ) + self.test_cache_dir( + AutoModel, "baicai/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=False, from_hf_hub=False + ) + + # aistudio + self.test_cache_dir(CLIPTextModel, "aistudio/tiny-clip", use_safetensors=False, from_aistudio=True) + self.test_cache_dir(AutoModel, "aistudio/tiny-clip", use_safetensors=False, from_aistudio=True) + self.test_cache_dir( + CLIPTextModel, + "aistudio/paddlenlp-test-model", + subfolder="tiny-clip", + use_safetensors=False, + from_aistudio=True, + ) + self.test_cache_dir( + AutoModel, + "aistudio/paddlenlp-test-model", + subfolder="tiny-clip", + use_safetensors=False, + from_aistudio=True, + ) + + # hf + self.test_cache_dir(CLIPTextModel, "Baicai003/tiny-clip", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir(AutoModel, "Baicai003/tiny-clip", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir( + CLIPTextModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-clip", + from_hf_hub=True, + use_safetensors=False, + ) + self.test_cache_dir( + AutoModel, "Baicai003/paddlenlp-test-model", subfolder="tiny-clip", from_hf_hub=True, use_safetensors=False + ) + self.test_cache_dir(CLIPTextModel, "Baicai003/tiny-clip-one", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir(AutoModel, "Baicai003/tiny-clip-one", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir( + CLIPTextModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-clip-one", + from_hf_hub=True, + use_safetensors=False, + ) + self.test_cache_dir( + AutoModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-clip-one", + from_hf_hub=True, + use_safetensors=False, + ) + + def test_clip_load_safe(self): + # BOS + logger.info("Download model from PaddleNLP BOS") + clip_model_bos = CLIPTextModel.from_pretrained("baicai/tiny-clip", use_safetensors=True, from_hf_hub=False) + clip_model_bos_auto = AutoModel.from_pretrained("baicai/tiny-clip", use_safetensors=True, from_hf_hub=False) + self.test_config_diff(clip_model_bos.config, clip_model_bos_auto.config) + + logger.info("Download model from PaddleNLP BOS with subfolder") + clip_model_bos_sub = CLIPTextModel.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=True, from_hf_hub=False + ) + self.test_config_diff(clip_model_bos.config, clip_model_bos_sub.config) + + clip_model_bos_sub_auto = AutoModel.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=True, from_hf_hub=False + ) + self.test_config_diff(clip_model_bos_sub.config, clip_model_bos_sub_auto.config) + + # aistudio + logger.info("Download model from aistudio") + clip_model_aistudio = CLIPTextModel.from_pretrained( + "aistudio/tiny-clip", use_safetensors=True, from_aistudio=True + ) + self.test_config_diff(clip_model_bos.config, clip_model_aistudio.config) + clip_model_aistudio_auto = AutoModel.from_pretrained( + "aistudio/tiny-clip", use_safetensors=True, from_aistudio=True + ) + self.test_config_diff(clip_model_aistudio.config, clip_model_aistudio_auto.config) + + logger.info("Download model from aistudio with subfolder") + clip_model_aistudio_sub = CLIPTextModel.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=True, from_aistudio=True + ) + self.test_config_diff(clip_model_aistudio.config, clip_model_aistudio_sub.config) + clip_model_aistudio_sub_auto = AutoModel.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=True, from_aistudio=True + ) + self.test_config_diff(clip_model_aistudio_sub.config, clip_model_aistudio_sub_auto.config) + + # hf + logger.info("Download model from hf") + clip_model_hf = CLIPTextModel.from_pretrained("Baicai003/tiny-clip", from_hf_hub=True, use_safetensors=True) + clip_model_hf_auto = AutoModel.from_pretrained("Baicai003/tiny-clip", from_hf_hub=True, use_safetensors=True) + self.test_config_diff(clip_model_hf.config, clip_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + clip_model_hf_sub = CLIPTextModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-clip", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(clip_model_hf.config, clip_model_hf_sub.config) + clip_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-clip", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(clip_model_hf_sub.config, clip_model_hf_sub_auto.config) + clip_model_hf = CLIPTextModel.from_pretrained( + "Baicai003/tiny-clip-one", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(clip_model_hf.config, clip_model_hf.config) + clip_model_hf_auto = AutoModel.from_pretrained( + "Baicai003/tiny-clip-one", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(clip_model_hf.config, clip_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + clip_model_hf_sub = CLIPTextModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-clip-one", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(clip_model_hf.config, clip_model_hf_sub.config) + clip_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-clip-one", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(clip_model_hf_sub.config, clip_model_hf_sub_auto.config) + + # local + logger.info("Download model from local") + clip_model_bos.save_pretrained("./paddlenlp-test-model/tiny-clip", safe_serialization=True) + clip_model_local = CLIPTextModel.from_pretrained( + "./paddlenlp-test-model/", subfolder="tiny-clip", use_safetensors=True + ) + self.test_config_diff(clip_model_bos.config, clip_model_local.config) + clip_model_local_auto = AutoModel.from_pretrained( + "./paddlenlp-test-model/", subfolder="tiny-clip", use_safetensors=True + ) + self.test_config_diff(clip_model_local.config, clip_model_local_auto.config) + + logger.info("Test cache_dir") + # BOS + self.test_cache_dir(CLIPTextModel, "baicai/tiny-clip", use_safetensors=True, from_hf_hub=False) + self.test_cache_dir(AutoModel, "baicai/tiny-clip", use_safetensors=True, from_hf_hub=False) + self.test_cache_dir( + CLIPTextModel, + "baicai/paddlenlp-test-model", + subfolder="tiny-clip", + use_safetensors=True, + from_hf_hub=False, + ) + self.test_cache_dir( + AutoModel, "baicai/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=True, from_hf_hub=False + ) + + # aistudio + self.test_cache_dir(CLIPTextModel, "aistudio/tiny-clip", use_safetensors=True, from_aistudio=True) + self.test_cache_dir(AutoModel, "aistudio/tiny-clip", use_safetensors=True, from_aistudio=True) + self.test_cache_dir( + CLIPTextModel, + "aistudio/paddlenlp-test-model", + subfolder="tiny-clip", + use_safetensors=True, + from_aistudio=True, + ) + self.test_cache_dir( + AutoModel, "aistudio/paddlenlp-test-model", subfolder="tiny-clip", use_safetensors=True, from_aistudio=True + ) + + # hf + self.test_cache_dir(CLIPTextModel, "Baicai003/tiny-clip", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir(AutoModel, "Baicai003/tiny-clip", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir( + CLIPTextModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-clip", + from_hf_hub=True, + use_safetensors=True, + ) + self.test_cache_dir( + AutoModel, "Baicai003/paddlenlp-test-model", subfolder="tiny-clip", from_hf_hub=True, use_safetensors=True + ) + self.test_cache_dir(CLIPTextModel, "Baicai003/tiny-clip-one", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir(AutoModel, "Baicai003/tiny-clip-one", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir( + CLIPTextModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-clip-one", + from_hf_hub=True, + use_safetensors=True, + ) + self.test_cache_dir( + AutoModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-clip-one", + from_hf_hub=True, + use_safetensors=True, + ) + + def test_t5_load(self): + # BOS + logger.info("Download model from PaddleNLP BOS") + t5_model_bos = T5Model.from_pretrained("baicai/tiny-t5", use_safetensors=False, from_hf_hub=False) + t5_model_bos_auto = AutoModel.from_pretrained("baicai/tiny-t5", use_safetensors=False, from_hf_hub=False) + self.test_config_diff(t5_model_bos.config, t5_model_bos_auto.config) + + logger.info("Download model from PaddleNLP BOS with subfolder") + t5_model_bos_sub = T5Model.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=False, from_hf_hub=False + ) + self.test_config_diff(t5_model_bos.config, t5_model_bos_sub.config) + + t5_model_bos_sub_auto = AutoModel.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=False, from_hf_hub=False + ) + self.test_config_diff(t5_model_bos_sub.config, t5_model_bos_sub_auto.config) + + # aistudio + logger.info("Download model from aistudio") + t5_model_aistudio = T5Model.from_pretrained("aistudio/tiny-t5", use_safetensors=False, from_aistudio=True) + self.test_config_diff(t5_model_bos.config, t5_model_aistudio.config) + t5_model_aistudio_auto = AutoModel.from_pretrained( + "aistudio/tiny-t5", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(t5_model_aistudio.config, t5_model_aistudio_auto.config) + + logger.info("Download model from aistudio with subfolder") + t5_model_aistudio_sub = T5Model.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(t5_model_aistudio.config, t5_model_aistudio_sub.config) + t5_model_aistudio_sub_auto = AutoModel.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=False, from_aistudio=True + ) + self.test_config_diff(t5_model_aistudio_sub.config, t5_model_aistudio_sub_auto.config) + + # hf + logger.info("Download model from hf") + t5_model_hf = T5Model.from_pretrained("Baicai003/tiny-t5", from_hf_hub=True, use_safetensors=False) + t5_model_hf_auto = AutoModel.from_pretrained("Baicai003/tiny-t5", from_hf_hub=True, use_safetensors=False) + self.test_config_diff(t5_model_hf.config, t5_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + t5_model_hf_sub = T5Model.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-t5", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(t5_model_hf.config, t5_model_hf_sub.config) + t5_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-t5", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(t5_model_hf_sub.config, t5_model_hf_sub_auto.config) + t5_model_hf = T5Model.from_pretrained("Baicai003/tiny-t5-one", from_hf_hub=True, use_safetensors=False) + self.test_config_diff(t5_model_hf.config, t5_model_hf.config) + t5_model_hf_auto = AutoModel.from_pretrained("Baicai003/tiny-t5-one", from_hf_hub=True, use_safetensors=False) + self.test_config_diff(t5_model_hf.config, t5_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + t5_model_hf_sub = T5Model.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-t5-one", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(t5_model_hf.config, t5_model_hf_sub.config) + t5_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-t5-one", from_hf_hub=True, use_safetensors=False + ) + self.test_config_diff(t5_model_hf_sub.config, t5_model_hf_sub_auto.config) + + # local + logger.info("Download model from local") + t5_model_bos.save_pretrained("./paddlenlp-test-model/tiny-t5", safe_serialization=True) + t5_model_local = T5Model.from_pretrained("./paddlenlp-test-model/", subfolder="tiny-t5", use_safetensors=False) + self.test_config_diff(t5_model_bos.config, t5_model_local.config) + t5_model_local_auto = AutoModel.from_pretrained( + "./paddlenlp-test-model/", subfolder="tiny-t5", use_safetensors=False + ) + self.test_config_diff(t5_model_local.config, t5_model_local_auto.config) + + logger.info("Test cache_dir") + # BOS + self.test_cache_dir(T5Model, "baicai/tiny-t5", use_safetensors=False, from_hf_hub=False) + self.test_cache_dir(AutoModel, "baicai/tiny-t5", use_safetensors=False, from_hf_hub=False) + self.test_cache_dir( + T5Model, "baicai/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=False, from_hf_hub=False + ) + self.test_cache_dir( + AutoModel, "baicai/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=False, from_hf_hub=False + ) + + # aistudio + self.test_cache_dir(T5Model, "aistudio/tiny-t5", use_safetensors=False, from_aistudio=True) + self.test_cache_dir(AutoModel, "aistudio/tiny-t5", use_safetensors=False, from_aistudio=True) + self.test_cache_dir( + T5Model, "aistudio/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=False, from_aistudio=True + ) + self.test_cache_dir( + AutoModel, "aistudio/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=False, from_aistudio=True + ) + + # hf + self.test_cache_dir(T5Model, "Baicai003/tiny-t5", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir(AutoModel, "Baicai003/tiny-t5", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir( + T5Model, "Baicai003/paddlenlp-test-model", subfolder="tiny-t5", from_hf_hub=True, use_safetensors=False + ) + self.test_cache_dir( + AutoModel, "Baicai003/paddlenlp-test-model", subfolder="tiny-t5", from_hf_hub=True, use_safetensors=False + ) + self.test_cache_dir(T5Model, "Baicai003/tiny-t5-one", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir(AutoModel, "Baicai003/tiny-t5-one", from_hf_hub=True, use_safetensors=False) + self.test_cache_dir( + T5Model, "Baicai003/paddlenlp-test-model", subfolder="tiny-t5-one", from_hf_hub=True, use_safetensors=False + ) + self.test_cache_dir( + AutoModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-t5-one", + from_hf_hub=True, + use_safetensors=False, + ) + + def test_t5_load_safe(self): + # BOS + logger.info("Download model from PaddleNLP BOS") + t5_model_bos = T5Model.from_pretrained("baicai/tiny-t5", use_safetensors=True, from_hf_hub=False) + t5_model_bos_auto = AutoModel.from_pretrained("baicai/tiny-t5", use_safetensors=True, from_hf_hub=False) + self.test_config_diff(t5_model_bos.config, t5_model_bos_auto.config) + + logger.info("Download model from PaddleNLP BOS with subfolder") + t5_model_bos_sub = T5Model.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=True, from_hf_hub=False + ) + self.test_config_diff(t5_model_bos.config, t5_model_bos_sub.config) + + t5_model_bos_sub_auto = AutoModel.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=True, from_hf_hub=False + ) + self.test_config_diff(t5_model_bos_sub.config, t5_model_bos_sub_auto.config) + + # aistudio + logger.info("Download model from aistudio") + t5_model_aistudio = T5Model.from_pretrained("aistudio/tiny-t5", use_safetensors=True, from_aistudio=True) + self.test_config_diff(t5_model_bos.config, t5_model_aistudio.config) + t5_model_aistudio_auto = AutoModel.from_pretrained( + "aistudio/tiny-t5", use_safetensors=True, from_aistudio=True + ) + self.test_config_diff(t5_model_aistudio.config, t5_model_aistudio_auto.config) + + logger.info("Download model from aistudio with subfolder") + t5_model_aistudio_sub = T5Model.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=True, from_aistudio=True + ) + self.test_config_diff(t5_model_aistudio.config, t5_model_aistudio_sub.config) + t5_model_aistudio_sub_auto = AutoModel.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=True, from_aistudio=True + ) + self.test_config_diff(t5_model_aistudio_sub.config, t5_model_aistudio_sub_auto.config) + + # hf + logger.info("Download model from hf") + t5_model_hf = T5Model.from_pretrained("Baicai003/tiny-t5", from_hf_hub=True, use_safetensors=True) + t5_model_hf_auto = AutoModel.from_pretrained("Baicai003/tiny-t5", from_hf_hub=True, use_safetensors=True) + self.test_config_diff(t5_model_hf.config, t5_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + t5_model_hf_sub = T5Model.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-t5", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(t5_model_hf.config, t5_model_hf_sub.config) + t5_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-t5", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(t5_model_hf_sub.config, t5_model_hf_sub_auto.config) + t5_model_hf = T5Model.from_pretrained("Baicai003/tiny-t5-one", from_hf_hub=True, use_safetensors=True) + self.test_config_diff(t5_model_hf.config, t5_model_hf.config) + t5_model_hf_auto = AutoModel.from_pretrained("Baicai003/tiny-t5-one", from_hf_hub=True, use_safetensors=True) + self.test_config_diff(t5_model_hf.config, t5_model_hf_auto.config) + logger.info("Download model from hf with subfolder") + t5_model_hf_sub = T5Model.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-t5-one", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(t5_model_hf.config, t5_model_hf_sub.config) + t5_model_hf_sub_auto = AutoModel.from_pretrained( + "Baicai003/paddlenlp-test-model", subfolder="tiny-t5-one", from_hf_hub=True, use_safetensors=True + ) + self.test_config_diff(t5_model_hf_sub.config, t5_model_hf_sub_auto.config) + + # local + logger.info("Download model from local") + t5_model_bos.save_pretrained("./paddlenlp-test-model/tiny-t5", safe_serialization=True) + t5_model_local = T5Model.from_pretrained("./paddlenlp-test-model/", subfolder="tiny-t5", use_safetensors=True) + self.test_config_diff(t5_model_bos.config, t5_model_local.config) + t5_model_local_auto = AutoModel.from_pretrained( + "./paddlenlp-test-model/", subfolder="tiny-t5", use_safetensors=True + ) + self.test_config_diff(t5_model_local.config, t5_model_local_auto.config) + + logger.info("Test cache_dir") + # BOS + self.test_cache_dir(T5Model, "baicai/tiny-t5", use_safetensors=True, from_hf_hub=False) + self.test_cache_dir(AutoModel, "baicai/tiny-t5", use_safetensors=True, from_hf_hub=False) + self.test_cache_dir( + T5Model, "baicai/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=True, from_hf_hub=False + ) + self.test_cache_dir( + AutoModel, "baicai/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=True, from_hf_hub=False + ) + + # aistudio + self.test_cache_dir(T5Model, "aistudio/tiny-t5", use_safetensors=True, from_aistudio=True) + self.test_cache_dir(AutoModel, "aistudio/tiny-t5", use_safetensors=True, from_aistudio=True) + self.test_cache_dir( + T5Model, "aistudio/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=True, from_aistudio=True + ) + self.test_cache_dir( + AutoModel, "aistudio/paddlenlp-test-model", subfolder="tiny-t5", use_safetensors=True, from_aistudio=True + ) + + # hf + self.test_cache_dir(T5Model, "Baicai003/tiny-t5", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir(AutoModel, "Baicai003/tiny-t5", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir( + T5Model, "Baicai003/paddlenlp-test-model", subfolder="tiny-t5", from_hf_hub=True, use_safetensors=True + ) + self.test_cache_dir( + AutoModel, "Baicai003/paddlenlp-test-model", subfolder="tiny-t5", from_hf_hub=True, use_safetensors=True + ) + self.test_cache_dir(T5Model, "Baicai003/tiny-t5-one", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir(AutoModel, "Baicai003/tiny-t5-one", from_hf_hub=True, use_safetensors=True) + self.test_cache_dir( + T5Model, "Baicai003/paddlenlp-test-model", subfolder="tiny-t5-one", from_hf_hub=True, use_safetensors=True + ) + self.test_cache_dir( + AutoModel, + "Baicai003/paddlenlp-test-model", + subfolder="tiny-t5-one", + from_hf_hub=True, + use_safetensors=True, + ) diff --git a/tests/transformers/load_subfolder/test_processor.py b/tests/transformers/load_subfolder/test_processor.py new file mode 100644 index 000000000000..537f0bb48c2f --- /dev/null +++ b/tests/transformers/load_subfolder/test_processor.py @@ -0,0 +1,53 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from paddlenlp.transformers import AutoProcessor, CLIPProcessor +from paddlenlp.utils.log import logger + + +class ProcessorLoadTester(unittest.TestCase): + def test_clip_load(self): + logger.info("Download model from PaddleNLP BOS") + clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False) + clip_processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False) + + logger.info("Download model from local") + clip_processor.save_pretrained("./paddlenlp-test-model/clip-vit-base-patch32") + clip_processor = CLIPProcessor.from_pretrained("./paddlenlp-test-model/clip-vit-base-patch32") + clip_processor = AutoProcessor.from_pretrained("./paddlenlp-test-model/clip-vit-base-patch32") + logger.info("Download model from PaddleNLP BOS with subfolder") + clip_processor = CLIPProcessor.from_pretrained("./paddlenlp-test-model/", subfolder="clip-vit-base-patch32") + clip_processor = AutoProcessor.from_pretrained("./paddlenlp-test-model/", subfolder="clip-vit-base-patch32") + + logger.info("Download model from PaddleNLP BOS with subfolder") + clip_processor = CLIPProcessor.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_hf_hub=False + ) + clip_processor = AutoProcessor.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_hf_hub=False + ) + + logger.info("Download model from aistudio") + clip_processor = CLIPProcessor.from_pretrained("aistudio/clip-vit-base-patch32", from_aistudio=True) + clip_processor = AutoProcessor.from_pretrained("aistudio/clip-vit-base-patch32", from_aistudio=True) + + logger.info("Download model from aistudio with subfolder") + clip_processor = CLIPProcessor.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_aistudio=True + ) + clip_processor = AutoProcessor.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_aistudio=True + ) diff --git a/tests/transformers/load_subfolder/test_tokenizer.py b/tests/transformers/load_subfolder/test_tokenizer.py new file mode 100644 index 000000000000..de44b2baf701 --- /dev/null +++ b/tests/transformers/load_subfolder/test_tokenizer.py @@ -0,0 +1,120 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from paddlenlp.transformers import ( + AutoTokenizer, + BertTokenizer, + CLIPTokenizer, + T5Tokenizer, +) +from paddlenlp.utils.log import logger + + +class TokenizerLoadTester(unittest.TestCase): + def test_bert_load(self): + logger.info("Download model from PaddleNLP BOS") + bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", from_hf_hub=False) + bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", from_hf_hub=False) + + logger.info("Download model from local") + bert_tokenizer.save_pretrained("./paddlenlp-test-model/bert-base-uncased") + bert_tokenizer = BertTokenizer.from_pretrained("./paddlenlp-test-model/bert-base-uncased") + bert_tokenizer = AutoTokenizer.from_pretrained("./paddlenlp-test-model/bert-base-uncased") + bert_tokenizer = BertTokenizer.from_pretrained("./paddlenlp-test-model/", subfolder="bert-base-uncased") + bert_tokenizer = AutoTokenizer.from_pretrained("./paddlenlp-test-model/", subfolder="bert-base-uncased") + + logger.info("Download model from PaddleNLP BOS with subfolder") + bert_tokenizer = BertTokenizer.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="bert-base-uncased", from_hf_hub=False + ) + bert_tokenizer = AutoTokenizer.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="bert-base-uncased", from_hf_hub=False + ) + + logger.info("Download model from aistudio") + bert_tokenizer = BertTokenizer.from_pretrained("aistudio/bert-base-uncased", from_aistudio=True) + bert_tokenizer = AutoTokenizer.from_pretrained("aistudio/bert-base-uncased", from_aistudio=True) + + logger.info("Download model from aistudio with subfolder") + bert_tokenizer = BertTokenizer.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="bert-base-uncased", from_aistudio=True + ) + bert_tokenizer = AutoTokenizer.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="bert-base-uncased", from_aistudio=True + ) + + def test_clip_load(self): + logger.info("Download model from PaddleNLP BOS") + clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False) + clip_tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32", from_hf_hub=False) + + logger.info("Download model from local") + clip_tokenizer.save_pretrained("./paddlenlp-test-model/clip-vit-base-patch32") + clip_tokenizer = CLIPTokenizer.from_pretrained("./paddlenlp-test-model/clip-vit-base-patch32") + clip_tokenizer = AutoTokenizer.from_pretrained("./paddlenlp-test-model/clip-vit-base-patch32") + clip_tokenizer = CLIPTokenizer.from_pretrained("./paddlenlp-test-model/", subfolder="clip-vit-base-patch32") + clip_tokenizer = AutoTokenizer.from_pretrained("./paddlenlp-test-model/", subfolder="clip-vit-base-patch32") + + logger.info("Download model from PaddleNLP BOS with subfolder") + clip_tokenizer = CLIPTokenizer.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_hf_hub=False + ) + clip_tokenizer = AutoTokenizer.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_hf_hub=False + ) + + logger.info("Download model from aistudio") + clip_tokenizer = CLIPTokenizer.from_pretrained("aistudio/clip-vit-base-patch32", from_aistudio=True) + clip_tokenizer = AutoTokenizer.from_pretrained("aistudio/clip-vit-base-patch32", from_aistudio=True) + + logger.info("Download model from aistudio with subfolder") + clip_tokenizer = CLIPTokenizer.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_aistudio=True + ) + clip_tokenizer = AutoTokenizer.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="clip-vit-base-patch32", from_aistudio=True + ) + + def test_t5_load(self): + logger.info("Download model from PaddleNLP BOS") + t5_tokenizer = T5Tokenizer.from_pretrained("t5-small", from_hf_hub=False) + t5_tokenizer = AutoTokenizer.from_pretrained("t5-small", from_hf_hub=False) + + logger.info("Download model from local") + t5_tokenizer.save_pretrained("./paddlenlp-test-model/t5-small") + t5_tokenizer = T5Tokenizer.from_pretrained("./paddlenlp-test-model/t5-small") + t5_tokenizer = AutoTokenizer.from_pretrained("./paddlenlp-test-model/t5-small") + t5_tokenizer = T5Tokenizer.from_pretrained("./paddlenlp-test-model/", subfolder="t5-small") + t5_tokenizer = AutoTokenizer.from_pretrained("./paddlenlp-test-model/", subfolder="t5-small") + + logger.info("Download model from PaddleNLP BOS with subfolder") + t5_tokenizer = T5Tokenizer.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="t5-small", from_hf_hub=False + ) + t5_tokenizer = AutoTokenizer.from_pretrained( + "baicai/paddlenlp-test-model", subfolder="t5-small", from_hf_hub=False + ) + + logger.info("Download model from aistudio") + t5_tokenizer = T5Tokenizer.from_pretrained("aistudio/t5-small", from_aistudio=True) + t5_tokenizer = AutoTokenizer.from_pretrained("aistudio/t5-small", from_aistudio=True) + + t5_tokenizer = T5Tokenizer.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="t5-small", from_aistudio=True + ) + t5_tokenizer = AutoTokenizer.from_pretrained( + "aistudio/paddlenlp-test-model", subfolder="t5-small", from_aistudio=True + )