Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download----fix new bug #8088

Merged
merged 7 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion paddlenlp/generation/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,9 @@ def from_pretrained(
from_aistudio=from_aistudio,
from_hf_hub=from_hf_hub,
)

assert (
resolved_config_file is not None
), f"please make sure {config_file_name} under {pretrained_model_name_or_path}"
try:
logger.info(f"Loading configuration file {resolved_config_file}")
# Load config dict
Expand Down
3 changes: 1 addition & 2 deletions paddlenlp/transformers/auto/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *model_args, **kwar
from_hf_hub=from_hf_hub,
from_aistudio=from_aistudio,
)

if os.path.exists(config_file):
if config_file is not None and os.path.exists(config_file):
config_class = cls._get_config_class_from_config(pretrained_model_name_or_path, config_file)
logger.info("We are using %s to load '%s'." % (config_class, pretrained_model_name_or_path))
if config_class is cls:
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/transformers/auto/image_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@
from_hf_hub=from_hf_hub,
from_aistudio=from_aistudio,
)
if os.path.exists(config_file):
if config_file is not None and os.path.exists(config_file):

Check warning on line 169 in paddlenlp/transformers/auto/image_processing.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/transformers/auto/image_processing.py#L169

Added line #L169 was not covered by tests
processor_class = cls._get_image_processor_class_from_config(
pretrained_model_name_or_path,
config_file,
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/transformers/auto/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def _from_pretrained(cls, pretrained_model_name_or_path, task=None, *model_args,
from_hf_hub=from_hf_hub,
from_aistudio=from_aistudio,
)
if os.path.exists(config_file):
if config_file is not None and os.path.exists(config_file):
model_class = cls._get_model_class_from_config(pretrained_model_name_or_path, config_file)
logger.info(f"We are using {model_class} to load '{pretrained_model_name_or_path}'.")
return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/transformers/auto/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@
from_hf_hub=from_hf_hub,
from_aistudio=from_aistudio,
)
if os.path.exists(config_file):
if config_file is not None and os.path.exists(config_file):

Check warning on line 179 in paddlenlp/transformers/auto/processing.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/transformers/auto/processing.py#L179

Added line #L179 was not covered by tests
processor_class = cls._get_processor_class_from_config(
pretrained_model_name_or_path,
config_file,
Expand Down
5 changes: 2 additions & 3 deletions paddlenlp/transformers/auto/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ class AutoTokenizer:
_tokenizer_mapping = MAPPING_NAMES
_name_mapping = TOKENIZER_MAPPING_NAMES
_fast_name_mapping = FAST_TOKENIZER_MAPPING_NAMES
tokenizer_config_file = ["tokenizer_config.json", "config.json", "model_config.json"]
tokenizer_config_file = "tokenizer_config.json"

def __init__(self, *args, **kwargs):
raise EnvironmentError(
Expand Down Expand Up @@ -319,8 +319,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
from_hf_hub=from_hf_hub,
from_aistudio=from_aistudio,
)

if os.path.exists(config_file):
if config_file is not None and os.path.exists(config_file):
tokenizer_class = cls._get_tokenizer_class_from_config(
pretrained_model_name_or_path, config_file, use_fast
)
Expand Down
4 changes: 3 additions & 1 deletion paddlenlp/transformers/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,9 @@ def _get_config_dict(
from_aistudio=from_aistudio,
from_hf_hub=from_hf_hub,
)

assert (
resolved_config_file is not None
), f"please make sure one of the {filenames} under {pretrained_model_name_or_path}"
try:
logger.info(f"Loading configuration file {resolved_config_file}")
# Load config dict
Expand Down
4 changes: 3 additions & 1 deletion paddlenlp/transformers/feature_extraction_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,9 @@ def get_feature_extractor_dict(
from_aistudio=from_aistudio,
from_hf_hub=from_hf_hub,
)

assert (
resolved_feature_extractor_file is not None
), f"please make sure {FEATURE_EXTRACTOR_NAME} under {pretrained_model_name_or_path}"
try:
# Load feature_extractor dict
with open(resolved_feature_extractor_file, "r", encoding="utf-8") as reader:
Expand Down
4 changes: 3 additions & 1 deletion paddlenlp/transformers/image_processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,9 @@ def get_image_processor_dict(
from_hf_hub=from_hf_hub,
from_aistudio=from_aistudio,
)

assert (
resolved_image_processor_file is not None
), f"please make sure {IMAGE_PROCESSOR_NAME} under {pretrained_model_name_or_path}"
try:
# Load image_processor dict
with open(resolved_image_processor_file, "r", encoding="utf-8") as reader:
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/transformers/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1607,9 +1607,9 @@ def get_file_path(pretrained_model_name_or_path, subfolder, SAFE_WEIGHTS_NAME, v
filenames = [
_add_variant(SAFE_WEIGHTS_INDEX_NAME, variant),
_add_variant(PADDLE_WEIGHTS_INDEX_NAME, variant),
_add_variant(PYTORCH_WEIGHTS_INDEX_NAME, variant),
_add_variant(SAFE_WEIGHTS_NAME, variant),
_add_variant(PADDLE_WEIGHTS_NAME, variant),
_add_variant(PYTORCH_WEIGHTS_INDEX_NAME, variant),
_add_variant(PYTORCH_WEIGHTS_NAME, variant),
]
else:
Expand Down
4 changes: 3 additions & 1 deletion paddlenlp/transformers/roberta/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,9 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
from_aistudio=from_aistudio,
from_hf_hub=from_hf_hub,
)
assert resolved_config_file is not None
assert (
resolved_config_file is not None
), f"please make sure {cls.tokenizer_config_file} under {pretrained_model_name_or_path}"

with io.open(resolved_config_file, encoding="utf-8") as f:
init_kwargs = json.load(f)
Expand Down
4 changes: 3 additions & 1 deletion paddlenlp/transformers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,9 @@ def get_checkpoint_shard_files(
from_aistudio=from_aistudio,
from_hf_hub=from_hf_hub,
)

assert (
cached_filename is not None
), f"please make sure {shard_filename} under {pretrained_model_name_or_path}"
# We have already dealt with RepositoryNotFoundError and RevisionNotFoundError when getting the index, so
# we don't have to catch them here.
except EntryNotFoundError:
Expand Down
5 changes: 4 additions & 1 deletion paddlenlp/utils/download/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pathlib import Path
from typing import Dict, Literal, Optional, Union

from huggingface_hub import _CACHED_NO_EXIST
from huggingface_hub import file_exists as hf_hub_file_exists
from huggingface_hub import hf_hub_download
from huggingface_hub import try_to_load_from_cache as hf_hub_try_to_load_from_cache
Expand Down Expand Up @@ -148,7 +149,9 @@ def resolve_file_path(
cache_file_name = bos_aistudio_hf_try_to_load_from_cache(
repo_id, filename, cache_dir, subfolder, revision, repo_type, from_bos, from_aistudio, from_hf_hub
)
if cache_file_name is not None and not isinstance(cache_file_name, object):
if from_hf_hub and cache_file_name is _CACHED_NO_EXIST:
cache_file_name = None
if cache_file_name is not None:
return cache_file_name

from_modelscope = strtobool(os.environ.get("from_modelscope", False))
Expand Down
3 changes: 1 addition & 2 deletions paddlenlp/utils/download/aistudio_hub_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
logger = logging.getLogger(__name__)

from .common import (
_CACHED_NO_EXIST,
DEFALUT_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD,
DEFAULT_ETAG_TIMEOUT,
DEFAULT_REQUEST_TIMEOUT,
Expand Down Expand Up @@ -714,7 +713,7 @@

# Check if file is cached as "no_exist"
if os.path.isfile(os.path.join(no_exist_dir, revision, filename)):
return _CACHED_NO_EXIST
return None

Check warning on line 716 in paddlenlp/utils/download/aistudio_hub_download.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/utils/download/aistudio_hub_download.py#L716

Added line #L716 was not covered by tests

# Check if revision folder exists
if not os.path.exists(snapshots_dir):
Expand Down
7 changes: 4 additions & 3 deletions paddlenlp/utils/download/bos_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,24 +186,25 @@ def bos_download(
# This is used to create a URL, and not a local path, hence the forward slash.
filename = f"{subfolder}/{filename}"

locks_dir = os.path.join(cache_dir, ".locks")

storage_folder = os.path.join(cache_dir, repo_id)
os.makedirs(storage_folder, exist_ok=True)

if url is None:
url = bos_url(repo_id, filename, repo_type=REPO_TYPE, endpoint=endpoint)
headers = None
url_to_download = url
lock_path = os.path.join(cache_dir, repo_id, f"{filename}.lock")
lock_path = os.path.join(locks_dir, repo_id, f"{filename}.lock")
file_path = os.path.join(cache_dir, repo_id, filename)

os.makedirs(os.path.dirname(lock_path), exist_ok=True)

if os.name == "nt" and len(os.path.abspath(lock_path)) > 255:
lock_path = "\\\\?\\" + os.path.abspath(lock_path)

if os.name == "nt" and len(os.path.abspath(file_path)) > 255:
file_path = "\\\\?\\" + os.path.abspath(file_path)

Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
with FileLock(lock_path):
# If the download just completed while the lock was activated.
if os.path.exists(file_path) and not force_download:
Expand Down
6 changes: 3 additions & 3 deletions tests/transformers/auto/test_confiugration.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ def test_from_aistudio(self):
config = AutoConfig.from_pretrained("PaddleNLP/tiny-random-bert", from_aistudio=True)
self.assertEqual(config.hidden_size, 32)

def test_subfolder(self):
config = AutoConfig.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="text_encoder")
self.assertEqual(config.hidden_size, 768)
# def test_subfolder(self):
# config = AutoConfig.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="text_encoder")
# self.assertEqual(config.hidden_size, 768)

def test_load_from_legacy_config(self):
number = random.randint(0, 10000)
Expand Down
Loading