Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable code-specific revision for code on the Hub #23799

Merged
merged 2 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions src/transformers/dynamic_module_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def get_cached_module_file(
)
new_files.append(f"{module_needed}.py")

if len(new_files) > 0:
if len(new_files) > 0 and revision is None:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't warn if the revision is set already.

new_files = "\n".join([f"- {f}" for f in new_files])
repo_type_str = "" if repo_type is None else f"{repo_type}s/"
url = f"https://huggingface.co/{repo_type_str}{pretrained_model_name_or_path}"
Expand All @@ -340,6 +340,7 @@ def get_class_from_dynamic_module(
revision: Optional[str] = None,
local_files_only: bool = False,
repo_type: Optional[str] = None,
code_revision: Optional[str] = None,
**kwargs,
):
"""
Expand Down Expand Up @@ -391,6 +392,10 @@ def get_class_from_dynamic_module(
If `True`, will only try to load the tokenizer configuration from local files.
repo_type (`str`, *optional*):
Specify the repo type (useful when downloading from a space for instance).
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than the
rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based system for
storing models and other artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
<Tip>
Expand All @@ -415,12 +420,12 @@ def get_class_from_dynamic_module(
# Catch the name of the repo if it's specified in `class_reference`
if "--" in class_reference:
repo_id, class_reference = class_reference.split("--")
# Invalidate revision since it's not relevant for this repo
revision = "main"
else:
repo_id = pretrained_model_name_or_path
module_file, class_name = class_reference.split(".")

if code_revision is None and pretrained_model_name_or_path == repo_id:
code_revision = revision
# And lastly we get the class inside our newly created module
final_module = get_cached_module_file(
repo_id,
Expand All @@ -430,7 +435,7 @@ def get_class_from_dynamic_module(
resume_download=resume_download,
proxies=proxies,
use_auth_token=use_auth_token,
revision=revision,
revision=code_revision,
local_files_only=local_files_only,
repo_type=repo_type,
)
Expand Down
18 changes: 18 additions & 0 deletions src/transformers/models/auto/auto_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
should only be set to `True` for repositories you trust and in which you have read the code, as it will
execute code present on the Hub on your local machine.
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
allowed by git.
kwargs (additional keyword arguments, *optional*):
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or
Expand Down Expand Up @@ -224,6 +229,11 @@
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
should only be set to `True` for repositories you trust and in which you have read the code, as it will
execute code present on the Hub on your local machine.
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
allowed by git.
kwargs (additional keyword arguments, *optional*):
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or
Expand Down Expand Up @@ -320,6 +330,11 @@
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
should only be set to `True` for repositories you trust and in which you have read the code, as it will
execute code present on the Hub on your local machine.
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
allowed by git.
kwargs (additional keyword arguments, *optional*):
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or
Expand Down Expand Up @@ -408,6 +423,7 @@ def from_config(cls, config, **kwargs):
else:
repo_id = config.name_or_path
model_class = get_class_from_dynamic_module(class_ref, repo_id, **kwargs)
_ = kwargs.pop("code_revision", None)
return model_class._from_config(config, **kwargs)
elif type(config) in cls._model_mapping.keys():
model_class = _get_model_class(config, cls._model_mapping)
Expand All @@ -425,6 +441,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
kwargs["_from_auto"] = True
hub_kwargs_names = [
"cache_dir",
"code_revision",
"force_download",
"local_files_only",
"proxies",
Expand Down Expand Up @@ -464,6 +481,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
model_class = get_class_from_dynamic_module(
class_ref, pretrained_model_name_or_path, **hub_kwargs, **kwargs
)
_ = hub_kwargs.pop("code_revision", None)
return model_class.from_pretrained(
pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
)
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/auto/configuration_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
)
class_ref = config_dict["auto_map"]["AutoConfig"]
config_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
_ = kwargs.pop("code_revision", None)
return config_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
elif "model_type" in config_dict:
config_class = CONFIG_MAPPING[config_dict["model_type"]]
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/auto/feature_extraction_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
feature_extractor_class = get_class_from_dynamic_module(
feature_extractor_auto_map, pretrained_model_name_or_path, **kwargs
)
_ = kwargs.pop("code_revision", None)
else:
feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class)

Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/auto/image_processing_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
image_processor_class = get_class_from_dynamic_module(
image_processor_auto_map, pretrained_model_name_or_path, **kwargs
)
_ = kwargs.pop("code_revision", None)
else:
image_processor_class = image_processor_class_from_name(image_processor_class)

Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/auto/processing_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
processor_class = get_class_from_dynamic_module(
processor_auto_map, pretrained_model_name_or_path, **kwargs
)
_ = kwargs.pop("code_revision", None)
else:
processor_class = processor_class_from_name(processor_class)

Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/auto/tokenization_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
else:
class_ref = tokenizer_auto_map[0]
tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
_ = kwargs.pop("code_revision", None)

elif use_fast and not config_tokenizer_class.endswith("Fast"):
tokenizer_class_candidate = f"{config_tokenizer_class}Fast"
Expand Down