Skip to content

Commit

Permalink
Enable arg trust_remote_code to use custom tokenizers (quic#16)
Browse files Browse the repository at this point in the history
* Enable arg trust_remote_code to use custom tokenizers

Allow for custom models defined on the Hub in their own modeling files.

Signed-off-by: vbaddi <[email protected]>

* fix the tokenizer in export_hf_to_cloud_ai_100

Signed-off-by: vbaddi <[email protected]>

---------

Signed-off-by: vbaddi <[email protected]>
Signed-off-by: quic-amitraj <[email protected]>
  • Loading branch information
vbaddi authored and quic-amitraj committed Jun 7, 2024
1 parent dda0516 commit 9da2555
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 4 deletions.
4 changes: 3 additions & 1 deletion QEfficient/cloud/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ def main(
login(hf_token)
# Download tokenizer along with model if it doesn't exist
model_hf_path = hf_download(repo_id=model_name, cache_dir=cache_dir, allow_patterns=["*.json"])
tokenizer = AutoTokenizer.from_pretrained(model_hf_path, use_cache=True, padding_side="left")
tokenizer = AutoTokenizer.from_pretrained(
model_hf_path, use_cache=True, padding_side="left", trust_remote_code=True
)

cloud_ai_100_exec_kv(tokenizer=tokenizer, qpc=qpc_path, device_id=devices, prompt=prompt)

Expand Down
4 changes: 3 additions & 1 deletion QEfficient/cloud/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ def main(model_name: str, cache_dir: str) -> None:
"""
model_hf_path = hf_download(repo_id=model_name, hf_token=None, cache_dir=cache_dir)
tokenizer = AutoTokenizer.from_pretrained(model_hf_path, use_cache=True, padding_side="left")
tokenizer = AutoTokenizer.from_pretrained(
model_hf_path, use_cache=True, padding_side="left", trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(model_hf_path, use_cache=True)

# Easy and minimal api to update the model to QEff.
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/cloud/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ def main(
cache_dir=cache_dir,
ignore_patterns=["*.txt", "*.onnx", "*.ot", "*.md", "*.tflite", "*.pdf"],
)
tokenizer = AutoTokenizer.from_pretrained(model_hf_path, use_cache=True, padding_side="left")
tokenizer = AutoTokenizer.from_pretrained(
model_hf_path, use_cache=True, padding_side="left", trust_remote_code=True
)

if qpc_exists(qpc_dir_path):
# execute
Expand Down
2 changes: 1 addition & 1 deletion QEfficient/exporter/export_hf_to_cloud_ai_100.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def convert_to_cloud_bertstyle(

# Load tokenizer
if tokenizer is None:
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
else:
if tokenizer.padding_side != "left":
logger.warning("Please use padding_side='left' while initializing the tokenizer")
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_con
# Bertstyle models do not have any optimization w.r.t KV cache changes and are unoptimized version.
# It is recommended to use kv=True for better performance.
# For custom models defined on the Hub in their own modeling files. We need `trust_remote_code` option
# Should be set to `True` in `AutoTokenizer` for repositories you trust.
tokenizer = AutoTokenizer.from_pretrained(model_hf_path, use_cache=True, padding_side="left")
base_path, onnx_path = qualcomm_efficient_converter(
model_kv=model_transformed,
Expand Down

0 comments on commit 9da2555

Please sign in to comment.