Skip to content

Commit

Permalink
Update infer and execute API to take prompts from txt file for BS>=1 (q…
Browse files Browse the repository at this point in the history
…uic#11)

* [QEff]: Update infer and execute API to take prompts from txt file for bs>1

Signed-off-by: mamtsing <[email protected]>
Signed-off-by: mamtsing <[email protected]>

* Update infer and execute API

Signed-off-by: mamtsing <[email protected]>

* Update infer and execute API

Signed-off-by: mamtsing <[email protected]>

* Update README.md

Signed-off-by: mamtsing <[email protected]>

* Update README.md

Signed-off-by: Mamta Singh <[email protected]>

* Update README.md

Signed-off-by: Mamta Singh <[email protected]>

* Update infer, execute and text generation interface

Signed-off-by: mamtsing <[email protected]>

* Update execute.py

Signed-off-by: Mamta Singh <[email protected]>

* Update execute.py

Signed-off-by: Mamta Singh <[email protected]>

* Update text generation interface

Signed-off-by: mamtsing <[email protected]>

* Update Notebooks

Signed-off-by: quic-mamta <[email protected]>
Signed-off-by: mamtsing <[email protected]>

* Update README.md

Signed-off-by: Mamta Singh <[email protected]>
Signed-off-by: mamtsing <[email protected]>

* Update README.md

Signed-off-by: Mamta Singh <[email protected]>
Signed-off-by: mamtsing <[email protected]>

* Update text_generation_inference.py

Signed-off-by: Mamta Singh <[email protected]>

* Update infer and execute and text generation interface

Signed-off-by: Mamta Singh <[email protected]>

* Update infer.py

Signed-off-by: Mamta Singh <[email protected]>

* Update README.md

Signed-off-by: Mamta Singh <[email protected]>

* Update README.md

Signed-off-by: Mamta Singh <[email protected]>

* Update README.md

Signed-off-by: Mamta Singh <[email protected]>

* Update infer.py

Signed-off-by: Mamta Singh <[email protected]>

* Update execute.py

Signed-off-by: Mamta Singh <[email protected]>

* Update files

Signed-off-by: Mamta Singh <[email protected]>

* Update files

Signed-off-by: Mamta Singh <[email protected]>

* Update README.md

Signed-off-by: Mamta Singh <[email protected]>

* Update QEfficientGPT2.ipynb

Signed-off-by: Mamta Singh <[email protected]>

* Update QEfficientMPT.ipynb

Signed-off-by: Mamta Singh <[email protected]>

* Update README.md

Signed-off-by: Mamta Singh <[email protected]>

* Update README.md

Signed-off-by: Mamta Singh <[email protected]>

---------

Signed-off-by: mamtsing <[email protected]>
Signed-off-by: mamtsing <[email protected]>
Signed-off-by: Mamta Singh <[email protected]>
Signed-off-by: quic-mamta <[email protected]>
Signed-off-by: Mamta Singh <[email protected]>

Signed-off-by: root <[email protected]>
  • Loading branch information
quic-mamta authored May 23, 2024
1 parent 3b6896a commit c4c911a
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 52 deletions.
63 changes: 44 additions & 19 deletions QEfficient/cloud/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,52 @@
from huggingface_hub import login
from transformers import AutoTokenizer

from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
from QEfficient.generation.text_generation_inference import (
check_batch_size_and_num_prompts,
cloud_ai_100_exec_kv,
get_compilation_batch_size,
)
from QEfficient.utils import hf_download
from QEfficient.utils.constants import Constants


def main(
model_name: str,
prompt: str,
qpc_path: str,
devices: List[int],
device_group: List[int],
prompt: str = None,
prompts_txt_file_path: str = None,
cache_dir: str = Constants.CACHE_DIR,
hf_token: str = None,
):
"""
APi() to run the Model on Cloud AI 100 Platform.
---------
:param model_name: str. Hugging Face Model Card name, Example: [gpt2]
:prompt: str. Sample prompt for the model text generation
:param model_name: str. Hugging Face Model Card name, Example: "gpt2"
:qpc_path: str. Path to the generated binary after compilation.
:devices: List[int]. Device Ids to be used for compilation. if devices > 1. Multiple Card setup is enabled.
:device_group: List[int]. Device Ids to be used for compilation. if len(device_group) > 1. Multiple Card setup is enabled.
:prompt: str. Sample prompt for the model text generation
:prompts_txt_file_path: str. Path to txt file for multiple input prompts
"""

if hf_token is not None:
login(hf_token)

# Download tokenizer along with model if it doesn't exist
model_hf_path = hf_download(repo_id=model_name, cache_dir=cache_dir, allow_patterns=["*.json"])
tokenizer = AutoTokenizer.from_pretrained(
model_hf_path, use_cache=True, padding_side="left", trust_remote_code=True
)
model_hf_path = hf_download(repo_id=model_name, cache_dir=cache_dir, allow_patterns=["*.json", "*.py", "*token*"])
tokenizer = AutoTokenizer.from_pretrained(model_hf_path, use_cache=True, padding_side="left")

cloud_ai_100_exec_kv(tokenizer=tokenizer, qpc=qpc_path, device_id=devices, prompt=prompt)
batch_size = get_compilation_batch_size(qpc_path)
prompt = check_batch_size_and_num_prompts(prompt, prompts_txt_file_path, batch_size)

# Execute
cloud_ai_100_exec_kv(
batch_size=batch_size,
tokenizer=tokenizer,
qpc_path=qpc_path,
device_id=device_group,
prompt=prompt,
)


if __name__ == "__main__":
Expand All @@ -49,24 +65,33 @@ def main(
"--model_name", "--model-name", required=False, type=str, help="HF model card name for tokenizing the inputs"
)
parser.add_argument("--qpc_path", "--qpc-path", required=True, help="Path to generated QPC")
parser.add_argument(
"--device_group",
"--device-group",
required=True,
type=lambda device_ids: [int(x) for x in device_ids.strip("[]").split(",")],
help="Cloud AI 100 device ids (comma-separated) e.g. [0]",
)
parser.add_argument(
"--prompt",
type=lambda prompt: prompt.split("|"),
default="My name is",
help="Input prompt, if executing for batch size>1, pass input promprs in single string but seperate with pipe (|) symbol",
)
parser.add_argument(
"--device_group",
"--device-group",
required=True,
type=lambda device_ids: [int(x) for x in device_ids.strip("[]").split(",")],
help="cloud AI 100 device ids (comma-separated) e.g. [0]",
"--prompts_txt_file_path",
"--prompts-txt-file-path",
type=str,
help="File path for taking input prompts from txt file, sample prompts.txt file present in examples folder",
)
parser.add_argument(
"--cache-dir", "--cache_dir", default=Constants.CACHE_DIR, required=False, help="Cache dir to store HF Downlods"
"--cache-dir",
"--cache_dir",
default=Constants.CACHE_DIR,
required=False,
help="Cache dir to store HF Downloads",
)
parser.add_argument(
"--hf-token", "--hf_token", default=None, type=str, required=False, help="HF token id for private HF models"
)
args = parser.parse_args()
main(args.model_name, args.prompt, args.qpc_path, args.device_group, args.cache_dir, args.hf_token)
main(**args.__dict__)
51 changes: 42 additions & 9 deletions QEfficient/cloud/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
import QEfficient
from QEfficient.cloud.compile import main as compile
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
from QEfficient.generation.text_generation_inference import (
check_batch_size_and_num_prompts,
cloud_ai_100_exec_kv,
)
from QEfficient.utils import hf_download
from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants
from QEfficient.utils.logging_utils import logger
Expand Down Expand Up @@ -48,7 +51,8 @@ def onnx_exists(onnx_file_path: str) -> bool:
def main(
model_name: str,
num_cores: int,
prompt: str,
prompt: str = None,
prompts_txt_file_path: str = None,
aic_enable_depth_first: bool = False,
mos: int = -1,
cache_dir: str = Constants.CACHE_DIR,
Expand Down Expand Up @@ -76,6 +80,8 @@ def main(
onnx_dir_path = os.path.join(model_card_dir, "onnx")
onnx_model_path = os.path.join(onnx_dir_path, model_name.replace("/", "_") + "_kv_clipped_fp16.onnx")

prompt = check_batch_size_and_num_prompts(prompt, prompts_txt_file_path, batch_size)

# Get tokenizer
if hf_token is not None:
login(hf_token)
Expand All @@ -91,7 +97,13 @@ def main(
if qpc_exists(qpc_dir_path):
# execute
logger.info("Pre-compiled qpc found! Trying to execute with given prompt")
cloud_ai_100_exec_kv(tokenizer=tokenizer, qpc=qpc_dir_path, device_id=device_group, prompt=prompt)
cloud_ai_100_exec_kv(
batch_size,
tokenizer=tokenizer,
qpc_path=qpc_dir_path,
device_id=device_group,
prompt=prompt,
)
return

if onnx_exists(onnx_model_path):
Expand All @@ -112,7 +124,13 @@ def main(
assert (
generated_qpc_path == qpc_dir_path
), f"QPC files were generated at an unusual location, expected {qpc_dir_path}; got {generated_qpc_path}"
cloud_ai_100_exec_kv(tokenizer=tokenizer, qpc=generated_qpc_path, device_id=device_group, prompt=prompt)
cloud_ai_100_exec_kv(
batch_size,
tokenizer=tokenizer,
qpc_path=qpc_dir_path,
device_id=device_group,
prompt=prompt,
)
return

#############################################
Expand Down Expand Up @@ -159,16 +177,26 @@ def main(
logger.info(f"Compiled qpc files can be found at : {generated_qpc_path}")

# Execute
cloud_ai_100_exec_kv(tokenizer=tokenizer, qpc=generated_qpc_path, device_id=device_group, prompt=prompt)
cloud_ai_100_exec_kv(
batch_size,
tokenizer=tokenizer,
qpc_path=qpc_dir_path,
device_id=device_group,
prompt=prompt,
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Inference command, the model will be downloaded from HF, optmized, compiled, executed on AIC"
description="Inference command, the model will be downloaded from HF, optmized, compiled, executed on Cloud AI 100"
)
parser.add_argument("--model-name", "--model_name", required=True, help="HF Model card name/id")
parser.add_argument(
"--cache-dir", "--cache_dir", default=Constants.CACHE_DIR, required=False, help="Cache dir to store HF Downlods"
"--cache-dir",
"--cache_dir",
default=Constants.CACHE_DIR,
required=False,
help="Cache dir to store HF Downloads",
)
parser.add_argument(
"--hf-token", "--hf_token", default=None, type=str, required=False, help="HF token id for private HF models"
Expand All @@ -194,8 +222,13 @@ def main(
parser.add_argument(
"--prompt",
type=lambda prompt: prompt.split("|"),
default="My name is",
help="Input prompt, if executing for batch size>1, pass input promprs in single string but seperate with pipe (|) symbol",
help="Input prompt, if executing for batch size>1, pass input prompts in single string but seperate with pipe (|) symbol",
)
parser.add_argument(
"--prompts_txt_file_path",
"--prompts-txt-file-path",
type=str,
help="File path for taking input prompts from txt file, sample prompts.txt file present in examples folder",
)
parser.add_argument(
"--aic_enable_depth_first",
Expand Down
Loading

0 comments on commit c4c911a

Please sign in to comment.