Update infer and execute API to take prompts from txt file for BS>=1 (q…

…uic#11) * [QEff]: Update infer and execute API to take prompts from txt file for bs>1 Signed-off-by: mamtsing <[email protected]> Signed-off-by: mamtsing <[email protected]> * Update infer and execute API Signed-off-by: mamtsing <[email protected]> * Update infer and execute API Signed-off-by: mamtsing <[email protected]> * Update README.md Signed-off-by: mamtsing <[email protected]> * Update README.md Signed-off-by: Mamta Singh <[email protected]> * Update README.md Signed-off-by: Mamta Singh <[email protected]> * Update infer, execute and text generation interface Signed-off-by: mamtsing <[email protected]> * Update execute.py Signed-off-by: Mamta Singh <[email protected]> * Update execute.py Signed-off-by: Mamta Singh <[email protected]> * Update text generation interface Signed-off-by: mamtsing <[email protected]> * Update Notebooks Signed-off-by: quic-mamta <[email protected]> Signed-off-by: mamtsing <[email protected]> * Update README.md Signed-off-by: Mamta Singh <[email protected]> Signed-off-by: mamtsing <[email protected]> * Update README.md Signed-off-by: Mamta Singh <[email protected]> Signed-off-by: mamtsing <[email protected]> * Update text_generation_inference.py Signed-off-by: Mamta Singh <[email protected]> * Update infer and execute and text generation interface Signed-off-by: Mamta Singh <[email protected]> * Update infer.py Signed-off-by: Mamta Singh <[email protected]> * Update README.md Signed-off-by: Mamta Singh <[email protected]> * Update README.md Signed-off-by: Mamta Singh <[email protected]> * Update README.md Signed-off-by: Mamta Singh <[email protected]> * Update infer.py Signed-off-by: Mamta Singh <[email protected]> * Update execute.py Signed-off-by: Mamta Singh <[email protected]> * Update files Signed-off-by: Mamta Singh <[email protected]> * Update files Signed-off-by: Mamta Singh <[email protected]> * Update README.md Signed-off-by: Mamta Singh <[email protected]> * Update QEfficientGPT2.ipynb Signed-off-by: Mamta Singh <[email protected]> * Update QEfficientMPT.ipynb Signed-off-by: Mamta Singh <[email protected]> * Update README.md Signed-off-by: Mamta Singh <[email protected]> * Update README.md Signed-off-by: Mamta Singh <[email protected]> --------- Signed-off-by: mamtsing <[email protected]> Signed-off-by: mamtsing <[email protected]> Signed-off-by: Mamta Singh <[email protected]> Signed-off-by: quic-mamta <[email protected]> Signed-off-by: Mamta Singh <[email protected]> Signed-off-by: root <[email protected]>
abukhoy · May 23, 2024 · c4c911a · c4c911a
1 parent 3b6896a
commit c4c911a
Show file tree

Hide file tree

Showing 7 changed files with 247 additions and 52 deletions.
diff --git a/QEfficient/cloud/execute.py b/QEfficient/cloud/execute.py
@@ -11,36 +11,52 @@
 from huggingface_hub import login
 from transformers import AutoTokenizer
 
-from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
+from QEfficient.generation.text_generation_inference import (
+    check_batch_size_and_num_prompts,
+    cloud_ai_100_exec_kv,
+    get_compilation_batch_size,
+)
 from QEfficient.utils import hf_download
 from QEfficient.utils.constants import Constants
 
 
 def main(
     model_name: str,
-    prompt: str,
     qpc_path: str,
-    devices: List[int],
+    device_group: List[int],
+    prompt: str = None,
+    prompts_txt_file_path: str = None,
     cache_dir: str = Constants.CACHE_DIR,
     hf_token: str = None,
 ):
     """
     APi() to run the Model on Cloud AI 100 Platform.
     ---------
-    :param model_name: str. Hugging Face Model Card name, Example: [gpt2]
-    :prompt: str. Sample prompt for the model text generation
+    :param model_name: str. Hugging Face Model Card name, Example: "gpt2"
     :qpc_path: str.  Path to the generated binary after compilation.
-    :devices: List[int]. Device Ids to be used for compilation. if devices > 1. Multiple Card setup is enabled.
+    :device_group: List[int]. Device Ids to be used for compilation. if len(device_group) > 1. Multiple Card setup is enabled.
+    :prompt: str. Sample prompt for the model text generation
+    :prompts_txt_file_path: str. Path to txt file for multiple input prompts
     """
+
     if hf_token is not None:
         login(hf_token)
+
     # Download tokenizer along with model if it doesn't exist
-    model_hf_path = hf_download(repo_id=model_name, cache_dir=cache_dir, allow_patterns=["*.json"])
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_hf_path, use_cache=True, padding_side="left", trust_remote_code=True
-    )
+    model_hf_path = hf_download(repo_id=model_name, cache_dir=cache_dir, allow_patterns=["*.json", "*.py", "*token*"])
+    tokenizer = AutoTokenizer.from_pretrained(model_hf_path, use_cache=True, padding_side="left")
 
-    cloud_ai_100_exec_kv(tokenizer=tokenizer, qpc=qpc_path, device_id=devices, prompt=prompt)
+    batch_size = get_compilation_batch_size(qpc_path)
+    prompt = check_batch_size_and_num_prompts(prompt, prompts_txt_file_path, batch_size)
+
+    # Execute
+    cloud_ai_100_exec_kv(
+        batch_size=batch_size,
+        tokenizer=tokenizer,
+        qpc_path=qpc_path,
+        device_id=device_group,
+        prompt=prompt,
+    )
 
 
 if __name__ == "__main__":
@@ -49,24 +65,33 @@ def main(
         "--model_name", "--model-name", required=False, type=str, help="HF model card name for tokenizing the inputs"
     )
     parser.add_argument("--qpc_path", "--qpc-path", required=True, help="Path to generated QPC")
+    parser.add_argument(
+        "--device_group",
+        "--device-group",
+        required=True,
+        type=lambda device_ids: [int(x) for x in device_ids.strip("[]").split(",")],
+        help="Cloud AI 100 device ids (comma-separated) e.g. [0]",
+    )
     parser.add_argument(
         "--prompt",
         type=lambda prompt: prompt.split("|"),
-        default="My name is",
         help="Input prompt, if executing for batch size>1, pass input promprs in single string but seperate with pipe (|) symbol",
     )
     parser.add_argument(
-        "--device_group",
-        "--device-group",
-        required=True,
-        type=lambda device_ids: [int(x) for x in device_ids.strip("[]").split(",")],
-        help="cloud AI 100 device ids (comma-separated) e.g. [0]",
+        "--prompts_txt_file_path",
+        "--prompts-txt-file-path",
+        type=str,
+        help="File path for taking input prompts from txt file, sample prompts.txt file present in examples folder",
     )
     parser.add_argument(
-        "--cache-dir", "--cache_dir", default=Constants.CACHE_DIR, required=False, help="Cache dir to store HF Downlods"
+        "--cache-dir",
+        "--cache_dir",
+        default=Constants.CACHE_DIR,
+        required=False,
+        help="Cache dir to store HF Downloads",
     )
     parser.add_argument(
         "--hf-token", "--hf_token", default=None, type=str, required=False, help="HF token id for private HF models"
     )
     args = parser.parse_args()
-    main(args.model_name, args.prompt, args.qpc_path, args.device_group, args.cache_dir, args.hf_token)
+    main(**args.__dict__)
diff --git a/QEfficient/cloud/infer.py b/QEfficient/cloud/infer.py
@@ -15,7 +15,10 @@
 import QEfficient
 from QEfficient.cloud.compile import main as compile
 from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
-from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
+from QEfficient.generation.text_generation_inference import (
+    check_batch_size_and_num_prompts,
+    cloud_ai_100_exec_kv,
+)
 from QEfficient.utils import hf_download
 from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants
 from QEfficient.utils.logging_utils import logger
@@ -48,7 +51,8 @@ def onnx_exists(onnx_file_path: str) -> bool:
 def main(
     model_name: str,
     num_cores: int,
-    prompt: str,
+    prompt: str = None,
+    prompts_txt_file_path: str = None,
     aic_enable_depth_first: bool = False,
     mos: int = -1,
     cache_dir: str = Constants.CACHE_DIR,
@@ -76,6 +80,8 @@ def main(
     onnx_dir_path = os.path.join(model_card_dir, "onnx")
     onnx_model_path = os.path.join(onnx_dir_path, model_name.replace("/", "_") + "_kv_clipped_fp16.onnx")
 
+    prompt = check_batch_size_and_num_prompts(prompt, prompts_txt_file_path, batch_size)
+
     # Get tokenizer
     if hf_token is not None:
         login(hf_token)
@@ -91,7 +97,13 @@ def main(
     if qpc_exists(qpc_dir_path):
         # execute
         logger.info("Pre-compiled qpc found! Trying to execute with given prompt")
-        cloud_ai_100_exec_kv(tokenizer=tokenizer, qpc=qpc_dir_path, device_id=device_group, prompt=prompt)
+        cloud_ai_100_exec_kv(
+            batch_size,
+            tokenizer=tokenizer,
+            qpc_path=qpc_dir_path,
+            device_id=device_group,
+            prompt=prompt,
+        )
         return
 
     if onnx_exists(onnx_model_path):
@@ -112,7 +124,13 @@ def main(
         assert (
             generated_qpc_path == qpc_dir_path
         ), f"QPC files were generated at an unusual location, expected {qpc_dir_path}; got {generated_qpc_path}"
-        cloud_ai_100_exec_kv(tokenizer=tokenizer, qpc=generated_qpc_path, device_id=device_group, prompt=prompt)
+        cloud_ai_100_exec_kv(
+            batch_size,
+            tokenizer=tokenizer,
+            qpc_path=qpc_dir_path,
+            device_id=device_group,
+            prompt=prompt,
+        )
         return
 
     #############################################
@@ -159,16 +177,26 @@ def main(
     logger.info(f"Compiled qpc files can be found at : {generated_qpc_path}")
 
     # Execute
-    cloud_ai_100_exec_kv(tokenizer=tokenizer, qpc=generated_qpc_path, device_id=device_group, prompt=prompt)
+    cloud_ai_100_exec_kv(
+        batch_size,
+        tokenizer=tokenizer,
+        qpc_path=qpc_dir_path,
+        device_id=device_group,
+        prompt=prompt,
+    )
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="Inference command, the model will be downloaded from HF, optmized, compiled, executed on AIC"
+        description="Inference command, the model will be downloaded from HF, optmized, compiled, executed on Cloud AI 100"
     )
     parser.add_argument("--model-name", "--model_name", required=True, help="HF Model card name/id")
     parser.add_argument(
-        "--cache-dir", "--cache_dir", default=Constants.CACHE_DIR, required=False, help="Cache dir to store HF Downlods"
+        "--cache-dir",
+        "--cache_dir",
+        default=Constants.CACHE_DIR,
+        required=False,
+        help="Cache dir to store HF Downloads",
     )
     parser.add_argument(
         "--hf-token", "--hf_token", default=None, type=str, required=False, help="HF token id for private HF models"
@@ -194,8 +222,13 @@ def main(
     parser.add_argument(
         "--prompt",
         type=lambda prompt: prompt.split("|"),
-        default="My name is",
-        help="Input prompt, if executing for batch size>1, pass input promprs in single string but seperate with pipe (|) symbol",
+        help="Input prompt, if executing for batch size>1, pass input prompts in single string but seperate with pipe (|) symbol",
+    )
+    parser.add_argument(
+        "--prompts_txt_file_path",
+        "--prompts-txt-file-path",
+        type=str,
+        help="File path for taking input prompts from txt file, sample prompts.txt file present in examples folder",
     )
     parser.add_argument(
         "--aic_enable_depth_first",