From 674b80013351924ba83f4c3895e9678ed601fb21 Mon Sep 17 00:00:00 2001 From: Tianyi Liu Date: Mon, 1 Apr 2024 09:15:13 +0800 Subject: [PATCH] [Inference] Added Two Examples for GenAI Application (#164) * added two example for genAI application * add the ci test for newly-added models * remove the sqlcoder CI tests * refine the sqlcoder file and test its with CI * remove the sqlcoder CI --- .github/workflows/workflow_inference.yml | 2 +- .../models/hpu/neural-chat-7b-v3-3.yaml | 26 +++++++++++++++++++ .../inference/models/sqlcoder-7b-2.yaml | 22 ++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 llm_on_ray/inference/models/hpu/neural-chat-7b-v3-3.yaml create mode 100644 llm_on_ray/inference/models/sqlcoder-7b-2.yaml diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml index be730afb1..65d1d7fe5 100644 --- a/.github/workflows/workflow_inference.yml +++ b/.github/workflows/workflow_inference.yml @@ -34,7 +34,7 @@ jobs: name: inference strategy: matrix: - model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, gemma-2b ] + model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, gemma-2b] isPR: - ${{inputs.ci_type == 'pr'}} diff --git a/llm_on_ray/inference/models/hpu/neural-chat-7b-v3-3.yaml b/llm_on_ray/inference/models/hpu/neural-chat-7b-v3-3.yaml new file mode 100644 index 000000000..4e4229bc8 --- /dev/null +++ b/llm_on_ray/inference/models/hpu/neural-chat-7b-v3-3.yaml @@ -0,0 +1,26 @@ +port: 8000 +name: neural-chat-7b-v3-3 +route_prefix: /neural-chat-7b-v3-3 +num_replicas: 1 +cpus_per_worker: 0 +gpus_per_worker: 0 +hpus_per_worker: 1 +deepspeed: false +workers_per_group: 2 +device: "hpu" +ipex: + enabled: false + precision: bf16 +model_description: + model_id_or_path: Intel/neural-chat-7b-v3-3 + tokenizer_name_or_path: Intel/neural-chat-7b-v3-3 + chat_processor: ChatModelGptJ + prompt: + intro: '### System: + You are a chatbot developed by Intel. Please answer all questions to the best of your ability.' + human_id: ' + + ### User' + bot_id: ' + + ### Assistant' diff --git a/llm_on_ray/inference/models/sqlcoder-7b-2.yaml b/llm_on_ray/inference/models/sqlcoder-7b-2.yaml new file mode 100644 index 000000000..480453fd3 --- /dev/null +++ b/llm_on_ray/inference/models/sqlcoder-7b-2.yaml @@ -0,0 +1,22 @@ +port: 8000 +name: sqlcoder-7b-2 +route_prefix: /sqlcoder-7b-2 +cpus_per_worker: 22 +gpus_per_worker: 0 +deepspeed: false +workers_per_group: 2 +device: "cpu" +ipex: + enabled: false + precision: bf16 +model_description: + model_id_or_path: defog/sqlcoder-7b-2 + tokenizer_name_or_path: defog/sqlcoder-7b-2 + chat_processor: ChatModelLLama + prompt: + intro: '' + human_id: '' + bot_id: '' + stop_words: ["```"] + config: + use_auth_token: ''