Skip to content

Commit

Permalink
Update K8S manifest for ChatQnA/CodeGen/CodeTrans/DocSum
Browse files Browse the repository at this point in the history
- Sync with docker-compose changes since v0.8 release

- Add K8S probes

Signed-off-by: Lianhao Lu <[email protected]>
  • Loading branch information
lianhao authored and yongfengdu committed Aug 20, 2024
1 parent c016d82 commit 01c1b75
Show file tree
Hide file tree
Showing 10 changed files with 677 additions and 225 deletions.
268 changes: 207 additions & 61 deletions ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml

Large diffs are not rendered by default.

270 changes: 207 additions & 63 deletions ChatQnA/kubernetes/manifests/xeon/chatqna.yaml

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion ChatQnA/tests/test_manifest_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ function validate_chatqna() {
echo "Checking response results, make sure the output is reasonable. "
local status=false
if [[ -f $LOGFILE ]] &&
[[ $(grep -c "billion" $LOGFILE) != 0 ]]; then
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
status=true
fi
if [ $status == false ]; then
Expand Down
2 changes: 1 addition & 1 deletion ChatQnA/tests/test_manifest_on_xeon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ function validate_chatqna() {
echo "Checking response results, make sure the output is reasonable. "
local status=false
if [[ -f $LOGFILE ]] &&
[[ $(grep -c "billion" $LOGFILE) != 0 ]]; then
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
status=true
fi
if [ $status == false ]; then
Expand Down
72 changes: 50 additions & 22 deletions CodeGen/kubernetes/manifests/gaudi/codegen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ metadata:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codegen-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy:
https_proxy:
no_proxy:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
Expand All @@ -36,22 +36,21 @@ metadata:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "meta-llama/CodeLlama-7b-hf"
PORT: "2080"
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
HF_TOKEN: "insert-your-huggingface-token-here"
MAX_INPUT_TOKENS: "1024"
MAX_TOTAL_TOKENS: "4096"
http_proxy:
https_proxy:
no_proxy:
http_proxy: ""
https_proxy: ""
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
---
# Source: codegen/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
Expand All @@ -65,7 +64,7 @@ metadata:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
Expand All @@ -90,7 +89,7 @@ metadata:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
Expand All @@ -115,7 +114,7 @@ metadata:
helm.sh/chart: codegen-0.8.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
Expand All @@ -140,7 +139,7 @@ metadata:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
Expand Down Expand Up @@ -180,14 +179,26 @@ spec:
volumeMounts:
- mountPath: /tmp
name: tmp
startupProbe:
exec:
command:
- curl
- http://codegen-tgi
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
Expand All @@ -206,7 +217,7 @@ metadata:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
Expand Down Expand Up @@ -240,6 +251,23 @@ spec:
- name: http
containerPort: 2080
protocol: TCP
livenessProbe:
failureThreshold: 24
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
startupProbe:
failureThreshold: 120
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
resources:
limits:
habana.ai/gaudi: 1
Expand All @@ -263,7 +291,7 @@ metadata:
helm.sh/chart: codegen-0.8.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
Expand Down
72 changes: 49 additions & 23 deletions CodeGen/kubernetes/manifests/xeon/codegen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ metadata:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codegen-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy:
https_proxy:
no_proxy:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
Expand All @@ -36,23 +36,20 @@ metadata:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "meta-llama/CodeLlama-7b-hf"
PORT: "2080"
CUDA_GRAPHS: "0"
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
HF_TOKEN: "insert-your-huggingface-token-here"
MAX_INPUT_TOKENS: "1024"
MAX_TOTAL_TOKENS: "4096"
http_proxy:
https_proxy:
no_proxy:
http_proxy: ""
https_proxy: ""
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
CUDA_GRAPHS: "0"
---
# Source: codegen/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
Expand All @@ -66,7 +63,7 @@ metadata:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
Expand All @@ -91,7 +88,7 @@ metadata:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
Expand All @@ -116,7 +113,7 @@ metadata:
helm.sh/chart: codegen-0.8.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
Expand All @@ -141,7 +138,7 @@ metadata:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
Expand Down Expand Up @@ -181,14 +178,26 @@ spec:
volumeMounts:
- mountPath: /tmp
name: tmp
startupProbe:
exec:
command:
- curl
- http://codegen-tgi
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
Expand All @@ -207,7 +216,7 @@ metadata:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
Expand Down Expand Up @@ -241,6 +250,23 @@ spec:
- name: http
containerPort: 2080
protocol: TCP
livenessProbe:
failureThreshold: 24
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
startupProbe:
failureThreshold: 120
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
resources:
{}
volumes:
Expand All @@ -263,7 +289,7 @@ metadata:
helm.sh/chart: codegen-0.8.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
Expand Down
Loading

0 comments on commit 01c1b75

Please sign in to comment.