Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ChatQnA] udate OOB & Tuned manifests #738

Merged
merged 10 commits into from
Sep 5, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: chatqna-backend-server-svc
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
port: 8888
targetPort: 8888
nodePort: 30888
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: embedding-dependency-svc
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: embedding-svc
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 31
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: llm-dependency-svc
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: llm-svc
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null
ports:
- containerPort: 8000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: reranking-svc
spec:
type: ClusterIP
selector:
app: reranking-deploy
ports:
- name: service
port: 8000
targetPort: 8000
Loading