From 665c46ffae23b3dc3b4c6c7d6b7693886e913294 Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Wed, 24 Jul 2024 09:59:38 +0800 Subject: [PATCH] Update Kubernetes manifest files for deploying ChatQnA (#445) Update Kubernetes manifest files for deploying ChatQnA without GMC. Signed-off-by: Lianhao Lu --- ChatQnA/README.md | 8 +- ChatQnA/kubernetes/manifests/README.md | 41 + .../manifests/chaqna-xeon-backend-server.yaml | 45 - .../manifests/docsum_gaudi_llm.yaml | 74 -- ChatQnA/kubernetes/manifests/docsum_llm.yaml | 74 -- ChatQnA/kubernetes/manifests/embedding.yaml | 45 - .../kubernetes/manifests/gaudi/chatqna.yaml | 1097 +++++++++++++++++ .../kubernetes/manifests/install_all_gaudi.sh | 12 - .../kubernetes/manifests/install_all_xeon.sh | 12 - ChatQnA/kubernetes/manifests/llm.yaml | 45 - .../manifests/qna_configmap_gaudi.yaml | 21 - .../manifests/qna_configmap_xeon.yaml | 21 - .../kubernetes/manifests/redis-vector-db.yaml | 42 - .../kubernetes/manifests/remove_all_gaudi.sh | 12 - .../kubernetes/manifests/remove_all_xeon.sh | 12 - ChatQnA/kubernetes/manifests/reranking.yaml | 45 - ChatQnA/kubernetes/manifests/retriever.yaml | 45 - .../tei_embedding_gaudi_service.yaml | 71 -- .../manifests/tei_embedding_service.yaml | 68 - .../manifests/tei_reranking_service.yaml | 68 - .../manifests/tgi_gaudi_service.yaml | 66 - ChatQnA/kubernetes/manifests/tgi_service.yaml | 75 -- .../kubernetes/manifests/xeon/chatqna.yaml | 1095 ++++++++++++++++ ChatQnA/tests/test_manifest_on_gaudi.sh | 43 +- ChatQnA/tests/test_manifest_on_xeon.sh | 47 +- 25 files changed, 2283 insertions(+), 901 deletions(-) create mode 100644 ChatQnA/kubernetes/manifests/README.md delete mode 100644 ChatQnA/kubernetes/manifests/chaqna-xeon-backend-server.yaml delete mode 100644 ChatQnA/kubernetes/manifests/docsum_gaudi_llm.yaml delete mode 100644 ChatQnA/kubernetes/manifests/docsum_llm.yaml delete mode 100644 ChatQnA/kubernetes/manifests/embedding.yaml create mode 100644 ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml delete mode 100755 ChatQnA/kubernetes/manifests/install_all_gaudi.sh delete mode 100755 ChatQnA/kubernetes/manifests/install_all_xeon.sh delete mode 100644 ChatQnA/kubernetes/manifests/llm.yaml delete mode 100644 ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml delete mode 100644 ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml delete mode 100644 ChatQnA/kubernetes/manifests/redis-vector-db.yaml delete mode 100755 ChatQnA/kubernetes/manifests/remove_all_gaudi.sh delete mode 100755 ChatQnA/kubernetes/manifests/remove_all_xeon.sh delete mode 100644 ChatQnA/kubernetes/manifests/reranking.yaml delete mode 100644 ChatQnA/kubernetes/manifests/retriever.yaml delete mode 100644 ChatQnA/kubernetes/manifests/tei_embedding_gaudi_service.yaml delete mode 100644 ChatQnA/kubernetes/manifests/tei_embedding_service.yaml delete mode 100644 ChatQnA/kubernetes/manifests/tei_reranking_service.yaml delete mode 100644 ChatQnA/kubernetes/manifests/tgi_gaudi_service.yaml delete mode 100644 ChatQnA/kubernetes/manifests/tgi_service.yaml create mode 100644 ChatQnA/kubernetes/manifests/xeon/chatqna.yaml diff --git a/ChatQnA/README.md b/ChatQnA/README.md index 83b85f5ec..7b243346d 100644 --- a/ChatQnA/README.md +++ b/ChatQnA/README.md @@ -105,9 +105,13 @@ docker compose -f docker_compose.yaml up -d Refer to the [NVIDIA GPU Guide](./docker/gpu/README.md) for more instructions on building docker images from source. -## Deploy ChatQnA into Kubernetes on Xeon & Gaudi +## Deploy ChatQnA into Kubernetes on Xeon & Gaudi with GMC -Refer to the [Kubernetes Guide](./kubernetes/manifests/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi. +Refer to the [Kubernetes Guide](./kubernetes/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi with GMC. + +## Deploy ChatQnA into Kubernetes on Xeon & Gaudi without GMC + +Refer to the [Kubernetes Guide](./kubernetes/manifests/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi without GMC. ## Deploy ChatQnA into Kubernetes using Helm Chart diff --git a/ChatQnA/kubernetes/manifests/README.md b/ChatQnA/kubernetes/manifests/README.md new file mode 100644 index 000000000..feee4516d --- /dev/null +++ b/ChatQnA/kubernetes/manifests/README.md @@ -0,0 +1,41 @@ +

Deploy ChatQnA in Kubernetes Cluster

+ +> [NOTE] +> The following values must be set before you can deploy: +> HUGGINGFACEHUB_API_TOKEN + +> You can also customize the "MODEL_ID" if needed. + +> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the ChatQnA workload is running. Otherwise, you need to modify the `chatqna.yaml` file to change the `model-volume` to a directory that exists on the node. + +## Deploy On Xeon + +``` +cd GenAIExamples/ChatQnA/kubernetes/manifests/xeon +export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" chatqna.yaml +kubectl apply -f chatqna.yaml +``` + +## Deploy On Gaudi + +``` +cd GenAIExamples/ChatQnA/kubernetes/manifests/gaudi +export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" chatqna.yaml +kubectl apply -f chatqna.yaml +``` + +## Verify Services + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/chatqna 8888:8888` to expose the ChatQnA service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:8888/v1/chatqna \ + -H 'Content-Type: application/json' \ + -d '{"messages": "What is the revenue of Nike in 2023?"}' +``` diff --git a/ChatQnA/kubernetes/manifests/chaqna-xeon-backend-server.yaml b/ChatQnA/kubernetes/manifests/chaqna-xeon-backend-server.yaml deleted file mode 100644 index dd2add8cb..000000000 --- a/ChatQnA/kubernetes/manifests/chaqna-xeon-backend-server.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chaqna-xeon-backend-server-deploy -spec: - replicas: 1 - selector: - matchLabels: - app: chaqna-xeon-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: chaqna-xeon-backend-server-deploy - spec: - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chaqna-xeon-backend-server-deploy - args: - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chaqna-xeon-backend-server-svc -spec: - type: NodePort - selector: - app: chaqna-xeon-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 diff --git a/ChatQnA/kubernetes/manifests/docsum_gaudi_llm.yaml b/ChatQnA/kubernetes/manifests/docsum_gaudi_llm.yaml deleted file mode 100644 index aa48d00fe..000000000 --- a/ChatQnA/kubernetes/manifests/docsum_gaudi_llm.yaml +++ /dev/null @@ -1,74 +0,0 @@ -# Source: llm-uservice/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: docsum-llm-uservice - labels: - helm.sh/chart: llm-uservice-0.1.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "1.0.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 9000 - targetPort: 9000 - protocol: TCP - name: llm-uservice - selector: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum ---- -# Source: llm-uservice/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: docsum-llm-uservice - labels: - helm.sh/chart: llm-uservice-0.1.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "1.0.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum - template: - metadata: - labels: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum - spec: - securityContext: {} - containers: - - name: docsum - envFrom: - - configMapRef: - name: qna-config - env: - - name: HUGGING_FACE_HUB_TOKEN - value: $(HUGGINGFACEHUB_API_TOKEN) - - name: HF_TOKEN - value: $(HUGGINGFACEHUB_API_TOKEN) - - name: LANGCHAIN_TRACING_V2 - value: "false" - - name: LANGCHAIN_PROJECT - value: "opea-llm-service" - securityContext: {} - image: "opea/llm-docsum-tgi:latest" - imagePullPolicy: IfNotPresent - ports: - - name: llm-uservice - containerPort: 9000 - protocol: TCP - resources: {} diff --git a/ChatQnA/kubernetes/manifests/docsum_llm.yaml b/ChatQnA/kubernetes/manifests/docsum_llm.yaml deleted file mode 100644 index aa48d00fe..000000000 --- a/ChatQnA/kubernetes/manifests/docsum_llm.yaml +++ /dev/null @@ -1,74 +0,0 @@ -# Source: llm-uservice/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: docsum-llm-uservice - labels: - helm.sh/chart: llm-uservice-0.1.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "1.0.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 9000 - targetPort: 9000 - protocol: TCP - name: llm-uservice - selector: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum ---- -# Source: llm-uservice/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: docsum-llm-uservice - labels: - helm.sh/chart: llm-uservice-0.1.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "1.0.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum - template: - metadata: - labels: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum - spec: - securityContext: {} - containers: - - name: docsum - envFrom: - - configMapRef: - name: qna-config - env: - - name: HUGGING_FACE_HUB_TOKEN - value: $(HUGGINGFACEHUB_API_TOKEN) - - name: HF_TOKEN - value: $(HUGGINGFACEHUB_API_TOKEN) - - name: LANGCHAIN_TRACING_V2 - value: "false" - - name: LANGCHAIN_PROJECT - value: "opea-llm-service" - securityContext: {} - image: "opea/llm-docsum-tgi:latest" - imagePullPolicy: IfNotPresent - ports: - - name: llm-uservice - containerPort: 9000 - protocol: TCP - resources: {} diff --git a/ChatQnA/kubernetes/manifests/embedding.yaml b/ChatQnA/kubernetes/manifests/embedding.yaml deleted file mode 100644 index 31d5ffa5c..000000000 --- a/ChatQnA/kubernetes/manifests/embedding.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: embedding-deploy - spec: - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 diff --git a/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml b/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml new file mode 100644 index 000000000..fbd6fabac --- /dev/null +++ b/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml @@ -0,0 +1,1097 @@ +--- +# Source: chatqna/charts/data-prep/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-data-prep-config + labels: + helm.sh/chart: data-prep-0.8.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_ENDPOINT: "http://chatqna-tei" + REDIS_URL: "redis://chatqna-redis-vector-db:6379" + INDEX_NAME: "rag-redis" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: + https_proxy: + no_proxy: + LANGCHAIN_TRACING_V2: "false" + LANGCHAIN_API_KEY: "insert-your-langchain-key-here" + LANGCHAIN_PROJECT: "opea-dataprep-service" +--- +# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-embedding-usvc-config + labels: + helm.sh/chart: embedding-usvc-0.8.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" + http_proxy: + https_proxy: + no_proxy: + LANGCHAIN_TRACING_V2: "false" + LANGCHAIN_API_KEY: insert-your-langchain-key-here + LANGCHAIN_PROJECT: "opea-embedding-service" +--- +# Source: chatqna/charts/llm-uservice/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-llm-uservice-config + labels: + helm.sh/chart: llm-uservice-0.8.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + TGI_LLM_ENDPOINT: "http://chatqna-tgi" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: + https_proxy: + no_proxy: + LANGCHAIN_TRACING_V2: "false" + LANGCHAIN_API_KEY: insert-your-langchain-key-here + LANGCHAIN_PROJECT: "opea-llm-uservice" +--- +# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-reranking-usvc-config + labels: + helm.sh/chart: reranking-usvc-0.8.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank" + http_proxy: + https_proxy: + no_proxy: + LANGCHAIN_TRACING_V2: "false" + LANGCHAIN_API_KEY: "insert-your-langchain-key-here" + LANGCHAIN_PROJECT: "opea-reranking-service" +--- +# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-retriever-usvc-config + labels: + helm.sh/chart: retriever-usvc-0.8.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" + REDIS_URL: "redis://chatqna-redis-vector-db:6379" + INDEX_NAME: "rag-redis" + EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" + http_proxy: + https_proxy: + no_proxy: + LANGCHAIN_TRACING_V2: "false" + LANGCHAIN_API_KEY: "insert-your-langchain-key-here" + LANGCHAIN_PROJECT: "opea-retriever-service" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: chatqna/charts/tei/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-tei-config + labels: + helm.sh/chart: tei-0.8.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "BAAI/bge-base-en-v1.5" + PORT: "2081" + http_proxy: + https_proxy: + no_proxy: + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" + MAX_WARMUP_SEQUENCE_LENGTH: "512" +--- +# Source: chatqna/charts/teirerank/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-teirerank-config + labels: + helm.sh/chart: teirerank-0.8.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "BAAI/bge-reranker-base" + PORT: "2082" + http_proxy: + https_proxy: + no_proxy: + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: chatqna/charts/tgi/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-tgi-config + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "Intel/neural-chat-7b-v3-3" + PORT: "2080" + HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" + MAX_INPUT_TOKENS: "1024" + MAX_TOTAL_TOKENS: "4096" + http_proxy: + https_proxy: + no_proxy: + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: chatqna/charts/data-prep/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-data-prep + labels: + helm.sh/chart: data-prep-0.8.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6007 + targetPort: 6007 + protocol: TCP + name: data-prep + selector: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/embedding-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-embedding-usvc + labels: + helm.sh/chart: embedding-usvc-0.8.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6000 + targetPort: 6000 + protocol: TCP + name: embedding-usvc + selector: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/llm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.8.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/redis-vector-db/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-redis-vector-db + labels: + helm.sh/chart: redis-vector-db-0.8.0 + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "7.2.0-v9" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6379 + targetPort: 6379 + protocol: TCP + name: redis-service + - port: 8001 + targetPort: 8001 + protocol: TCP + name: redis-insight + selector: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/reranking-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-reranking-usvc + labels: + helm.sh/chart: reranking-usvc-0.8.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8000 + targetPort: 8000 + protocol: TCP + name: reranking-usvc + selector: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/retriever-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-retriever-usvc + labels: + helm.sh/chart: retriever-usvc-0.8.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 7000 + targetPort: 7000 + protocol: TCP + name: retriever-usvc + selector: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/tei/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-tei + labels: + helm.sh/chart: tei-0.8.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2081 + protocol: TCP + name: tei + selector: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/teirerank/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-teirerank + labels: + helm.sh/chart: teirerank-0.8.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2082 + protocol: TCP + name: teirerank + selector: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-tgi + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna + labels: + helm.sh/chart: chatqna-0.8.0 + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8888 + targetPort: 8888 + protocol: TCP + name: chatqna + selector: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/data-prep/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-data-prep + labels: + helm.sh/chart: data-prep-0.8.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-data-prep-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/dataprep-redis:latest" + imagePullPolicy: IfNotPresent + ports: + - name: data-prep + containerPort: 6007 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-embedding-usvc + labels: + helm.sh/chart: embedding-usvc-0.8.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-embedding-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/embedding-tei:latest" + imagePullPolicy: IfNotPresent + ports: + - name: embedding-usvc + containerPort: 6000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/llm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.8.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-llm-uservice-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/llm-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + startupProbe: + exec: + command: + - curl + - http://chatqna-tgi + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-redis-vector-db + labels: + helm.sh/chart: redis-vector-db-0.8.0 + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "7.2.0-v9" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: redis-vector-db + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "redis/redis-stack:7.2.0-v9" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: data-volume + - mountPath: /redisinsight + name: redisinsight-volume + - mountPath: /tmp + name: tmp + ports: + - name: redis-service + containerPort: 6379 + protocol: TCP + - name: redis-insight + containerPort: 8001 + protocol: TCP + startupProbe: + tcpSocket: + port: 6379 # Probe the Redis port + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {} + volumes: + - name: data-volume + emptyDir: {} + - name: redisinsight-volume + emptyDir: {} + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-reranking-usvc + labels: + helm.sh/chart: reranking-usvc-0.8.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-reranking-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/reranking-tei:latest" + imagePullPolicy: IfNotPresent + ports: + - name: reranking-usvc + containerPort: 8000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-retriever-usvc + labels: + helm.sh/chart: retriever-usvc-0.8.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-retriever-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/retriever-redis:latest" + imagePullPolicy: IfNotPresent + ports: + - name: retriever-usvc + containerPort: 7000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + startupProbe: + exec: + command: + - curl + - http://chatqna-tei + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/tei/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-tei + labels: + helm.sh/chart: tei-0.8.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: tei + envFrom: + - configMapRef: + name: chatqna-tei-config + securityContext: + {} + image: "ghcr.io/huggingface/tei-gaudi:synapse_1.16" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2081 + protocol: TCP + resources: + limits: + habana.ai/gaudi: 1 + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/teirerank/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-teirerank + labels: + helm.sh/chart: teirerank-0.8.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: teirerank + envFrom: + - configMapRef: + name: chatqna-teirerank-config + securityContext: + {} + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.2" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2082 + protocol: TCP + resources: + {} + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-tgi + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: tgi + envFrom: + - configMapRef: + name: chatqna-tgi-config + securityContext: + {} + image: "ghcr.io/huggingface/tgi-gaudi:2.0.1" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + resources: + limits: + habana.ai/gaudi: 1 + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: tmp + emptyDir: {} +--- +# Source: chatqna/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna + labels: + helm.sh/chart: chatqna-0.8.0 + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + spec: + securityContext: + null + containers: + - name: chatqna + env: + - name: LLM_SERVICE_HOST_IP + value: chatqna-llm-uservice + - name: RERANK_SERVICE_HOST_IP + value: chatqna-reranking-usvc + - name: RETRIEVER_SERVICE_HOST_IP + value: chatqna-retriever-usvc + - name: EMBEDDING_SERVICE_HOST_IP + value: chatqna-embedding-usvc + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/chatqna:latest" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: chatqna + containerPort: 8888 + protocol: TCP + # startupProbe: + # httpGet: + # host: chatqna-llm-uservice + # port: 9000 + # path: / + # initialDelaySeconds: 5 + # periodSeconds: 5 + # failureThreshold: 120 + # livenessProbe: + # httpGet: + # path: / + # port: 8888 + # readinessProbe: + # httpGet: + # path: / + # port: 8888 + resources: + null + volumes: + - name: tmp + emptyDir: {} diff --git a/ChatQnA/kubernetes/manifests/install_all_gaudi.sh b/ChatQnA/kubernetes/manifests/install_all_gaudi.sh deleted file mode 100755 index da140dfff..000000000 --- a/ChatQnA/kubernetes/manifests/install_all_gaudi.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Array of YAML file names -yaml_files=("qna_configmap_gaudi" "redis-vector-db" "tei_embedding_gaudi_service" "tei_reranking_service" "tgi_gaudi_service" "retriever" "embedding" "reranking" "llm" "chaqna-xeon-backend-server") -for element in ${yaml_files[@]} -do - echo "Applying manifest from ${element}.yaml" - kubectl apply -f "${element}.yaml" -done diff --git a/ChatQnA/kubernetes/manifests/install_all_xeon.sh b/ChatQnA/kubernetes/manifests/install_all_xeon.sh deleted file mode 100755 index 35f79b2ee..000000000 --- a/ChatQnA/kubernetes/manifests/install_all_xeon.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Array of YAML file names -yaml_files=("qna_configmap_xeon" "redis-vector-db" "tei_embedding_service" "tei_reranking_service" "tgi_service" "retriever" "embedding" "reranking" "llm" "chaqna-xeon-backend-server") -for element in ${yaml_files[@]} -do - echo "Applying manifest from ${element}.yaml" - kubectl apply -f "${element}.yaml" -done diff --git a/ChatQnA/kubernetes/manifests/llm.yaml b/ChatQnA/kubernetes/manifests/llm.yaml deleted file mode 100644 index 6d31e2c0f..000000000 --- a/ChatQnA/kubernetes/manifests/llm.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: llm-deploy - spec: - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 diff --git a/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml b/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml deleted file mode 100644 index a02152d55..000000000 --- a/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config -data: - EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5" - RERANK_MODEL_ID: "BAAI/bge-reranker-base" - LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3" - TEI_EMBEDDING_ENDPOINT: "http://tei-embedding-gaudi-svc.default.svc.cluster.local:6006" - TEI_RERANKING_ENDPOINT: "http://tei-reranking-svc.default.svc.cluster.local:8808" - TGI_LLM_ENDPOINT: "http://tgi-gaudi-svc.default.svc.cluster.local:9009" - REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379" - INDEX_NAME: "rag-redis" - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml b/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml deleted file mode 100644 index 69ed22195..000000000 --- a/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config -data: - EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5" - RERANK_MODEL_ID: "BAAI/bge-reranker-base" - LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3" - TEI_EMBEDDING_ENDPOINT: "http://tei-embedding-svc.default.svc.cluster.local:6006" - TEI_RERANKING_ENDPOINT: "http://tei-reranking-svc.default.svc.cluster.local:8808" - TGI_LLM_ENDPOINT: "http://tgi-svc.default.svc.cluster.local:9009" - REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379" - INDEX_NAME: "rag-redis" - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/kubernetes/manifests/redis-vector-db.yaml b/ChatQnA/kubernetes/manifests/redis-vector-db.yaml deleted file mode 100644 index 6bc036e79..000000000 --- a/ChatQnA/kubernetes/manifests/redis-vector-db.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Redis Vector DB Deployment -apiVersion: apps/v1 -kind: Deployment -metadata: - name: redis-vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: redis-vector-db - template: - metadata: - labels: - app: redis-vector-db - spec: - containers: - - name: redis-vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 - ---- -# Redis Vector DB Service -apiVersion: v1 -kind: Service -metadata: - name: redis-vector-db -spec: - type: ClusterIP - selector: - app: redis-vector-db - ports: - - name: redis-service - port: 6379 - targetPort: 6379 - - name: redis-insight - port: 8001 - targetPort: 8001 diff --git a/ChatQnA/kubernetes/manifests/remove_all_gaudi.sh b/ChatQnA/kubernetes/manifests/remove_all_gaudi.sh deleted file mode 100755 index f4feea21c..000000000 --- a/ChatQnA/kubernetes/manifests/remove_all_gaudi.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Array of YAML file names -yaml_files=("qna_configmap_gaudi" "redis-vector-db" "tei_embedding_gaudi_service" "tei_reranking_service" "tgi_gaudi_service" "retriever" "embedding" "reranking" "llm" "chaqna-xeon-backend-server") -for element in ${yaml_files[@]} -do - echo "Delete manifest from ${element}.yaml" - kubectl delete -f "${element}.yaml" -done diff --git a/ChatQnA/kubernetes/manifests/remove_all_xeon.sh b/ChatQnA/kubernetes/manifests/remove_all_xeon.sh deleted file mode 100755 index 344b343d6..000000000 --- a/ChatQnA/kubernetes/manifests/remove_all_xeon.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Array of YAML file names -yaml_files=("qna_configmap_xeon" "redis-vector-db" "tei_embedding_service" "tei_reranking_service" "tgi_service" "retriever" "embedding" "reranking" "llm" "chaqna-xeon-backend-server") -for element in ${yaml_files[@]} -do - echo "Delete manifest from ${element}.yaml" - kubectl delete -f "${element}.yaml" -done diff --git a/ChatQnA/kubernetes/manifests/reranking.yaml b/ChatQnA/kubernetes/manifests/reranking.yaml deleted file mode 100644 index 7c3ca8b2c..000000000 --- a/ChatQnA/kubernetes/manifests/reranking.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: reranking-deploy - spec: - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: - ports: - - containerPort: 8000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 diff --git a/ChatQnA/kubernetes/manifests/retriever.yaml b/ChatQnA/kubernetes/manifests/retriever.yaml deleted file mode 100644 index 04157ba14..000000000 --- a/ChatQnA/kubernetes/manifests/retriever.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: retriever-deploy - spec: - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/kubernetes/manifests/tei_embedding_gaudi_service.yaml b/ChatQnA/kubernetes/manifests/tei_embedding_gaudi_service.yaml deleted file mode 100644 index 60081154b..000000000 --- a/ChatQnA/kubernetes/manifests/tei_embedding_gaudi_service.yaml +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tei-embedding-gaudi-service-deploy -spec: - replicas: 1 - selector: - matchLabels: - app: tei-embedding-gaudi-service-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: tei-embedding-gaudi-service-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - imagePullPolicy: IfNotPresent - name: tei-embedding-gaudi-service-deploy - #command: ["/usr/bin/bash"] - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - #- $(RERANK_MODEL_ID) - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - # memory: 26Gi - # requests: - # cpu: 56000m - # memory: 26Gi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - # directory location on host - path: /mnt/models - # this field is optional - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: tei-embedding-gaudi-svc -spec: - type: ClusterIP - selector: - app: tei-embedding-gaudi-service-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/kubernetes/manifests/tei_embedding_service.yaml b/ChatQnA/kubernetes/manifests/tei_embedding_service.yaml deleted file mode 100644 index 5051e78fa..000000000 --- a/ChatQnA/kubernetes/manifests/tei_embedding_service.yaml +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tei-embedding-service-deploy -spec: - replicas: 1 - selector: - matchLabels: - app: tei-embedding-service-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: tei-embedding-service-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - name: tei-embedding-service-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - # resources: - # limits: - # cpu: 56000m - # memory: 26Gi - # requests: - # cpu: 56000m - # memory: 26Gi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - # directory location on host - path: /mnt/models - # this field is optional - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: tei-embedding-svc -spec: - type: ClusterIP - selector: - app: tei-embedding-service-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/kubernetes/manifests/tei_reranking_service.yaml b/ChatQnA/kubernetes/manifests/tei_reranking_service.yaml deleted file mode 100644 index 7528d01d4..000000000 --- a/ChatQnA/kubernetes/manifests/tei_reranking_service.yaml +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tei-reranking-service-deploy -spec: - replicas: 1 - selector: - matchLabels: - app: tei-reranking-service-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: tei-reranking-service-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - name: tei-reranking-service-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - # resources: - # limits: - # cpu: 56000m - # memory: 26Gi - # requests: - # cpu: 56000m - # memory: 26Gi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - # directory location on host - path: /mnt/models - # this field is optional - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: tei-reranking-svc -spec: - type: ClusterIP - selector: - app: tei-reranking-service-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/kubernetes/manifests/tgi_gaudi_service.yaml b/ChatQnA/kubernetes/manifests/tgi_gaudi_service.yaml deleted file mode 100644 index 039b39079..000000000 --- a/ChatQnA/kubernetes/manifests/tgi_gaudi_service.yaml +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tgi-gaudi-service-deploy -spec: - replicas: 1 - selector: - matchLabels: - app: tgi-gaudi-service-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: tgi-gaudi-service-deploy - spec: - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:1.2.1 - name: tgi-gaudi-service-deploy-demo - args: - - --model-id - - $(LLM_MODEL_ID) - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - serviceAccountName: default - nodeSelector: - volumes: - - name: model-volume - hostPath: - # directory location on host - path: /mnt/models - # this field is optional - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: tgi-gaudi-svc -spec: - type: ClusterIP - selector: - app: tgi-gaudi-service-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/kubernetes/manifests/tgi_service.yaml b/ChatQnA/kubernetes/manifests/tgi_service.yaml deleted file mode 100644 index b425d2561..000000000 --- a/ChatQnA/kubernetes/manifests/tgi_service.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tgi-service-deploy -spec: - replicas: 1 - selector: - matchLabels: - app: tgi-service-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: "true" - labels: - app: tgi-service-deploy - spec: - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-generation-inference:1.4 - name: tgi-service-deploy-demo - args: - - --model-id - - $(LLM_MODEL_ID) - #- "/data/Llama-2-7b-hf" - # - "/data/Mistral-7B-Instruct-v0.2" - # - --quantize - # - "bitsandbytes-fp4" -# - --disable-custom-kernels - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - # limits: - # cpu: 56000m - # memory: 26Gi - # requests: - # cpu: 56000m - # memory: 26Gi - serviceAccountName: default - nodeSelector: - volumes: - - name: model-volume - hostPath: - # directory location on host - path: /mnt/models - # this field is optional - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: tgi-svc -spec: - type: ClusterIP - selector: - app: tgi-service-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml b/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml new file mode 100644 index 000000000..b8b1ebaae --- /dev/null +++ b/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml @@ -0,0 +1,1095 @@ +--- +# Source: chatqna/charts/data-prep/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-data-prep-config + labels: + helm.sh/chart: data-prep-0.8.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_ENDPOINT: "http://chatqna-tei" + REDIS_URL: "redis://chatqna-redis-vector-db:6379" + INDEX_NAME: "rag-redis" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: + https_proxy: + no_proxy: + LANGCHAIN_TRACING_V2: "false" + LANGCHAIN_API_KEY: "insert-your-langchain-key-here" + LANGCHAIN_PROJECT: "opea-dataprep-service" +--- +# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-embedding-usvc-config + labels: + helm.sh/chart: embedding-usvc-0.8.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" + http_proxy: + https_proxy: + no_proxy: + LANGCHAIN_TRACING_V2: "false" + LANGCHAIN_API_KEY: insert-your-langchain-key-here + LANGCHAIN_PROJECT: "opea-embedding-service" +--- +# Source: chatqna/charts/llm-uservice/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-llm-uservice-config + labels: + helm.sh/chart: llm-uservice-0.8.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + TGI_LLM_ENDPOINT: "http://chatqna-tgi" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: + https_proxy: + no_proxy: + LANGCHAIN_TRACING_V2: "false" + LANGCHAIN_API_KEY: insert-your-langchain-key-here + LANGCHAIN_PROJECT: "opea-llm-uservice" +--- +# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-reranking-usvc-config + labels: + helm.sh/chart: reranking-usvc-0.8.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank" + http_proxy: + https_proxy: + no_proxy: + LANGCHAIN_TRACING_V2: "false" + LANGCHAIN_API_KEY: "insert-your-langchain-key-here" + LANGCHAIN_PROJECT: "opea-reranking-service" +--- +# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-retriever-usvc-config + labels: + helm.sh/chart: retriever-usvc-0.8.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" + REDIS_URL: "redis://chatqna-redis-vector-db:6379" + INDEX_NAME: "rag-redis" + EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" + http_proxy: + https_proxy: + no_proxy: + LANGCHAIN_TRACING_V2: "false" + LANGCHAIN_API_KEY: "insert-your-langchain-key-here" + LANGCHAIN_PROJECT: "opea-retriever-service" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: chatqna/charts/tei/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-tei-config + labels: + helm.sh/chart: tei-0.8.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "BAAI/bge-base-en-v1.5" + PORT: "2081" + http_proxy: + https_proxy: + no_proxy: + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" + MAX_WARMUP_SEQUENCE_LENGTH: "512" +--- +# Source: chatqna/charts/teirerank/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-teirerank-config + labels: + helm.sh/chart: teirerank-0.8.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "BAAI/bge-reranker-base" + PORT: "2082" + http_proxy: + https_proxy: + no_proxy: + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: chatqna/charts/tgi/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-tgi-config + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "Intel/neural-chat-7b-v3-3" + PORT: "2080" + HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" + MAX_INPUT_TOKENS: "1024" + MAX_TOTAL_TOKENS: "4096" + http_proxy: + https_proxy: + no_proxy: + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: chatqna/charts/data-prep/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-data-prep + labels: + helm.sh/chart: data-prep-0.8.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6007 + targetPort: 6007 + protocol: TCP + name: data-prep + selector: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/embedding-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-embedding-usvc + labels: + helm.sh/chart: embedding-usvc-0.8.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6000 + targetPort: 6000 + protocol: TCP + name: embedding-usvc + selector: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/llm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.8.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/redis-vector-db/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-redis-vector-db + labels: + helm.sh/chart: redis-vector-db-0.8.0 + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "7.2.0-v9" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6379 + targetPort: 6379 + protocol: TCP + name: redis-service + - port: 8001 + targetPort: 8001 + protocol: TCP + name: redis-insight + selector: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/reranking-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-reranking-usvc + labels: + helm.sh/chart: reranking-usvc-0.8.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8000 + targetPort: 8000 + protocol: TCP + name: reranking-usvc + selector: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/retriever-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-retriever-usvc + labels: + helm.sh/chart: retriever-usvc-0.8.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 7000 + targetPort: 7000 + protocol: TCP + name: retriever-usvc + selector: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/tei/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-tei + labels: + helm.sh/chart: tei-0.8.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2081 + protocol: TCP + name: tei + selector: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/teirerank/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-teirerank + labels: + helm.sh/chart: teirerank-0.8.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2082 + protocol: TCP + name: teirerank + selector: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-tgi + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna + labels: + helm.sh/chart: chatqna-0.8.0 + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8888 + targetPort: 8888 + protocol: TCP + name: chatqna + selector: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/data-prep/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-data-prep + labels: + helm.sh/chart: data-prep-0.8.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-data-prep-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/dataprep-redis:latest" + imagePullPolicy: IfNotPresent + ports: + - name: data-prep + containerPort: 6007 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-embedding-usvc + labels: + helm.sh/chart: embedding-usvc-0.8.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-embedding-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/embedding-tei:latest" + imagePullPolicy: IfNotPresent + ports: + - name: embedding-usvc + containerPort: 6000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/llm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-llm-uservice + labels: + helm.sh/chart: llm-uservice-0.8.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-llm-uservice-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/llm-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + startupProbe: + exec: + command: + - curl + - http://chatqna-tgi + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-redis-vector-db + labels: + helm.sh/chart: redis-vector-db-0.8.0 + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "7.2.0-v9" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: redis-vector-db + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "redis/redis-stack:7.2.0-v9" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: data-volume + - mountPath: /redisinsight + name: redisinsight-volume + - mountPath: /tmp + name: tmp + ports: + - name: redis-service + containerPort: 6379 + protocol: TCP + - name: redis-insight + containerPort: 8001 + protocol: TCP + startupProbe: + tcpSocket: + port: 6379 # Probe the Redis port + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {} + volumes: + - name: data-volume + emptyDir: {} + - name: redisinsight-volume + emptyDir: {} + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-reranking-usvc + labels: + helm.sh/chart: reranking-usvc-0.8.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-reranking-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/reranking-tei:latest" + imagePullPolicy: IfNotPresent + ports: + - name: reranking-usvc + containerPort: 8000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-retriever-usvc + labels: + helm.sh/chart: retriever-usvc-0.8.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-retriever-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/retriever-redis:latest" + imagePullPolicy: IfNotPresent + ports: + - name: retriever-usvc + containerPort: 7000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + startupProbe: + exec: + command: + - curl + - http://chatqna-tei + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/tei/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-tei + labels: + helm.sh/chart: tei-0.8.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: tei + envFrom: + - configMapRef: + name: chatqna-tei-config + securityContext: + {} + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.2" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2081 + protocol: TCP + resources: + {} + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/teirerank/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-teirerank + labels: + helm.sh/chart: teirerank-0.8.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.2" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: teirerank + envFrom: + - configMapRef: + name: chatqna-teirerank-config + securityContext: + {} + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.2" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2082 + protocol: TCP + resources: + {} + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-tgi + labels: + helm.sh/chart: tgi-0.8.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: tgi + envFrom: + - configMapRef: + name: chatqna-tgi-config + securityContext: + {} + image: "ghcr.io/huggingface/text-generation-inference:2.1.0" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + resources: + {} + volumes: + - name: model-volume + hostPath: + path: /mnt/opea-models + type: Directory + - name: tmp + emptyDir: {} +--- +# Source: chatqna/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna + labels: + helm.sh/chart: chatqna-0.8.0 + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "1.0.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + spec: + securityContext: + null + containers: + - name: chatqna + env: + - name: LLM_SERVICE_HOST_IP + value: chatqna-llm-uservice + - name: RERANK_SERVICE_HOST_IP + value: chatqna-reranking-usvc + - name: RETRIEVER_SERVICE_HOST_IP + value: chatqna-retriever-usvc + - name: EMBEDDING_SERVICE_HOST_IP + value: chatqna-embedding-usvc + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/chatqna:latest" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: chatqna + containerPort: 8888 + protocol: TCP + # startupProbe: + # httpGet: + # host: chatqna-llm-uservice + # port: 9000 + # path: / + # initialDelaySeconds: 5 + # periodSeconds: 5 + # failureThreshold: 120 + # livenessProbe: + # httpGet: + # path: / + # port: 8888 + # readinessProbe: + # httpGet: + # path: / + # port: 8888 + resources: + null + volumes: + - name: tmp + emptyDir: {} diff --git a/ChatQnA/tests/test_manifest_on_gaudi.sh b/ChatQnA/tests/test_manifest_on_gaudi.sh index bb5bf52ec..0ce140984 100755 --- a/ChatQnA/tests/test_manifest_on_gaudi.sh +++ b/ChatQnA/tests/test_manifest_on_gaudi.sh @@ -10,59 +10,58 @@ IMAGE_REPO=${IMAGE_REPO:-} IMAGE_TAG=${IMAGE_TAG:-latest} function init_chatqna() { - # replace the mount dir "path: /mnt" with "path: $CHART_MOUNT" - find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/models#path: $MOUNT_DIR#g" {} \; + # replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT" + find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \; # replace megaservice image tag find . -name '*.yaml' -type f -exec sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" {} \; # replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/" - find . -name '*.yaml' -type f -exec sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" {} \; + find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \; # set huggingface token - find . -name '*.yaml' -type f -exec sed -i "s#\${HUGGINGFACEHUB_API_TOKEN}#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \; + find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \; } function install_chatqna { - # replace namespace "default" with real namespace - find . -name '*.yaml' -type f -exec sed -i "s#default.svc#$NAMESPACE.svc#g" {} \; - # for very yaml file in yaml_files, apply it to the k8s cluster - yaml_files=("qna_configmap_gaudi" "redis-vector-db" "tei_embedding_gaudi_service" "tei_reranking_service" "tgi_gaudi_service" "retriever" "embedding" "reranking" "llm") - for yaml_file in ${yaml_files[@]}; do - kubectl apply -f $yaml_file.yaml -n $NAMESPACE - done + echo "namespace is $NAMESPACE" + kubectl apply -f . -n $NAMESPACE + # Sleep enough time for retreiver-usvc to be ready sleep 60 - kubectl apply -f chaqna-xeon-backend-server.yaml -n $NAMESPACE } function validate_chatqna() { max_retry=20 - # make sure microservice retriever is ready + # make sure microservice retriever-usvc is ready # try to curl retriever-svc for max_retry times test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") for ((i=1; i<=max_retry; i++)) do - curl http://retriever-svc.$NAMESPACE:7000/v1/retrieval -X POST \ + curl http://chatqna-retriever-usvc.$NAMESPACE:7000/v1/retrieval -X POST \ -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \ -H 'Content-Type: application/json' && break - sleep 10 + sleep 30 done + # if i is bigger than max_retry, then exit with error + if [ $i -gt $max_retry ]; then + echo "Microservice retriever failed, exit with error." + exit 1 + fi # make sure microservice tgi-svc is ready for ((i=1; i<=max_retry; i++)) do - curl http://tgi-gaudi-svc.$NAMESPACE:9009/generate -X POST \ + curl http://chatqna-tgi.$NAMESPACE:80/generate -X POST \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' && break sleep 10 done # if i is bigger than max_retry, then exit with error if [ $i -gt $max_retry ]; then - echo "Microservice failed, exit with error." + echo "Microservice tgi failed, exit with error." exit 1 fi # check megaservice works # generate a random logfile name to avoid conflict among multiple runners LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log - curl http://chaqna-xeon-backend-server-svc.$NAMESPACE:8888/v1/chatqna -H "Content-Type: application/json" -d '{ - "messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE + curl http://chatqna.$NAMESPACE:8888/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE exit_code=$? if [ $exit_code -ne 0 ]; then echo "Megaservice failed, please check the logs in $LOGFILE!" @@ -90,19 +89,19 @@ fi case "$1" in init_ChatQnA) - pushd ChatQnA/kubernetes/manifests + pushd ChatQnA/kubernetes/manifests/gaudi init_chatqna popd ;; install_ChatQnA) - pushd ChatQnA/kubernetes/manifests + pushd ChatQnA/kubernetes/manifests/gaudi NAMESPACE=$2 install_chatqna popd ;; validate_ChatQnA) NAMESPACE=$2 - SERVICE_NAME=chaqna-xeon-backend-server-svc + SERVICE_NAME=chatqna validate_chatqna ;; *) diff --git a/ChatQnA/tests/test_manifest_on_xeon.sh b/ChatQnA/tests/test_manifest_on_xeon.sh index 058dabcb7..b33337b8a 100755 --- a/ChatQnA/tests/test_manifest_on_xeon.sh +++ b/ChatQnA/tests/test_manifest_on_xeon.sh @@ -10,59 +10,58 @@ IMAGE_REPO=${IMAGE_REPO:-} IMAGE_TAG=${IMAGE_TAG:-latest} function init_chatqna() { - # executed under path manifest/chatqna/xeon - # replace the mount dir "path: /mnt" with "path: $CHART_MOUNT" - find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/models#path: $MOUNT_DIR#g" {} \; + # replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT" + find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \; # replace megaservice image tag find . -name '*.yaml' -type f -exec sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" {} \; # replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/" - find . -name '*.yaml' -type f -exec sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" {} \; + find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \; # set huggingface token - find . -name '*.yaml' -type f -exec sed -i "s#\${HUGGINGFACEHUB_API_TOKEN}#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \; + find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \; } function install_chatqna { - # replace namespace "default" with real namespace - find . -name '*.yaml' -type f -exec sed -i "s#default.svc#$NAMESPACE.svc#g" {} \; - # for very yaml file in yaml_files, apply it to the k8s cluster - yaml_files=("qna_configmap_xeon" "redis-vector-db" "tei_embedding_service" "tei_reranking_service" "tgi_service" "retriever" "embedding" "reranking" "llm") - for yaml_file in ${yaml_files[@]}; do - kubectl apply -f $yaml_file.yaml -n $NAMESPACE - done + echo "namespace is $NAMESPACE" + kubectl apply -f . -n $NAMESPACE + # Sleep enough time for retreiver-usvc to be ready sleep 60 - kubectl apply -f chaqna-xeon-backend-server.yaml -n $NAMESPACE } function validate_chatqna() { max_retry=20 - # make sure microservice retriever is ready + # make sure microservice retriever-usvc is ready # try to curl retriever-svc for max_retry times + test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") for ((i=1; i<=max_retry; i++)) do - curl http://retriever-svc.$NAMESPACE:7000/v1/retrieval -X POST \ - -d '{"text":"What is the revenue of Nike in 2023?","embedding":"'"${your_embedding}"'"}' \ + curl http://chatqna-retriever-usvc.$NAMESPACE:7000/v1/retrieval -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \ -H 'Content-Type: application/json' && break - sleep 10 + sleep 30 done + # if i is bigger than max_retry, then exit with error + if [ $i -gt $max_retry ]; then + echo "Microservice retriever failed, exit with error." + exit 1 + fi # make sure microservice tgi-svc is ready for ((i=1; i<=max_retry; i++)) do - curl http://tgi-svc.$NAMESPACE:9009/generate -X POST \ + curl http://chatqna-tgi.$NAMESPACE:80/generate -X POST \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' && break sleep 10 done # if i is bigger than max_retry, then exit with error if [ $i -gt $max_retry ]; then - echo "Microservice failed, exit with error." + echo "Microservice tgi failed, exit with error." exit 1 fi # check megaservice works # generate a random logfile name to avoid conflict among multiple runners LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log - curl http://chaqna-xeon-backend-server-svc.$NAMESPACE:8888/v1/chatqna -H "Content-Type: application/json" -d '{ - "messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE + curl http://chatqna.$NAMESPACE:8888/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE exit_code=$? if [ $exit_code -ne 0 ]; then echo "Megaservice failed, please check the logs in $LOGFILE!" @@ -90,19 +89,19 @@ fi case "$1" in init_ChatQnA) - pushd ChatQnA/kubernetes/manifests + pushd ChatQnA/kubernetes/manifests/xeon init_chatqna popd ;; install_ChatQnA) - pushd ChatQnA/kubernetes/manifests + pushd ChatQnA/kubernetes/manifests/xeon NAMESPACE=$2 install_chatqna popd ;; validate_ChatQnA) NAMESPACE=$2 - SERVICE_NAME=chaqna-xeon-backend-server-svc + SERVICE_NAME=chatqna validate_chatqna ;; *)