From 149dbc37e6c2d45ae39aecb82caa1f0c9f999935 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 4 Sep 2024 19:56:27 -0700 Subject: [PATCH 01/10] update OOB manifests --- .../four_gaudi/chatqna_config_map.yaml | 23 +++++ .../four_gaudi/chatqna_mega_service_run.yaml | 55 ++++++++++++ .../four_gaudi/dataprep-microservice_run.yaml | 75 ++++++++++++++++ .../four_gaudi/embedding-dependency_run.yaml | 62 ++++++++++++++ .../embedding-microservice_run.yaml | 54 ++++++++++++ .../four_gaudi/llm-dependency_run.yaml | 80 +++++++++++++++++ .../four_gaudi/llm-microservice_run.yaml | 54 ++++++++++++ .../four_gaudi/reranking-dependency_run.yaml | 85 +++++++++++++++++++ .../reranking-microservice_run.yaml | 54 ++++++++++++ .../retrieval-microservice_run.yaml | 72 ++++++++++++++++ .../with_rerank/four_gaudi/vector-db_run.yaml | 48 +++++++++++ .../single_gaudi/chatqna_config_map.yaml | 23 +++++ .../chatqna_mega_service_run.yaml | 55 ++++++++++++ .../dataprep-microservice_run.yaml | 75 ++++++++++++++++ .../embedding-dependency_run.yaml | 62 ++++++++++++++ .../embedding-microservice_run.yaml | 54 ++++++++++++ .../single_gaudi/llm-dependency_run.yaml | 80 +++++++++++++++++ .../single_gaudi/llm-microservice_run.yaml | 54 ++++++++++++ .../reranking-dependency_run.yaml | 85 +++++++++++++++++++ .../reranking-microservice_run.yaml | 54 ++++++++++++ .../retrieval-microservice_run.yaml | 72 ++++++++++++++++ .../single_gaudi/vector-db_run.yaml | 48 +++++++++++ .../two_gaudi/chatqna_config_map.yaml | 23 +++++ .../two_gaudi/chatqna_mega_service_run.yaml | 55 ++++++++++++ .../two_gaudi/dataprep-microservice_run.yaml | 75 ++++++++++++++++ .../two_gaudi/embedding-dependency_run.yaml | 62 ++++++++++++++ .../two_gaudi/embedding-microservice_run.yaml | 54 ++++++++++++ .../two_gaudi/llm-dependency_run.yaml | 80 +++++++++++++++++ .../two_gaudi/llm-microservice_run.yaml | 54 ++++++++++++ .../two_gaudi/reranking-dependency_run.yaml | 85 +++++++++++++++++++ .../two_gaudi/reranking-microservice_run.yaml | 54 ++++++++++++ .../two_gaudi/retrieval-microservice_run.yaml | 72 ++++++++++++++++ .../with_rerank/two_gaudi/vector-db_run.yaml | 48 +++++++++++ 33 files changed, 1986 insertions(+) create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml new file mode 100644 index 000000000..368c800e4 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml new file mode 100644 index 000000000..98422525f --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml @@ -0,0 +1,55 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml new file mode 100644 index 000000000..4c71df7ce --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml @@ -0,0 +1,75 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml new file mode 100644 index 000000000..42a20871d --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml @@ -0,0 +1,62 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml new file mode 100644 index 000000000..3af5b9859 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml new file mode 100644 index 000000000..9f223c715 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml @@ -0,0 +1,80 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml new file mode 100644 index 000000000..3056dbc1d --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml new file mode 100644 index 000000000..af908ecd1 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml @@ -0,0 +1,85 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml new file mode 100644 index 000000000..0723d46a8 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml new file mode 100644 index 000000000..ac6c12fdc --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml new file mode 100644 index 000000000..e04e8c5fe --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml new file mode 100644 index 000000000..368c800e4 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml new file mode 100644 index 000000000..98422525f --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml @@ -0,0 +1,55 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml new file mode 100644 index 000000000..4c71df7ce --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml @@ -0,0 +1,75 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml new file mode 100644 index 000000000..42a20871d --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml @@ -0,0 +1,62 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml new file mode 100644 index 000000000..3af5b9859 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml new file mode 100644 index 000000000..fd5955f70 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml @@ -0,0 +1,80 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml new file mode 100644 index 000000000..3056dbc1d --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml new file mode 100644 index 000000000..af908ecd1 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml @@ -0,0 +1,85 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml new file mode 100644 index 000000000..0723d46a8 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml new file mode 100644 index 000000000..ac6c12fdc --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml new file mode 100644 index 000000000..e04e8c5fe --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml new file mode 100644 index 000000000..368c800e4 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml new file mode 100644 index 000000000..98422525f --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml @@ -0,0 +1,55 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml new file mode 100644 index 000000000..4c71df7ce --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml @@ -0,0 +1,75 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml new file mode 100644 index 000000000..42a20871d --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml @@ -0,0 +1,62 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml new file mode 100644 index 000000000..3af5b9859 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml new file mode 100644 index 000000000..1f0a2a49a --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml @@ -0,0 +1,80 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml new file mode 100644 index 000000000..3056dbc1d --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml new file mode 100644 index 000000000..af908ecd1 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml @@ -0,0 +1,85 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml new file mode 100644 index 000000000..0723d46a8 --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml new file mode 100644 index 000000000..ac6c12fdc --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml new file mode 100644 index 000000000..e04e8c5fe --- /dev/null +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 From d554d0a87dae9f0eccb3b8cec332b76d1937dcdc Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 4 Sep 2024 22:50:51 -0700 Subject: [PATCH 02/10] update tgi parameters --- .../oob/with_rerank/four_gaudi/llm-dependency_run.yaml | 4 ++++ .../oob/with_rerank/single_gaudi/llm-dependency_run.yaml | 4 ++++ .../oob/with_rerank/two_gaudi/llm-dependency_run.yaml | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml index 9f223c715..ed9053049 100644 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml @@ -34,6 +34,10 @@ spec: args: - --model-id - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' volumeMounts: - mountPath: /data name: model-volume diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml index fd5955f70..cf95652c0 100644 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml @@ -34,6 +34,10 @@ spec: args: - --model-id - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' volumeMounts: - mountPath: /data name: model-volume diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml index 1f0a2a49a..45605f03c 100644 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml @@ -34,6 +34,10 @@ spec: args: - --model-id - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' volumeMounts: - mountPath: /data name: model-volume From 327f3b1f8895e616425c432c7061093189e8e160 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 4 Sep 2024 23:16:23 -0700 Subject: [PATCH 03/10] update OOB manifests for w/o rerank --- .../four_gaudi/chatqna_config_map.yaml | 20 +++++ .../four_gaudi/chatqna_mega_service_run.yaml | 52 ++++++++++++ .../four_gaudi/dataprep-microservice_run.yaml | 72 ++++++++++++++++ .../four_gaudi/embedding-dependency_run.yaml | 59 +++++++++++++ .../embedding-microservice_run.yaml | 51 ++++++++++++ .../four_gaudi/llm-dependency_run.yaml | 81 ++++++++++++++++++ .../four_gaudi/llm-microservice_run.yaml | 51 ++++++++++++ .../four_gaudi/reranking-dependency_run.yaml | 82 +++++++++++++++++++ .../reranking-microservice_run.yaml | 51 ++++++++++++ .../retrieval-microservice_run.yaml | 69 ++++++++++++++++ .../four_gaudi/vector-db_run.yaml | 45 ++++++++++ .../single_gaudi/chatqna_config_map.yaml | 20 +++++ .../chatqna_mega_service_run.yaml | 52 ++++++++++++ .../dataprep-microservice_run.yaml | 72 ++++++++++++++++ .../embedding-dependency_run.yaml | 59 +++++++++++++ .../embedding-microservice_run.yaml | 51 ++++++++++++ .../single_gaudi/llm-dependency_run.yaml | 81 ++++++++++++++++++ .../single_gaudi/llm-microservice_run.yaml | 51 ++++++++++++ .../retrieval-microservice_run.yaml | 69 ++++++++++++++++ .../single_gaudi/vector-db_run.yaml | 45 ++++++++++ .../two_gaudi/chatqna_config_map.yaml | 20 +++++ .../two_gaudi/chatqna_mega_service_run.yaml | 52 ++++++++++++ .../two_gaudi/dataprep-microservice_run.yaml | 72 ++++++++++++++++ .../two_gaudi/embedding-dependency_run.yaml | 59 +++++++++++++ .../two_gaudi/embedding-microservice_run.yaml | 51 ++++++++++++ .../two_gaudi/llm-dependency_run.yaml | 81 ++++++++++++++++++ .../two_gaudi/llm-microservice_run.yaml | 51 ++++++++++++ .../two_gaudi/retrieval-microservice_run.yaml | 69 ++++++++++++++++ .../two_gaudi/vector-db_run.yaml | 45 ++++++++++ 29 files changed, 1633 insertions(+) create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml new file mode 100644 index 000000000..3ada83152 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml new file mode 100644 index 000000000..f94a2ba1d --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml @@ -0,0 +1,52 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml new file mode 100644 index 000000000..200ab5cbb --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml new file mode 100644 index 000000000..1c8aa91de --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml new file mode 100644 index 000000000..f32d44abc --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml new file mode 100644 index 000000000..9e9f25907 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml @@ -0,0 +1,81 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 32 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml new file mode 100644 index 000000000..c073ddd1b --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml new file mode 100644 index 000000000..4841536de --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml @@ -0,0 +1,82 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml new file mode 100644 index 000000000..60f888356 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + args: null + ports: + - containerPort: 8000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-svc +spec: + type: ClusterIP + selector: + app: reranking-deploy + ports: + - name: service + port: 8000 + targetPort: 8000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml new file mode 100644 index 000000000..d71b78f92 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml new file mode 100644 index 000000000..069d6a01a --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml new file mode 100644 index 000000000..3ada83152 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml new file mode 100644 index 000000000..f94a2ba1d --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml @@ -0,0 +1,52 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml new file mode 100644 index 000000000..200ab5cbb --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml new file mode 100644 index 000000000..1c8aa91de --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml new file mode 100644 index 000000000..f32d44abc --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml new file mode 100644 index 000000000..f64acac77 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml @@ -0,0 +1,81 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml new file mode 100644 index 000000000..c073ddd1b --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml new file mode 100644 index 000000000..d71b78f92 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml new file mode 100644 index 000000000..069d6a01a --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml new file mode 100644 index 000000000..3ada83152 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml new file mode 100644 index 000000000..f94a2ba1d --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml @@ -0,0 +1,52 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml new file mode 100644 index 000000000..200ab5cbb --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml new file mode 100644 index 000000000..1c8aa91de --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml new file mode 100644 index 000000000..f32d44abc --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml new file mode 100644 index 000000000..989e0807f --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml @@ -0,0 +1,81 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 16 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml new file mode 100644 index 000000000..c073ddd1b --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml new file mode 100644 index 000000000..d71b78f92 --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml new file mode 100644 index 000000000..069d6a01a --- /dev/null +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 From 47c33abf31af217d9fb15b337cf9ebad35c2d5e0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 06:16:40 +0000 Subject: [PATCH 04/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../oob/without_rerank/four_gaudi/chatqna_config_map.yaml | 3 +++ .../without_rerank/four_gaudi/chatqna_mega_service_run.yaml | 3 +++ .../without_rerank/four_gaudi/dataprep-microservice_run.yaml | 3 +++ .../without_rerank/four_gaudi/embedding-dependency_run.yaml | 3 +++ .../without_rerank/four_gaudi/embedding-microservice_run.yaml | 3 +++ .../oob/without_rerank/four_gaudi/llm-dependency_run.yaml | 3 +++ .../oob/without_rerank/four_gaudi/llm-microservice_run.yaml | 3 +++ .../without_rerank/four_gaudi/reranking-dependency_run.yaml | 3 +++ .../without_rerank/four_gaudi/reranking-microservice_run.yaml | 3 +++ .../without_rerank/four_gaudi/retrieval-microservice_run.yaml | 3 +++ .../benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml | 3 +++ .../oob/without_rerank/single_gaudi/chatqna_config_map.yaml | 3 +++ .../without_rerank/single_gaudi/chatqna_mega_service_run.yaml | 3 +++ .../without_rerank/single_gaudi/dataprep-microservice_run.yaml | 3 +++ .../without_rerank/single_gaudi/embedding-dependency_run.yaml | 3 +++ .../single_gaudi/embedding-microservice_run.yaml | 3 +++ .../oob/without_rerank/single_gaudi/llm-dependency_run.yaml | 3 +++ .../oob/without_rerank/single_gaudi/llm-microservice_run.yaml | 3 +++ .../single_gaudi/retrieval-microservice_run.yaml | 3 +++ .../oob/without_rerank/single_gaudi/vector-db_run.yaml | 3 +++ .../oob/without_rerank/two_gaudi/chatqna_config_map.yaml | 3 +++ .../oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml | 3 +++ .../without_rerank/two_gaudi/dataprep-microservice_run.yaml | 3 +++ .../oob/without_rerank/two_gaudi/embedding-dependency_run.yaml | 3 +++ .../without_rerank/two_gaudi/embedding-microservice_run.yaml | 3 +++ .../oob/without_rerank/two_gaudi/llm-dependency_run.yaml | 3 +++ .../oob/without_rerank/two_gaudi/llm-microservice_run.yaml | 3 +++ .../without_rerank/two_gaudi/retrieval-microservice_run.yaml | 3 +++ .../benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml | 3 +++ 29 files changed, 87 insertions(+) diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml index 3ada83152..368c800e4 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 kind: ConfigMap metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml index f94a2ba1d..687fdc51e 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml index 200ab5cbb..4c71df7ce 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml index 1c8aa91de..42a20871d 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml index f32d44abc..3af5b9859 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml index 9e9f25907..bf58c13a8 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml index c073ddd1b..3056dbc1d 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml index 4841536de..af908ecd1 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml index 60f888356..0723d46a8 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml index d71b78f92..ac6c12fdc 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml index 069d6a01a..e04e8c5fe 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml index 3ada83152..368c800e4 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 kind: ConfigMap metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml index f94a2ba1d..687fdc51e 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml index 200ab5cbb..4c71df7ce 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml index 1c8aa91de..42a20871d 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml index f32d44abc..3af5b9859 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml index f64acac77..e8079c1a1 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml index c073ddd1b..3056dbc1d 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml index d71b78f92..ac6c12fdc 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml index 069d6a01a..e04e8c5fe 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml index 3ada83152..368c800e4 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 kind: ConfigMap metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml index f94a2ba1d..687fdc51e 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml index 200ab5cbb..4c71df7ce 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml index 1c8aa91de..42a20871d 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml index f32d44abc..3af5b9859 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml index 989e0807f..2b2386e15 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml index c073ddd1b..3056dbc1d 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml index d71b78f92..ac6c12fdc 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml index 069d6a01a..e04e8c5fe 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: From 15611dbcb57870ea86f3585b757baae2983bbba0 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 4 Sep 2024 23:56:44 -0700 Subject: [PATCH 05/10] update tgi parameters --- .../oob/with_rerank/four_gaudi/llm-dependency_run.yaml | 4 ++++ .../oob/with_rerank/single_gaudi/llm-dependency_run.yaml | 4 ++++ .../oob/with_rerank/two_gaudi/llm-dependency_run.yaml | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml index ed9053049..50cdc6f3d 100644 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml @@ -38,6 +38,10 @@ spec: - '2048' - --max-total-tokens - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' volumeMounts: - mountPath: /data name: model-volume diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml index cf95652c0..f91c34526 100644 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml @@ -38,6 +38,10 @@ spec: - '2048' - --max-total-tokens - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' volumeMounts: - mountPath: /data name: model-volume diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml index 45605f03c..ae32ed018 100644 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml @@ -38,6 +38,10 @@ spec: - '2048' - --max-total-tokens - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' volumeMounts: - mountPath: /data name: model-volume From c959586293fa1ef42644354306ccec4fa9594058 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 5 Sep 2024 00:52:31 -0700 Subject: [PATCH 06/10] update tgi parameters for v0.9 w/o rerank --- .../oob/without_rerank/four_gaudi/llm-dependency_run.yaml | 4 ++++ .../oob/without_rerank/single_gaudi/llm-dependency_run.yaml | 4 ++++ .../oob/without_rerank/two_gaudi/llm-dependency_run.yaml | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml index bf58c13a8..7977b1a31 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml @@ -38,6 +38,10 @@ spec: - '2048' - --max-total-tokens - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' volumeMounts: - mountPath: /data name: model-volume diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml index e8079c1a1..0ce7c055e 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml @@ -38,6 +38,10 @@ spec: - '2048' - --max-total-tokens - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' volumeMounts: - mountPath: /data name: model-volume diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml index 2b2386e15..0383fa738 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml @@ -38,6 +38,10 @@ spec: - '2048' - --max-total-tokens - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' volumeMounts: - mountPath: /data name: model-volume From 7607c785c6f946f86b887090c81ab4f1dfa4471b Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 5 Sep 2024 01:30:55 -0700 Subject: [PATCH 07/10] update OOB manifests 2.0.4->2.0.1 for w/o rerank --- .../oob/with_rerank/four_gaudi/llm-dependency_run.yaml | 2 +- .../oob/with_rerank/single_gaudi/llm-dependency_run.yaml | 2 +- .../benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml | 2 +- .../oob/without_rerank/four_gaudi/llm-dependency_run.yaml | 2 +- .../oob/without_rerank/single_gaudi/llm-dependency_run.yaml | 2 +- .../oob/without_rerank/two_gaudi/llm-dependency_run.yaml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml index 50cdc6f3d..130089f87 100644 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml @@ -25,7 +25,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 name: llm-dependency-deploy-demo securityContext: capabilities: diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml index f91c34526..093d2264b 100644 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml @@ -25,7 +25,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 name: llm-dependency-deploy-demo securityContext: capabilities: diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml index ae32ed018..9499f04ed 100644 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml @@ -25,7 +25,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 name: llm-dependency-deploy-demo securityContext: capabilities: diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml index 7977b1a31..64b4197db 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml @@ -25,7 +25,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 name: llm-dependency-deploy-demo securityContext: capabilities: diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml index 0ce7c055e..bbf9d6aeb 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml @@ -25,7 +25,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 name: llm-dependency-deploy-demo securityContext: capabilities: diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml index 0383fa738..e78da3e38 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml @@ -25,7 +25,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 name: llm-dependency-deploy-demo securityContext: capabilities: From 7d56e63636ab7bc709b5ef4f72556e901cccaa4f Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 5 Sep 2024 01:34:43 -0700 Subject: [PATCH 08/10] update tuned manifests --- .../four_gaudi/chatqna_config_map.yaml | 0 .../four_gaudi/chatqna_mega_service_run.yaml | 0 .../four_gaudi/dataprep-microservice_run.yaml | 0 .../four_gaudi/embedding-dependency_run.yaml | 0 .../embedding-microservice_run.yaml | 0 .../four_gaudi/llm-dependency_run.yaml | 0 .../four_gaudi/llm-microservice_run.yaml | 0 .../four_gaudi/reranking-dependency_run.yaml | 0 .../reranking-microservice_run.yaml | 0 .../retrieval-microservice_run.yaml | 0 .../four_gaudi/vector-db_run.yaml | 0 .../single_gaudi/chatqna_config_map.yaml | 0 .../chatqna_mega_service_run.yaml | 0 .../dataprep-microservice_run.yaml | 0 .../embedding-dependency_run.yaml | 0 .../embedding-microservice_run.yaml | 0 .../single_gaudi/llm-dependency_run.yaml | 0 .../single_gaudi/llm-microservice_run.yaml | 0 .../reranking-dependency_run.yaml | 0 .../reranking-microservice_run.yaml | 0 .../retrieval-microservice_run.yaml | 0 .../single_gaudi/vector-db_run.yaml | 0 .../two_gaudi/chatqna_config_map.yaml | 0 .../two_gaudi/chatqna_mega_service_run.yaml | 0 .../two_gaudi/dataprep-microservice_run.yaml | 0 .../two_gaudi/embedding-dependency_run.yaml | 0 .../two_gaudi/embedding-microservice_run.yaml | 0 .../two_gaudi/llm-dependency_run.yaml | 0 .../two_gaudi/llm-microservice_run.yaml | 0 .../two_gaudi/reranking-dependency_run.yaml | 0 .../two_gaudi/reranking-microservice_run.yaml | 0 .../two_gaudi/retrieval-microservice_run.yaml | 0 .../with_rerank}/two_gaudi/vector-db_run.yaml | 0 .../four_gaudi/chatqna_config_map.yaml | 20 +++++ .../four_gaudi/chatqna_mega_service_run.yaml | 59 +++++++++++++ .../four_gaudi/dataprep-microservice_run.yaml | 72 ++++++++++++++++ .../four_gaudi/embedding-dependency_run.yaml | 66 ++++++++++++++ .../embedding-microservice_run.yaml | 56 ++++++++++++ .../four_gaudi/llm-dependency_run.yaml | 85 +++++++++++++++++++ .../four_gaudi/llm-microservice_run.yaml | 56 ++++++++++++ .../retrieval-microservice_run.yaml | 76 +++++++++++++++++ .../four_gaudi/vector-db_run.yaml | 45 ++++++++++ .../single_gaudi/chatqna_config_map.yaml | 20 +++++ .../chatqna_mega_service_run.yaml | 59 +++++++++++++ .../dataprep-microservice_run.yaml | 72 ++++++++++++++++ .../embedding-dependency_run.yaml | 66 ++++++++++++++ .../embedding-microservice_run.yaml | 56 ++++++++++++ .../single_gaudi/llm-dependency_run.yaml | 85 +++++++++++++++++++ .../single_gaudi/llm-microservice_run.yaml | 56 ++++++++++++ .../retrieval-microservice_run.yaml | 76 +++++++++++++++++ .../single_gaudi/vector-db_run.yaml | 45 ++++++++++ .../two_gaudi/chatqna_config_map.yaml | 20 +++++ .../two_gaudi/chatqna_mega_service_run.yaml | 59 +++++++++++++ .../two_gaudi/dataprep-microservice_run.yaml | 72 ++++++++++++++++ .../two_gaudi/embedding-dependency_run.yaml | 66 ++++++++++++++ .../two_gaudi/embedding-microservice_run.yaml | 56 ++++++++++++ .../two_gaudi/llm-dependency_run.yaml | 85 +++++++++++++++++++ .../two_gaudi/llm-microservice_run.yaml | 56 ++++++++++++ .../two_gaudi/retrieval-microservice_run.yaml | 76 +++++++++++++++++ .../two_gaudi/vector-db_run.yaml | 45 ++++++++++ 60 files changed, 1605 insertions(+) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/chatqna_config_map.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/chatqna_mega_service_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/dataprep-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/embedding-dependency_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/embedding-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/llm-dependency_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/llm-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/reranking-dependency_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/reranking-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/retrieval-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/vector-db_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/chatqna_config_map.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/chatqna_mega_service_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/dataprep-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/embedding-dependency_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/embedding-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/llm-dependency_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/llm-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/reranking-dependency_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/reranking-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/retrieval-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/vector-db_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/chatqna_config_map.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/chatqna_mega_service_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/dataprep-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/embedding-dependency_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/embedding-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/llm-dependency_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/llm-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/reranking-dependency_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/reranking-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/retrieval-microservice_run.yaml (100%) rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/vector-db_run.yaml (100%) create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml diff --git a/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml diff --git a/ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml diff --git a/ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml diff --git a/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml diff --git a/ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml diff --git a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml diff --git a/ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml diff --git a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml diff --git a/ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml diff --git a/ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml diff --git a/ChatQnA/benchmark/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml similarity index 100% rename from ChatQnA/benchmark/four_gaudi/vector-db_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml diff --git a/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml diff --git a/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml diff --git a/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml diff --git a/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml diff --git a/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml diff --git a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml diff --git a/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml diff --git a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml diff --git a/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml diff --git a/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml diff --git a/ChatQnA/benchmark/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml similarity index 100% rename from ChatQnA/benchmark/single_gaudi/vector-db_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml diff --git a/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml diff --git a/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml diff --git a/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml diff --git a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml diff --git a/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml diff --git a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml diff --git a/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml diff --git a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml diff --git a/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml diff --git a/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml diff --git a/ChatQnA/benchmark/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml similarity index 100% rename from ChatQnA/benchmark/two_gaudi/vector-db_run.yaml rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml new file mode 100644 index 000000000..3ada83152 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml new file mode 100644 index 000000000..ba84ea9da --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml new file mode 100644 index 000000000..200ab5cbb --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml new file mode 100644 index 000000000..6f29d87bd --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml new file mode 100644 index 000000000..02e1aa1a5 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml new file mode 100644 index 000000000..a139cebb8 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml @@ -0,0 +1,85 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 32 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml new file mode 100644 index 000000000..17e70999b --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml new file mode 100644 index 000000000..79e563520 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml new file mode 100644 index 000000000..069d6a01a --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml new file mode 100644 index 000000000..3ada83152 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml new file mode 100644 index 000000000..ba84ea9da --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml new file mode 100644 index 000000000..200ab5cbb --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml new file mode 100644 index 000000000..0f208b673 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml new file mode 100644 index 000000000..02e1aa1a5 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml new file mode 100644 index 000000000..bda1e0449 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml @@ -0,0 +1,85 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml new file mode 100644 index 000000000..17e70999b --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml new file mode 100644 index 000000000..79e563520 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml new file mode 100644 index 000000000..069d6a01a --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml new file mode 100644 index 000000000..3ada83152 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + EMBEDDING_SERVICE_HOST_IP: embedding-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + RERANK_SERVICE_HOST_IP: reranking-svc + NODE_SELECTOR: chatqna-opea + LLM_SERVICE_HOST_IP: llm-svc diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml new file mode 100644 index 000000000..ba84ea9da --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 4000Mi + requests: + cpu: 8 + memory: 4000Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml new file mode 100644 index 000000000..200ab5cbb --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml new file mode 100644 index 000000000..b18d72c5d --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml new file mode 100644 index 000000000..02e1aa1a5 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: embedding-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + args: null + ports: + - containerPort: 6000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-svc +spec: + type: ClusterIP + selector: + app: embedding-deploy + ports: + - name: service + port: 6000 + targetPort: 6000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml new file mode 100644 index 000000000..8b44745f1 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml @@ -0,0 +1,85 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 16 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml new file mode 100644 index 000000000..17e70999b --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: llm-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + args: null + ports: + - containerPort: 9000 + resources: + limits: + cpu: 4 + requests: + cpu: 4 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-svc +spec: + type: ClusterIP + selector: + app: llm-deploy + ports: + - name: service + port: 9000 + targetPort: 9000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml new file mode 100644 index 000000000..79e563520 --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + resources: + limits: + cpu: 8 + memory: 2500Mi + requests: + cpu: 8 + memory: 2500Mi + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml new file mode 100644 index 000000000..069d6a01a --- /dev/null +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 From 5562d339f78e075d781ec422c21f076b4292c3e1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 08:35:33 +0000 Subject: [PATCH 09/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../tuned/without_rerank/four_gaudi/chatqna_config_map.yaml | 3 +++ .../without_rerank/four_gaudi/chatqna_mega_service_run.yaml | 3 +++ .../without_rerank/four_gaudi/dataprep-microservice_run.yaml | 3 +++ .../without_rerank/four_gaudi/embedding-dependency_run.yaml | 3 +++ .../without_rerank/four_gaudi/embedding-microservice_run.yaml | 3 +++ .../tuned/without_rerank/four_gaudi/llm-dependency_run.yaml | 3 +++ .../tuned/without_rerank/four_gaudi/llm-microservice_run.yaml | 3 +++ .../without_rerank/four_gaudi/retrieval-microservice_run.yaml | 3 +++ .../tuned/without_rerank/four_gaudi/vector-db_run.yaml | 3 +++ .../tuned/without_rerank/single_gaudi/chatqna_config_map.yaml | 3 +++ .../without_rerank/single_gaudi/chatqna_mega_service_run.yaml | 3 +++ .../without_rerank/single_gaudi/dataprep-microservice_run.yaml | 3 +++ .../without_rerank/single_gaudi/embedding-dependency_run.yaml | 3 +++ .../single_gaudi/embedding-microservice_run.yaml | 3 +++ .../tuned/without_rerank/single_gaudi/llm-dependency_run.yaml | 3 +++ .../without_rerank/single_gaudi/llm-microservice_run.yaml | 3 +++ .../single_gaudi/retrieval-microservice_run.yaml | 3 +++ .../tuned/without_rerank/single_gaudi/vector-db_run.yaml | 3 +++ .../tuned/without_rerank/two_gaudi/chatqna_config_map.yaml | 3 +++ .../without_rerank/two_gaudi/chatqna_mega_service_run.yaml | 3 +++ .../without_rerank/two_gaudi/dataprep-microservice_run.yaml | 3 +++ .../without_rerank/two_gaudi/embedding-dependency_run.yaml | 3 +++ .../without_rerank/two_gaudi/embedding-microservice_run.yaml | 3 +++ .../tuned/without_rerank/two_gaudi/llm-dependency_run.yaml | 3 +++ .../tuned/without_rerank/two_gaudi/llm-microservice_run.yaml | 3 +++ .../without_rerank/two_gaudi/retrieval-microservice_run.yaml | 3 +++ .../tuned/without_rerank/two_gaudi/vector-db_run.yaml | 3 +++ 27 files changed, 81 insertions(+) diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml index 3ada83152..368c800e4 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 kind: ConfigMap metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml index ba84ea9da..cfe155580 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml index 200ab5cbb..4c71df7ce 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml index 6f29d87bd..69dbd7af9 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml index 02e1aa1a5..f23ba0b4f 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml index a139cebb8..ebee24319 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml index 17e70999b..1d9e29112 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml index 79e563520..298abd73a 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml index 069d6a01a..e04e8c5fe 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml index 3ada83152..368c800e4 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 kind: ConfigMap metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml index ba84ea9da..cfe155580 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml index 200ab5cbb..4c71df7ce 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml index 0f208b673..f27ffcad0 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml index 02e1aa1a5..f23ba0b4f 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml index bda1e0449..6fd539c95 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml index 17e70999b..1d9e29112 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml index 79e563520..298abd73a 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml index 069d6a01a..e04e8c5fe 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml index 3ada83152..368c800e4 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 kind: ConfigMap metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml index ba84ea9da..cfe155580 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml index 200ab5cbb..4c71df7ce 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml index b18d72c5d..485d73402 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml index 02e1aa1a5..f23ba0b4f 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml index 8b44745f1..466008735 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml index 17e70999b..1d9e29112 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml index 79e563520..298abd73a 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml index 069d6a01a..e04e8c5fe 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: apps/v1 kind: Deployment metadata: From 7f1c321e92426ae916b9691501f57b615d3321e8 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 5 Sep 2024 01:42:09 -0700 Subject: [PATCH 10/10] update tuned manifests --- .../without_rerank/four_gaudi/chatqna_mega_service_run.yaml | 2 +- .../without_rerank/four_gaudi/embedding-microservice_run.yaml | 2 +- .../tuned/without_rerank/four_gaudi/llm-microservice_run.yaml | 2 +- .../without_rerank/four_gaudi/retrieval-microservice_run.yaml | 2 +- .../without_rerank/two_gaudi/chatqna_mega_service_run.yaml | 2 +- .../without_rerank/two_gaudi/embedding-microservice_run.yaml | 2 +- .../tuned/without_rerank/two_gaudi/llm-microservice_run.yaml | 2 +- .../without_rerank/two_gaudi/retrieval-microservice_run.yaml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml index cfe155580..22c8c4d46 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml @@ -7,7 +7,7 @@ metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: chatqna-backend-server-deploy diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml index f23ba0b4f..348aa7a23 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml @@ -7,7 +7,7 @@ metadata: name: embedding-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: embedding-deploy diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml index 1d9e29112..7cc6ad123 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml @@ -7,7 +7,7 @@ metadata: name: llm-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: llm-deploy diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml index 298abd73a..25314a782 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml @@ -7,7 +7,7 @@ metadata: name: retriever-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: retriever-deploy diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml index cfe155580..b95d4edec 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml @@ -7,7 +7,7 @@ metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: chatqna-backend-server-deploy diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml index f23ba0b4f..3822537c4 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml @@ -7,7 +7,7 @@ metadata: name: embedding-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: embedding-deploy diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml index 1d9e29112..49a67fd2e 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml @@ -7,7 +7,7 @@ metadata: name: llm-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: llm-deploy diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml index 298abd73a..b6799fc60 100644 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml +++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml @@ -7,7 +7,7 @@ metadata: name: retriever-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: retriever-deploy