From 149dbc37e6c2d45ae39aecb82caa1f0c9f999935 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Wed, 4 Sep 2024 19:56:27 -0700
Subject: [PATCH 01/10] update OOB manifests

---
 .../four_gaudi/chatqna_config_map.yaml        | 23 +++++
 .../four_gaudi/chatqna_mega_service_run.yaml  | 55 ++++++++++++
 .../four_gaudi/dataprep-microservice_run.yaml | 75 ++++++++++++++++
 .../four_gaudi/embedding-dependency_run.yaml  | 62 ++++++++++++++
 .../embedding-microservice_run.yaml           | 54 ++++++++++++
 .../four_gaudi/llm-dependency_run.yaml        | 80 +++++++++++++++++
 .../four_gaudi/llm-microservice_run.yaml      | 54 ++++++++++++
 .../four_gaudi/reranking-dependency_run.yaml  | 85 +++++++++++++++++++
 .../reranking-microservice_run.yaml           | 54 ++++++++++++
 .../retrieval-microservice_run.yaml           | 72 ++++++++++++++++
 .../with_rerank/four_gaudi/vector-db_run.yaml | 48 +++++++++++
 .../single_gaudi/chatqna_config_map.yaml      | 23 +++++
 .../chatqna_mega_service_run.yaml             | 55 ++++++++++++
 .../dataprep-microservice_run.yaml            | 75 ++++++++++++++++
 .../embedding-dependency_run.yaml             | 62 ++++++++++++++
 .../embedding-microservice_run.yaml           | 54 ++++++++++++
 .../single_gaudi/llm-dependency_run.yaml      | 80 +++++++++++++++++
 .../single_gaudi/llm-microservice_run.yaml    | 54 ++++++++++++
 .../reranking-dependency_run.yaml             | 85 +++++++++++++++++++
 .../reranking-microservice_run.yaml           | 54 ++++++++++++
 .../retrieval-microservice_run.yaml           | 72 ++++++++++++++++
 .../single_gaudi/vector-db_run.yaml           | 48 +++++++++++
 .../two_gaudi/chatqna_config_map.yaml         | 23 +++++
 .../two_gaudi/chatqna_mega_service_run.yaml   | 55 ++++++++++++
 .../two_gaudi/dataprep-microservice_run.yaml  | 75 ++++++++++++++++
 .../two_gaudi/embedding-dependency_run.yaml   | 62 ++++++++++++++
 .../two_gaudi/embedding-microservice_run.yaml | 54 ++++++++++++
 .../two_gaudi/llm-dependency_run.yaml         | 80 +++++++++++++++++
 .../two_gaudi/llm-microservice_run.yaml       | 54 ++++++++++++
 .../two_gaudi/reranking-dependency_run.yaml   | 85 +++++++++++++++++++
 .../two_gaudi/reranking-microservice_run.yaml | 54 ++++++++++++
 .../two_gaudi/retrieval-microservice_run.yaml | 72 ++++++++++++++++
 .../with_rerank/two_gaudi/vector-db_run.yaml  | 48 +++++++++++
 33 files changed, 1986 insertions(+)
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml

diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml
new file mode 100644
index 000000000..368c800e4
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
new file mode 100644
index 000000000..98422525f
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,55 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml
new file mode 100644
index 000000000..4c71df7ce
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,75 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml
new file mode 100644
index 000000000..42a20871d
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,62 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml
new file mode 100644
index 000000000..3af5b9859
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
new file mode 100644
index 000000000..9f223c715
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,80 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 31
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml
new file mode 100644
index 000000000..3056dbc1d
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml
new file mode 100644
index 000000000..af908ecd1
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml
@@ -0,0 +1,85 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml
new file mode 100644
index 000000000..0723d46a8
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml
new file mode 100644
index 000000000..ac6c12fdc
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,72 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml
new file mode 100644
index 000000000..e04e8c5fe
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml
@@ -0,0 +1,48 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml
new file mode 100644
index 000000000..368c800e4
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
new file mode 100644
index 000000000..98422525f
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,55 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml
new file mode 100644
index 000000000..4c71df7ce
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,75 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml
new file mode 100644
index 000000000..42a20871d
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,62 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml
new file mode 100644
index 000000000..3af5b9859
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
new file mode 100644
index 000000000..fd5955f70
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,80 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 7
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml
new file mode 100644
index 000000000..3056dbc1d
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml
new file mode 100644
index 000000000..af908ecd1
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml
@@ -0,0 +1,85 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml
new file mode 100644
index 000000000..0723d46a8
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml
new file mode 100644
index 000000000..ac6c12fdc
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,72 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml
new file mode 100644
index 000000000..e04e8c5fe
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml
@@ -0,0 +1,48 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml
new file mode 100644
index 000000000..368c800e4
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
new file mode 100644
index 000000000..98422525f
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,55 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml
new file mode 100644
index 000000000..4c71df7ce
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,75 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml
new file mode 100644
index 000000000..42a20871d
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,62 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml
new file mode 100644
index 000000000..3af5b9859
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
new file mode 100644
index 000000000..1f0a2a49a
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,80 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 15
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml
new file mode 100644
index 000000000..3056dbc1d
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml
new file mode 100644
index 000000000..af908ecd1
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml
@@ -0,0 +1,85 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml
new file mode 100644
index 000000000..0723d46a8
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml
new file mode 100644
index 000000000..ac6c12fdc
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,72 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml
new file mode 100644
index 000000000..e04e8c5fe
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml
@@ -0,0 +1,48 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001

From d554d0a87dae9f0eccb3b8cec332b76d1937dcdc Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Wed, 4 Sep 2024 22:50:51 -0700
Subject: [PATCH 02/10] update tgi parameters

---
 .../oob/with_rerank/four_gaudi/llm-dependency_run.yaml        | 4 ++++
 .../oob/with_rerank/single_gaudi/llm-dependency_run.yaml      | 4 ++++
 .../oob/with_rerank/two_gaudi/llm-dependency_run.yaml         | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
index 9f223c715..ed9053049 100644
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
@@ -34,6 +34,10 @@ spec:
         args:
         - --model-id
         - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
         volumeMounts:
         - mountPath: /data
           name: model-volume
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
index fd5955f70..cf95652c0 100644
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
@@ -34,6 +34,10 @@ spec:
         args:
         - --model-id
         - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
         volumeMounts:
         - mountPath: /data
           name: model-volume
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
index 1f0a2a49a..45605f03c 100644
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
@@ -34,6 +34,10 @@ spec:
         args:
         - --model-id
         - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
         volumeMounts:
         - mountPath: /data
           name: model-volume

From 327f3b1f8895e616425c432c7061093189e8e160 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Wed, 4 Sep 2024 23:16:23 -0700
Subject: [PATCH 03/10] update OOB manifests for w/o rerank

---
 .../four_gaudi/chatqna_config_map.yaml        | 20 +++++
 .../four_gaudi/chatqna_mega_service_run.yaml  | 52 ++++++++++++
 .../four_gaudi/dataprep-microservice_run.yaml | 72 ++++++++++++++++
 .../four_gaudi/embedding-dependency_run.yaml  | 59 +++++++++++++
 .../embedding-microservice_run.yaml           | 51 ++++++++++++
 .../four_gaudi/llm-dependency_run.yaml        | 81 ++++++++++++++++++
 .../four_gaudi/llm-microservice_run.yaml      | 51 ++++++++++++
 .../four_gaudi/reranking-dependency_run.yaml  | 82 +++++++++++++++++++
 .../reranking-microservice_run.yaml           | 51 ++++++++++++
 .../retrieval-microservice_run.yaml           | 69 ++++++++++++++++
 .../four_gaudi/vector-db_run.yaml             | 45 ++++++++++
 .../single_gaudi/chatqna_config_map.yaml      | 20 +++++
 .../chatqna_mega_service_run.yaml             | 52 ++++++++++++
 .../dataprep-microservice_run.yaml            | 72 ++++++++++++++++
 .../embedding-dependency_run.yaml             | 59 +++++++++++++
 .../embedding-microservice_run.yaml           | 51 ++++++++++++
 .../single_gaudi/llm-dependency_run.yaml      | 81 ++++++++++++++++++
 .../single_gaudi/llm-microservice_run.yaml    | 51 ++++++++++++
 .../retrieval-microservice_run.yaml           | 69 ++++++++++++++++
 .../single_gaudi/vector-db_run.yaml           | 45 ++++++++++
 .../two_gaudi/chatqna_config_map.yaml         | 20 +++++
 .../two_gaudi/chatqna_mega_service_run.yaml   | 52 ++++++++++++
 .../two_gaudi/dataprep-microservice_run.yaml  | 72 ++++++++++++++++
 .../two_gaudi/embedding-dependency_run.yaml   | 59 +++++++++++++
 .../two_gaudi/embedding-microservice_run.yaml | 51 ++++++++++++
 .../two_gaudi/llm-dependency_run.yaml         | 81 ++++++++++++++++++
 .../two_gaudi/llm-microservice_run.yaml       | 51 ++++++++++++
 .../two_gaudi/retrieval-microservice_run.yaml | 69 ++++++++++++++++
 .../two_gaudi/vector-db_run.yaml              | 45 ++++++++++
 29 files changed, 1633 insertions(+)
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml

diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml
new file mode 100644
index 000000000..3ada83152
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
new file mode 100644
index 000000000..f94a2ba1d
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,52 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml
new file mode 100644
index 000000000..200ab5cbb
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,72 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml
new file mode 100644
index 000000000..1c8aa91de
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,59 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml
new file mode 100644
index 000000000..f32d44abc
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
new file mode 100644
index 000000000..9e9f25907
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,81 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 32
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml
new file mode 100644
index 000000000..c073ddd1b
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml
new file mode 100644
index 000000000..4841536de
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml
@@ -0,0 +1,82 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml
new file mode 100644
index 000000000..60f888356
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml
new file mode 100644
index 000000000..d71b78f92
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,69 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml
new file mode 100644
index 000000000..069d6a01a
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml
@@ -0,0 +1,45 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml
new file mode 100644
index 000000000..3ada83152
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
new file mode 100644
index 000000000..f94a2ba1d
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,52 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml
new file mode 100644
index 000000000..200ab5cbb
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,72 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml
new file mode 100644
index 000000000..1c8aa91de
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,59 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml
new file mode 100644
index 000000000..f32d44abc
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
new file mode 100644
index 000000000..f64acac77
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,81 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 8
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml
new file mode 100644
index 000000000..c073ddd1b
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml
new file mode 100644
index 000000000..d71b78f92
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,69 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml
new file mode 100644
index 000000000..069d6a01a
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml
@@ -0,0 +1,45 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml
new file mode 100644
index 000000000..3ada83152
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
new file mode 100644
index 000000000..f94a2ba1d
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,52 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml
new file mode 100644
index 000000000..200ab5cbb
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,72 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml
new file mode 100644
index 000000000..1c8aa91de
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,59 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml
new file mode 100644
index 000000000..f32d44abc
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
new file mode 100644
index 000000000..989e0807f
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,81 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 16
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml
new file mode 100644
index 000000000..c073ddd1b
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml
new file mode 100644
index 000000000..d71b78f92
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,69 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml
new file mode 100644
index 000000000..069d6a01a
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml
@@ -0,0 +1,45 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001

From 47c33abf31af217d9fb15b337cf9ebad35c2d5e0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 5 Sep 2024 06:16:40 +0000
Subject: [PATCH 04/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../oob/without_rerank/four_gaudi/chatqna_config_map.yaml      | 3 +++
 .../without_rerank/four_gaudi/chatqna_mega_service_run.yaml    | 3 +++
 .../without_rerank/four_gaudi/dataprep-microservice_run.yaml   | 3 +++
 .../without_rerank/four_gaudi/embedding-dependency_run.yaml    | 3 +++
 .../without_rerank/four_gaudi/embedding-microservice_run.yaml  | 3 +++
 .../oob/without_rerank/four_gaudi/llm-dependency_run.yaml      | 3 +++
 .../oob/without_rerank/four_gaudi/llm-microservice_run.yaml    | 3 +++
 .../without_rerank/four_gaudi/reranking-dependency_run.yaml    | 3 +++
 .../without_rerank/four_gaudi/reranking-microservice_run.yaml  | 3 +++
 .../without_rerank/four_gaudi/retrieval-microservice_run.yaml  | 3 +++
 .../benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml | 3 +++
 .../oob/without_rerank/single_gaudi/chatqna_config_map.yaml    | 3 +++
 .../without_rerank/single_gaudi/chatqna_mega_service_run.yaml  | 3 +++
 .../without_rerank/single_gaudi/dataprep-microservice_run.yaml | 3 +++
 .../without_rerank/single_gaudi/embedding-dependency_run.yaml  | 3 +++
 .../single_gaudi/embedding-microservice_run.yaml               | 3 +++
 .../oob/without_rerank/single_gaudi/llm-dependency_run.yaml    | 3 +++
 .../oob/without_rerank/single_gaudi/llm-microservice_run.yaml  | 3 +++
 .../single_gaudi/retrieval-microservice_run.yaml               | 3 +++
 .../oob/without_rerank/single_gaudi/vector-db_run.yaml         | 3 +++
 .../oob/without_rerank/two_gaudi/chatqna_config_map.yaml       | 3 +++
 .../oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml | 3 +++
 .../without_rerank/two_gaudi/dataprep-microservice_run.yaml    | 3 +++
 .../oob/without_rerank/two_gaudi/embedding-dependency_run.yaml | 3 +++
 .../without_rerank/two_gaudi/embedding-microservice_run.yaml   | 3 +++
 .../oob/without_rerank/two_gaudi/llm-dependency_run.yaml       | 3 +++
 .../oob/without_rerank/two_gaudi/llm-microservice_run.yaml     | 3 +++
 .../without_rerank/two_gaudi/retrieval-microservice_run.yaml   | 3 +++
 .../benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml  | 3 +++
 29 files changed, 87 insertions(+)

diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml
index 3ada83152..368c800e4 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: v1
 kind: ConfigMap
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
index f94a2ba1d..687fdc51e 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml
index 200ab5cbb..4c71df7ce 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml
index 1c8aa91de..42a20871d 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml
index f32d44abc..3af5b9859 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
index 9e9f25907..bf58c13a8 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml
index c073ddd1b..3056dbc1d 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml
index 4841536de..af908ecd1 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml
index 60f888356..0723d46a8 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml
index d71b78f92..ac6c12fdc 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml
index 069d6a01a..e04e8c5fe 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml
index 3ada83152..368c800e4 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: v1
 kind: ConfigMap
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
index f94a2ba1d..687fdc51e 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml
index 200ab5cbb..4c71df7ce 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml
index 1c8aa91de..42a20871d 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml
index f32d44abc..3af5b9859 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
index f64acac77..e8079c1a1 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml
index c073ddd1b..3056dbc1d 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml
index d71b78f92..ac6c12fdc 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml
index 069d6a01a..e04e8c5fe 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml
index 3ada83152..368c800e4 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: v1
 kind: ConfigMap
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
index f94a2ba1d..687fdc51e 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml
index 200ab5cbb..4c71df7ce 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml
index 1c8aa91de..42a20871d 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml
index f32d44abc..3af5b9859 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
index 989e0807f..2b2386e15 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml
index c073ddd1b..3056dbc1d 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml
index d71b78f92..ac6c12fdc 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml
index 069d6a01a..e04e8c5fe 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:

From 15611dbcb57870ea86f3585b757baae2983bbba0 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Wed, 4 Sep 2024 23:56:44 -0700
Subject: [PATCH 05/10] update tgi parameters

---
 .../oob/with_rerank/four_gaudi/llm-dependency_run.yaml        | 4 ++++
 .../oob/with_rerank/single_gaudi/llm-dependency_run.yaml      | 4 ++++
 .../oob/with_rerank/two_gaudi/llm-dependency_run.yaml         | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
index ed9053049..50cdc6f3d 100644
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
@@ -38,6 +38,10 @@ spec:
         - '2048'
         - --max-total-tokens
         - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
         volumeMounts:
         - mountPath: /data
           name: model-volume
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
index cf95652c0..f91c34526 100644
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
@@ -38,6 +38,10 @@ spec:
         - '2048'
         - --max-total-tokens
         - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
         volumeMounts:
         - mountPath: /data
           name: model-volume
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
index 45605f03c..ae32ed018 100644
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
@@ -38,6 +38,10 @@ spec:
         - '2048'
         - --max-total-tokens
         - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
         volumeMounts:
         - mountPath: /data
           name: model-volume

From c959586293fa1ef42644354306ccec4fa9594058 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Thu, 5 Sep 2024 00:52:31 -0700
Subject: [PATCH 06/10] update tgi parameters for v0.9 w/o rerank

---
 .../oob/without_rerank/four_gaudi/llm-dependency_run.yaml     | 4 ++++
 .../oob/without_rerank/single_gaudi/llm-dependency_run.yaml   | 4 ++++
 .../oob/without_rerank/two_gaudi/llm-dependency_run.yaml      | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
index bf58c13a8..7977b1a31 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
@@ -38,6 +38,10 @@ spec:
         - '2048'
         - --max-total-tokens
         - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
         volumeMounts:
         - mountPath: /data
           name: model-volume
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
index e8079c1a1..0ce7c055e 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
@@ -38,6 +38,10 @@ spec:
         - '2048'
         - --max-total-tokens
         - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
         volumeMounts:
         - mountPath: /data
           name: model-volume
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
index 2b2386e15..0383fa738 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
@@ -38,6 +38,10 @@ spec:
         - '2048'
         - --max-total-tokens
         - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
         volumeMounts:
         - mountPath: /data
           name: model-volume

From 7607c785c6f946f86b887090c81ab4f1dfa4471b Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Thu, 5 Sep 2024 01:30:55 -0700
Subject: [PATCH 07/10] update OOB manifests 2.0.4->2.0.1 for w/o rerank

---
 .../oob/with_rerank/four_gaudi/llm-dependency_run.yaml          | 2 +-
 .../oob/with_rerank/single_gaudi/llm-dependency_run.yaml        | 2 +-
 .../benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml | 2 +-
 .../oob/without_rerank/four_gaudi/llm-dependency_run.yaml       | 2 +-
 .../oob/without_rerank/single_gaudi/llm-dependency_run.yaml     | 2 +-
 .../oob/without_rerank/two_gaudi/llm-dependency_run.yaml        | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
index 50cdc6f3d..130089f87 100644
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
@@ -25,7 +25,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
index f91c34526..093d2264b 100644
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
@@ -25,7 +25,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
index ae32ed018..9499f04ed 100644
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
@@ -25,7 +25,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
index 7977b1a31..64b4197db 100644
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
@@ -25,7 +25,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
index 0ce7c055e..bbf9d6aeb 100644
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
@@ -25,7 +25,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
index 0383fa738..e78da3e38 100644
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
@@ -25,7 +25,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:

From 7d56e63636ab7bc709b5ef4f72556e901cccaa4f Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Thu, 5 Sep 2024 01:34:43 -0700
Subject: [PATCH 08/10] update tuned manifests

---
 .../four_gaudi/chatqna_config_map.yaml        |  0
 .../four_gaudi/chatqna_mega_service_run.yaml  |  0
 .../four_gaudi/dataprep-microservice_run.yaml |  0
 .../four_gaudi/embedding-dependency_run.yaml  |  0
 .../embedding-microservice_run.yaml           |  0
 .../four_gaudi/llm-dependency_run.yaml        |  0
 .../four_gaudi/llm-microservice_run.yaml      |  0
 .../four_gaudi/reranking-dependency_run.yaml  |  0
 .../reranking-microservice_run.yaml           |  0
 .../retrieval-microservice_run.yaml           |  0
 .../four_gaudi/vector-db_run.yaml             |  0
 .../single_gaudi/chatqna_config_map.yaml      |  0
 .../chatqna_mega_service_run.yaml             |  0
 .../dataprep-microservice_run.yaml            |  0
 .../embedding-dependency_run.yaml             |  0
 .../embedding-microservice_run.yaml           |  0
 .../single_gaudi/llm-dependency_run.yaml      |  0
 .../single_gaudi/llm-microservice_run.yaml    |  0
 .../reranking-dependency_run.yaml             |  0
 .../reranking-microservice_run.yaml           |  0
 .../retrieval-microservice_run.yaml           |  0
 .../single_gaudi/vector-db_run.yaml           |  0
 .../two_gaudi/chatqna_config_map.yaml         |  0
 .../two_gaudi/chatqna_mega_service_run.yaml   |  0
 .../two_gaudi/dataprep-microservice_run.yaml  |  0
 .../two_gaudi/embedding-dependency_run.yaml   |  0
 .../two_gaudi/embedding-microservice_run.yaml |  0
 .../two_gaudi/llm-dependency_run.yaml         |  0
 .../two_gaudi/llm-microservice_run.yaml       |  0
 .../two_gaudi/reranking-dependency_run.yaml   |  0
 .../two_gaudi/reranking-microservice_run.yaml |  0
 .../two_gaudi/retrieval-microservice_run.yaml |  0
 .../with_rerank}/two_gaudi/vector-db_run.yaml |  0
 .../four_gaudi/chatqna_config_map.yaml        | 20 +++++
 .../four_gaudi/chatqna_mega_service_run.yaml  | 59 +++++++++++++
 .../four_gaudi/dataprep-microservice_run.yaml | 72 ++++++++++++++++
 .../four_gaudi/embedding-dependency_run.yaml  | 66 ++++++++++++++
 .../embedding-microservice_run.yaml           | 56 ++++++++++++
 .../four_gaudi/llm-dependency_run.yaml        | 85 +++++++++++++++++++
 .../four_gaudi/llm-microservice_run.yaml      | 56 ++++++++++++
 .../retrieval-microservice_run.yaml           | 76 +++++++++++++++++
 .../four_gaudi/vector-db_run.yaml             | 45 ++++++++++
 .../single_gaudi/chatqna_config_map.yaml      | 20 +++++
 .../chatqna_mega_service_run.yaml             | 59 +++++++++++++
 .../dataprep-microservice_run.yaml            | 72 ++++++++++++++++
 .../embedding-dependency_run.yaml             | 66 ++++++++++++++
 .../embedding-microservice_run.yaml           | 56 ++++++++++++
 .../single_gaudi/llm-dependency_run.yaml      | 85 +++++++++++++++++++
 .../single_gaudi/llm-microservice_run.yaml    | 56 ++++++++++++
 .../retrieval-microservice_run.yaml           | 76 +++++++++++++++++
 .../single_gaudi/vector-db_run.yaml           | 45 ++++++++++
 .../two_gaudi/chatqna_config_map.yaml         | 20 +++++
 .../two_gaudi/chatqna_mega_service_run.yaml   | 59 +++++++++++++
 .../two_gaudi/dataprep-microservice_run.yaml  | 72 ++++++++++++++++
 .../two_gaudi/embedding-dependency_run.yaml   | 66 ++++++++++++++
 .../two_gaudi/embedding-microservice_run.yaml | 56 ++++++++++++
 .../two_gaudi/llm-dependency_run.yaml         | 85 +++++++++++++++++++
 .../two_gaudi/llm-microservice_run.yaml       | 56 ++++++++++++
 .../two_gaudi/retrieval-microservice_run.yaml | 76 +++++++++++++++++
 .../two_gaudi/vector-db_run.yaml              | 45 ++++++++++
 60 files changed, 1605 insertions(+)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/chatqna_config_map.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/chatqna_mega_service_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/dataprep-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/embedding-dependency_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/embedding-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/llm-dependency_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/llm-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/reranking-dependency_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/reranking-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/retrieval-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/four_gaudi/vector-db_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/chatqna_config_map.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/chatqna_mega_service_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/dataprep-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/embedding-dependency_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/embedding-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/llm-dependency_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/llm-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/reranking-dependency_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/reranking-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/retrieval-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/single_gaudi/vector-db_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/chatqna_config_map.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/chatqna_mega_service_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/dataprep-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/embedding-dependency_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/embedding-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/llm-dependency_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/llm-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/reranking-dependency_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/reranking-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/retrieval-microservice_run.yaml (100%)
 rename ChatQnA/benchmark/{ => tuned/with_rerank}/two_gaudi/vector-db_run.yaml (100%)
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml

diff --git a/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml
diff --git a/ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
diff --git a/ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml
diff --git a/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml
diff --git a/ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml
diff --git a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml
diff --git a/ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml
diff --git a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml
diff --git a/ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml
diff --git a/ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml
diff --git a/ChatQnA/benchmark/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/four_gaudi/vector-db_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml
diff --git a/ChatQnA/benchmark/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/single_gaudi/vector-db_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml
diff --git a/ChatQnA/benchmark/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml
similarity index 100%
rename from ChatQnA/benchmark/two_gaudi/vector-db_run.yaml
rename to ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml
new file mode 100644
index 000000000..3ada83152
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
new file mode 100644
index 000000000..ba84ea9da
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,59 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml
new file mode 100644
index 000000000..200ab5cbb
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,72 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml
new file mode 100644
index 000000000..6f29d87bd
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,66 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
new file mode 100644
index 000000000..02e1aa1a5
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,56 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml
new file mode 100644
index 000000000..a139cebb8
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,85 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 32
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '1024'
+        - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
new file mode 100644
index 000000000..17e70999b
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,56 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
new file mode 100644
index 000000000..79e563520
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,76 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml
new file mode 100644
index 000000000..069d6a01a
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml
@@ -0,0 +1,45 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml
new file mode 100644
index 000000000..3ada83152
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
new file mode 100644
index 000000000..ba84ea9da
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,59 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml
new file mode 100644
index 000000000..200ab5cbb
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,72 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml
new file mode 100644
index 000000000..0f208b673
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,66 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml
new file mode 100644
index 000000000..02e1aa1a5
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,56 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml
new file mode 100644
index 000000000..bda1e0449
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,85 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 8
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '1024'
+        - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml
new file mode 100644
index 000000000..17e70999b
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,56 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml
new file mode 100644
index 000000000..79e563520
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,76 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml
new file mode 100644
index 000000000..069d6a01a
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml
@@ -0,0 +1,45 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml
new file mode 100644
index 000000000..3ada83152
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
new file mode 100644
index 000000000..ba84ea9da
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,59 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml
new file mode 100644
index 000000000..200ab5cbb
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,72 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml
new file mode 100644
index 000000000..b18d72c5d
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,66 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
new file mode 100644
index 000000000..02e1aa1a5
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,56 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml
new file mode 100644
index 000000000..8b44745f1
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,85 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 16
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '1024'
+        - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
new file mode 100644
index 000000000..17e70999b
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,56 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
new file mode 100644
index 000000000..79e563520
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,76 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml
new file mode 100644
index 000000000..069d6a01a
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml
@@ -0,0 +1,45 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001

From 5562d339f78e075d781ec422c21f076b4292c3e1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 5 Sep 2024 08:35:33 +0000
Subject: [PATCH 09/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../tuned/without_rerank/four_gaudi/chatqna_config_map.yaml    | 3 +++
 .../without_rerank/four_gaudi/chatqna_mega_service_run.yaml    | 3 +++
 .../without_rerank/four_gaudi/dataprep-microservice_run.yaml   | 3 +++
 .../without_rerank/four_gaudi/embedding-dependency_run.yaml    | 3 +++
 .../without_rerank/four_gaudi/embedding-microservice_run.yaml  | 3 +++
 .../tuned/without_rerank/four_gaudi/llm-dependency_run.yaml    | 3 +++
 .../tuned/without_rerank/four_gaudi/llm-microservice_run.yaml  | 3 +++
 .../without_rerank/four_gaudi/retrieval-microservice_run.yaml  | 3 +++
 .../tuned/without_rerank/four_gaudi/vector-db_run.yaml         | 3 +++
 .../tuned/without_rerank/single_gaudi/chatqna_config_map.yaml  | 3 +++
 .../without_rerank/single_gaudi/chatqna_mega_service_run.yaml  | 3 +++
 .../without_rerank/single_gaudi/dataprep-microservice_run.yaml | 3 +++
 .../without_rerank/single_gaudi/embedding-dependency_run.yaml  | 3 +++
 .../single_gaudi/embedding-microservice_run.yaml               | 3 +++
 .../tuned/without_rerank/single_gaudi/llm-dependency_run.yaml  | 3 +++
 .../without_rerank/single_gaudi/llm-microservice_run.yaml      | 3 +++
 .../single_gaudi/retrieval-microservice_run.yaml               | 3 +++
 .../tuned/without_rerank/single_gaudi/vector-db_run.yaml       | 3 +++
 .../tuned/without_rerank/two_gaudi/chatqna_config_map.yaml     | 3 +++
 .../without_rerank/two_gaudi/chatqna_mega_service_run.yaml     | 3 +++
 .../without_rerank/two_gaudi/dataprep-microservice_run.yaml    | 3 +++
 .../without_rerank/two_gaudi/embedding-dependency_run.yaml     | 3 +++
 .../without_rerank/two_gaudi/embedding-microservice_run.yaml   | 3 +++
 .../tuned/without_rerank/two_gaudi/llm-dependency_run.yaml     | 3 +++
 .../tuned/without_rerank/two_gaudi/llm-microservice_run.yaml   | 3 +++
 .../without_rerank/two_gaudi/retrieval-microservice_run.yaml   | 3 +++
 .../tuned/without_rerank/two_gaudi/vector-db_run.yaml          | 3 +++
 27 files changed, 81 insertions(+)

diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml
index 3ada83152..368c800e4 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: v1
 kind: ConfigMap
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
index ba84ea9da..cfe155580 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml
index 200ab5cbb..4c71df7ce 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml
index 6f29d87bd..69dbd7af9 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
index 02e1aa1a5..f23ba0b4f 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml
index a139cebb8..ebee24319 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
index 17e70999b..1d9e29112 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
index 79e563520..298abd73a 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml
index 069d6a01a..e04e8c5fe 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml
index 3ada83152..368c800e4 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: v1
 kind: ConfigMap
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
index ba84ea9da..cfe155580 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml
index 200ab5cbb..4c71df7ce 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml
index 0f208b673..f27ffcad0 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml
index 02e1aa1a5..f23ba0b4f 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml
index bda1e0449..6fd539c95 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml
index 17e70999b..1d9e29112 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml
index 79e563520..298abd73a 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml
index 069d6a01a..e04e8c5fe 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml
index 3ada83152..368c800e4 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: v1
 kind: ConfigMap
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
index ba84ea9da..cfe155580 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml
index 200ab5cbb..4c71df7ce 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml
index b18d72c5d..485d73402 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
index 02e1aa1a5..f23ba0b4f 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml
index 8b44745f1..466008735 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
index 17e70999b..1d9e29112 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
index 79e563520..298abd73a 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml
index 069d6a01a..e04e8c5fe 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:

From 7f1c321e92426ae916b9691501f57b615d3321e8 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Thu, 5 Sep 2024 01:42:09 -0700
Subject: [PATCH 10/10] update tuned manifests

---
 .../without_rerank/four_gaudi/chatqna_mega_service_run.yaml     | 2 +-
 .../without_rerank/four_gaudi/embedding-microservice_run.yaml   | 2 +-
 .../tuned/without_rerank/four_gaudi/llm-microservice_run.yaml   | 2 +-
 .../without_rerank/four_gaudi/retrieval-microservice_run.yaml   | 2 +-
 .../without_rerank/two_gaudi/chatqna_mega_service_run.yaml      | 2 +-
 .../without_rerank/two_gaudi/embedding-microservice_run.yaml    | 2 +-
 .../tuned/without_rerank/two_gaudi/llm-microservice_run.yaml    | 2 +-
 .../without_rerank/two_gaudi/retrieval-microservice_run.yaml    | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
index cfe155580..22c8c4d46 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
@@ -7,7 +7,7 @@ metadata:
   name: chatqna-backend-server-deploy
   namespace: default
 spec:
-  replicas: 1
+  replicas: 4
   selector:
     matchLabels:
       app: chatqna-backend-server-deploy
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
index f23ba0b4f..348aa7a23 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
@@ -7,7 +7,7 @@ metadata:
   name: embedding-deploy
   namespace: default
 spec:
-  replicas: 1
+  replicas: 4
   selector:
     matchLabels:
       app: embedding-deploy
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
index 1d9e29112..7cc6ad123 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
@@ -7,7 +7,7 @@ metadata:
   name: llm-deploy
   namespace: default
 spec:
-  replicas: 1
+  replicas: 4
   selector:
     matchLabels:
       app: llm-deploy
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
index 298abd73a..25314a782 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
@@ -7,7 +7,7 @@ metadata:
   name: retriever-deploy
   namespace: default
 spec:
-  replicas: 1
+  replicas: 4
   selector:
     matchLabels:
       app: retriever-deploy
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
index cfe155580..b95d4edec 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
@@ -7,7 +7,7 @@ metadata:
   name: chatqna-backend-server-deploy
   namespace: default
 spec:
-  replicas: 1
+  replicas: 2
   selector:
     matchLabels:
       app: chatqna-backend-server-deploy
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
index f23ba0b4f..3822537c4 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
@@ -7,7 +7,7 @@ metadata:
   name: embedding-deploy
   namespace: default
 spec:
-  replicas: 1
+  replicas: 2
   selector:
     matchLabels:
       app: embedding-deploy
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
index 1d9e29112..49a67fd2e 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
@@ -7,7 +7,7 @@ metadata:
   name: llm-deploy
   namespace: default
 spec:
-  replicas: 1
+  replicas: 2
   selector:
     matchLabels:
       app: llm-deploy
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
index 298abd73a..b6799fc60 100644
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
@@ -7,7 +7,7 @@ metadata:
   name: retriever-deploy
   namespace: default
 spec:
-  replicas: 1
+  replicas: 2
   selector:
     matchLabels:
       app: retriever-deploy