diff --git a/ChatQnA/Dockerfile.no_wrapper b/ChatQnA/Dockerfile.no_wrapper index 2cd2601223..c6adacaeec 100644 --- a/ChatQnA/Dockerfile.no_wrapper +++ b/ChatQnA/Dockerfile.no_wrapper @@ -31,6 +31,4 @@ USER user WORKDIR /home/user -RUN echo 'ulimit -S -n 999999' >> ~/.bashrc - ENTRYPOINT ["python", "chatqna_no_wrapper.py"] diff --git a/ChatQnA/Dockerfile.no_wrapper_without_rerank b/ChatQnA/Dockerfile.no_wrapper_without_rerank new file mode 100644 index 0000000000..0d6c1e34e2 --- /dev/null +++ b/ChatQnA/Dockerfile.no_wrapper_without_rerank @@ -0,0 +1,34 @@ + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim \ + git + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +WORKDIR /home/user/ +RUN git clone https://github.com/opea-project/GenAIComps.git + +WORKDIR /home/user/GenAIComps +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \ + pip install --no-cache-dir langchain_core + +COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py + +ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps + +USER user + +WORKDIR /home/user + +ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"] diff --git a/ChatQnA/benchmark/README.md b/ChatQnA/benchmark/README.md index b666e8ce40..767b4999b4 100644 --- a/ChatQnA/benchmark/README.md +++ b/ChatQnA/benchmark/README.md @@ -37,6 +37,17 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c - Set up kubectl on the master node with access to the Kubernetes cluster. - Install Python 3.8+ on the master node for running the stress tool. - Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods. +- Ensure that the container's ulimit can meet the the number of requests. + +```bash +# The way to modify the containered ulimit: +sudo systemctl edit containerd +# Add two lines: +[Service] +LimitNOFILE=65536:1048576 + +sudo systemctl daemon-reload; sudo systemctl restart containerd +``` ### Kubernetes Cluster Example diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml index f8684c239e..85866573f1 100644 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml @@ -2,33 +2,27 @@ # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +40,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,33 +47,35 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -107,70 +93,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -188,57 +145,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -256,16 +215,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -273,32 +222,34 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -316,20 +267,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -340,16 +279,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -361,38 +290,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -410,16 +362,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -427,32 +369,34 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -470,35 +414,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -512,38 +432,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -561,16 +500,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -578,32 +507,34 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -621,67 +552,41 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -694,24 +599,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -719,9 +632,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -729,6 +639,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml index b05326a30d..eb63aada5f 100644 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml @@ -2,33 +2,27 @@ # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +40,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,33 +47,35 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -107,70 +93,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -188,57 +145,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -256,16 +215,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -273,32 +222,34 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -316,20 +267,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -340,16 +279,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -361,38 +290,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -410,16 +362,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -427,32 +369,34 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -470,35 +414,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -512,38 +432,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -561,16 +500,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -578,32 +507,34 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -621,67 +552,41 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -694,24 +599,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -719,9 +632,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -729,6 +639,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml index 13d8345129..1b9bf7ebfe 100644 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml @@ -2,33 +2,27 @@ # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +40,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,33 +47,35 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -107,70 +93,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -188,57 +145,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -256,16 +215,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -273,32 +222,34 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -316,20 +267,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -340,16 +279,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -361,38 +290,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -410,16 +362,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -427,32 +369,34 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -470,35 +414,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -512,38 +432,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -561,16 +500,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -578,32 +507,34 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -621,67 +552,41 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -694,24 +599,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -719,9 +632,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -729,6 +639,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..0e8ab7ff2d --- /dev/null +++ b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml @@ -0,0 +1,489 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 63 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..f950787784 --- /dev/null +++ b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml @@ -0,0 +1,489 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..fcf1bd4241 --- /dev/null +++ b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml @@ -0,0 +1,489 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..56fbf194c8 --- /dev/null +++ b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml @@ -0,0 +1,489 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..0d7c3388c5 --- /dev/null +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml @@ -0,0 +1,403 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 64 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..1a8ff49921 --- /dev/null +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml @@ -0,0 +1,403 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 32 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..4b5e034aed --- /dev/null +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml @@ -0,0 +1,403 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..16e3020f18 --- /dev/null +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml @@ -0,0 +1,403 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 16 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index 373b46c8a1..ddb1b7fc5f 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -2,33 +2,27 @@ # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +40,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,40 +47,42 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 resources: limits: cpu: 8 - memory: 4000Mi + memory: 8000Mi requests: cpu: 8 - memory: 4000Mi + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -114,70 +100,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -195,23 +152,17 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm ports: - containerPort: 80 resources: @@ -221,38 +172,46 @@ spec: requests: cpu: 80 memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -270,16 +229,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -287,37 +236,38 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -335,20 +285,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -359,16 +297,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -380,38 +308,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -429,16 +380,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -446,37 +387,38 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -494,35 +436,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -536,38 +454,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -585,16 +522,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -602,37 +529,38 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -650,74 +578,45 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 resources: - limits: - cpu: 8 - memory: 2500Mi requests: - cpu: 8 - memory: 2500Mi + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -730,24 +629,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -755,9 +662,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -765,6 +669,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index 9d2f0ee96d..dae895f526 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -2,33 +2,27 @@ # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +40,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,40 +47,42 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 resources: limits: cpu: 8 - memory: 4000Mi + memory: 8000Mi requests: cpu: 8 - memory: 4000Mi + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -114,70 +100,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -195,23 +152,17 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm ports: - containerPort: 80 resources: @@ -221,38 +172,46 @@ spec: requests: cpu: 80 memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -270,16 +229,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -287,37 +236,38 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -335,20 +285,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -359,16 +297,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -380,38 +308,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -429,16 +380,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -446,37 +387,38 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -494,35 +436,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -536,38 +454,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -585,16 +522,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -602,37 +529,38 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -650,74 +578,45 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 resources: - limits: - cpu: 8 - memory: 2500Mi requests: - cpu: 8 - memory: 2500Mi + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -730,24 +629,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -755,9 +662,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -765,6 +669,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index 4ed98c347c..ee10361c7c 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -2,33 +2,27 @@ # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +40,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,40 +47,42 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 resources: limits: cpu: 8 - memory: 4000Mi + memory: 8000Mi requests: cpu: 8 - memory: 4000Mi + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -114,70 +100,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -195,23 +152,17 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm ports: - containerPort: 80 resources: @@ -221,38 +172,46 @@ spec: requests: cpu: 80 memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -270,16 +229,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -287,37 +236,38 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -335,20 +285,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -359,16 +297,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -380,38 +308,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -429,16 +380,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -446,37 +387,38 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -494,35 +436,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -536,38 +454,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -585,16 +522,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -602,37 +529,38 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -650,74 +578,45 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 resources: - limits: - cpu: 8 - memory: 2500Mi requests: - cpu: 8 - memory: 2500Mi + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -730,24 +629,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -755,9 +662,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -765,6 +669,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..c541964e93 --- /dev/null +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml @@ -0,0 +1,507 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 63 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1280' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..a4cf76e74b --- /dev/null +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml @@ -0,0 +1,507 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1280' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..9b64b1fbbd --- /dev/null +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml @@ -0,0 +1,507 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1280' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml new file mode 100644 index 0000000000..9fbb3fa06d --- /dev/null +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml @@ -0,0 +1,507 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1280' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..36d9c4d2b1 --- /dev/null +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml @@ -0,0 +1,421 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 64 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1280' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..9c74e60ddb --- /dev/null +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml @@ -0,0 +1,421 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 32 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1280' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..06c7321a59 --- /dev/null +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml @@ -0,0 +1,421 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1280' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml new file mode 100644 index 0000000000..7505e2a030 --- /dev/null +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml @@ -0,0 +1,421 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 16 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1280' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/chatqna_no_wrapper.py b/ChatQnA/chatqna_no_wrapper.py index a2d007999e..2780c7486d 100644 --- a/ChatQnA/chatqna_no_wrapper.py +++ b/ChatQnA/chatqna_no_wrapper.py @@ -1,6 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import argparse import json import os import re @@ -95,7 +96,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]] else: # forward to llm - if not docs: + if not docs and with_rerank: # delete the rerank from retriever -> rerank -> llm for ds in reversed(runtime_graph.downstream(cur_node)): for nds in runtime_graph.downstream(ds): @@ -260,6 +261,13 @@ def add_remote_service_without_rerank(self): if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--without-rerank", action="store_true") + + args = parser.parse_args() + chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT) - chatqna.add_remote_service() - # chatqna.add_remote_service_without_rerank() + if args.without_rerank: + chatqna.add_remote_service_without_rerank() + else: + chatqna.add_remote_service() diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml index bbde37d415..4dd1d3b740 100644 --- a/ChatQnA/docker_image_build/build.yaml +++ b/ChatQnA/docker_image_build/build.yaml @@ -29,6 +29,12 @@ services: dockerfile: ./Dockerfile.no_wrapper extends: chatqna image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest} + chatqna-no-wrapper-without-rerank: + build: + context: ../ + dockerfile: ./Dockerfile.no_wrapper_without_rerank + extends: chatqna + image: ${REGISTRY:-opea}/chatqna-no-wrapper-without-rerank:${TAG:-latest} chatqna-ui: build: context: ../ui