diff --git a/DocIndexRetriever/README.md b/DocIndexRetriever/README.md index bfd09a8308..0301c6205e 100644 --- a/DocIndexRetriever/README.md +++ b/DocIndexRetriever/README.md @@ -1,8 +1,22 @@ # DocRetriever Application -DocRetriever are the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity. +DocRetriever is the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity. ## We provided DocRetriever with different deployment infra - [docker xeon version](docker_compose/intel/cpu/xeon/README.md) => minimum endpoints, easy to setup - [docker gaudi version](docker_compose/intel/hpu/gaudi/README.md) => with extra tei_gaudi endpoint, faster + +## We allow users to set retriever/reranker hyperparams via requests + +Example usage: + +```python +url = "http://{host_ip}:{port}/v1/retrievaltool".format(host_ip=host_ip, port=port) +payload = { + "messages": query, + "k": 5, # retriever top k + "top_n": 2, # reranker top n +} +response = requests.post(url, json=payload) +``` diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md index b921cc126f..58354babfa 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md @@ -79,13 +79,26 @@ Retrieval from KnowledgeBase ```bash curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{ - "text": "Explain the OPEA project?" + "messages": "Explain the OPEA project?" }' # expected output {"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"} ``` +**Note**: `messages` is the required field. You can also pass in parameters for the retriever and reranker in the request. The parameters that can changed are listed below. + + 1. retriever + * search_type: str = "similarity" + * k: int = 4 + * distance_threshold: Optional[float] = None + * fetch_k: int = 20 + * lambda_mult: float = 0.5 + * score_threshold: float = 0.2 + + 2. reranker + * top_n: int = 1 + ## 5. Trouble shooting 1. check all containers are alive diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml index 23399f9d0f..9fe1ed9621 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml @@ -74,13 +74,30 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${RERANK_MODEL_ID} --auto-truncate reranking: image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} container_name: reranking-tei-xeon-server + depends_on: + - tei-reranking-service ports: - "8000:8000" ipc: host - entrypoint: python local_reranking.py environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md index 2cac81c8f5..6799aa5007 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md @@ -80,13 +80,26 @@ Retrieval from KnowledgeBase ```bash curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{ - "text": "Explain the OPEA project?" + "messages": "Explain the OPEA project?" }' # expected output {"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"} ``` +**Note**: `messages` is the required field. You can also pass in parameters for the retriever and reranker in the request. The parameters that can changed are listed below. + + 1. retriever + * search_type: str = "similarity" + * k: int = 4 + * distance_threshold: Optional[float] = None + * fetch_k: int = 20 + * lambda_mult: float = 0.5 + * score_threshold: float = 0.2 + + 2. reranker + * top_n: int = 1 + ## 5. Trouble shooting 1. check all containers are alive diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml index 831659dcad..1d0a445050 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml @@ -77,13 +77,30 @@ services: REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-reranking-gaudi-server + ports: + - "8808:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${RERANK_MODEL_ID} --auto-truncate reranking: image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} container_name: reranking-tei-gaudi-server + depends_on: + - tei-reranking-service ports: - "8000:8000" ipc: host - entrypoint: python local_reranking.py environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/DocIndexRetriever/tests/test.py b/DocIndexRetriever/tests/test.py new file mode 100644 index 0000000000..698f40da30 --- /dev/null +++ b/DocIndexRetriever/tests/test.py @@ -0,0 +1,71 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +import requests + + +def search_knowledge_base(query: str, url: str, request_type="chat_completion") -> str: + """Search the knowledge base for a specific query.""" + print(url) + proxies = {"http": ""} + if request_type == "chat_completion": + print("Sending chat completion request") + payload = { + "messages": query, + "k": 5, + "top_n": 2, + } + else: + print("Sending text request") + payload = { + "text": query, + } + response = requests.post(url, json=payload, proxies=proxies) + print(response) + if "documents" in response.json(): + docs = response.json()["documents"] + context = "" + for i, doc in enumerate(docs): + if i == 0: + context = str(i) + ": " + doc + else: + context += "\n" + str(i) + ": " + doc + # print(context) + return context + elif "text" in response.json(): + return response.json()["text"] + elif "reranked_docs" in response.json(): + docs = response.json()["reranked_docs"] + context = "" + for i, doc in enumerate(docs): + if i == 0: + context = doc["text"] + else: + context += "\n" + doc["text"] + # print(context) + return context + else: + return "Error parsing response from the knowledge base." + + +def main(): + parser = argparse.ArgumentParser(description="Index data") + parser.add_argument("--host_ip", type=str, default="localhost", help="Host IP") + parser.add_argument("--port", type=int, default=8889, help="Port") + parser.add_argument("--request_type", type=str, default="chat_completion", help="Test type") + args = parser.parse_args() + print(args) + + host_ip = args.host_ip + port = args.port + url = "http://{host_ip}:{port}/v1/retrievaltool".format(host_ip=host_ip, port=port) + + response = search_knowledge_base("OPEA", url, request_type=args.request_type) + + print(response) + + +if __name__ == "__main__": + main() diff --git a/DocIndexRetriever/tests/test_compose_on_gaudi.sh b/DocIndexRetriever/tests/test_compose_on_gaudi.sh index 04f32a7b54..8779944be4 100644 --- a/DocIndexRetriever/tests/test_compose_on_gaudi.sh +++ b/DocIndexRetriever/tests/test_compose_on_gaudi.sh @@ -64,7 +64,7 @@ function validate() { } function validate_megaservice() { - echo "Testing DataPrep Service" + echo "=========Ingest data==================" local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]') @@ -78,7 +78,7 @@ function validate_megaservice() { fi # Curl the Mega Service - echo "Testing retriever service" + echo "==============Testing retriever service: Text Request=================" local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{ "text": "Explain the OPEA project?" }') @@ -93,6 +93,21 @@ function validate_megaservice() { docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log exit 1 fi + + echo "==============Testing retriever service: ChatCompletion Request================" + cd $WORKPATH/tests + local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion) + local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-gaudi") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + docker logs tei-embedding-gaudi-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log + docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log + docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log + docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log + exit 1 + fi } function stop_docker() { diff --git a/DocIndexRetriever/tests/test_compose_on_xeon.sh b/DocIndexRetriever/tests/test_compose_on_xeon.sh index 4a04030d49..c6ff29e29f 100644 --- a/DocIndexRetriever/tests/test_compose_on_xeon.sh +++ b/DocIndexRetriever/tests/test_compose_on_xeon.sh @@ -63,8 +63,8 @@ function validate() { } function validate_megaservice() { - echo "Testing DataPrep Service" - local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \ + echo "===========Ingest data==================" + local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]') local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon") @@ -77,16 +77,32 @@ function validate_megaservice() { fi # Curl the Mega Service - echo "Testing retriever service" + echo "================Testing retriever service: Default params================" + local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{ - "text": "Explain the OPEA project?" + "messages": "Explain the OPEA project?" }') local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon") echo "$EXIT_CODE" local EXIT_CODE="${EXIT_CODE:0-1}" echo "return value is $EXIT_CODE" if [ "$EXIT_CODE" == "1" ]; then - docker logs tei-embedding-xeon-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + exit 1 + fi + + echo "================Testing retriever service: ChatCompletion Request================" + cd $WORKPATH/tests + local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion) + local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log