Skip to content

Commit

Permalink
update tei embedding format. (opea-project#1035)
Browse files Browse the repository at this point in the history
  • Loading branch information
lkk12014402 authored Dec 16, 2024
1 parent 46835f9 commit c955e5e
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 98 deletions.
38 changes: 8 additions & 30 deletions comps/embeddings/tei/langchain/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,20 @@ docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$htt
Then you need to test your TEI service using the following commands:

```bash
curl localhost:$your_port/embed \
curl localhost:$your_port/v1/embeddings \
-X POST \
-d '{"inputs":"What is Deep Learning?"}' \
-d '{"input":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
```

Start the embedding service with the TEI_EMBEDDING_ENDPOINT.

```bash
export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport/v1/embeddings"
export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
python embedding_tei.py
```

#### Start Embedding Service with Local Model

```bash
python local_embedding.py
```

## 🚀2. Start Microservice with Docker (Optional 2)

### 2.1 Start Embedding Service with TEI
Expand All @@ -68,16 +62,16 @@ docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$htt
Then you need to test your TEI service using the following commands:

```bash
curl localhost:$your_port/embed \
curl localhost:$your_port/embed/v1/embeddings \
-X POST \
-d '{"inputs":"What is Deep Learning?"}' \
-d '{"input":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
```

Export the `TEI_EMBEDDING_ENDPOINT` for later usage:

```bash
export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport/v1/embeddings"
export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
```

Expand Down Expand Up @@ -113,23 +107,7 @@ curl http://localhost:6000/v1/health_check\

### 3.2 Consume Embedding Service

Use our basic API.

```bash
## query with single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'

## query with multiple texts
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":["Hello, world!","How are you?"]}' \
-H 'Content-Type: application/json'
```

We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings).
The input/output follows [OpenAI API Embeddings](https://platform.openai.com/docs/api-reference/embeddings) format.

```bash
## Input single text
Expand All @@ -141,6 +119,6 @@ curl http://localhost:6000/v1/embeddings\
## Input multiple texts with parameters
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \
-d '{"input":["Hello, world!","How are you?"], "encoding_format":"base64"}' \
-H 'Content-Type: application/json'
```
55 changes: 15 additions & 40 deletions comps/embeddings/tei/langchain/embedding_tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json
import os
import time
from typing import List, Union
from typing import Dict, List, Union

from huggingface_hub import AsyncInferenceClient

Expand All @@ -19,12 +19,7 @@
statistics_dict,
)
from comps.cores.mega.utils import get_access_token
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
EmbeddingRequest,
EmbeddingResponse,
EmbeddingResponseData,
)
from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse, EmbeddingResponseData

logger = CustomLogger("embedding_tei_langchain")
logflag = os.getenv("LOGFLAG", False)
Expand All @@ -45,56 +40,36 @@
port=6000,
)
@register_statistics(names=["opea_service@embedding_tei_langchain"])
async def embedding(
input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest]
) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]:
async def embedding(input: Union[TextDoc, EmbeddingRequest]) -> Union[EmbedDoc, EmbeddingResponse]:
start = time.time()
access_token = (
get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None
)
async_client = get_async_inference_client(access_token)
if logflag:
logger.info(input)

if isinstance(input, TextDoc):
embed_vector = await aembed_query(input.text, async_client)
embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector
res = EmbedDoc(text=input.text, embedding=embedding_res)
embedding_res = await aembed_query({"input": input.text}, async_client)
embedding_vec = [data["embedding"] for data in embedding_res["data"]]
embedding_vec = embedding_vec[0] if isinstance(input.text, str) else embedding_vec
res = EmbedDoc(text=input.text, embedding=embedding_vec)
else:
embed_vector = await aembed_query(input.input, async_client)
if input.dimensions is not None:
embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))]

# for standard openai embedding format
res = EmbeddingResponse(
data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))]
embedding_res = await aembed_query(
{"input": input.input, "encoding_format": input.encoding_format, "model": input.model, "user": input.user},
async_client,
)

if isinstance(input, ChatCompletionRequest):
input.embedding = res
# keep
res = input
res = EmbeddingResponse(**embedding_res)

statistics_dict["opea_service@embedding_tei_langchain"].append_latency(time.time() - start, None)
if logflag:
logger.info(res)
return res


async def aembed_query(
text: Union[str, List[str]], async_client: AsyncInferenceClient, model_kwargs=None, task=None
) -> List[List[float]]:
texts = [text] if isinstance(text, str) else text
response = await aembed_documents(texts, async_client, model_kwargs=model_kwargs, task=task)
return response


async def aembed_documents(
texts: List[str], async_client: AsyncInferenceClient, model_kwargs=None, task=None
) -> List[List[float]]:
texts = [text.replace("\n", " ") for text in texts]
_model_kwargs = model_kwargs or {}
responses = await async_client.post(json={"inputs": texts, **_model_kwargs}, task=task)
return json.loads(responses.decode())
async def aembed_query(request: Dict, async_client: AsyncInferenceClient) -> Union[Dict, List[List[float]]]:
response = await async_client.post(json=request)
return json.loads(response.decode())


def get_async_inference_client(access_token: str) -> AsyncInferenceClient:
Expand Down
27 changes: 0 additions & 27 deletions comps/embeddings/tei/langchain/local_embedding_768.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/embeddings/test_embeddings_tei_langchain.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function start_service() {
model="BAAI/bge-base-en-v1.5"
unset http_proxy
docker run -d --name="test-comps-embedding-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}/v1/embeddings"
tei_service_port=5002
docker run -d --name="test-comps-embedding-tei-server" -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:comps
sleep 3m
Expand Down

0 comments on commit c955e5e

Please sign in to comment.