From 044cf323a785502a14334d492ee2faf84ec1f9e5 Mon Sep 17 00:00:00 2001 From: aleimu Date: Tue, 18 Jul 2023 17:50:19 +0800 Subject: [PATCH 1/2] Update api.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 同步方法异步化,增加api调用的并发性能,防止请求之间在服务层相互阻塞 --- api.py | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/api.py b/api.py index 693c70ac..13f66691 100644 --- a/api.py +++ b/api.py @@ -1,7 +1,9 @@ +import torch +import asyncio +import concurrent.futures from fastapi import FastAPI, Request from transformers import AutoTokenizer, AutoModel import uvicorn, json, datetime -import torch DEVICE = "cuda" DEVICE_ID = "0" @@ -29,23 +31,30 @@ async def create_item(request: Request): max_length = json_post_list.get('max_length') top_p = json_post_list.get('top_p') temperature = json_post_list.get('temperature') - response, history = model.chat(tokenizer, - prompt, - history=history, - max_length=max_length if max_length else 2048, - top_p=top_p if top_p else 0.7, - temperature=temperature if temperature else 0.95) - now = datetime.datetime.now() - time = now.strftime("%Y-%m-%d %H:%M:%S") - answer = { - "response": response, - "history": history, - "status": 200, - "time": time - } - log = "[" + time + "] " + '", prompt:"' + prompt + '", response:"' + repr(response) + '"' - print(log) - torch_gc() + + def _sync_chat(history): + response, history = model.chat(tokenizer, + prompt, + history=history, + max_length=max_length if max_length else 2048, + top_p=top_p if top_p else 0.7, + temperature=temperature if temperature else 0.95) + now = datetime.datetime.now() + time = now.strftime("%Y-%m-%d %H:%M:%S") + answer = { + "response": response, + "history": history, + "status": 200, + "time": time + } + log = "[" + time + "] " + '", prompt:"' + prompt + '", response:"' + repr(response) + '"' + print(log) + torch_gc() + return answer + + loop = asyncio.get_event_loop() + executor = concurrent.futures.ThreadPoolExecutor() + answer = await loop.run_in_executor(executor, _sync_chat, history) return answer From 6ba39b421c0bebbdc6d6fa68adbd7b31a592a671 Mon Sep 17 00:00:00 2001 From: aleimu Date: Tue, 18 Jul 2023 19:19:26 +0800 Subject: [PATCH 2/2] Update api.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 简洁一些,用库里的函数to_thread --- api.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/api.py b/api.py index 13f66691..cc5c4b2f 100644 --- a/api.py +++ b/api.py @@ -1,9 +1,8 @@ -import torch -import asyncio -import concurrent.futures from fastapi import FastAPI, Request from transformers import AutoTokenizer, AutoModel import uvicorn, json, datetime +import torch +import asyncio DEVICE = "cuda" DEVICE_ID = "0" @@ -52,10 +51,7 @@ def _sync_chat(history): torch_gc() return answer - loop = asyncio.get_event_loop() - executor = concurrent.futures.ThreadPoolExecutor() - answer = await loop.run_in_executor(executor, _sync_chat, history) - return answer + return await asyncio.to_thread(_sync_chat, history=history) if __name__ == '__main__':