From 044cf323a785502a14334d492ee2faf84ec1f9e5 Mon Sep 17 00:00:00 2001
From: aleimu <lgj_work@qq.com>
Date: Tue, 18 Jul 2023 17:50:19 +0800
Subject: [PATCH 1/2] Update api.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

同步方法异步化,增加api调用的并发性能,防止请求之间在服务层相互阻塞
---
 api.py | 45 +++++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/api.py b/api.py
index 693c70ac..13f66691 100644
--- a/api.py
+++ b/api.py
@@ -1,7 +1,9 @@
+import torch
+import asyncio
+import concurrent.futures
 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModel
 import uvicorn, json, datetime
-import torch
 
 DEVICE = "cuda"
 DEVICE_ID = "0"
@@ -29,23 +31,30 @@ async def create_item(request: Request):
     max_length = json_post_list.get('max_length')
     top_p = json_post_list.get('top_p')
     temperature = json_post_list.get('temperature')
-    response, history = model.chat(tokenizer,
-                                   prompt,
-                                   history=history,
-                                   max_length=max_length if max_length else 2048,
-                                   top_p=top_p if top_p else 0.7,
-                                   temperature=temperature if temperature else 0.95)
-    now = datetime.datetime.now()
-    time = now.strftime("%Y-%m-%d %H:%M:%S")
-    answer = {
-        "response": response,
-        "history": history,
-        "status": 200,
-        "time": time
-    }
-    log = "[" + time + "] " + '", prompt:"' + prompt + '", response:"' + repr(response) + '"'
-    print(log)
-    torch_gc()
+
+    def _sync_chat(history):
+        response, history = model.chat(tokenizer,
+                                       prompt,
+                                       history=history,
+                                       max_length=max_length if max_length else 2048,
+                                       top_p=top_p if top_p else 0.7,
+                                       temperature=temperature if temperature else 0.95)
+        now = datetime.datetime.now()
+        time = now.strftime("%Y-%m-%d %H:%M:%S")
+        answer = {
+            "response": response,
+            "history": history,
+            "status": 200,
+            "time": time
+        }
+        log = "[" + time + "] " + '", prompt:"' + prompt + '", response:"' + repr(response) + '"'
+        print(log)
+        torch_gc()
+        return answer
+
+    loop = asyncio.get_event_loop()
+    executor = concurrent.futures.ThreadPoolExecutor()
+    answer = await loop.run_in_executor(executor, _sync_chat, history)
     return answer
 
 

From 6ba39b421c0bebbdc6d6fa68adbd7b31a592a671 Mon Sep 17 00:00:00 2001
From: aleimu <lgj_work@qq.com>
Date: Tue, 18 Jul 2023 19:19:26 +0800
Subject: [PATCH 2/2] Update api.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

简洁一些,用库里的函数to_thread
---
 api.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/api.py b/api.py
index 13f66691..cc5c4b2f 100644
--- a/api.py
+++ b/api.py
@@ -1,9 +1,8 @@
-import torch
-import asyncio
-import concurrent.futures
 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModel
 import uvicorn, json, datetime
+import torch
+import asyncio
 
 DEVICE = "cuda"
 DEVICE_ID = "0"
@@ -52,10 +51,7 @@ def _sync_chat(history):
         torch_gc()
         return answer
 
-    loop = asyncio.get_event_loop()
-    executor = concurrent.futures.ThreadPoolExecutor()
-    answer = await loop.run_in_executor(executor, _sync_chat, history)
-    return answer
+    return await asyncio.to_thread(_sync_chat, history=history)
 
 
 if __name__ == '__main__':