From 4bff4d2ed195dcd4defc1e288201af87bd624d7c Mon Sep 17 00:00:00 2001 From: Oleg Klimov Date: Thu, 4 Jan 2024 08:27:05 +0100 Subject: [PATCH 1/8] a bit more correct "finally" section --- self_hosting_machinery/webgui/tab_loras.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/self_hosting_machinery/webgui/tab_loras.py b/self_hosting_machinery/webgui/tab_loras.py index 3a978664..90632370 100644 --- a/self_hosting_machinery/webgui/tab_loras.py +++ b/self_hosting_machinery/webgui/tab_loras.py @@ -84,8 +84,10 @@ async def write_to_file() -> JSONResponse: log("Error while uploading file: %s" % (e or str(type(e)))) return JSONResponse({"detail": "Cannot upload file, see logs for details"}, status_code=500) finally: - if os.path.exists(tmp_path): + try: os.remove(tmp_path) + except: + pass f = Path(os.path.join(env.DIR_LORAS, file.filename)) From 9c59d608155f7a084068cb546d5027ae6e702775 Mon Sep 17 00:00:00 2001 From: JegernOUTT Date: Mon, 15 Jan 2024 15:47:01 +1030 Subject: [PATCH 2/8] missing exception block --- self_hosting_machinery/finetune/scripts/finetune_filter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/self_hosting_machinery/finetune/scripts/finetune_filter.py b/self_hosting_machinery/finetune/scripts/finetune_filter.py index 3eb0b66c..8becedc9 100644 --- a/self_hosting_machinery/finetune/scripts/finetune_filter.py +++ b/self_hosting_machinery/finetune/scripts/finetune_filter.py @@ -108,6 +108,9 @@ def _get_file_loss(file) -> float: except InvalidLossValueException as e: files_status_context.reject_file(file, reason=str(e)) continue + except Exception as e: + files_status_context.reject_file(file, reason=str(e)) + continue if file_loss > filter_loss_threshold: files_status_context.reject_file(file, reason=f"loss {file_loss:.3f}") From c1b907525c75768053c60f515cc056ac473f3114 Mon Sep 17 00:00:00 2001 From: Dimitry Ageev Date: Mon, 15 Jan 2024 14:25:16 +0300 Subject: [PATCH 3/8] Cassandra fixes (#265) * store cassandra data and logs in perm storage volume * update readme to not use database volume * database start script * less memory consumption --- Dockerfile | 2 ++ README.md | 3 +-- database-start.sh | 23 +++++++++++++++++++++++ docker-entrypoint.sh | 3 +-- 4 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 database-start.sh diff --git a/Dockerfile b/Dockerfile index 8ce2addd..941caa2b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -75,6 +75,8 @@ ENV RDMAV_HUGEPAGES_SAFE 0 EXPOSE 8008 +COPY database-start.sh / +RUN chmod +x database-start.sh COPY docker-entrypoint.sh / RUN chmod +x docker-entrypoint.sh diff --git a/README.md b/README.md index 9a20bda8..0725050e 100644 --- a/README.md +++ b/README.md @@ -34,11 +34,10 @@ On Windows you need to install WSL 2 first, [one guide to do this](https://docs. Run docker container with following command: ```commandline -docker run -d --rm --gpus all -p 8008:8008 -v refact-perm-storage:/perm_storage -v refact-database:/var/lib/cassandra smallcloud/refact_self_hosting:latest +docker run -d --rm --gpus all -p 8008:8008 -v refact-perm-storage:/perm_storage smallcloud/refact_self_hosting:latest ``` `perm-storage` is a volume that is mounted inside the container. All the configuration files, downloaded weights and logs are stored here. -`refact-database` is a volume for database where server stores statistics from your users. To upgrade the docker, delete it using `docker kill XXX` (the volume `perm-storage` will retain your data), run `docker pull smallcloud/refact_self_hosting` and run it again. diff --git a/database-start.sh b/database-start.sh new file mode 100644 index 00000000..9ba1ac08 --- /dev/null +++ b/database-start.sh @@ -0,0 +1,23 @@ +#!/bin/sh +REFACT_CASSANDRA_DIR="$REFACT_PERM_DIR/cassandra" +if [ ! -d "$REFACT_CASSANDRA_DIR" ]; then + mkdir -p "$REFACT_CASSANDRA_DIR" + chown cassandra:cassandra "$REFACT_CASSANDRA_DIR" + if [ ! -z "$(ls /var/lib/cassandra)" ]; then + cp -rp /var/lib/cassandra/* "$REFACT_CASSANDRA_DIR" + fi + cp -rp /var/log/cassandra "$REFACT_CASSANDRA_DIR/log" +fi +# patch cassandra config to work with REFACT_CASSANDRA_DIR +sed -i "s|/var/lib/cassandra|$REFACT_CASSANDRA_DIR|g" /etc/cassandra/cassandra.yaml +# patch cassandra.in.sh for less memory consumption and logging to REFACT_CASSANDRA_DIR/log +REFACT_CASSANDRA_INCLUDE=/usr/sbin/cassandra.in.sh +cp /usr/share/cassandra/cassandra.in.sh "$REFACT_CASSANDRA_INCLUDE" +echo "MAX_HEAP_SIZE=2G" >> "$REFACT_CASSANDRA_INCLUDE" +echo "HEAP_NEWSIZE=400M" >> "$REFACT_CASSANDRA_INCLUDE" +echo "CASSANDRA_LOG_DIR=$REFACT_CASSANDRA_DIR/log" >> "$REFACT_CASSANDRA_INCLUDE" + +if [ ! -z "$(service cassandra status | grep 'not running')" ]; then + service cassandra start + echo "cassandra database started on localhost" +fi diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 1dfdbd29..4ab821d9 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -1,6 +1,5 @@ #!/bin/sh if [ -z "$REFACT_DATABASE_HOST" ]; then - sudo service cassandra start - echo "cassandra database started on localhost" + sh database-start.sh fi python -m self_hosting_machinery.watchdog.docker_watchdog From 40d29ffab33ac6a0cda6d0afc8898e5ff7ef63a4 Mon Sep 17 00:00:00 2001 From: Oleg Klimov Date: Mon, 15 Jan 2024 14:43:12 +0100 Subject: [PATCH 4/8] Ooops bad log rotation --- self_hosting_machinery/watchdog/docker_watchdog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/self_hosting_machinery/watchdog/docker_watchdog.py b/self_hosting_machinery/watchdog/docker_watchdog.py index 39ebd7c9..b7580ef3 100644 --- a/self_hosting_machinery/watchdog/docker_watchdog.py +++ b/self_hosting_machinery/watchdog/docker_watchdog.py @@ -40,7 +40,7 @@ def log(*args): list_of_files.sort() while len(list_of_files) > 20: try: - os.remove(list_of_files.pop()) + os.remove(list_of_files.pop(0)) except OSError: pass with open(os.path.join(env.DIR_LOGS, "watchdog_%s.log" % date), "a") as f: From edf144b5c969b5f3d7f3044bd52e353f00ab64b9 Mon Sep 17 00:00:00 2001 From: Dimitry Ageev Date: Mon, 15 Jan 2024 18:12:41 +0300 Subject: [PATCH 5/8] cassandra cannot start after container restarted, removing old pid file helps (#272) --- database-start.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/database-start.sh b/database-start.sh index 9ba1ac08..6ebae226 100644 --- a/database-start.sh +++ b/database-start.sh @@ -17,6 +17,10 @@ echo "MAX_HEAP_SIZE=2G" >> "$REFACT_CASSANDRA_INCLUDE" echo "HEAP_NEWSIZE=400M" >> "$REFACT_CASSANDRA_INCLUDE" echo "CASSANDRA_LOG_DIR=$REFACT_CASSANDRA_DIR/log" >> "$REFACT_CASSANDRA_INCLUDE" +if [ ! -z "$(service cassandra status | grep 'could not access pidfile')" ]; then + rm /var/run/cassandra/cassandra.pid +fi + if [ ! -z "$(service cassandra status | grep 'not running')" ]; then service cassandra start echo "cassandra database started on localhost" From e4d00b5a90097b736902a74dd8dc9bd8dfdc3387 Mon Sep 17 00:00:00 2001 From: Valerii Date: Mon, 15 Jan 2024 15:20:17 +0000 Subject: [PATCH 6/8] adjustments to dash_prime.py (#270) * adjustments to dash_prime.py * added comment --- .../dashboard_service/dashboards/dash_prime.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/self_hosting_machinery/dashboard_service/dashboards/dash_prime.py b/self_hosting_machinery/dashboard_service/dashboards/dash_prime.py index 0534a004..5ecf695a 100644 --- a/self_hosting_machinery/dashboard_service/dashboards/dash_prime.py +++ b/self_hosting_machinery/dashboard_service/dashboards/dash_prime.py @@ -15,6 +15,9 @@ def robot_human_ratio(robot: int, human: int) -> float: return 1 if robot == 0: return 0 + # in older versions of refact LSP negative values of human metric existed + if robot + human == 0: + return 0 return round(robot / (robot + human), 2) @@ -181,10 +184,10 @@ def extract_stats(df: pd.DataFrame, date_kind: str) -> Dict: if lang not in languages: continue res_loc[lang] = { - "Assistant": (robot := int(group["robot_characters"].sum())), + "Refact": (robot := int(group["robot_characters"].sum())), "Human": (human := int(group["human_characters"].sum())), "Total (characters)": robot + human, - "A/(A+H)": robot_human_ratio(robot, human), + "Refact Impact": robot_human_ratio(robot, human), "Completions": int(group["completions_cnt"].sum()), "Users": int(group["tenant_name"].nunique()), } @@ -194,7 +197,7 @@ def extract_stats(df: pd.DataFrame, date_kind: str) -> Dict: res_loc = { 'data': fmt_vals, 'columns': ['Language', *res_loc[list(res_loc.keys())[0]].keys()], - 'title': f"Assistant's impact by language: {date_kind}" + 'title': f"Refact's impact by language: {date_kind}" } return res_loc From 5322bad66f73cc3e47c4499d2655e71c31c60c4d Mon Sep 17 00:00:00 2001 From: Oleg Klimov Date: Mon, 15 Jan 2024 20:01:36 +0100 Subject: [PATCH 7/8] transmit caps_version, don't restart lsp, misc (#268) --- self_hosting_machinery/scripts/env.py | 2 - .../watchdog/watchdog.d/lsp.cfg | 5 +- .../webgui/selfhost_fastapi_completions.py | 104 +++--------------- .../webgui/selfhost_fastapi_gpu.py | 2 +- .../webgui/selfhost_model_assigner.py | 11 +- .../webgui/selfhost_model_resolve.py | 2 +- .../webgui/selfhost_queue.py | 20 +++- .../webgui/selfhost_req_queue.py | 32 ------ .../webgui/tab_models_host.py | 1 - self_hosting_machinery/webgui/webgui.py | 7 +- 10 files changed, 45 insertions(+), 141 deletions(-) delete mode 100644 self_hosting_machinery/webgui/selfhost_req_queue.py diff --git a/self_hosting_machinery/scripts/env.py b/self_hosting_machinery/scripts/env.py index e15b5825..54a004a8 100644 --- a/self_hosting_machinery/scripts/env.py +++ b/self_hosting_machinery/scripts/env.py @@ -43,8 +43,6 @@ FLAG_LAUNCH_FINETUNE = os.path.join(DIR_WATCHDOG_D, "_launch_finetune.flag") FLAG_STOP_FINETUNE = os.path.join(DIR_WATCHDOG_D, "_stop_finetune.flag") -FLAG_RESTART_LSP = os.path.join(DIR_WATCHDOG_D, "_restart_lsp.flag") - def create_dirs(): os.makedirs(DIR_WATCHDOG_D, exist_ok=True) os.makedirs(DIR_WEIGHTS, exist_ok=True) diff --git a/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg b/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg index 54d4617f..7f975244 100644 --- a/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg +++ b/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg @@ -1,12 +1,9 @@ { "policy": ["always_on"], - "interrupt_when_file_appears": "%FLAG_RESTART_LSP%", "command_line": [ "refact-lsp", "--address-url", "http://127.0.0.1:8008", - "--http-port", "8001", - "--lsp-port", "8002", - "--logs-stderr" + "--http-port", "8001" ], "gpus": [] } diff --git a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py index e56e7d12..2b3887cc 100644 --- a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py +++ b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py @@ -8,12 +8,12 @@ import litellm from fastapi import APIRouter, Request, HTTPException, Query -from fastapi.responses import StreamingResponse +from fastapi.responses import Response, StreamingResponse from self_hosting_machinery import env from self_hosting_machinery.webgui.selfhost_model_resolve import completion_resolve_model from self_hosting_machinery.webgui.selfhost_model_resolve import static_resolve_model -from self_hosting_machinery.webgui.selfhost_req_queue import Ticket +from self_hosting_machinery.webgui.selfhost_queue import Ticket from self_hosting_machinery.webgui.selfhost_webutils import log from self_hosting_machinery.webgui.selfhost_queue import InferenceQueue from self_hosting_machinery.webgui.selfhost_model_assigner import ModelAssigner @@ -107,7 +107,7 @@ class ChatContext(NlpSamplingParams): function: str = Query(default="chat", regex="^[a-zA-Z0-9_\.\-]+$") -async def completion_streamer(ticket: Ticket, post: NlpCompletion, timeout, seen, created_ts): +async def _completion_streamer(ticket: Ticket, post: NlpCompletion, timeout, seen, created_ts, caps_version: int): try: packets_cnt = 0 while 1: @@ -117,6 +117,7 @@ async def completion_streamer(ticket: Ticket, post: NlpCompletion, timeout, seen log("TIMEOUT %s" % ticket.id()) msg = {"status": "error", "human_readable_message": "timeout"} not_seen_resp = copy.deepcopy(msg) + not_seen_resp["caps_version"] = caps_version if "choices" in not_seen_resp: for i in range(post.n): newtext = not_seen_resp["choices"][i]["text"] @@ -152,40 +153,6 @@ async def completion_streamer(ticket: Ticket, post: NlpCompletion, timeout, seen ticket.cancelled = True -async def diff_streamer(ticket: Ticket, post: DiffCompletion, timeout, created_ts): - try: - while 1: - try: - msg = await asyncio.wait_for(ticket.streaming_queue.get(), timeout) - except asyncio.TimeoutError: - log("TIMEOUT %s" % ticket.id()) - msg = {"status": "error", "human_readable_message": "timeout"} - if not post.stream: - if msg.get("status", "") == "in_progress": - continue - yield json.dumps(msg) - break - tmp = json.dumps(msg) - yield "data: " + tmp + "\n\n" - log(" " + red_time(created_ts) + " stream %s <- %i bytes" % (ticket.id(), len(tmp))) - if msg.get("status", "") != "in_progress": - break - if post.stream: - yield "data: [DONE]" + "\n\n" - log(red_time(created_ts) + " /finished call %s" % ticket.id()) - ticket.done() - # fastapi_stats.stats_accum[kt] += msg.get("generated_tokens_n", 0) - # fastapi_stats.stats_accum[kcomp] += 1 - # fastapi_stats.stats_lists_accum["stat_latency_" + post.model].append(time.time() - created_ts) - finally: - if ticket.id() is not None: - log(" *** CANCEL *** cancelling %s " % ticket.id() + red_time(created_ts)) - # fastapi_stats.stats_accum["stat_api_cancelled"] += 1 - # fastapi_stats.stats_accum["stat_m_" + post.model + "_cancelled"] += 1 - ticket.cancelled = True - ticket.done() - - async def chat_streamer(ticket: Ticket, timeout, created_ts): seen: Dict[int, str] = dict() try: @@ -240,7 +207,6 @@ def __init__(self, # API for direct FIM and Chat usage self.add_api_route("/v1/login", self._login, methods=["GET"]) self.add_api_route("/v1/secret-key-activate", self._secret_key_activate, methods=["GET"]) - self.add_api_route("/v1/contrast", self._contrast, methods=["POST"]) self.add_api_route("/v1/chat", self._chat, methods=["POST"]) # API for LSP server @@ -275,10 +241,11 @@ async def _coding_assistant_caps(self): or model_name in litellm.model_list: code_chat_default_model = model_name break - return { + config_mtime = self._model_assigner.config_inference_mtime() + data = { "cloud_name": "Refact Self-Hosted", - "endpoint_template": "v1/completions", - "endpoint_chat_passthrough": "v1/chat/completions", + "endpoint_template": "/v1/completions", + "endpoint_chat_passthrough": "/v1/chat/completions", "endpoint_style": "openai", "telemetry_basic_dest": "/stats/telemetry-basic", "telemetry_corrected_snippets_dest": "/stats/telemetry-snippets", @@ -291,7 +258,9 @@ async def _coding_assistant_caps(self): for model in models_available if model in self._model_assigner.models_db }, + "caps_version": config_mtime, } + return Response(content=json.dumps(data, indent=4), media_type="application/json") async def _login(self): longthink_functions = dict() @@ -348,10 +317,12 @@ async def _secret_key_activate(self): async def _completions(self, post: NlpCompletion, account: str = "user"): ticket = Ticket("comp-") req = post.clamp() + caps_version = self._model_assigner.config_inference_mtime() # use mtime as a version, if that changes the client will know to refresh caps model_name, err_msg = static_resolve_model(post.model, self._inference_queue) if err_msg: log("%s model resolve \"%s\" -> error \"%s\" from %s" % (ticket.id(), post.model, err_msg, account)) - raise HTTPException(status_code=400, detail=err_msg) + return Response(status_code=400, content=json.dumps({"detail": err_msg, "caps_version": caps_version}, indent=4), media_type="application/json") + log("%s model resolve \"%s\" -> \"%s\" from %s" % (ticket.id(), post.model, model_name, account)) req.update({ "object": "text_completion_req", @@ -367,57 +338,10 @@ async def _completions(self, post: NlpCompletion, account: str = "user"): await q.put(ticket) seen = [""] * post.n return StreamingResponse( - completion_streamer(ticket, post, self._timeout, seen, req["created"]), + _completion_streamer(ticket, post, self._timeout, seen, req["created"], caps_version=caps_version), media_type=("text/event-stream" if post.stream else "application/json"), ) - async def _contrast(self, post: DiffCompletion, request: Request, account: str = "user"): - if post.function != "diff-anywhere": - if post.cursor_file not in post.sources: - raise HTTPException(status_code=400, detail="cursor_file='%s' is not in sources=%s" % (post.cursor_file, list(post.sources.keys()))) - if post.cursor0 < 0 or post.cursor1 < 0: - raise HTTPException(status_code=400, detail="cursor0=%d or cursor1=%d is negative" % (post.cursor0, post.cursor1)) - filetext = post.sources[post.cursor_file] - if post.cursor0 > len(filetext) or post.cursor1 > len(filetext): - raise HTTPException(status_code=400, detail="cursor0=%d or cursor1=%d is beyond file length=%d" % (post.cursor0, post.cursor1, len(filetext))) - for fn, text in post.sources.items(): - if len(text) > 180*1024: - raise HTTPException(status_code=400, detail="file '%s' is too long (%d bytes)" % (fn, len(text))) - ticket = Ticket("comp-") - if post.function == "infill": - model_name, err_msg = completion_resolve_model(self._inference_queue) - else: - model_name, err_msg = static_resolve_model(post.model, self._inference_queue) - if err_msg: - log("%s model resolve \"%s\" func \"%s\" -> error \"%s\" from %s" % (ticket.id(), post.model, post.function, err_msg, account)) - raise HTTPException(status_code=400, detail=err_msg) - log("%s model resolve \"%s\" func \"%s\" -> \"%s\" from %s" % (ticket.id(), post.model, post.function, model_name, account)) - if post.function == "highlight": - post.max_tokens = 0 - req = post.clamp() - req.update({ - "object": "diff_completion_req", - "account": account, - "model": model_name, - "intent": post.intent, - "sources": post.sources, - "cursor_file": post.cursor_file, - "cursor0": post.cursor0, - "cursor1": post.cursor1, - "function": post.function, - "max_edits": post.max_edits, - "stream": post.stream, - }) - post_raw = await request.json() - if "poi" in post_raw: - req["poi"] = post_raw["poi"] - ticket.call.update(req) - q = self._inference_queue.model_name_to_queue(ticket, model_name) - # kt, kcomp = await _model_hit(red, ticket, req, model_name, account) - self._id2ticket[ticket.id()] = ticket - await q.put(ticket) - return StreamingResponse(diff_streamer(ticket, post, self._timeout, req["created"])) - async def _chat(self, post: ChatContext, request: Request, account: str = "user"): ticket = Ticket("comp-") diff --git a/self_hosting_machinery/webgui/selfhost_fastapi_gpu.py b/self_hosting_machinery/webgui/selfhost_fastapi_gpu.py index 32efeaa8..17162835 100644 --- a/self_hosting_machinery/webgui/selfhost_fastapi_gpu.py +++ b/self_hosting_machinery/webgui/selfhost_fastapi_gpu.py @@ -5,7 +5,7 @@ from fastapi import APIRouter, Query, Request, Header, HTTPException -from self_hosting_machinery.webgui.selfhost_req_queue import Ticket +from self_hosting_machinery.webgui.selfhost_queue import Ticket from self_hosting_machinery.webgui.selfhost_webutils import log from self_hosting_machinery.webgui.selfhost_queue import InferenceQueue diff --git a/self_hosting_machinery/webgui/selfhost_model_assigner.py b/self_hosting_machinery/webgui/selfhost_model_assigner.py index e0f5f56a..d5d03296 100644 --- a/self_hosting_machinery/webgui/selfhost_model_assigner.py +++ b/self_hosting_machinery/webgui/selfhost_model_assigner.py @@ -224,7 +224,10 @@ def model_assignment(self): } return j - @staticmethod - def restart_lsp(): - with open(env.FLAG_RESTART_LSP, "w") as f: - f.write("") + def config_inference_mtime(self) -> int: + if os.path.exists(env.CONFIG_INFERENCE): + try: + return int(os.path.getmtime(env.CONFIG_INFERENCE)) + except OSError: + return 0 + return 0 diff --git a/self_hosting_machinery/webgui/selfhost_model_resolve.py b/self_hosting_machinery/webgui/selfhost_model_resolve.py index 6b508cb7..d15137a9 100644 --- a/self_hosting_machinery/webgui/selfhost_model_resolve.py +++ b/self_hosting_machinery/webgui/selfhost_model_resolve.py @@ -49,4 +49,4 @@ def _family(model: str) -> str: if not model_name or _family(model_name) == _family(have_model): return have_model, "" else: - return "", f"model is not loaded (3)" + return "", f"model \"{model_name}\" is not loaded (3)" diff --git a/self_hosting_machinery/webgui/selfhost_queue.py b/self_hosting_machinery/webgui/selfhost_queue.py index b5620fda..36768eea 100644 --- a/self_hosting_machinery/webgui/selfhost_queue.py +++ b/self_hosting_machinery/webgui/selfhost_queue.py @@ -6,7 +6,25 @@ from self_hosting_machinery import env from self_hosting_machinery.webgui.selfhost_webutils import log from fastapi import HTTPException -from typing import Dict, List +from typing import Dict, List, Any +import uuid + + +class Ticket: + def __init__(self, id_prefix): + self.call: Dict[str, Any] = dict() + random_guid = str(uuid.uuid4()).replace("-", "")[0:12] + self.call["id"] = id_prefix + random_guid + self.cancelled: bool = False + self.processed_by_infmod_guid: str = "" + self.streaming_queue = asyncio.queues.Queue() + + def id(self): + return self.call.get("id", None) + + def done(self): + if "id" in self.call: + del self.call["id"] class InferenceQueue: diff --git a/self_hosting_machinery/webgui/selfhost_req_queue.py b/self_hosting_machinery/webgui/selfhost_req_queue.py deleted file mode 100644 index ec43a39e..00000000 --- a/self_hosting_machinery/webgui/selfhost_req_queue.py +++ /dev/null @@ -1,32 +0,0 @@ -import asyncio -import random - -from self_hosting_machinery.webgui import selfhost_webutils - -from typing import Dict, Any - - -__all__ = ["Ticket"] - - -# TODO: why not uuid??? -def random_guid(n=12): - random_chars = "0123456789" + "ABCDEFGHIJKLNMPQRSTUVWXYZ" + "ABCDEFGHIJKLNMPQRSTUVWXYZ".lower() - guid = "".join([random_chars[random.randint(0, len(random_chars)-1)] for _ in range(n)]) - return guid - - -class Ticket: - def __init__(self, id_prefix): - self.call: Dict[str, Any] = dict() - self.call["id"] = id_prefix + random_guid() - self.cancelled: bool = False - self.processed_by_infmod_guid: str = "" - self.streaming_queue = asyncio.queues.Queue() - - def id(self): - return self.call.get("id", None) - - def done(self): - if "id" in self.call: - del self.call["id"] diff --git a/self_hosting_machinery/webgui/tab_models_host.py b/self_hosting_machinery/webgui/tab_models_host.py index ac0606b3..c1138e85 100644 --- a/self_hosting_machinery/webgui/tab_models_host.py +++ b/self_hosting_machinery/webgui/tab_models_host.py @@ -51,5 +51,4 @@ async def _tab_host_models_assign(self, post: TabHostModelsAssign): else: validated["completion"] = "" self._model_assigner.models_to_watchdog_configs(validated) - self._model_assigner.restart_lsp() return JSONResponse("OK") diff --git a/self_hosting_machinery/webgui/webgui.py b/self_hosting_machinery/webgui/webgui.py index 6f15df22..68f853ed 100644 --- a/self_hosting_machinery/webgui/webgui.py +++ b/self_hosting_machinery/webgui/webgui.py @@ -13,7 +13,6 @@ from self_hosting_machinery.webgui.selfhost_model_assigner import ModelAssigner from self_hosting_machinery.webgui.selfhost_plugins import PluginsRouter -from self_hosting_machinery.webgui.selfhost_req_queue import Ticket from self_hosting_machinery.webgui.selfhost_fastapi_completions import CompletionsRouter from self_hosting_machinery.webgui.selfhost_fastapi_gpu import GPURouter from self_hosting_machinery.webgui.tab_server_logs import TabServerLogRouter @@ -21,7 +20,7 @@ from self_hosting_machinery.webgui.tab_upload import TabUploadRouter from self_hosting_machinery.webgui.tab_finetune import TabFinetuneRouter from self_hosting_machinery.webgui.tab_models_host import TabHostRouter -from self_hosting_machinery.webgui.selfhost_queue import InferenceQueue +from self_hosting_machinery.webgui.selfhost_queue import InferenceQueue, Ticket from self_hosting_machinery.webgui.selfhost_static import StaticRouter from self_hosting_machinery.webgui.tab_loras import TabLorasRouter from self_hosting_machinery.webgui.selfhost_statistics import TabStatisticsRouter @@ -103,9 +102,7 @@ def handle_sigint(*args): signal.signal(signal.SIGINT, handle_sigint) signal.signal(signal.SIGUSR1, handle_sigint) - - # NOTE: try restart LSP after server started - self._model_assigner.restart_lsp() + signal.signal(signal.SIGTERM, handle_sigint) async def init_database(): await self._database.connect() From e78cf082231eb7d92ffaff3a7291c9e37ef937e8 Mon Sep 17 00:00:00 2001 From: mitya Date: Tue, 16 Jan 2024 12:03:18 +0300 Subject: [PATCH 8/8] update version to v1.3.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f01f4812..8e2bf504 100644 --- a/setup.py +++ b/setup.py @@ -93,7 +93,7 @@ def get_install_requires(packages): setup( name="refact-self-hosting", - version="1.3.0", + version="1.3.1", py_modules=list(setup_packages.keys()), package_data={ name: py_package.data