From 4bff4d2ed195dcd4defc1e288201af87bd624d7c Mon Sep 17 00:00:00 2001
From: Oleg Klimov <omgtech@gmail.com>
Date: Thu, 4 Jan 2024 08:27:05 +0100
Subject: [PATCH 1/8] a bit more correct "finally" section

---
 self_hosting_machinery/webgui/tab_loras.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/self_hosting_machinery/webgui/tab_loras.py b/self_hosting_machinery/webgui/tab_loras.py
index 3a978664..90632370 100644
--- a/self_hosting_machinery/webgui/tab_loras.py
+++ b/self_hosting_machinery/webgui/tab_loras.py
@@ -84,8 +84,10 @@ async def write_to_file() -> JSONResponse:
                 log("Error while uploading file: %s" % (e or str(type(e))))
                 return JSONResponse({"detail": "Cannot upload file, see logs for details"}, status_code=500)
             finally:
-                if os.path.exists(tmp_path):
+                try:
                     os.remove(tmp_path)
+                except:
+                    pass
 
         f = Path(os.path.join(env.DIR_LORAS, file.filename))
 

From 9c59d608155f7a084068cb546d5027ae6e702775 Mon Sep 17 00:00:00 2001
From: JegernOUTT <sergey.vakhreev@gmail.com>
Date: Mon, 15 Jan 2024 15:47:01 +1030
Subject: [PATCH 2/8] missing exception block

---
 self_hosting_machinery/finetune/scripts/finetune_filter.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/self_hosting_machinery/finetune/scripts/finetune_filter.py b/self_hosting_machinery/finetune/scripts/finetune_filter.py
index 3eb0b66c..8becedc9 100644
--- a/self_hosting_machinery/finetune/scripts/finetune_filter.py
+++ b/self_hosting_machinery/finetune/scripts/finetune_filter.py
@@ -108,6 +108,9 @@ def _get_file_loss(file) -> float:
             except InvalidLossValueException as e:
                 files_status_context.reject_file(file, reason=str(e))
                 continue
+            except Exception as e:
+                files_status_context.reject_file(file, reason=str(e))
+                continue
 
             if file_loss > filter_loss_threshold:
                 files_status_context.reject_file(file, reason=f"loss {file_loss:.3f}")

From c1b907525c75768053c60f515cc056ac473f3114 Mon Sep 17 00:00:00 2001
From: Dimitry Ageev <dimitry.ageev@gmail.com>
Date: Mon, 15 Jan 2024 14:25:16 +0300
Subject: [PATCH 3/8] Cassandra fixes (#265)

* store cassandra data and logs in perm storage volume

* update readme to not use database volume

* database start script

* less memory consumption
---
 Dockerfile           |  2 ++
 README.md            |  3 +--
 database-start.sh    | 23 +++++++++++++++++++++++
 docker-entrypoint.sh |  3 +--
 4 files changed, 27 insertions(+), 4 deletions(-)
 create mode 100644 database-start.sh

diff --git a/Dockerfile b/Dockerfile
index 8ce2addd..941caa2b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -75,6 +75,8 @@ ENV RDMAV_HUGEPAGES_SAFE 0
 
 EXPOSE 8008
 
+COPY database-start.sh /
+RUN chmod +x database-start.sh
 COPY docker-entrypoint.sh /
 RUN chmod +x docker-entrypoint.sh
 
diff --git a/README.md b/README.md
index 9a20bda8..0725050e 100644
--- a/README.md
+++ b/README.md
@@ -34,11 +34,10 @@ On Windows you need to install WSL 2 first, [one guide to do this](https://docs.
 
 Run docker container with following command:
 ```commandline
-docker run -d --rm --gpus all -p 8008:8008 -v refact-perm-storage:/perm_storage -v refact-database:/var/lib/cassandra smallcloud/refact_self_hosting:latest
+docker run -d --rm --gpus all -p 8008:8008 -v refact-perm-storage:/perm_storage smallcloud/refact_self_hosting:latest
 ```
 
 `perm-storage` is a volume that is mounted inside the container. All the configuration files, downloaded weights and logs are stored here.
-`refact-database` is a volume for database where server stores statistics from your users.
 
 To upgrade the docker, delete it using `docker kill XXX` (the volume `perm-storage` will retain your
 data), run `docker pull smallcloud/refact_self_hosting` and run it again.
diff --git a/database-start.sh b/database-start.sh
new file mode 100644
index 00000000..9ba1ac08
--- /dev/null
+++ b/database-start.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+REFACT_CASSANDRA_DIR="$REFACT_PERM_DIR/cassandra"
+if [ ! -d "$REFACT_CASSANDRA_DIR" ]; then
+    mkdir -p "$REFACT_CASSANDRA_DIR"
+    chown cassandra:cassandra "$REFACT_CASSANDRA_DIR"
+    if [ ! -z "$(ls /var/lib/cassandra)" ]; then
+        cp -rp /var/lib/cassandra/* "$REFACT_CASSANDRA_DIR"
+    fi
+    cp -rp /var/log/cassandra "$REFACT_CASSANDRA_DIR/log"
+fi
+# patch cassandra config to work with REFACT_CASSANDRA_DIR
+sed -i "s|/var/lib/cassandra|$REFACT_CASSANDRA_DIR|g" /etc/cassandra/cassandra.yaml
+# patch cassandra.in.sh for less memory consumption and logging to REFACT_CASSANDRA_DIR/log
+REFACT_CASSANDRA_INCLUDE=/usr/sbin/cassandra.in.sh
+cp /usr/share/cassandra/cassandra.in.sh "$REFACT_CASSANDRA_INCLUDE"
+echo "MAX_HEAP_SIZE=2G" >> "$REFACT_CASSANDRA_INCLUDE"
+echo "HEAP_NEWSIZE=400M" >> "$REFACT_CASSANDRA_INCLUDE"
+echo "CASSANDRA_LOG_DIR=$REFACT_CASSANDRA_DIR/log" >> "$REFACT_CASSANDRA_INCLUDE"
+
+if [ ! -z "$(service cassandra status | grep 'not running')" ]; then
+    service cassandra start
+    echo "cassandra database started on localhost"
+fi
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
index 1dfdbd29..4ab821d9 100755
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -1,6 +1,5 @@
 #!/bin/sh
 if [ -z "$REFACT_DATABASE_HOST" ]; then
-    sudo service cassandra start
-    echo "cassandra database started on localhost"
+    sh database-start.sh
 fi
 python -m self_hosting_machinery.watchdog.docker_watchdog

From 40d29ffab33ac6a0cda6d0afc8898e5ff7ef63a4 Mon Sep 17 00:00:00 2001
From: Oleg Klimov <omgtech@gmail.com>
Date: Mon, 15 Jan 2024 14:43:12 +0100
Subject: [PATCH 4/8] Ooops bad log rotation

---
 self_hosting_machinery/watchdog/docker_watchdog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/self_hosting_machinery/watchdog/docker_watchdog.py b/self_hosting_machinery/watchdog/docker_watchdog.py
index 39ebd7c9..b7580ef3 100644
--- a/self_hosting_machinery/watchdog/docker_watchdog.py
+++ b/self_hosting_machinery/watchdog/docker_watchdog.py
@@ -40,7 +40,7 @@ def log(*args):
         list_of_files.sort()
         while len(list_of_files) > 20:
             try:
-                os.remove(list_of_files.pop())
+                os.remove(list_of_files.pop(0))
             except OSError:
                 pass
     with open(os.path.join(env.DIR_LOGS, "watchdog_%s.log" % date), "a") as f:

From edf144b5c969b5f3d7f3044bd52e353f00ab64b9 Mon Sep 17 00:00:00 2001
From: Dimitry Ageev <dimitry.ageev@gmail.com>
Date: Mon, 15 Jan 2024 18:12:41 +0300
Subject: [PATCH 5/8] cassandra cannot start after container restarted,
 removing old pid file helps (#272)

---
 database-start.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/database-start.sh b/database-start.sh
index 9ba1ac08..6ebae226 100644
--- a/database-start.sh
+++ b/database-start.sh
@@ -17,6 +17,10 @@ echo "MAX_HEAP_SIZE=2G" >> "$REFACT_CASSANDRA_INCLUDE"
 echo "HEAP_NEWSIZE=400M" >> "$REFACT_CASSANDRA_INCLUDE"
 echo "CASSANDRA_LOG_DIR=$REFACT_CASSANDRA_DIR/log" >> "$REFACT_CASSANDRA_INCLUDE"
 
+if [ ! -z "$(service cassandra status | grep 'could not access pidfile')" ]; then
+    rm /var/run/cassandra/cassandra.pid
+fi
+
 if [ ! -z "$(service cassandra status | grep 'not running')" ]; then
     service cassandra start
     echo "cassandra database started on localhost"

From e4d00b5a90097b736902a74dd8dc9bd8dfdc3387 Mon Sep 17 00:00:00 2001
From: Valerii <work@valerii.cc>
Date: Mon, 15 Jan 2024 15:20:17 +0000
Subject: [PATCH 6/8] adjustments to dash_prime.py (#270)

* adjustments to dash_prime.py

* added comment
---
 .../dashboard_service/dashboards/dash_prime.py           | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/self_hosting_machinery/dashboard_service/dashboards/dash_prime.py b/self_hosting_machinery/dashboard_service/dashboards/dash_prime.py
index 0534a004..5ecf695a 100644
--- a/self_hosting_machinery/dashboard_service/dashboards/dash_prime.py
+++ b/self_hosting_machinery/dashboard_service/dashboards/dash_prime.py
@@ -15,6 +15,9 @@ def robot_human_ratio(robot: int, human: int) -> float:
         return 1
     if robot == 0:
         return 0
+    # in older versions of refact LSP negative values of human metric existed
+    if robot + human == 0:
+        return 0
     return round(robot / (robot + human), 2)
 
 
@@ -181,10 +184,10 @@ def extract_stats(df: pd.DataFrame, date_kind: str) -> Dict:
             if lang not in languages:
                 continue
             res_loc[lang] = {
-                "Assistant": (robot := int(group["robot_characters"].sum())),
+                "Refact": (robot := int(group["robot_characters"].sum())),
                 "Human": (human := int(group["human_characters"].sum())),
                 "Total (characters)": robot + human,
-                "A/(A+H)": robot_human_ratio(robot, human),
+                "Refact Impact": robot_human_ratio(robot, human),
                 "Completions": int(group["completions_cnt"].sum()),
                 "Users": int(group["tenant_name"].nunique()),
             }
@@ -194,7 +197,7 @@ def extract_stats(df: pd.DataFrame, date_kind: str) -> Dict:
         res_loc = {
             'data': fmt_vals,
             'columns': ['Language', *res_loc[list(res_loc.keys())[0]].keys()],
-            'title': f"Assistant's impact by language: {date_kind}"
+            'title': f"Refact's impact by language: {date_kind}"
         }
         return res_loc
 

From 5322bad66f73cc3e47c4499d2655e71c31c60c4d Mon Sep 17 00:00:00 2001
From: Oleg Klimov <omgtech@gmail.com>
Date: Mon, 15 Jan 2024 20:01:36 +0100
Subject: [PATCH 7/8] transmit caps_version, don't restart lsp, misc (#268)

---
 self_hosting_machinery/scripts/env.py         |   2 -
 .../watchdog/watchdog.d/lsp.cfg               |   5 +-
 .../webgui/selfhost_fastapi_completions.py    | 104 +++---------------
 .../webgui/selfhost_fastapi_gpu.py            |   2 +-
 .../webgui/selfhost_model_assigner.py         |  11 +-
 .../webgui/selfhost_model_resolve.py          |   2 +-
 .../webgui/selfhost_queue.py                  |  20 +++-
 .../webgui/selfhost_req_queue.py              |  32 ------
 .../webgui/tab_models_host.py                 |   1 -
 self_hosting_machinery/webgui/webgui.py       |   7 +-
 10 files changed, 45 insertions(+), 141 deletions(-)
 delete mode 100644 self_hosting_machinery/webgui/selfhost_req_queue.py

diff --git a/self_hosting_machinery/scripts/env.py b/self_hosting_machinery/scripts/env.py
index e15b5825..54a004a8 100644
--- a/self_hosting_machinery/scripts/env.py
+++ b/self_hosting_machinery/scripts/env.py
@@ -43,8 +43,6 @@
 FLAG_LAUNCH_FINETUNE = os.path.join(DIR_WATCHDOG_D, "_launch_finetune.flag")
 FLAG_STOP_FINETUNE = os.path.join(DIR_WATCHDOG_D, "_stop_finetune.flag")
 
-FLAG_RESTART_LSP = os.path.join(DIR_WATCHDOG_D, "_restart_lsp.flag")
-
 def create_dirs():
     os.makedirs(DIR_WATCHDOG_D, exist_ok=True)
     os.makedirs(DIR_WEIGHTS, exist_ok=True)
diff --git a/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg b/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg
index 54d4617f..7f975244 100644
--- a/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg
+++ b/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg
@@ -1,12 +1,9 @@
 {
     "policy": ["always_on"],
-    "interrupt_when_file_appears": "%FLAG_RESTART_LSP%",
     "command_line": [
         "refact-lsp",
         "--address-url", "http://127.0.0.1:8008",
-        "--http-port", "8001",
-        "--lsp-port", "8002",
-        "--logs-stderr"
+        "--http-port", "8001"
     ],
     "gpus": []
 }
diff --git a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
index e56e7d12..2b3887cc 100644
--- a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
+++ b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
@@ -8,12 +8,12 @@
 import litellm
 
 from fastapi import APIRouter, Request, HTTPException, Query
-from fastapi.responses import StreamingResponse
+from fastapi.responses import Response, StreamingResponse
 
 from self_hosting_machinery import env
 from self_hosting_machinery.webgui.selfhost_model_resolve import completion_resolve_model
 from self_hosting_machinery.webgui.selfhost_model_resolve import static_resolve_model
-from self_hosting_machinery.webgui.selfhost_req_queue import Ticket
+from self_hosting_machinery.webgui.selfhost_queue import Ticket
 from self_hosting_machinery.webgui.selfhost_webutils import log
 from self_hosting_machinery.webgui.selfhost_queue import InferenceQueue
 from self_hosting_machinery.webgui.selfhost_model_assigner import ModelAssigner
@@ -107,7 +107,7 @@ class ChatContext(NlpSamplingParams):
     function: str = Query(default="chat", regex="^[a-zA-Z0-9_\.\-]+$")
 
 
-async def completion_streamer(ticket: Ticket, post: NlpCompletion, timeout, seen, created_ts):
+async def _completion_streamer(ticket: Ticket, post: NlpCompletion, timeout, seen, created_ts, caps_version: int):
     try:
         packets_cnt = 0
         while 1:
@@ -117,6 +117,7 @@ async def completion_streamer(ticket: Ticket, post: NlpCompletion, timeout, seen
                 log("TIMEOUT %s" % ticket.id())
                 msg = {"status": "error", "human_readable_message": "timeout"}
             not_seen_resp = copy.deepcopy(msg)
+            not_seen_resp["caps_version"] = caps_version
             if "choices" in not_seen_resp:
                 for i in range(post.n):
                     newtext = not_seen_resp["choices"][i]["text"]
@@ -152,40 +153,6 @@ async def completion_streamer(ticket: Ticket, post: NlpCompletion, timeout, seen
         ticket.cancelled = True
 
 
-async def diff_streamer(ticket: Ticket, post: DiffCompletion, timeout, created_ts):
-    try:
-        while 1:
-            try:
-                msg = await asyncio.wait_for(ticket.streaming_queue.get(), timeout)
-            except asyncio.TimeoutError:
-                log("TIMEOUT %s" % ticket.id())
-                msg = {"status": "error", "human_readable_message": "timeout"}
-            if not post.stream:
-                if msg.get("status", "") == "in_progress":
-                    continue
-                yield json.dumps(msg)
-                break
-            tmp = json.dumps(msg)
-            yield "data: " + tmp + "\n\n"
-            log("  " + red_time(created_ts) + " stream %s <- %i bytes" % (ticket.id(), len(tmp)))
-            if msg.get("status", "") != "in_progress":
-                break
-        if post.stream:
-            yield "data: [DONE]" + "\n\n"
-        log(red_time(created_ts) + " /finished call %s" % ticket.id())
-        ticket.done()
-        # fastapi_stats.stats_accum[kt] += msg.get("generated_tokens_n", 0)
-        # fastapi_stats.stats_accum[kcomp] += 1
-        # fastapi_stats.stats_lists_accum["stat_latency_" + post.model].append(time.time() - created_ts)
-    finally:
-        if ticket.id() is not None:
-            log("   ***  CANCEL  ***  cancelling %s " % ticket.id() + red_time(created_ts))
-            # fastapi_stats.stats_accum["stat_api_cancelled"] += 1
-            # fastapi_stats.stats_accum["stat_m_" + post.model + "_cancelled"] += 1
-        ticket.cancelled = True
-        ticket.done()
-
-
 async def chat_streamer(ticket: Ticket, timeout, created_ts):
     seen: Dict[int, str] = dict()
     try:
@@ -240,7 +207,6 @@ def __init__(self,
         # API for direct FIM and Chat usage
         self.add_api_route("/v1/login", self._login, methods=["GET"])
         self.add_api_route("/v1/secret-key-activate", self._secret_key_activate, methods=["GET"])
-        self.add_api_route("/v1/contrast", self._contrast, methods=["POST"])
         self.add_api_route("/v1/chat", self._chat, methods=["POST"])
 
         # API for LSP server
@@ -275,10 +241,11 @@ async def _coding_assistant_caps(self):
                     or model_name in litellm.model_list:
                 code_chat_default_model = model_name
                 break
-        return {
+        config_mtime = self._model_assigner.config_inference_mtime()
+        data = {
             "cloud_name": "Refact Self-Hosted",
-            "endpoint_template": "v1/completions",
-            "endpoint_chat_passthrough": "v1/chat/completions",
+            "endpoint_template": "/v1/completions",
+            "endpoint_chat_passthrough": "/v1/chat/completions",
             "endpoint_style": "openai",
             "telemetry_basic_dest": "/stats/telemetry-basic",
             "telemetry_corrected_snippets_dest": "/stats/telemetry-snippets",
@@ -291,7 +258,9 @@ async def _coding_assistant_caps(self):
                 for model in models_available
                 if model in self._model_assigner.models_db
             },
+            "caps_version": config_mtime,
         }
+        return Response(content=json.dumps(data, indent=4), media_type="application/json")
 
     async def _login(self):
         longthink_functions = dict()
@@ -348,10 +317,12 @@ async def _secret_key_activate(self):
     async def _completions(self, post: NlpCompletion, account: str = "user"):
         ticket = Ticket("comp-")
         req = post.clamp()
+        caps_version = self._model_assigner.config_inference_mtime()       # use mtime as a version, if that changes the client will know to refresh caps
         model_name, err_msg = static_resolve_model(post.model, self._inference_queue)
         if err_msg:
             log("%s model resolve \"%s\" -> error \"%s\" from %s" % (ticket.id(), post.model, err_msg, account))
-            raise HTTPException(status_code=400, detail=err_msg)
+            return Response(status_code=400, content=json.dumps({"detail": err_msg, "caps_version": caps_version}, indent=4), media_type="application/json")
+
         log("%s model resolve \"%s\" -> \"%s\" from %s" % (ticket.id(), post.model, model_name, account))
         req.update({
             "object": "text_completion_req",
@@ -367,57 +338,10 @@ async def _completions(self, post: NlpCompletion, account: str = "user"):
         await q.put(ticket)
         seen = [""] * post.n
         return StreamingResponse(
-            completion_streamer(ticket, post, self._timeout, seen, req["created"]),
+            _completion_streamer(ticket, post, self._timeout, seen, req["created"], caps_version=caps_version),
             media_type=("text/event-stream" if post.stream else "application/json"),
         )
 
-    async def _contrast(self, post: DiffCompletion, request: Request, account: str = "user"):
-        if post.function != "diff-anywhere":
-            if post.cursor_file not in post.sources:
-                raise HTTPException(status_code=400, detail="cursor_file='%s' is not in sources=%s" % (post.cursor_file, list(post.sources.keys())))
-            if post.cursor0 < 0 or post.cursor1 < 0:
-                raise HTTPException(status_code=400, detail="cursor0=%d or cursor1=%d is negative" % (post.cursor0, post.cursor1))
-            filetext = post.sources[post.cursor_file]
-            if post.cursor0 > len(filetext) or post.cursor1 > len(filetext):
-                raise HTTPException(status_code=400, detail="cursor0=%d or cursor1=%d is beyond file length=%d" % (post.cursor0, post.cursor1, len(filetext)))
-        for fn, text in post.sources.items():
-            if len(text) > 180*1024:
-                raise HTTPException(status_code=400, detail="file '%s' is too long (%d bytes)" % (fn, len(text)))
-        ticket = Ticket("comp-")
-        if post.function == "infill":
-            model_name, err_msg = completion_resolve_model(self._inference_queue)
-        else:
-            model_name, err_msg = static_resolve_model(post.model, self._inference_queue)
-        if err_msg:
-            log("%s model resolve \"%s\" func \"%s\" -> error \"%s\" from %s" % (ticket.id(), post.model, post.function, err_msg, account))
-            raise HTTPException(status_code=400, detail=err_msg)
-        log("%s model resolve \"%s\" func \"%s\" -> \"%s\" from %s" % (ticket.id(), post.model, post.function, model_name, account))
-        if post.function == "highlight":
-            post.max_tokens = 0
-        req = post.clamp()
-        req.update({
-            "object": "diff_completion_req",
-            "account": account,
-            "model": model_name,
-            "intent": post.intent,
-            "sources": post.sources,
-            "cursor_file": post.cursor_file,
-            "cursor0": post.cursor0,
-            "cursor1": post.cursor1,
-            "function": post.function,
-            "max_edits": post.max_edits,
-            "stream": post.stream,
-        })
-        post_raw = await request.json()
-        if "poi" in post_raw:
-            req["poi"] = post_raw["poi"]
-        ticket.call.update(req)
-        q = self._inference_queue.model_name_to_queue(ticket, model_name)
-        # kt, kcomp = await _model_hit(red, ticket, req, model_name, account)
-        self._id2ticket[ticket.id()] = ticket
-        await q.put(ticket)
-        return StreamingResponse(diff_streamer(ticket, post, self._timeout, req["created"]))
-
     async def _chat(self, post: ChatContext, request: Request, account: str = "user"):
         ticket = Ticket("comp-")
 
diff --git a/self_hosting_machinery/webgui/selfhost_fastapi_gpu.py b/self_hosting_machinery/webgui/selfhost_fastapi_gpu.py
index 32efeaa8..17162835 100644
--- a/self_hosting_machinery/webgui/selfhost_fastapi_gpu.py
+++ b/self_hosting_machinery/webgui/selfhost_fastapi_gpu.py
@@ -5,7 +5,7 @@
 
 from fastapi import APIRouter, Query, Request, Header, HTTPException
 
-from self_hosting_machinery.webgui.selfhost_req_queue import Ticket
+from self_hosting_machinery.webgui.selfhost_queue import Ticket
 from self_hosting_machinery.webgui.selfhost_webutils import log
 from self_hosting_machinery.webgui.selfhost_queue import InferenceQueue
 
diff --git a/self_hosting_machinery/webgui/selfhost_model_assigner.py b/self_hosting_machinery/webgui/selfhost_model_assigner.py
index e0f5f56a..d5d03296 100644
--- a/self_hosting_machinery/webgui/selfhost_model_assigner.py
+++ b/self_hosting_machinery/webgui/selfhost_model_assigner.py
@@ -224,7 +224,10 @@ def model_assignment(self):
         }
         return j
 
-    @staticmethod
-    def restart_lsp():
-        with open(env.FLAG_RESTART_LSP, "w") as f:
-            f.write("")
+    def config_inference_mtime(self) -> int:
+        if os.path.exists(env.CONFIG_INFERENCE):
+            try:
+                return int(os.path.getmtime(env.CONFIG_INFERENCE))
+            except OSError:
+                return 0
+        return 0
diff --git a/self_hosting_machinery/webgui/selfhost_model_resolve.py b/self_hosting_machinery/webgui/selfhost_model_resolve.py
index 6b508cb7..d15137a9 100644
--- a/self_hosting_machinery/webgui/selfhost_model_resolve.py
+++ b/self_hosting_machinery/webgui/selfhost_model_resolve.py
@@ -49,4 +49,4 @@ def _family(model: str) -> str:
         if not model_name or _family(model_name) == _family(have_model):
             return have_model, ""
     else:
-        return "", f"model is not loaded (3)"
+        return "", f"model \"{model_name}\" is not loaded (3)"
diff --git a/self_hosting_machinery/webgui/selfhost_queue.py b/self_hosting_machinery/webgui/selfhost_queue.py
index b5620fda..36768eea 100644
--- a/self_hosting_machinery/webgui/selfhost_queue.py
+++ b/self_hosting_machinery/webgui/selfhost_queue.py
@@ -6,7 +6,25 @@
 from self_hosting_machinery import env
 from self_hosting_machinery.webgui.selfhost_webutils import log
 from fastapi import HTTPException
-from typing import Dict, List
+from typing import Dict, List, Any
+import uuid
+
+
+class Ticket:
+    def __init__(self, id_prefix):
+        self.call: Dict[str, Any] = dict()
+        random_guid = str(uuid.uuid4()).replace("-", "")[0:12]
+        self.call["id"] = id_prefix + random_guid
+        self.cancelled: bool = False
+        self.processed_by_infmod_guid: str = ""
+        self.streaming_queue = asyncio.queues.Queue()
+
+    def id(self):
+        return self.call.get("id", None)
+
+    def done(self):
+        if "id" in self.call:
+            del self.call["id"]
 
 
 class InferenceQueue:
diff --git a/self_hosting_machinery/webgui/selfhost_req_queue.py b/self_hosting_machinery/webgui/selfhost_req_queue.py
deleted file mode 100644
index ec43a39e..00000000
--- a/self_hosting_machinery/webgui/selfhost_req_queue.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import asyncio
-import random
-
-from self_hosting_machinery.webgui import selfhost_webutils
-
-from typing import Dict, Any
-
-
-__all__ = ["Ticket"]
-
-
-# TODO: why not uuid???
-def random_guid(n=12):
-    random_chars = "0123456789" + "ABCDEFGHIJKLNMPQRSTUVWXYZ" + "ABCDEFGHIJKLNMPQRSTUVWXYZ".lower()
-    guid = "".join([random_chars[random.randint(0, len(random_chars)-1)] for _ in range(n)])
-    return guid
-
-
-class Ticket:
-    def __init__(self, id_prefix):
-        self.call: Dict[str, Any] = dict()
-        self.call["id"] = id_prefix + random_guid()
-        self.cancelled: bool = False
-        self.processed_by_infmod_guid: str = ""
-        self.streaming_queue = asyncio.queues.Queue()
-
-    def id(self):
-        return self.call.get("id", None)
-
-    def done(self):
-        if "id" in self.call:
-            del self.call["id"]
diff --git a/self_hosting_machinery/webgui/tab_models_host.py b/self_hosting_machinery/webgui/tab_models_host.py
index ac0606b3..c1138e85 100644
--- a/self_hosting_machinery/webgui/tab_models_host.py
+++ b/self_hosting_machinery/webgui/tab_models_host.py
@@ -51,5 +51,4 @@ async def _tab_host_models_assign(self, post: TabHostModelsAssign):
             else:
                 validated["completion"] = ""
         self._model_assigner.models_to_watchdog_configs(validated)
-        self._model_assigner.restart_lsp()
         return JSONResponse("OK")
diff --git a/self_hosting_machinery/webgui/webgui.py b/self_hosting_machinery/webgui/webgui.py
index 6f15df22..68f853ed 100644
--- a/self_hosting_machinery/webgui/webgui.py
+++ b/self_hosting_machinery/webgui/webgui.py
@@ -13,7 +13,6 @@
 
 from self_hosting_machinery.webgui.selfhost_model_assigner import ModelAssigner
 from self_hosting_machinery.webgui.selfhost_plugins import PluginsRouter
-from self_hosting_machinery.webgui.selfhost_req_queue import Ticket
 from self_hosting_machinery.webgui.selfhost_fastapi_completions import CompletionsRouter
 from self_hosting_machinery.webgui.selfhost_fastapi_gpu import GPURouter
 from self_hosting_machinery.webgui.tab_server_logs import TabServerLogRouter
@@ -21,7 +20,7 @@
 from self_hosting_machinery.webgui.tab_upload import TabUploadRouter
 from self_hosting_machinery.webgui.tab_finetune import TabFinetuneRouter
 from self_hosting_machinery.webgui.tab_models_host import TabHostRouter
-from self_hosting_machinery.webgui.selfhost_queue import InferenceQueue
+from self_hosting_machinery.webgui.selfhost_queue import InferenceQueue, Ticket
 from self_hosting_machinery.webgui.selfhost_static import StaticRouter
 from self_hosting_machinery.webgui.tab_loras import TabLorasRouter
 from self_hosting_machinery.webgui.selfhost_statistics import TabStatisticsRouter
@@ -103,9 +102,7 @@ def handle_sigint(*args):
 
         signal.signal(signal.SIGINT, handle_sigint)
         signal.signal(signal.SIGUSR1, handle_sigint)
-
-        # NOTE: try restart LSP after server started
-        self._model_assigner.restart_lsp()
+        signal.signal(signal.SIGTERM, handle_sigint)
 
         async def init_database():
             await self._database.connect()

From e78cf082231eb7d92ffaff3a7291c9e37ef937e8 Mon Sep 17 00:00:00 2001
From: mitya <dimitry.ageev@gmail.com>
Date: Tue, 16 Jan 2024 12:03:18 +0300
Subject: [PATCH 8/8] update version to v1.3.1

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index f01f4812..8e2bf504 100644
--- a/setup.py
+++ b/setup.py
@@ -93,7 +93,7 @@ def get_install_requires(packages):
 
 setup(
     name="refact-self-hosting",
-    version="1.3.0",
+    version="1.3.1",
     py_modules=list(setup_packages.keys()),
     package_data={
         name: py_package.data