diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index cb069eff..a265b014 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -21,7 +21,6 @@ separate terminals.
 ```commandline
 python -m self_hosting_machinery.webgui.webgui
 DEBUG=1 python -m self_hosting_machinery.inference.inference_worker --model wizardlm/7b
-DEBUG=1 python -m refact_scratchpads_no_gpu.infserver_no_gpu longthink/stable --openai_key sk-XXXYYY
 ```
 
 That should be enough to get started!
diff --git a/Dockerfile b/Dockerfile
index 1636c586..0d87175b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -41,6 +41,14 @@ ENV PATH="${PATH}:/tmp/linguist/bin"
 
 RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y python3-packaging
 
+# refact lsp requisites
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y
+ENV PATH="${PATH}:/root/.cargo/bin"
+RUN git clone https://github.com/smallcloudai/refact-lsp.git /tmp/refact-lsp \
+    && cd /tmp/refact-lsp \
+    && cargo install --path . \
+    && rm -rf /tmp/refact-lsp
+
 ENV INSTALL_OPTIONAL=TRUE
 ENV BUILD_CUDA_EXT=1
 ENV GITHUB_ACTIONS=true
diff --git a/refact_scratchpads_no_gpu/async_scratchpad/__init__.py b/refact_scratchpads_no_gpu/async_scratchpad/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/refact_scratchpads_no_gpu/async_scratchpad/ascratch.py b/refact_scratchpads_no_gpu/async_scratchpad/ascratch.py
deleted file mode 100644
index 60e5885a..00000000
--- a/refact_scratchpads_no_gpu/async_scratchpad/ascratch.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from typing import Callable, Union, List, Dict, Iterator
-
-
-class AsyncScratchpad:
-    def __init__(
-        self,
-        id: str,
-        created: float,
-        temperature: float,
-        top_p: float,
-        max_tokens: int,
-        stop_tokens: Union[str, List[str]],
-        function: str,
-        stream: bool,
-        logger: Callable,
-        **unused
-    ):
-        self.id = id
-        self.created = created
-        self.finish_reason = ""
-        self.temp = min(max(float(temperature), 0.0), 1.0)
-        self.top_p = top_p
-        self.max_tokens = int(max_tokens)
-        self.function = function
-        self.stream = stream
-        self._logger = logger
-        tmp = stop_tokens
-        if isinstance(tmp, str):
-            stop_strings = [tmp]
-        else:
-            stop_strings = tmp
-        self.metering_generated_tokens_n = 0
-        self.metering_total_tokens_n = 0
-        self.needs_upload = False
-        for k, v in unused.items():
-            self.debuglog("AsyncScratchpad: unused parameter '%s' = '%s'" % (k, v))
-
-    def toplevel_fields(self):
-        return {}
-
-    def debuglog(self, *args):
-        if self._logger:
-            self._logger(*args)
-
-    async def completion(self) -> Iterator[Dict[str, str]]:
-        raise NotImplementedError
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/__init__.py b/refact_scratchpads_no_gpu/gpt_toolbox/__init__.py
deleted file mode 100644
index 22e441de..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from refact_scratchpads_no_gpu.gpt_toolbox.toolbox_functions import *
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_chat_spad import GptChat
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_chat_spad.py b/refact_scratchpads_no_gpu/gpt_toolbox/gpt_chat_spad.py
deleted file mode 100644
index 1942a8f4..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_chat_spad.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import asyncio
-import functools
-import json
-from typing import List, Tuple, Dict, Union, Iterator
-
-from refact_scratchpads_no_gpu.async_scratchpad import ascratch
-
-import openai
-import tiktoken
-
-
-def gpt_prices(  # Apr 4 2023:
-    model_name: str,
-) -> Tuple[int, int]:
-    # GPT-4 8K prompt[$0.03 / 1K tokens] generated[$0.06 / 1K tokens]
-    if model_name.startswith("gpt-4") or model_name.startswith("gpt4"):
-        pp1000t_prompt = 30_000
-        pp1000t_generated = 60_000
-    # gpt-3.5-turbo $0.002 / 1K tokens
-    elif model_name.startswith("gpt-3.5-turbo"):
-        pp1000t_prompt = 2_000
-        pp1000t_generated = 2_000
-    else:
-        raise ValueError(f'get_prices: Unknown model: {model_name}')
-    return pp1000t_prompt, pp1000t_generated
-
-
-@functools.lru_cache(maxsize=10)
-def engine_to_encoding(engine: str) -> tiktoken.Encoding:
-    enc = tiktoken.encoding_for_model(engine)
-    return enc
-
-
-ACCUMULATE_N_STREAMING_CHUNKS = 5
-engine_to_encoding("text-davinci-003")  # this immediately tests if tiktoken works or not
-
-
-def calculate_chat_tokens(model_name, messages, completion):
-    enc = engine_to_encoding(model_name)
-    calc_prompt_tokens_n = 2   # warmup
-    for d in messages:
-        calc_prompt_tokens_n += len(enc.encode(d["content"], disallowed_special=()))
-        calc_prompt_tokens_n += len(enc.encode(d["role"], disallowed_special=()))
-        calc_prompt_tokens_n += 4    # to switch user/assistant
-    calc_generated_tokens_n = len(enc.encode(completion, disallowed_special=())) + 2   # one to switch, another EOF
-    return calc_prompt_tokens_n, calc_generated_tokens_n
-
-
-class GptChat(ascratch.AsyncScratchpad):
-    def __init__(
-            self,
-            id: str,
-            *,
-            created: float,
-            temperature: float,
-            top_p: float,
-            max_tokens: int,
-            stop_tokens: Union[str, List[str]],
-            messages: List[Dict[str, str]],
-            model: str,  # always "longthink", don't use
-            **more,
-    ):
-        super().__init__(
-            id=id,
-            created=created,
-            temperature=temperature,
-            top_p=top_p,
-            max_tokens=max_tokens,
-            stop_tokens=stop_tokens,
-            **more,
-        )
-
-        self._model_name = "gpt-3.5-turbo"
-        if "gpt4" in self.function or "gpt-4" in self.function:
-            self._model_name = "gpt-4"
-        self._stream_timeout_sec = 15
-
-        messages = messages or []
-        if not messages or messages[0].get('role') != 'system':
-            messages = [
-                {
-                    "role": "system",
-                    "content": "You are a coding assistant that outputs short answers, give links to documentation.",
-                }, *messages
-            ]
-        self._messages = messages
-        self._completion = ""
-
-    @property
-    def prices(self) -> Tuple[int, int]:
-        return gpt_prices(self._model_name)
-
-    async def completion(self) -> Iterator[Dict[str, str]]:
-        gen = await openai.ChatCompletion.acreate(
-            model=self._model_name,
-            messages=self._messages,
-            max_tokens=self.max_tokens,
-            temperature=self.temp,
-            stream=True,
-        )
-        accum = ""
-        role = ""
-        tokens = 0
-        self.metering_prompt_tokens_n = 0
-        self.metering_generated_tokens_n = 0
-        try:
-            def forward_streaming():
-                nonlocal tokens, accum, role
-                self._completion += accum
-                msg = {
-                    "chat__role": "assistant",
-                    "chat__content": self._completion,
-                }
-                accum = ""
-                return msg
-
-            while True:
-                resp = await asyncio.wait_for(gen.__anext__(), self._stream_timeout_sec)
-                delta = resp.choices[0].delta
-                if "role" in delta:
-                    role = delta["role"]
-                if "content" in delta:
-                    accum += delta["content"]
-                    tokens += 1  # assuming 1 token per chunk
-                if "swear" in accum:
-                    raise ValueError("swear!")
-                if "finish_reason" in resp.choices[0] and resp.choices[0]["finish_reason"] is not None:
-                    self.finish_reason = resp.choices[0]["finish_reason"]
-                if self.finish_reason:
-                    break
-                if tokens % ACCUMULATE_N_STREAMING_CHUNKS == 0:
-                    yield forward_streaming()
-                if self.finish_reason:  # cancelled from main coroutine
-                    break
-            if self.finish_reason == "":
-                self.finish_reason = "END"
-        except asyncio.exceptions.TimeoutError as e:
-            self.debuglog("CHAT TIMEOUT:", str(type(e)), str(e))
-        except Exception as e:
-            self.debuglog("CHAT EXCEPTION:", str(type(e)), str(e))
-            self.finish_reason = "ERROR"
-        yield forward_streaming()
-
-    def toplevel_fields(self):
-        if not self.finish_reason:
-            return {}
-        else:
-            calc_prompt_tokens_n, calc_generated_tokens_n = calculate_chat_tokens(
-                self._model_name, self._messages, self._completion
-            )
-            self.metering_prompt_tokens_n = calc_prompt_tokens_n
-            self.metering_generated_tokens_n = calc_generated_tokens_n
-            metering_message = {
-                "metering_prompt_tokens_n": self.metering_prompt_tokens_n,
-                "metering_generated_tokens_n": self.metering_generated_tokens_n,
-                "pp1000t_prompt": self.prices[0],
-                "pp1000t_generated": self.prices[1],
-                "model_name": self._model_name,
-            }
-            self.debuglog(json.dumps(metering_message))
-            return metering_message
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_toolbox_spad.py b/refact_scratchpads_no_gpu/gpt_toolbox/gpt_toolbox_spad.py
deleted file mode 100644
index e68d7364..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_toolbox_spad.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import os
-import sys
-import asyncio
-import termcolor
-import functools
-import json
-from typing import List, Union, Callable, Dict, Iterator, Tuple
-
-import openai
-import tiktoken
-
-from refact_scratchpads_no_gpu.gpt_toolbox.scratchpad_utils import full_line_selection
-from refact_scratchpads_no_gpu.async_scratchpad import ascratch
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_chat_spad import gpt_prices, calculate_chat_tokens
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import trim_context_tok, code_block_postprocess
-
-
-DEBUG = int(os.environ.get("DEBUG", "0"))
-
-
-@functools.lru_cache(maxsize=10)
-def engine_to_encoding(engine: str) -> tiktoken.Encoding:
-    enc = tiktoken.encoding_for_model(engine)
-    return enc
-
-
-ACCUMULATE_N_STREAMING_CHUNKS = 5
-engine_to_encoding("text-davinci-003")  # this immediately tests if tiktoken works or not
-
-
-class ScratchpadToolboxGPT(ascratch.AsyncScratchpad):
-    def __init__(
-            self,
-            id: str,
-            created: float,
-            temperature: float,
-            max_tokens: int,
-            stop_tokens: Union[str, List[str]],
-            function: str,
-            intent: str,
-            cursor_file: str,
-            cursor0: int,
-            cursor1: int,
-            sources: Dict[str, str],
-            stream: bool,
-            logger: Callable,
-
-            model_n: str = "gpt-3.5-turbo",
-            supports_stream: bool = True,
-            timeout: int = None,
-            **kwargs,
-    ):
-        super().__init__(
-            id=id,
-            created=created,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            stop_tokens=stop_tokens,
-            function=function,
-            stream=stream,
-            logger=logger,
-            **kwargs
-        )
-        self.intent = intent
-        self.cursor_file = cursor_file
-        self.cursor0 = cursor0
-        self.cursor1 = cursor1
-        self.sources = sources
-        self.metering_generated_tokens_n = 0
-        self.metering_total_tokens_n = 0
-        self.needs_upload = False
-
-        self._model_n = model_n
-        self.__model_name = None
-
-        if not supports_stream: self.stream = False
-        self._stream_timeout_sec: float = 15
-
-        self._txt: str = self.sources.get(self.cursor_file)
-
-        self.cursor0, self.cursor1, self.selection = full_line_selection(
-            self.cursor0, self.cursor1, self._txt
-        )
-        self.enc = engine_to_encoding(self.model_name)
-
-    def trim_context(self) -> Tuple[int, int, str]:
-        cursor0, cursor1, ctxt = trim_context_tok(self.cursor0, self.cursor1, self._txt, self.enc)
-        return cursor0, cursor1, ctxt
-
-    @property
-    def prices(self) -> Tuple[int, int]:
-        return gpt_prices(self.model_name)
-
-    @property
-    def model_name(self) -> str:
-        if not self.__model_name:
-            model_name = 'gpt-3.5-turbo-0613'
-            if self._model_n == 'gpt-3.5-turbo' or self._model_n == 'gpt-4':
-                model_name = self._model_n + '-0613'
-            self.__model_name = model_name
-        return self.__model_name
-
-    @model_name.setter
-    def model_name(self, val: str):
-        self.__model_name = val
-
-    async def completion(self) -> Iterator[Dict[str, str]]:
-        if self.max_tokens < 1: self.max_tokens = 256
-        self.messages = self._messages()
-        self.completion_so_far: str = ""
-        self.metering_prompt_tokens_n = 0
-        self.metering_generated_tokens_n = 0
-        self.openai_prompt_tokens_n = 0
-        self.openai_completion_tokens = 0
-
-        def forward_streaming():
-            modified = self._postprocess(self.completion_so_far)
-            return {self.cursor_file: modified}
-
-        try:
-            gen = await openai.ChatCompletion.acreate(
-                model=self.model_name,
-                messages=self.messages,
-                max_tokens=self.max_tokens,
-                stream=self.stream,
-                temperature=self.temp,
-                stop=['<|end|>'],
-            )
-
-            if not self.stream:
-                resp = gen
-                self.completion_so_far = resp["choices"][0]["message"]["content"]
-                if DEBUG:
-                    sys.stdout.write(termcolor.colored(self.completion_so_far, "green"))
-                    sys.stdout.flush()
-                self.openai_prompt_tokens_n = resp["usage"]["prompt_tokens"]
-                self.openai_completion_tokens = resp["usage"]["completion_tokens"]
-                print(resp["usage"])
-                self.model_name = resp["model"]
-                self.finish_reason = resp["choices"][0]["finish_reason"] or "END"
-            else:
-                self.finish_reason = ""
-                self.completion_so_far = ""
-                tokens = 0
-                while True:
-                    resp = await asyncio.wait_for(gen.__anext__(), self._stream_timeout_sec)
-                    delta = resp.choices[0].delta
-                    if "content" in delta:
-                        if DEBUG:
-                            sys.stdout.write(termcolor.colored(delta["content"], "green"))
-                            sys.stdout.flush()
-                        self.completion_so_far += delta["content"]
-                        tokens += 1  # assuming 1 token per chunk
-                    if "model" in resp:
-                        self.model_name = resp["model"]
-                    if "finish_reason" in resp.choices[0] and resp.choices[0]["finish_reason"] is not None:
-                        self.finish_reason = resp.choices[0]["finish_reason"]
-                    if self.finish_reason:
-                        break
-                    if tokens % ACCUMULATE_N_STREAMING_CHUNKS == 0:
-                        yield forward_streaming()
-                    if self.finish_reason:
-                        break
-            if self.model_name == "":
-                self.debuglog("ScratchpadToolboxGPT: model_name is empty")
-            if self.finish_reason == "":
-                self.finish_reason = "END"
-        except asyncio.exceptions.TimeoutError as e:
-            self.debuglog("FUNCTIONS TIMEOUT:", str(type(e)), str(e))
-        except Exception as e:
-            self.debuglog("FUNCTIONS EXCEPTION:", str(type(e)), str(e))
-            self.finish_reason = "ERROR"
-        yield forward_streaming()
-
-    def _messages(self) -> List[Dict[str, str]]:
-        raise NotImplementedError
-
-    def _postprocess(self, completion: str) -> str:
-        completion = code_block_postprocess(completion)
-        return self._txt[:self.cursor0] + completion + self._txt[self.cursor1:]
-
-    def toplevel_fields(self):
-        if not self.finish_reason:
-            return {}
-        else:
-            calc_prompt_tokens_n, calc_generated_tokens_n = calculate_chat_tokens(
-                self.model_name, self.messages, self.completion_so_far
-            )
-            self.metering_prompt_tokens_n = self.openai_prompt_tokens_n or calc_prompt_tokens_n
-            self.metering_generated_tokens_n = self.openai_completion_tokens or calc_generated_tokens_n
-            metering_message = {
-                "metering_prompt_tokens_n": self.metering_prompt_tokens_n,
-                "metering_generated_tokens_n": self.metering_generated_tokens_n,
-                "pp1000t_prompt": self.prices[0],
-                "pp1000t_generated": self.prices[1],
-                "model_name": self.model_name,
-            }
-            self.debuglog(json.dumps(metering_message))
-            return metering_message
-
-    def debuglog(self, *args):
-        if self._logger:
-            self._logger(*args)
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_utils.py b/refact_scratchpads_no_gpu/gpt_toolbox/gpt_utils.py
deleted file mode 100644
index 75746371..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_utils.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import re
-from itertools import zip_longest
-from typing import *
-import tiktoken
-
-from refact_scratchpads_no_gpu.gpt_toolbox.scratchpad_utils import full_line_selection
-
-
-def msg(role: str, content: str) -> Dict[str, str]:
-    assert role in ['system', 'user', 'assistant']
-    return {'role': role, 'content': content}
-
-
-def code_block_postprocess(txt: str) -> str:
-    lines_code = []
-    is_code = False
-    for line in txt.split('\n'):
-        if '```' in line:
-            is_code = not is_code
-            continue
-        if is_code:
-            lines_code.append(line)
-
-    code = '\n'.join(lines_code) or txt
-    return code
-
-
-def find_substring_positions(substring, text) -> Optional[Tuple[int, int]]:
-    words = substring.split()
-    pattern = r'\s*'.join(map(re.escape, words))
-    match = re.search(pattern, text)
-    if not match:
-        return
-
-    c0, c1, _ = full_line_selection(match.start(), match.end(), text)
-    return c0, c1
-
-
-def trim_context_tok(
-        cursor0: int,
-        cursor1: int,
-        text: str,
-        enc: tiktoken.Encoding,
-        max_tokens: int = 2000
-) -> Tuple[int, int, str]:
-    selection = text[cursor0:cursor1]
-    tokens_left = max_tokens - len(enc.encode(selection, disallowed_special=()))
-
-    lines_before = ((l, 'before') for l in reversed(text[:cursor0].splitlines()))
-    lines_after = ((l, 'after') for l in text[cursor1:].splitlines())
-    merged_lines = [val for pair in zip_longest(lines_before, lines_after) for val in pair if val]
-
-    lines_before_p, lines_after_p = [], []
-    for line, t in merged_lines:
-        if (line_tok_cnt := len(enc.encode(line, disallowed_special=()))) >= tokens_left: break
-        lines_before_p.append(line) if t == 'before' else lines_after_p.append(line)
-        tokens_left -= line_tok_cnt
-
-    txt_before = '\n'.join(reversed(lines_before_p)) + '\n'
-    txt_after = '\n'.join(lines_after_p)
-    txt = txt_before + selection + txt_after
-    cursor0, cursor1 = len(txt_before), len(txt_before) + len(selection)
-
-    # print("chars before %i -> cut to %i" % (len(text[:cursor0]), len(txt_before)))
-    # print("chars  after %i -> cut to %i" % (len(text[cursor1:]), len(txt_after)))
-    # print("before %i bytes -> %i tokens" % (len(txt_before), len(enc.encode(txt_before, disallowed_special=()))))
-    # print("after  %i bytes -> %i tokens" % (len(txt_after), len(enc.encode(txt_after, disallowed_special=()))))
-    # print("tokens + tokens + tokens = %i" % (len(enc.encode(txt, disallowed_special=()))))
-
-    return cursor0, cursor1, txt
-
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/scratchpad_utils.py b/refact_scratchpads_no_gpu/gpt_toolbox/scratchpad_utils.py
deleted file mode 100644
index 3a943412..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/scratchpad_utils.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from typing import Tuple
-
-
-def full_line_selection(cursor0: int, cursor1: int, txt: str) -> Tuple[int, int, str]:
-    """
-    Adjusts selection to only include full lines.
-    """
-    c0, c1, buff = '<|cursor0|>', '<|cursor1|>', ''
-    txt: str = txt[:cursor0] + c0 + txt[cursor0:cursor1] + c1 + txt[cursor1:]
-
-    lines_new = []
-    for line in txt.split('\n'):
-        if buff:
-            line = buff + line
-            buff = ''
-        if c0 in line:
-            if not line.split(c0)[1].strip():
-                buff = c0
-                line = line.replace(c0, "")
-            else:
-                line = c0 + line.replace(c0, "")
-
-        if c1 in line:
-            if not line.split(c1)[0].strip() and lines_new:
-                lines_new[-1] += c1
-                line = line.replace(c1, "")
-            else:
-                line = line.replace(c1, "") + c1
-        lines_new.append(line)
-
-    txt_new = '\n'.join(lines_new)
-    cursor0 = txt_new.index(c0)
-    cursor1 = txt_new.replace(c0, "").index(c1)
-    selection = txt_new.replace(c0, "").replace(c1, "")[cursor0:cursor1]
-
-    return cursor0, cursor1, selection
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/__init__.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/__init__.py
deleted file mode 100644
index 7e02f281..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from .experimental_gpt_bugs_highlight import GptBugsHighlight
-from .experimental_gpt_code_review import ScratchpadCodeReviewHighlightGPT4
-from .experimental_gpt_completion import GptCompletion, GptCompletionGPT4
-from .experimental_gpt_vulnerabilities_highlight import GptDetectVulnerabilitiesHighlightGPT4
-from .gpt_add_console_logs import GptAddConsoleLogs
-from .gpt_comment_each_line import GptCommentEachLine
-from .gpt_explain_code_block import GptExplainCodeBlock, GptExplainCodeBlockGPT4
-from .gpt_fix_bug import GptFixBug, GptFixBugGPT4
-from .gpt_make_code_shorter import GptMakeCodeShorter, GptMakeCodeShorterGPT4
-from .gpt_precise_naming import GptPreciseNaming
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_bugs_highlight.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_bugs_highlight.py
deleted file mode 100644
index 8641fb9e..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_bugs_highlight.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import json
-
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, find_substring_positions
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptBugsHighlight(ScratchpadToolboxGPT):
-    def __init__(self, model_n="gpt3.5-turbo-0301", supports_stream=False, **kwargs):
-        super().__init__(
-            model_n=model_n,
-            supports_stream=supports_stream,
-            **kwargs
-        )
-
-    def _messages(self) -> List[Dict[str, str]]:
-        return [
-            msg(
-                'system',
-                "You are an AI programming assistant.\n"
-                "Follow the user's requirements carefully & to the letter."
-            ),
-            msg('user', '''
-You are a code reviewer. Follow my instructions carefully & to the letter.
-
-You are to receive a single code file.
-It contain imports from other files that are present in the project, but you cannot see them.
-That's why you must not highlight errors that are connected to the imports, it's a false positive.
-
-Your assignment is:
-1. Carefully read code line by line up to the end.
-2. Find all errors likely to happen in runtime (ignore the imports)
-3. For each found error output a comment in the following format:
-{"code": "    def _messages(self) -> list[dict[str, str]]:", "description": "errors in type annotations"}
-{"code": "for call, idx in enumerate(calls_unfiltered):", "description": "Invalid variable assignment"}
-
-FIELDS DESCRIPTION:
-- code: the code you found issue in
-- description: brief description of the issue and short instructions hints how to fix it
-
-Guidelines:
-Explain yourself as briefly as possible, do not explain outside of code block.
-The output you provide must be decodable using jsonlines format.
-Do not highlight any errors connected to imports.
-'''
-            ),
-        msg(
-            'user',
-            """from routers import FindRouter
-
-if __name__ == "__main__":
-    from argparse import ArgumentParser
-    parser = ArgumentParser()
-"""
-        ),
-        msg(
-            'assistant',
-            """{"code": "from routers import FindRouter", "description": "ModuleNotFoundError: no module named routers"}"""
-        ),
-        msg(
-            'user',
-            'Not valid. You have been told to ignore any kind of import errors!'
-        ),
-        msg('user', self._txt)
-        ]
-
-    def _postprocess(self, completion: str) -> str:
-        self.debuglog(f'Completion:\n{completion}')
-        suggestions = []
-        for line in completion.splitlines():
-            if not line.strip():
-                continue
-            try:
-                suggestions.append(json.loads(line))
-            except Exception as e:
-                self.debuglog(e)
-        for s in suggestions:
-            code = s['code']
-            indexes = find_substring_positions(code, self._txt)
-            if not indexes:
-                self.debuglog('Substring not found')
-                continue
-            s_start, s_end = indexes
-            self._txt = \
-                self._txt[:s_start] + \
-                f'\n<BUG>' \
-                f'\nDESC: {s["description"]}\n' \
-                f'{self._txt[s_start:s_end]}' \
-                f'\n</BUG>' + \
-                self._txt[s_end:]
-        return self._txt
-
-
-class GptBugsHighlightGPT4(GptBugsHighlight):
-    def __init__(self, **kwargs):
-        super().__init__(
-            model_n='gpt-4',
-            supports_stream=False,
-            **kwargs
-        )
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_code_review.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_code_review.py
deleted file mode 100644
index 639e8fb5..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_code_review.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import json
-
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, find_substring_positions
-
-
-class ScratchpadCodeReviewHighlightGPT4(ScratchpadToolboxGPT):
-    def __init__(self, **kwargs):
-        super().__init__(
-            model_n='gpt-4',
-            supports_stream=False,
-            timeout=120,
-            **kwargs
-        )
-
-    def _messages(self) -> List[Dict[str, str]]:
-        return [
-            msg(
-                'system',
-                "You are an AI programming assistant.\n"
-                "Follow the user's requirements carefully & to the letter."
-            ),
-            msg('user',
-                '''
-You are a code reviewer.
-Follow my instructions carefully & to the letter.
-
-You are to receive a single code file.
-It contain imports from other files that are present in the project, but you cannot see them.
-That's why you must not highlight errors that are connected to the imports to not commit false-positive errors.
-
-Your assignment is:
-1. Carefully read code line by line up to the end.
-2. Find all possible errors that interrupt code runtime (except the cases listed above)
-3. For each found error you will suggest a comment in the following format:
-{"code": "    def _messages(self) -> list[dict[str, str]]:", "description": "errors in type annotations"}
-{"code": "for call, idx in enumerate(calls_unfiltered):", "description": "Invalid variable assignment"}
-
-FIELDS DESCRIPTION:
-- code: the code you found issue in
-- description: extremely brief description of the issue and short instructions hints how to fix it
-
-Guidelines:
-Explain yourself as briefly and clear as possible, do not explain outside of code block.
-The output you provide must be decodable using jsonlines format.
-Do not highlight any error that is anyhow connected to imports!
-'''
-            ),
-            msg(
-                'user',
-                """
-from routers import FindRouter
-
-if __name__ == "__main__":
-    from argparse import ArgumentParser
-    parser = ArgumentParser()
-"""
-            ),
-            msg(
-                'assistant',
-                """{"code": "from routers import FindRouter", "description": "ModuleNotFoundError: no module named routers"}"""
-            ),
-            msg(
-                'user',
-                'Not valid. You have been told to ignore any kind of import errors!'
-            ),
-            msg(
-                'assistant',
-                "Sorry for the confusion. Give me another example."
-            ),
-            msg('user', self._txt)
-        ]
-
-    def _postprocess(self, completion: str) -> str:
-        suggestions = [json.loads(c) for c in completion.split('\n')]
-
-        for s in suggestions:
-            code = s['code']
-            indexes = find_substring_positions(code, self._txt)
-            if not indexes:
-                print('Substring not found')
-                continue
-
-            s_start, s_end = indexes
-            self._txt = \
-                self._txt[:s_start] + \
-                f'\n<COMMENT>' \
-                f'\nDESC: {s["description"]}\n' \
-                f'SCORE: {s["critical_score"]}\n' \
-                f'{self._txt[s_start:s_end]}' \
-                f'\n</COMMENT>' + \
-                self._txt[s_end:]
-
-        return self._txt
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_completion.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_completion.py
deleted file mode 100644
index 17205a74..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_completion.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptCompletion(ScratchpadToolboxGPT):
-    def _messages(self) -> List[Dict[str, str]]:
-        cursor0, _, ctxt = self.trim_context()
-        ctxt = ctxt[:cursor0] + '<|complete-me|>' + ctxt[cursor0:]
-        return [
-            msg(
-                'system',
-                "You are an AI programming assistant.\n"
-                "Follow the user's requirements carefully and to the letter."
-            ),
-            msg(
-                'user',
-                ctxt
-            ),
-            msg(
-                'assistant',
-                'What do I need to do with this code?'
-            ),
-            msg(
-                'user',
-                "Replace <|complete-me|> with the code completion. "
-                "Write it in the block of code. "
-                "Do not explain anything. "
-                "Write only the code completion."
-            )
-        ]
-
-
-class GptCompletionGPT4(GptCompletion):
-    def __init__(self, **kwargs):
-        super().__init__(
-            model_n='gpt-4',
-            **kwargs
-        )
-
-    def _postprocess(self, completion: str) -> str:
-        # Output of GPT-4 does not need to be postprocessed, such as find ```
-        return self._txt[:self.cursor0] + completion + self._txt[self.cursor1:]
-
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_vulnerabilities_highlight.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_vulnerabilities_highlight.py
deleted file mode 100644
index 9aef8f9b..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_vulnerabilities_highlight.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import json
-
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, find_substring_positions
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptDetectVulnerabilitiesHighlightGPT4(ScratchpadToolboxGPT):
-    def __init__(self, **kwargs):
-        super().__init__(
-            model_n='gpt-4-0314',
-            supports_stream=False,
-            **kwargs
-        )
-
-    def _messages(self) -> List[Dict[str, str]]:
-        return [
-            msg(
-                'system',
-                "You are an AI programming assistant.\n"
-                "Follow the user's requirements carefully & to the letter."
-            ),
-            msg('user',
-                'I am a software engineer. '
-                'I have a question about one of my scripts. '
-                'I am afraid there are some vulnerabilities in it. I need you to find them and explain. '
-                'You need to stick to the following format: you will output a block of code in jsonlines format.'
-                'This is how you must format you output:'
-                '''
-    {"code": "VULNERABLE_CODE_PART_1", "vulnerability": "YOUR_VULNERABILITY_1_DESCRIPTION"}
-    {"code": "VULNERABLE_CODE_PART_2", "vulnerability": "YOUR_VULNERABILITY_2_DESCRIPTION"}
-                '''
-                'Explain as briefly as possible, do not explain outside of code block. '
-                'The output you provide must be decodable using jsonlines format. '
-            ),
-            msg('assistant',
-                'Thank you for detailed description. '
-                'Now please provide me this script that might contain vulnerabilities. '
-                'I will find them for you and explain them in the format you have given. '
-            ),
-            msg('user', self._txt)
-        ]
-
-    def _postprocess(self, completion: str) -> str:
-        suggestions = [json.loads(c) for c in completion.split('\n')]
-
-        for s in suggestions:
-            code = s['code']
-            indexes = find_substring_positions(code, self._txt)
-            if not indexes:
-                self.debuglog('Substring not found')
-                continue
-
-            s_start, s_end = indexes
-            self._txt = \
-                self._txt[:s_start] + \
-                f'\n<VULNERABLE>' \
-                f'\nDESC: {s["vulnerability"]}\n' \
-                f'{self._txt[s_start:s_end]}' \
-                f'\n</VULNERABLE>' + \
-                self._txt[s_end:]
-
-        return self._txt
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_add_console_logs.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_add_console_logs.py
deleted file mode 100644
index 5af71ec2..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_add_console_logs.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptAddConsoleLogs(ScratchpadToolboxGPT):
-    def _messages(self) -> List[Dict[str, str]]:
-        return [
-            msg(
-                'system',
-                'You are a professional high-class code assistant. '
-                'Your were created to modify, generate, interpret and generalize code'
-            ),
-            msg(
-                'user',
-                'I will provide you a piece of code, you need to add console logs. '
-                'You have to localize places in a given piece of code where console logs might be useful for debugging '
-            ),
-            msg(
-                'user',
-                """
-    def __call__(self, filename: str) -> Optional[str]:
-        for filt in self._filters:
-            filtered = filt([filename])
-            if filtered:
-                return filt.language
-        return None
-"""),
-        msg(
-            'assistant',
-            """```
-    def __call__(self, filename: str) -> Optional[str]:
-        print(f'filename: {filename}')
-        print(f'filters: {self._filters}')
-        for it, filt in enumerate(self._filters):
-            print(f'it: {it}')
-            print(f'filt: {filt}')
-            filtered = filt([filename])
-            print(f'filtered: {filtered}')'
-            if filtered:
-                print(f'language: {filt.language}')
-                return filt.language
-        print('language: None')
-        return None
-```"""),
-            msg('user', self.selection)
-        ]
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_comment_each_line.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_comment_each_line.py
deleted file mode 100644
index 9768775a..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_comment_each_line.py
+++ /dev/null
@@ -1,91 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptCommentEachLine(ScratchpadToolboxGPT):
-    def _messages(self) -> List[Dict[str, str]]:
-        return [
-            msg(
-                'system',
-                'You are a professional high-class code assistant. '
-                'Your were created to modify, generate, interpret and generalize code'
-            ),
-            msg(
-                'user',
-                "I have an assignment for you, so listen attentively. "
-                "I will provide you a piece of code, you need to explain each line of this piece of code. "
-                "I need to know what each line of the piece of code does. "
-                "You need to return the exact same piece of code given, but each line must have an explanation in comment above it"
-                "Do you understand your assignment?"
-            ),
-            msg(
-                'assistant',
-                "Yes, my assignment is clear. "
-                "I am receiving a piece of code and I need to explain each line of it. "
-                "I have to return the exact same piece of code, but each line must have an explanation in comment above it. "
-                "I'm willing to do it for you! However, I still have some questions:\n"
-                "1. If the code already has some comments, should I keep them or remove?\n"
-                "2. If the code has any other issues that I can detect, may I fix them?"
-            ),
-            msg(
-                'user',
-                "If you see existing comments in the piece of code, you must keep them, you should ignore them, no explaination for them needed. "
-                "If you see some other issues in the code, you are prohibited react on them in any way, don't fix them. Ignore them! "
-                "When code block ends you must print <|end|> after it. "
-                "Is it clear?"
-            ),
-            msg(
-                'assistant',
-                "Yes, absolutely! "
-                "If I see existing comments in provided piece of code, I must keep them and I must not explain them.\n"
-                "If I see any other issues in the piece of code, I will not try to fix them. It is prohibited.\n"
-                "When code block ends I will print <|end|> after it.\n"
-                "I have no doubts I can manage this task! Please provide me a piece of code each line of which I need explain."
-            ),
-            msg('user',
-                """
-    def completion(self, final: bool, tokens_batch: Optional[int] = 25) -> Iterator[Dict[str, str]]:
-        tokens_batch: int = self.max_tokens if final else tokens_batch
-
-        # implement more cool features
-        return self.completion_stream(
-            # engine must be one of the one in docs
-            engine=self._engine,
-            tokens_batch=tokens_batch,
-            prompt=self.prompt,
-            replace_modified=self._replace_modified
-        )
-"""),
-            msg('assistant',
-                """
-```
-    def completion(self, final: bool, tokens_batch: Optional[int] = 25) -> Iterator[Dict[str, str]]:
-        # if not tokens_batch given, using max_tokens
-        tokens_batch: int = self.max_tokens if final else tokens_batch
-
-        # implement more cool features
-        return self.completion_stream(
-            # engine is a model codify API uses. E.g.  text-davinci-003, code-davinci-002 etc
-            # engine must be one of the one in docs
-            engine=self._engine,
-            # how many tokens will be in each batch
-            tokens_batch=tokens_batch,
-            # function that returns prompt for selected engine
-            prompt=self.prompt,
-            # replace selection from original code with generated code
-            replace_modified=self._replace_modified
-        )
-```
-<|end|>"""),
-            msg(
-                'user',
-                'Thanks for your help! That is 100% precise for what I meant.'
-            ),
-            msg(
-                'assistant',
-                'Anytime! Give me another piece of code you need to explain each line. It is a pleasure to help you!'
-            ),
-            msg('user', self.selection)
-        ]
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_explain_code_block.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_explain_code_block.py
deleted file mode 100644
index abe71379..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_explain_code_block.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, code_block_postprocess
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptExplainCodeBlock(ScratchpadToolboxGPT):
-    def _messages(self) -> List[Dict[str, str]]:
-        _, _, ctxt = self.trim_context()
-        return [
-            msg(
-                'system',
-                "You are an AI programming assistant.\n"
-                "Follow the user's requirements carefully & to the letter."
-            ),
-            msg(
-                'user',
-                "I will provide you the code that I do not understand and I need you to explain me it."
-                "I need you understand the logic of this code and t"
-                "hen output the block of code that contain comments that explain the given code. "
-                "Do not explain anything outside of a block of code. "
-                "Firstly, I will provide you the context, which is the whole file. "
-                "Then I will provide you the specific piece of code you will explain in a block. "
-                "When the block of code ends you will print <|end|> after it."
-            ),
-            msg(
-                'assistant',
-                "Provide me a context."
-            ),
-            msg('user',
-                """
-from typing import *
-
-from .chatgpt_scratchpad import ScratchpadToolboxGPT
-
-
-class ScratchpadMakeCodeShorter(ScratchpadToolboxGPT):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-
-    def _messages(self):
-        return [*make_code_shorter(), msg('user', self._selection)]
-
-    def completion(self, final, tokens_batch=25):
-        tokens_batch: int = self.max_tokens if final else tokens_batch
-
-        return self.completion_stream(
-            engine=self._engine,
-            tokens_batch=tokens_batch,
-            prompt=self.prompt,
-            replace_modified=self._replace_modified
-        )
-
-    def _postprocess(self, completion):
-        return self._txt[:self.cursor0] + completion + self._txt[self.cursor1:]
-"""),
-            msg('assistant',
-                "Provide me the code you need to explain in a block."
-                ),
-            msg('user',
-                """
-    def completion(self, final, tokens_batch=25):
-        tokens_batch: int = self.max_tokens if final else tokens_batch
-
-        return self.completion_stream(
-            engine=self._engine,
-            tokens_batch=tokens_batch,
-            prompt=self.prompt,
-            replace_modified=self._replace_modified
-        )
-"""),
-            msg('assistant',
-                """
-```
-# This code defines a function called "completion"
-# which takes two arguments: final which is boolean and tokens_batch, which is optional int
-# if final = true, streaming is disabled, and vice versa
-# tokens batch will be set to self.max_tokens if streaming is disabled
-# it returns a function self._completion_stream, which receives engine, tokens_batch, prompt and replace_modified
-# this function returns an iterator of Dicts, where keys and values are both strings
-```
-<|end|>"""),
-            msg('user', ctxt),
-            msg('assistant',
-                "Thanks for giving me the context. "
-                "Please provide me the part of code you need to explain in a block."
-                ),
-            msg('user', self.selection)
-        ]
-
-    def _postprocess(self, completion: str) -> str:
-        completion = code_block_postprocess(completion)
-        return self._txt[:self.cursor1] + '\n' + completion + self._txt[self.cursor1:]
-
-
-class GptExplainCodeBlockGPT4(GptExplainCodeBlock):
-    def __init__(self, **kwargs):
-        super().__init__(
-            model_n='gpt-4',
-            **kwargs
-        )
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_fix_bug.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_fix_bug.py
deleted file mode 100644
index 4218b259..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_fix_bug.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptFixBug(ScratchpadToolboxGPT):
-    def _messages(self) -> List[Dict[str, str]]:
-        _, _, ctxt = self.trim_context()
-        return [
-            msg(
-                'system',
-                "You are an AI programming assistant.\n"
-                "Follow the user's requirements carefully & to the letter."
-            ),
-            msg(
-                'user',
-                "I will provide you the code that contains bugs, which you will need to find and fix."
-                "Output the block of rewritten bug-free code. "
-                "Do not explain anything! "
-                "Firstly, I will provide you the context, which is the whole file. "
-                "Then I will provide you the specific piece of code to find and fix bugs. "
-                "When the block of code ends you will print <|end|> after it."
-                "In case there are no bugs, leave it as it is."
-            ),
-            msg(
-                'assistant',
-                "Provide me a context."
-            ),
-            msg('user',
-                """
-from utils import filter_filenames
-
-
-class LanguagesFilter:
-    def __init__(self, languages, percent_in_commit):
-        self._percent_in_commit = percent_in_commit
-        self._filters = [LanguageFilter(language) for language in languages]
-
-    def __call__(self, filenames):
-        return filter_filenames(filenames, self._filters, self._percent_in_commit)
-
-class CodeLanguagesFilter:
-    def __init__(self, languages):
-        self._filters = [LanguageFilter(language) for language in languages]
-
-    def __call__(self, filename):
-        for (filt in self._filters):
-            filtered = filt([filename)
-            if (filtered):
-                return filt.language
-        return null
-
-if __name__ == "__main__":
-    pass
-"""),
-            msg('assistant',
-                "Please provide me the code you need to fix bugs in. "
-                ),
-            msg('user',
-                """
-    def __call__(self, filename):
-        for (filt in self._filters):
-            filtered = filt([filename)
-            if (filtered):
-                return filt.language
-        return null
-    }"""),
-            msg('assistant',
-                """
-```
-    def __call__(self, filename: str):
-        for filt in self._filters:
-            filtered = filt([filename])
-            if filtered:
-                return filt.language
-        return None
-```
-<|end|>"""),
-            msg('user', ctxt),
-            msg('assistant',
-                "Thanks for giving me the context. "
-                "I understand it. "
-                "Please provide me the part of code you need to fix bugs in."
-                ),
-            msg('user', self.selection)
-        ]
-
-
-class GptFixBugGPT4(GptFixBug):
-    def __init__(self, **kwargs):
-        super().__init__(
-            model_n='gpt-4',
-            **kwargs
-        )
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_make_code_shorter.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_make_code_shorter.py
deleted file mode 100644
index 8a1d1a0f..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_make_code_shorter.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptMakeCodeShorter(ScratchpadToolboxGPT):
-    def _messages(self) -> List[Dict[str, str]]:
-        _, _, ctxt = self.trim_context()
-        return [
-            msg(
-                'system',
-                "You are an AI programming assistant.\n"
-                "Follow the user's requirements carefully & to the letter."
-            ),
-            msg(
-                'user',
-                "I will provide you the code that is suboptimal, verbose and complicated. "
-                "You need to replace the suboptimal code with a shorter and more simple code. "
-                "The code you generated will be placed in the context file, "
-                "so keep all styles and indents. "
-                "Do not explain anything. "
-                "Firstly, I will provide you the whole file -- the context. "
-                "Then you will receive a piece of code you will simplify. "
-                "When the block of code ends you will print <|end|> after it."
-            ),
-            msg(
-                'assistant',
-                "Provide me a context."
-            ),
-            msg('user',
-                """
-class Person:
-    def __init__(self, name, age):
-        self.name = name
-        self.age = age
-
-
-class People:
-    def __init__(people):
-        self.people = []
-        for p in people:
-            name = p[0]
-            age = p[1]
-            person = Person(name, age)
-            self.people.append(person)
-
-    def __iter__(self):
-        yield from self.people
-
-            """),
-        msg('assistant',
-            "Please provide me the code you need to simplify."
-            ),
-        msg('user',
-            """
-        self.people = []
-        for p in people:
-            name = p[0]
-            age = p[1]
-            person = Person(name, age)
-            self.people.append(person)
-            """),
-        msg('assistant',
-            """
-```
-        self.people = [Person(name, age) for name, age in people]
-```
-<|end|>"""),
-            msg('user', ctxt),
-            msg('assistant',
-                "Thanks for giving me the context. "
-                "Please provide me the part of code you need to simplify."
-                ),
-            msg('user', self.selection)
-        ]
-
-
-class GptMakeCodeShorterGPT4(GptMakeCodeShorter):
-    def __init__(self, **kwargs):
-        super().__init__(
-            model_n='gpt-4',
-            **kwargs
-        )
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_precise_naming.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_precise_naming.py
deleted file mode 100644
index 36b456a1..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_precise_naming.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptPreciseNaming(ScratchpadToolboxGPT):
-    def _messages(self) -> List[Dict[str, str]]:
-        _, _, ctxt = self.trim_context()
-        return [
-            msg(
-                'system',
-                "You are an AI programming assistant.\n"
-                "Follow the user's requirements carefully & to the letter."
-            ),
-            msg(
-                'user',
-                "I will provide you the code that has ambitious, unclear and incorrect naming."
-                "I need you understand the logic of this code and then output the block of code with fixed naming. "
-                "Do not explain anything. "
-                "Firstly, I will provide you the context, which is the whole file. "
-                "Then I will provide you the specific piece of code you will fix naming in. "
-                "When the block of code ends you will print <|end|> after it."
-            ),
-            msg(
-                'assistant',
-                "Provide me a context."
-            ),
-            msg('user',
-                """
-from typing import *
-
-
-def f(x, c):
-    r = 0
-    xx = 1
-    for cc in c:
-        r += xx * cc
-        xx *= x
-    return r
-
-
-def main():
-    years = [1693, 1900, 2000]
-    for year in years:
-        if f(year):
-            print(year, "is leap")
-        else:
-            print(year, "is not leap")
-
-if __name__ == "__main__":
-    main()
-"""),
-            msg('assistant',
-                "Please provide me the code you need to fix naming in."
-                ),
-            msg('user',
-                """
-def f(x, c):
-    r = 0
-    xx = 1
-    for cc in c:
-        r += xx * cc
-        xx *= x
-    return r
-"""),
-            msg('assistant', """```
-def polynom(x, coefficients):
-    value = 0
-    x_power = 1
-    for c in coefficients:
-        value += x_power * c
-        x_power *= x
-    return value
-```<|end|>"""),
-            msg('user', ctxt),
-            msg('assistant',
-                "Thanks for giving me the context. "
-                "Please provide me the part of code you need to fix naming in."
-                ),
-            msg('user', self.selection)
-        ]
diff --git a/refact_scratchpads_no_gpu/infserver_no_gpu.py b/refact_scratchpads_no_gpu/infserver_no_gpu.py
deleted file mode 100644
index d5531fc0..00000000
--- a/refact_scratchpads_no_gpu/infserver_no_gpu.py
+++ /dev/null
@@ -1,239 +0,0 @@
-import os
-import socket
-import sys
-import time
-import json
-import datetime
-import traceback
-import signal
-import logging
-
-import importlib
-import asyncio
-
-from refact_scratchpads_no_gpu import stream_results_async
-
-
-DEBUG = int(os.environ.get("DEBUG", "0"))
-
-
-gpt_functions = {
-    "free-chat":                 "refact_scratchpads_no_gpu.gpt_toolbox:GptChat",
-    "free-chat-gpt3.5":          "refact_scratchpads_no_gpu.gpt_toolbox:GptChat",
-    "free-chat-gpt4":            "refact_scratchpads_no_gpu.gpt_toolbox:GptChat",
-
-    "make-code-shorter":         "refact_scratchpads_no_gpu.gpt_toolbox:GptMakeCodeShorter",
-    "make-code-shorter-gpt3.5":  "refact_scratchpads_no_gpu.gpt_toolbox:GptMakeCodeShorter",
-    "make-code-shorter-gpt4":    "refact_scratchpads_no_gpu.gpt_toolbox:GptMakeCodeShorterGPT4",
-
-    "fix-bug":                   "refact_scratchpads_no_gpu.gpt_toolbox:GptFixBug",
-    "fix-bug-gpt3.5":            "refact_scratchpads_no_gpu.gpt_toolbox:GptFixBug",
-    "fix-bug-gpt4":              "refact_scratchpads_no_gpu.gpt_toolbox:GptFixBugGPT4",
-
-    "explain-code-block":        "refact_scratchpads_no_gpu.gpt_toolbox:GptExplainCodeBlock",
-    "explain-code-block-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptExplainCodeBlock",
-    "explain-code-block-gpt4":   "refact_scratchpads_no_gpu.gpt_toolbox:GptExplainCodeBlockGPT4",
-
-    "comment-each-line":         "refact_scratchpads_no_gpu.gpt_toolbox:GptCommentEachLine",
-    "comment-each-line-gpt3.5":  "refact_scratchpads_no_gpu.gpt_toolbox:GptCommentEachLine",
-
-    "precise-naming":            "refact_scratchpads_no_gpu.gpt_toolbox:GptPreciseNaming",
-    "precise-naming-gpt3.5":     "refact_scratchpads_no_gpu.gpt_toolbox:GptPreciseNaming",
-
-    "add-console-logs":          "refact_scratchpads_no_gpu.gpt_toolbox:GptAddConsoleLogs",
-    "add-console-logs-gpt3.5":   "refact_scratchpads_no_gpu.gpt_toolbox:GptAddConsoleLogs",
-
-    "completion-gpt3.5":         "refact_scratchpads_no_gpu.gpt_toolbox:GptCompletion",
-    "completion-gpt4":           "refact_scratchpads_no_gpu.gpt_toolbox:GptCompletionGPT4",
-}
-
-experimental_functions = {
-    "bugs-highlight-gpt3.5":             "refact_scratchpads_no_gpu.gpt_toolbox:GptBugsHighlight",
-    "bugs-highlight-gpt4":               "refact_scratchpads_no_gpu.gpt_toolbox:GptBugsHighlightGPT4",
-    "vulnerabilities-highlight-gpt4":    "refact_scratchpads_no_gpu.gpt_toolbox:GptDetectVulnerabilitiesHighlightGPT4",
-}
-
-supported_models = {
-    "longthink/stable": {
-        "functions": {
-            **gpt_functions,
-            **experimental_functions,
-        }
-    },
-}
-
-
-for mod in ["debug", "experimental"]:
-    supported_models["longthink/" + mod] = supported_models["longthink/stable"]
-
-
-host = socket.getfqdn()
-quit_flag = False
-
-
-def dump_problematic_call(stacktrace: str, stacktrace_short: str, suspicious_call):
-    if suspicious_call and not DEBUG:
-        # not DEBUG means in production, save it to disk to check out later
-        ymd = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        dump_path = f'./{ymd}_infserver_no_gpu_stacktrace.dump'
-        with open(dump_path, 'w') as f:
-            f.write(f"{host} caught exception:\n{stacktrace}")
-            f.flush()
-            f.write(json.dumps(suspicious_call))
-        sys.stdout.write("'%s' DUMP SAVED TO %s\n" % (stacktrace_short, dump_path))
-        sys.stdout.flush()
-    elif suspicious_call:
-        # if DEBUG, just print the call that caused the problem
-        sys.stdout.write(json.dumps(suspicious_call))
-        sys.stdout.flush()
-
-
-def except_hook(exctype, value, tb, suspicious_call=None):
-    msg = "".join(traceback.format_exception(exctype, value, tb, limit=10))
-    sys.stderr.write(msg)
-    sys.stderr.flush()
-    if exctype == KeyboardInterrupt:
-        quit()
-    dump_problematic_call(
-        "".join(traceback.format_exception(exctype, value, tb, limit=None, chain=True)),
-        f"{exctype.__name__}: {value}",
-        suspicious_call
-    )
-
-
-async def handle_single_batch(routine_n, my_desc, model_dict, calls_unfiltered):
-    ts_arrived = time.time()
-    uproxy = stream_results_async.UploadAsync()
-    upload_task = asyncio.create_task(uproxy.upload_results_coroutine())
-    calls = []
-    def logger(*args):
-        msg = " ".join(map(str, args))
-        msg = "R%04d" % routine_n + " " + msg
-        stream_results_async.logger.info(msg)
-    try:
-        scratchpads = []
-        for ci, call in enumerate(calls_unfiltered):
-            function = call.get("function", "completion")
-            import_str = model_dict["functions"].get(function, None)
-            if import_str is None:
-                logger("function '%s' is not supported in model '%s'" % (function, call["model"]))
-                continue
-            import_mod, import_class = import_str.rsplit(":", 1)
-            mod = importlib.import_module(import_mod)
-            Class = getattr(mod, import_class, None)
-            if Class is None:
-                logger("module '%s', class '%s' not found" % (import_mod, import_class))
-                continue
-            logger("running '%s' using %s" % (function, import_class))
-            calls.append(call)
-            spad = Class(logger=logger, **call)
-            scratchpads.append(spad)
-
-        ts_batch_started = time.time()
-        # for i in range(len(calls)):
-        #     _prompt = scratchpads[i].prompt()
-        ts_prompt = time.time()
-        ts_first_token = time.time()
-
-        for call_n, (call, spad) in enumerate(zip(calls, scratchpads)):
-            async for files_dict in spad.completion():
-                assert isinstance(files_dict, dict), f'expected dict, got {type(files_dict)}'
-                cancelled_idset = uproxy.check_cancelled()
-                if call["id"] in cancelled_idset:
-                    spad.finish_reason = "cancelled"
-                uproxy.upload_result(
-                    my_desc,
-                    [call],
-                    ts_arrived=ts_arrived,
-                    ts_batch_started=ts_batch_started,
-                    ts_prompt=ts_prompt,
-                    ts_first_token=ts_first_token,
-                    ts_batch_finished=time.time() if spad.finish_reason else 0,
-                    idx_updated=[call_n],
-                    files=[files_dict],
-                    tokens=None,
-                    finish_reason=[spad.finish_reason],
-                    status=("completed" if spad.finish_reason else "in_progress"),
-                    more_toplevel_fields=[spad.toplevel_fields()],
-                )
-                if call["id"] in cancelled_idset:
-                    break
-    except Exception as e:
-        except_hook(type(e), e, e.__traceback__, calls[0] if len(calls) else None)
-    finally:
-        await uproxy.shutdown_coroutine()
-        await upload_task
-        await uproxy.close_session()
-        uproxy.cancelled_reset()
-        upload_task = None
-
-
-def catch_sigusr1(signum, frame):
-    stream_results_async.logger.info("infserver_no_gpu catched SIGUSR1")
-    global quit_flag
-    quit_flag = True
-
-
-async def do_the_serving(
-    longthink_variant: str,
-    routine_n: int,
-):
-    aio_session = stream_results_async.infserver_async_session()
-    infmod_guid = longthink_variant + "_" + host + "_%04i" % routine_n
-    infmod_guid = infmod_guid.replace("-", "_")
-    stream_results_async.logger.info(f'infmod_guid: {infmod_guid}')
-    while not quit_flag:
-        model_dict = supported_models[longthink_variant]
-        my_desc = stream_results_async.validate_description_dict(
-            infeng_instance_guid=infmod_guid,
-            account="engineer",
-            model=longthink_variant,
-            B=1,
-            max_thinking_time=10,
-        )
-        retcode, calls_unfiltered = await stream_results_async.completions_wait_batch(aio_session, my_desc)
-        if retcode == "WAIT":
-            continue
-        if retcode != "OK":
-            stream_results_async.logger.warning("server retcode %s" % retcode)
-            await asyncio.sleep(5)
-            continue
-        await handle_single_batch(routine_n, my_desc, model_dict, calls_unfiltered)
-    await aio_session.close()
-    stream_results_async.logger.info("clean shutdown")
-
-
-def main():
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s NOGPU %(message)s',
-        datefmt='%Y%m%d %H:%M:%S',
-        handlers=[logging.StreamHandler(stream=sys.stderr)])
-
-    from argparse import ArgumentParser
-
-    parser = ArgumentParser()
-    parser.add_argument("longthink_variant", type=str, default='longthink/stable')
-    parser.add_argument("-k", "--openai_key", type=str)
-    parser.add_argument("-w", "--workers", type=int, default=1)
-    args = parser.parse_args()
-
-    if not (args.openai_key or os.environ.get('OPENAI_API_KEY')):
-        raise RuntimeError("set OPENAI_API_KEY or use --openai_key")
-
-    if args.openai_key:
-        import openai
-        openai.api_key = args.openai_key
-
-    sys.excepthook = except_hook
-    signal.signal(signal.SIGUSR1, catch_sigusr1)
-
-    workers: int = max(1, args.workers) if not DEBUG else 1
-    asyncio.get_event_loop().run_until_complete(asyncio.gather(*[
-        do_the_serving(args.longthink_variant, routine_n)
-        for routine_n in range(workers)
-    ]))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/self_hosting_machinery/finetune/modelling/flash_sa.py b/self_hosting_machinery/finetune/modelling/flash_sa.py
index d77c5c91..1d629bda 100644
--- a/self_hosting_machinery/finetune/modelling/flash_sa.py
+++ b/self_hosting_machinery/finetune/modelling/flash_sa.py
@@ -48,7 +48,7 @@ def get_slopes_power_of_2(n):
     return alibi, alibi_start, alibi_ratio
 
 
-def _prerequisites_are_ok(model):
+def _prerequisites_are_ok(model, try_triton_kernel: bool):
     try:
         from flash_attn import flash_attn_func
         return True
@@ -56,12 +56,13 @@ def _prerequisites_are_ok(model):
         logging.warning("Original flash attention is not installed, trying to use triton implementation...")
         from self_hosting_machinery.finetune.modelling.triton_flash_sa import (apply_flash_mha_to_refact_model
                                                                                as apply_triton_flash)
-        apply_triton_flash(model)
+        if try_triton_kernel:
+            apply_triton_flash(model)
         return False
 
 
 def apply_flash_mha_to_refact_model(model):
-    if not _prerequisites_are_ok(model):
+    if not _prerequisites_are_ok(model, try_triton_kernel=True):
         return
 
     from flash_attn import flash_attn_func
@@ -73,7 +74,8 @@ def _forward(
             attention_mask: Optional[torch.Tensor] = None,
             alibi: Optional[torch.Tensor] = None,
             use_cache: Optional[bool] = False,
-            output_attentions: Optional[bool] = False
+            output_attentions: Optional[bool] = False,
+            *args, **kwargs
     ):
         q = einops.rearrange(self.q(x), "b t (h d) -> b t h d", h=self.num_heads)
         kv = einops.rearrange(self.kv(x), "b t (h d) -> b t h d", h=2)
@@ -98,7 +100,7 @@ def _forward(
 
 
 def apply_flash_mha_to_starcoder_model(model):
-    if not _prerequisites_are_ok(model):
+    if not _prerequisites_are_ok(model, try_triton_kernel=False):
         return
 
     from flash_attn import flash_attn_func
@@ -113,6 +115,7 @@ def _forward(
             encoder_attention_mask: Optional[torch.Tensor] = None,
             use_cache: Optional[bool] = False,
             output_attentions: Optional[bool] = False,
+            *args, **kwargs
     ):
         qkv = self.c_attn(x)
         q = einops.rearrange(qkv[:, :, :self.embed_dim], "b t (h d) -> b t h d", h=self.num_heads)
@@ -139,7 +142,7 @@ def _forward(
 
 
 def apply_flash_mha_to_codellama_model(model):
-    if not _prerequisites_are_ok(model):
+    if not _prerequisites_are_ok(model, try_triton_kernel=False):
         return
 
     from flash_attn import flash_attn_func
@@ -152,7 +155,7 @@ def _forward(
             past_key_value: Optional[Tuple[torch.Tensor]] = None,
             output_attentions: bool = False,
             use_cache: bool = False,
-            **kwargs
+            *args, **kwargs
     ):
         from transformers.models.llama.modeling_llama import apply_rotary_pos_emb
 
diff --git a/self_hosting_machinery/finetune/modelling/triton_flash_sa.py b/self_hosting_machinery/finetune/modelling/triton_flash_sa.py
index 019c6df8..d960d197 100644
--- a/self_hosting_machinery/finetune/modelling/triton_flash_sa.py
+++ b/self_hosting_machinery/finetune/modelling/triton_flash_sa.py
@@ -574,7 +574,8 @@ def _forward(
             attention_mask: Optional[th.Tensor] = None,
             alibi: Optional[th.Tensor] = None,
             use_cache: Optional[bool] = False,
-            output_attentions: Optional[bool] = False
+            output_attentions: Optional[bool] = False,
+            *args, **kwargs
     ):
         q = einops.rearrange(self.q(x), "b t (h d) -> b t h d", h=self.num_heads)
         kv = einops.rearrange(self.kv(x), "b t (h d) -> b t h d", h=2)
diff --git a/self_hosting_machinery/finetune/scripts/finetune_filter.py b/self_hosting_machinery/finetune/scripts/finetune_filter.py
index 551aba2a..ffeda946 100644
--- a/self_hosting_machinery/finetune/scripts/finetune_filter.py
+++ b/self_hosting_machinery/finetune/scripts/finetune_filter.py
@@ -156,20 +156,21 @@ def catch_sigusr1(signum, frame):
     model_name = get_finetune_config(models_db, logger=traces.log)["model_name"]
     finetune_cfg = copy.deepcopy(base_config(model_name, models_db))
 
-    _log_everywhere("Loading file sets context...")
-    file_sets_context = FileSetsContext(
-        autoselect_test_files_num=finetune_filter_cfg.get("autoselect_test_files_num", 3)
-    )
-    if file_sets_context.is_up_to_date():
-        logging.info("Train set filtering: nothing changed since last time, quit")
-        return
-
-    traces.log(textwrap.fill(
-        f"This filter calculates perplexity for each file and filters out "
-        f"files with perplexity larger than {finetune_filter_cfg['filter_loss_threshold']:.3f}.\n"
-        f"Those files likely don't have meaningful content to train on", width=100
-    ))
     try:
+        _log_everywhere("Loading file sets context...")
+        file_sets_context = FileSetsContext(
+            autoselect_test_files_num=finetune_filter_cfg.get("autoselect_test_files_num", 3)
+        )
+        if file_sets_context.is_up_to_date():
+            logging.info("Train set filtering: nothing changed since last time, quit")
+            return
+
+        traces.log(textwrap.fill(
+            f"This filter calculates perplexity for each file and filters out "
+            f"files with perplexity larger than {finetune_filter_cfg['filter_loss_threshold']:.3f}.\n"
+            f"Those files likely don't have meaningful content to train on", width=100
+        ))
+
         status_tracker.update_status("starting")
         finetune_filter(
             status_tracker=status_tracker,
diff --git a/self_hosting_machinery/scripts/env.py b/self_hosting_machinery/scripts/env.py
index 91947bb5..1d853fde 100644
--- a/self_hosting_machinery/scripts/env.py
+++ b/self_hosting_machinery/scripts/env.py
@@ -42,6 +42,8 @@
 FLAG_LAUNCH_FINETUNE = os.path.join(DIR_WATCHDOG_D, "_launch_finetune.flag")
 FLAG_STOP_FINETUNE = os.path.join(DIR_WATCHDOG_D, "_stop_finetune.flag")
 
+FLAG_RESTART_LSP = os.path.join(DIR_WATCHDOG_D, "_restart_lsp.flag")
+
 def create_dirs():
     os.makedirs(DIR_WATCHDOG_D, exist_ok=True)
     os.makedirs(DIR_WEIGHTS, exist_ok=True)
diff --git a/self_hosting_machinery/scripts/first_run.py b/self_hosting_machinery/scripts/first_run.py
index cd95392d..3712bf70 100644
--- a/self_hosting_machinery/scripts/first_run.py
+++ b/self_hosting_machinery/scripts/first_run.py
@@ -12,10 +12,13 @@ def copy_watchdog_configs_if_first_run_detected(model_assigner: ModelAssigner):
 
 
 def convert_old_configs(model_assigner: ModelAssigner):
-    # longthink.cfg is an old version of openai_api_worker.cfg
+    # longthink.cfg and openai_api_worker.cfg are deprecated watchdog configs
     old_longthink = os.path.join(env.DIR_WATCHDOG_D, "longthink.cfg")
     if os.path.exists(old_longthink):
         os.unlink(old_longthink)
+    openai_watchdog_cfg_fn = os.path.join(env.DIR_WATCHDOG_D, "openai_api_worker.cfg")
+    if os.path.exists(openai_watchdog_cfg_fn):
+        os.unlink(openai_watchdog_cfg_fn)
 
     for gpu in range(16):
         fn = os.path.join(env.DIR_WATCHDOG_D, "model-gpu%d.cfg" % gpu)
diff --git a/self_hosting_machinery/watchdog/docker_watchdog.py b/self_hosting_machinery/watchdog/docker_watchdog.py
index 43f008e6..39ebd7c9 100644
--- a/self_hosting_machinery/watchdog/docker_watchdog.py
+++ b/self_hosting_machinery/watchdog/docker_watchdog.py
@@ -79,6 +79,7 @@ def __init__(self, cfg):
         self.sent_sigusr1_ts = 0
         self.status_from_stderr = ""
         self.status_nickname = ""
+        self.command_not_found = False
 
     def set_status(self, newstatus):
         self.status_from_stderr = newstatus
@@ -96,7 +97,7 @@ def set_status(self, newstatus):
             os.rename(save_status_fn + ".tmp", save_status_fn)
 
     def _start(self):
-        if self.p is not None:
+        if self.p is not None or self.command_not_found:
             return
         global compiling_now
         alt_env = os.environ.copy()
@@ -113,12 +114,19 @@ def _start(self):
         CUDA_VISIBLE_DEVICES = ",".join(["%d" % x for x in self.cfg["gpus"]])
         alt_env["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES
         self.start_ts = time.time()
-        self.p = subprocess.Popen(
-            cmdline,
-            env=alt_env,
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.PIPE,
-        )
+        try:
+            self.p = subprocess.Popen(
+                cmdline,
+                env=alt_env,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.PIPE,
+            )
+        except FileNotFoundError as e:
+            log(f"failed to run command {self.cmdline_str}: '{e}'")
+            self.p = None
+            self.command_not_found = True
+            self.set_status("failed")
+            return
         log("%s CVD=%s starting %s\n -> pid %s" % (
             time.strftime("%Y%m%d %H:%M:%S"),
             CUDA_VISIBLE_DEVICES,
diff --git a/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg b/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg
new file mode 100644
index 00000000..54d4617f
--- /dev/null
+++ b/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg
@@ -0,0 +1,12 @@
+{
+    "policy": ["always_on"],
+    "interrupt_when_file_appears": "%FLAG_RESTART_LSP%",
+    "command_line": [
+        "refact-lsp",
+        "--address-url", "http://127.0.0.1:8008",
+        "--http-port", "8001",
+        "--lsp-port", "8002",
+        "--logs-stderr"
+    ],
+    "gpus": []
+}
diff --git a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
index 65ad0672..ff115fad 100644
--- a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
+++ b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
@@ -2,11 +2,15 @@
 import json
 import copy
 import asyncio
+import aiohttp
 import termcolor
+import os
+import litellm
 
 from fastapi import APIRouter, Request, HTTPException, Query
 from fastapi.responses import StreamingResponse
 
+from self_hosting_machinery import env
 from self_hosting_machinery.webgui.selfhost_model_resolve import completion_resolve_model
 from self_hosting_machinery.webgui.selfhost_model_resolve import static_resolve_model
 from self_hosting_machinery.webgui.selfhost_req_queue import Ticket
@@ -243,26 +247,48 @@ def __init__(self,
         self.add_api_route("/coding_assistant_caps.json", self._coding_assistant_caps, methods=["GET"])
         self.add_api_route("/v1/completions", self._completions, methods=["POST"])
 
+        self.add_api_route("/v1/models", self._models, methods=["GET"])
+        self.add_api_route("/v1/chat/completions", self._chat_completions, methods=["POST"])
+
         self._inference_queue = inference_queue
         self._id2ticket = id2ticket
         self._model_assigner = model_assigner
         self._timeout = timeout
 
+    @staticmethod
+    def _interations_env_setup():
+        inference = {}
+        if os.path.exists(env.CONFIG_INFERENCE):
+            inference = json.load(open(env.CONFIG_INFERENCE, 'r'))
+        integrations = {}
+        if os.path.exists(env.CONFIG_INTEGRATIONS):
+            integrations = json.load(open(env.CONFIG_INTEGRATIONS, 'r'))
+        openai_api_key = integrations.get("openai_api_key", "") if inference.get("openai_api_enable", False) else ""
+        os.environ["OPENAI_API_KEY"] = openai_api_key
+
     async def _coding_assistant_caps(self):
+        models_available = self._inference_queue.models_available(force_read=True)
         code_completion_default_model, _ = completion_resolve_model(self._inference_queue)
+        code_chat_default_model = ""
+        for model_name in models_available:
+            if self._model_assigner.models_db.get(model_name, {}).get("chat_scratchpad_class", None) is not None \
+                    or model_name in litellm.model_list:
+                code_chat_default_model = model_name
+                break
         return {
             "cloud_name": "Refact Self-Hosted",
             "endpoint_template": "v1/completions",
+            "endpoint_chat_passthrough": "v1/chat/completions",
             "endpoint_style": "openai",
             "telemetry_basic_dest": "/stats/telemetry-basic",
             "telemetry_corrected_snippets_dest": "/stats/telemetry-snippets",
-            "running_models": self._inference_queue.models_available(),
+            "running_models": models_available,
             "code_completion_default_model": code_completion_default_model,
-            "code_chat_default_model": "",
+            "code_chat_default_model": code_chat_default_model,
             "tokenizer_path_template": "https://huggingface.co/$MODEL/resolve/main/tokenizer.json",
             "tokenizer_rewrite_path": {
                 model: self._model_assigner.models_db[model]["model_path"]
-                for model in self._inference_queue.models_available()
+                for model in models_available
                 if model in self._model_assigner.models_db
             },
         }
@@ -278,7 +304,7 @@ async def _login(self):
         }
         filter_caps = set([
             capability
-            for model in self._inference_queue.models_available()
+            for model in self._inference_queue.models_available(force_read=True)
             for capability in models_mini_db_extended.get(model, {}).get("filter_caps", [])
         ])
         for rec in self._model_assigner.models_caps_db:
@@ -423,3 +449,92 @@ async def _chat(self, post: ChatContext, request: Request, account: str = "XXX")
         self._id2ticket[ticket.id()] = ticket
         await q.put(ticket)
         return StreamingResponse(chat_streamer(ticket, self._timeout, req["created"]))
+
+    async def _models(self):
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get("http://127.0.0.1:8001/v1/caps") as resp:
+                    lsp_server_caps = await resp.json()
+        except aiohttp.ClientConnectorError as e:
+            err_msg = f"LSP server is not ready yet: {e}"
+            log(err_msg)
+            raise HTTPException(status_code=401, detail=err_msg)
+        completion_models = set()
+        for model, caps in lsp_server_caps["code_completion_models"].items():
+            completion_models.update({model, *caps["similar_models"]})
+        chat_models = set()
+        for model, caps in lsp_server_caps["code_chat_models"].items():
+            chat_models.update({model, *caps["similar_models"]})
+        data = [
+            {
+                "id": model, "root": model, "object": "model",
+                "created": 0, "owned_by": "", "permission": [], "parent": None,
+                "completion": model in completion_models, "chat": model in chat_models,
+            }
+            for model in lsp_server_caps["running_models"]
+        ]
+        return {
+            "object": "list",
+            "data": data,
+        }
+
+    async def _chat_completions(self, post: ChatContext, account: str = "XXX"):
+        prefix, postfix = "data: ", "\n\n"
+
+        if post.model in litellm.model_list:
+            async def litellm_streamer(post: ChatContext):
+                try:
+                    self._interations_env_setup()
+                    response = await litellm.acompletion(
+                        model=post.model, messages=post.messages, stream=True,
+                        temperature=post.temperature, top_p=post.top_p, max_tokens=post.max_tokens, stop=post.stop)
+                    finish_reason = None
+                    async for model_response in response:
+                        try:
+                            data = model_response.dict()
+                            finish_reason = data["choices"][0]["finish_reason"]
+                        except json.JSONDecodeError:
+                            data = {"choices": [{"finish_reason": finish_reason}]}
+                        yield prefix + json.dumps(data) + postfix
+                    # NOTE: DONE neededed by refact-lsp server
+                    yield prefix + "[DONE]" + postfix
+                except BaseException as e:
+                    err_msg = f"litellm error: {e}"
+                    log(err_msg)
+                    yield prefix + json.dumps({"error": err_msg}) + postfix
+
+            response_streamer = litellm_streamer(post)
+
+        else:
+            async def chat_completion_streamer(post: ChatContext):
+                post_url = "http://127.0.0.1:8001/v1/chat"
+                post_data = {
+                    "messages": [m.dict() for m in post.messages],
+                    "stream": True,
+                    "model": post.model,
+                    "parameters": {
+                        "temperature": post.temperature,
+                        "max_new_tokens": post.max_tokens,
+                    }
+                }
+                async with aiohttp.ClientSession() as session:
+                    try:
+                        async with session.post(post_url, json=post_data) as response:
+                            finish_reason = None
+                            async for data, _ in response.content.iter_chunks():
+                                try:
+                                    data = data.decode("utf-8")
+                                    data = json.loads(data[len(prefix):-len(postfix)])
+                                    finish_reason = data["choices"][0]["finish_reason"]
+                                    data["choices"][0]["finish_reason"] = None
+                                except json.JSONDecodeError:
+                                    data = {"choices": [{"finish_reason": finish_reason}]}
+                                yield prefix + json.dumps(data) + postfix
+                    except aiohttp.ClientConnectorError as e:
+                        err_msg = f"LSP server is not ready yet: {e}"
+                        log(err_msg)
+                        yield prefix + json.dumps({"error": err_msg}) + postfix
+
+            response_streamer = chat_completion_streamer(post)
+
+        return StreamingResponse(response_streamer, media_type="text/event-stream")
diff --git a/self_hosting_machinery/webgui/selfhost_model_assigner.py b/self_hosting_machinery/webgui/selfhost_model_assigner.py
index 9872a03e..5078e002 100644
--- a/self_hosting_machinery/webgui/selfhost_model_assigner.py
+++ b/self_hosting_machinery/webgui/selfhost_model_assigner.py
@@ -70,7 +70,6 @@ def models_to_watchdog_configs(self, inference_config=None):
 
         inference_config = self._model_assign_filter(inference_config)
         inference_config = self._model_inference_setup(inference_config)
-        inference_config = self._integrations_inference_setup(inference_config)
 
         with open(env.CONFIG_INFERENCE + ".tmp", "w") as f:
             json.dump(inference_config, f, indent=4)
@@ -135,30 +134,6 @@ def _model_inference_setup(self, inference_config: Dict[str, Any]) -> Dict[str,
             "more_models_than_gpus": more_models_than_gpus,
         }
 
-    def _integrations_inference_setup(self, inference_config: Dict[str, Any]) -> Dict[str, Any]:
-        integrations = {}
-        if os.path.exists(env.CONFIG_INTEGRATIONS):
-            integrations = json.load(open(env.CONFIG_INTEGRATIONS, 'r'))
-
-        openai_api_key = integrations.get("openai_api_key", "")
-        openai_watchdog_cfg_fn = os.path.join(env.DIR_WATCHDOG_D, "openai_api_worker.cfg")
-
-        if inference_config.get("openai_api_enable", False) and openai_api_key.startswith("sk-"):
-            cfg = json.load(open(os.path.join(env.DIR_WATCHDOG_TEMPLATES, "openai_api_worker.cfg"), 'r'))
-            cfg.pop('unfinished')
-            cfg['command_line'].append('--openai_key')
-            cfg['command_line'].append(openai_api_key)
-            with open(openai_watchdog_cfg_fn + ".tmp", "w") as f:
-                json.dump(cfg, f, indent=4)
-            os.rename(openai_watchdog_cfg_fn + ".tmp", openai_watchdog_cfg_fn)
-        else:
-            try:
-                os.unlink(openai_watchdog_cfg_fn)
-            except FileNotFoundError:
-                pass
-
-        return inference_config
-
     def first_run(self):
         default_config = {
             "model_assign": {
@@ -248,3 +223,8 @@ def model_assignment(self):
             if model in self.models_db
         }
         return j
+
+    @staticmethod
+    def restart_lsp():
+        with open(env.FLAG_RESTART_LSP, "w") as f:
+            f.write("")
diff --git a/self_hosting_machinery/webgui/selfhost_queue.py b/self_hosting_machinery/webgui/selfhost_queue.py
index 129e043e..b5620fda 100644
--- a/self_hosting_machinery/webgui/selfhost_queue.py
+++ b/self_hosting_machinery/webgui/selfhost_queue.py
@@ -24,9 +24,9 @@ def model_name_to_queue(self, ticket, model_name, no_checks=False):
             raise HTTPException(status_code=400, detail="model '%s' is not available at this moment." % model_name)
         return self._user2gpu_queue[model_name]
 
-    def models_available(self) -> List[str]:
+    def models_available(self, force_read: bool = False) -> List[str]:
         t1 = time.time()
-        if self._models_available_ts + self.CACHE_MODELS_AVAILABLE > t1:
+        if not force_read and self._models_available_ts + self.CACHE_MODELS_AVAILABLE > t1:
             return self._models_available
         self._models_available = []
         if os.path.exists(env.CONFIG_INFERENCE):
@@ -35,7 +35,6 @@ def models_available(self) -> List[str]:
                 self._models_available.append(model)
             self._models_available_ts = time.time()
             if j.get("openai_api_enable", False):
-                # self._models_available.append('gpt3.5')
-                # self._models_available.append('gpt4')
-                self._models_available.append('longthink/stable')
+                self._models_available.append('gpt-3.5-turbo')
+                self._models_available.append('gpt-4')
         return self._models_available
diff --git a/self_hosting_machinery/webgui/static/tab-finetune.js b/self_hosting_machinery/webgui/static/tab-finetune.js
index b59f6e20..3e2aebdc 100644
--- a/self_hosting_machinery/webgui/static/tab-finetune.js
+++ b/self_hosting_machinery/webgui/static/tab-finetune.js
@@ -900,16 +900,16 @@ function finetune_controls_state()
 
     render_ftf_stats(finetune_state.finetune_filter_stats);
 
-    if(finetune_state.finetune_filter_stats.filterting_status) {
+    if(finetune_state.finetune_filter_stats.filtering_status) {
         document.querySelector('.ftf-status').classList.remove('d-none');
-        document.querySelector('.ftf-status span').innerHTML = finetune_state.finetune_filter_stats.filterting_status;
+        document.querySelector('.ftf-status span').innerHTML = finetune_state.finetune_filter_stats.filtering_status;
     } else {
         document.querySelector('.ftf-status').classList.add('d-none');
     }
 
     let error_span = document.querySelector('.ftf-error span');
     let ftf_error = document.querySelector('.ftf-error');
-    if (finetune_state.finetune_filter_stats.filterting_status == "failed") {
+    if (finetune_state.finetune_filter_stats.filtering_status == "failed") {
         ftf_error.classList.remove('d-none');
         if(finetune_state.finetune_filter_stats.error && finetune_state.finetune_filter_stats.error !== '') {
             error_span.innerHTML = finetune_state.finetune_filter_stats.error;
diff --git a/self_hosting_machinery/webgui/tab_models_host.py b/self_hosting_machinery/webgui/tab_models_host.py
index c1138e85..ac0606b3 100644
--- a/self_hosting_machinery/webgui/tab_models_host.py
+++ b/self_hosting_machinery/webgui/tab_models_host.py
@@ -51,4 +51,5 @@ async def _tab_host_models_assign(self, post: TabHostModelsAssign):
             else:
                 validated["completion"] = ""
         self._model_assigner.models_to_watchdog_configs(validated)
+        self._model_assigner.restart_lsp()
         return JSONResponse("OK")
diff --git a/self_hosting_machinery/webgui/webgui.py b/self_hosting_machinery/webgui/webgui.py
index 7ef22b52..8d0a6e7d 100644
--- a/self_hosting_machinery/webgui/webgui.py
+++ b/self_hosting_machinery/webgui/webgui.py
@@ -40,6 +40,7 @@ def __init__(self,
                  *args, **kwargs):
         super().__init__(*args, **kwargs)
 
+        self._model_assigner = model_assigner
         inference_queue = InferenceQueue()
         id2ticket: Dict[str, Ticket] = weakref.WeakValueDictionary()
         for router in self._routers_list(id2ticket, inference_queue, model_assigner, stats_service):
@@ -99,6 +100,9 @@ def handle_sigint(*args):
         signal.signal(signal.SIGINT, handle_sigint)
         signal.signal(signal.SIGUSR1, handle_sigint)
 
+        # NOTE: try restart LSP after server started
+        self._model_assigner.restart_lsp()
+
 
 if __name__ == "__main__":
     from argparse import ArgumentParser
diff --git a/setup.py b/setup.py
index 6ae83133..172aa35f 100644
--- a/setup.py
+++ b/setup.py
@@ -44,8 +44,8 @@ class PyPackage:
     "self_hosting_machinery": PyPackage(
         requires=["aiohttp", "aiofiles", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic==1.10.13",
                   "starlette==0.27.0", "uvicorn", "uvloop", "python-multipart", "auto-gptq==0.4.2", "accelerate",
-                  "termcolor", "torch", "transformers==4.34.0", "bitsandbytes", "safetensors", "peft", "triton",
-                  "torchinfo", "mpi4py", "deepspeed==0.11.1", "pandas>=2.0.3",
+                  "termcolor", "torch", "transformers==4.34.0", "bitsandbytes", "safetensors", "peft", "triton",`
+                  "torchinfo", "mpi4py", "deepspeed==0.11.1", "litellm", "pandas>=2.0.3",
                   "sqlalchemy", "sqlalchemy-utils", "psycopg2-binary"],
         optional=["ninja", "flash_attn @ git+https://github.com/smallcloudai/flash-attention@feat/alibi"],
         requires_packages=["refact_scratchpads", "refact_scratchpads_no_gpu",