diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index cb069eff..a265b014 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -21,7 +21,6 @@ separate terminals.
```commandline
python -m self_hosting_machinery.webgui.webgui
DEBUG=1 python -m self_hosting_machinery.inference.inference_worker --model wizardlm/7b
-DEBUG=1 python -m refact_scratchpads_no_gpu.infserver_no_gpu longthink/stable --openai_key sk-XXXYYY
```
That should be enough to get started!
diff --git a/Dockerfile b/Dockerfile
index 1636c586..0d87175b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -41,6 +41,14 @@ ENV PATH="${PATH}:/tmp/linguist/bin"
RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y python3-packaging
+# refact lsp requisites
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y
+ENV PATH="${PATH}:/root/.cargo/bin"
+RUN git clone https://github.com/smallcloudai/refact-lsp.git /tmp/refact-lsp \
+ && cd /tmp/refact-lsp \
+ && cargo install --path . \
+ && rm -rf /tmp/refact-lsp
+
ENV INSTALL_OPTIONAL=TRUE
ENV BUILD_CUDA_EXT=1
ENV GITHUB_ACTIONS=true
diff --git a/refact_scratchpads_no_gpu/async_scratchpad/__init__.py b/refact_scratchpads_no_gpu/async_scratchpad/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/refact_scratchpads_no_gpu/async_scratchpad/ascratch.py b/refact_scratchpads_no_gpu/async_scratchpad/ascratch.py
deleted file mode 100644
index 60e5885a..00000000
--- a/refact_scratchpads_no_gpu/async_scratchpad/ascratch.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from typing import Callable, Union, List, Dict, Iterator
-
-
-class AsyncScratchpad:
- def __init__(
- self,
- id: str,
- created: float,
- temperature: float,
- top_p: float,
- max_tokens: int,
- stop_tokens: Union[str, List[str]],
- function: str,
- stream: bool,
- logger: Callable,
- **unused
- ):
- self.id = id
- self.created = created
- self.finish_reason = ""
- self.temp = min(max(float(temperature), 0.0), 1.0)
- self.top_p = top_p
- self.max_tokens = int(max_tokens)
- self.function = function
- self.stream = stream
- self._logger = logger
- tmp = stop_tokens
- if isinstance(tmp, str):
- stop_strings = [tmp]
- else:
- stop_strings = tmp
- self.metering_generated_tokens_n = 0
- self.metering_total_tokens_n = 0
- self.needs_upload = False
- for k, v in unused.items():
- self.debuglog("AsyncScratchpad: unused parameter '%s' = '%s'" % (k, v))
-
- def toplevel_fields(self):
- return {}
-
- def debuglog(self, *args):
- if self._logger:
- self._logger(*args)
-
- async def completion(self) -> Iterator[Dict[str, str]]:
- raise NotImplementedError
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/__init__.py b/refact_scratchpads_no_gpu/gpt_toolbox/__init__.py
deleted file mode 100644
index 22e441de..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from refact_scratchpads_no_gpu.gpt_toolbox.toolbox_functions import *
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_chat_spad import GptChat
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_chat_spad.py b/refact_scratchpads_no_gpu/gpt_toolbox/gpt_chat_spad.py
deleted file mode 100644
index 1942a8f4..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_chat_spad.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import asyncio
-import functools
-import json
-from typing import List, Tuple, Dict, Union, Iterator
-
-from refact_scratchpads_no_gpu.async_scratchpad import ascratch
-
-import openai
-import tiktoken
-
-
-def gpt_prices( # Apr 4 2023:
- model_name: str,
-) -> Tuple[int, int]:
- # GPT-4 8K prompt[$0.03 / 1K tokens] generated[$0.06 / 1K tokens]
- if model_name.startswith("gpt-4") or model_name.startswith("gpt4"):
- pp1000t_prompt = 30_000
- pp1000t_generated = 60_000
- # gpt-3.5-turbo $0.002 / 1K tokens
- elif model_name.startswith("gpt-3.5-turbo"):
- pp1000t_prompt = 2_000
- pp1000t_generated = 2_000
- else:
- raise ValueError(f'get_prices: Unknown model: {model_name}')
- return pp1000t_prompt, pp1000t_generated
-
-
-@functools.lru_cache(maxsize=10)
-def engine_to_encoding(engine: str) -> tiktoken.Encoding:
- enc = tiktoken.encoding_for_model(engine)
- return enc
-
-
-ACCUMULATE_N_STREAMING_CHUNKS = 5
-engine_to_encoding("text-davinci-003") # this immediately tests if tiktoken works or not
-
-
-def calculate_chat_tokens(model_name, messages, completion):
- enc = engine_to_encoding(model_name)
- calc_prompt_tokens_n = 2 # warmup
- for d in messages:
- calc_prompt_tokens_n += len(enc.encode(d["content"], disallowed_special=()))
- calc_prompt_tokens_n += len(enc.encode(d["role"], disallowed_special=()))
- calc_prompt_tokens_n += 4 # to switch user/assistant
- calc_generated_tokens_n = len(enc.encode(completion, disallowed_special=())) + 2 # one to switch, another EOF
- return calc_prompt_tokens_n, calc_generated_tokens_n
-
-
-class GptChat(ascratch.AsyncScratchpad):
- def __init__(
- self,
- id: str,
- *,
- created: float,
- temperature: float,
- top_p: float,
- max_tokens: int,
- stop_tokens: Union[str, List[str]],
- messages: List[Dict[str, str]],
- model: str, # always "longthink", don't use
- **more,
- ):
- super().__init__(
- id=id,
- created=created,
- temperature=temperature,
- top_p=top_p,
- max_tokens=max_tokens,
- stop_tokens=stop_tokens,
- **more,
- )
-
- self._model_name = "gpt-3.5-turbo"
- if "gpt4" in self.function or "gpt-4" in self.function:
- self._model_name = "gpt-4"
- self._stream_timeout_sec = 15
-
- messages = messages or []
- if not messages or messages[0].get('role') != 'system':
- messages = [
- {
- "role": "system",
- "content": "You are a coding assistant that outputs short answers, give links to documentation.",
- }, *messages
- ]
- self._messages = messages
- self._completion = ""
-
- @property
- def prices(self) -> Tuple[int, int]:
- return gpt_prices(self._model_name)
-
- async def completion(self) -> Iterator[Dict[str, str]]:
- gen = await openai.ChatCompletion.acreate(
- model=self._model_name,
- messages=self._messages,
- max_tokens=self.max_tokens,
- temperature=self.temp,
- stream=True,
- )
- accum = ""
- role = ""
- tokens = 0
- self.metering_prompt_tokens_n = 0
- self.metering_generated_tokens_n = 0
- try:
- def forward_streaming():
- nonlocal tokens, accum, role
- self._completion += accum
- msg = {
- "chat__role": "assistant",
- "chat__content": self._completion,
- }
- accum = ""
- return msg
-
- while True:
- resp = await asyncio.wait_for(gen.__anext__(), self._stream_timeout_sec)
- delta = resp.choices[0].delta
- if "role" in delta:
- role = delta["role"]
- if "content" in delta:
- accum += delta["content"]
- tokens += 1 # assuming 1 token per chunk
- if "swear" in accum:
- raise ValueError("swear!")
- if "finish_reason" in resp.choices[0] and resp.choices[0]["finish_reason"] is not None:
- self.finish_reason = resp.choices[0]["finish_reason"]
- if self.finish_reason:
- break
- if tokens % ACCUMULATE_N_STREAMING_CHUNKS == 0:
- yield forward_streaming()
- if self.finish_reason: # cancelled from main coroutine
- break
- if self.finish_reason == "":
- self.finish_reason = "END"
- except asyncio.exceptions.TimeoutError as e:
- self.debuglog("CHAT TIMEOUT:", str(type(e)), str(e))
- except Exception as e:
- self.debuglog("CHAT EXCEPTION:", str(type(e)), str(e))
- self.finish_reason = "ERROR"
- yield forward_streaming()
-
- def toplevel_fields(self):
- if not self.finish_reason:
- return {}
- else:
- calc_prompt_tokens_n, calc_generated_tokens_n = calculate_chat_tokens(
- self._model_name, self._messages, self._completion
- )
- self.metering_prompt_tokens_n = calc_prompt_tokens_n
- self.metering_generated_tokens_n = calc_generated_tokens_n
- metering_message = {
- "metering_prompt_tokens_n": self.metering_prompt_tokens_n,
- "metering_generated_tokens_n": self.metering_generated_tokens_n,
- "pp1000t_prompt": self.prices[0],
- "pp1000t_generated": self.prices[1],
- "model_name": self._model_name,
- }
- self.debuglog(json.dumps(metering_message))
- return metering_message
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_toolbox_spad.py b/refact_scratchpads_no_gpu/gpt_toolbox/gpt_toolbox_spad.py
deleted file mode 100644
index e68d7364..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_toolbox_spad.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import os
-import sys
-import asyncio
-import termcolor
-import functools
-import json
-from typing import List, Union, Callable, Dict, Iterator, Tuple
-
-import openai
-import tiktoken
-
-from refact_scratchpads_no_gpu.gpt_toolbox.scratchpad_utils import full_line_selection
-from refact_scratchpads_no_gpu.async_scratchpad import ascratch
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_chat_spad import gpt_prices, calculate_chat_tokens
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import trim_context_tok, code_block_postprocess
-
-
-DEBUG = int(os.environ.get("DEBUG", "0"))
-
-
-@functools.lru_cache(maxsize=10)
-def engine_to_encoding(engine: str) -> tiktoken.Encoding:
- enc = tiktoken.encoding_for_model(engine)
- return enc
-
-
-ACCUMULATE_N_STREAMING_CHUNKS = 5
-engine_to_encoding("text-davinci-003") # this immediately tests if tiktoken works or not
-
-
-class ScratchpadToolboxGPT(ascratch.AsyncScratchpad):
- def __init__(
- self,
- id: str,
- created: float,
- temperature: float,
- max_tokens: int,
- stop_tokens: Union[str, List[str]],
- function: str,
- intent: str,
- cursor_file: str,
- cursor0: int,
- cursor1: int,
- sources: Dict[str, str],
- stream: bool,
- logger: Callable,
-
- model_n: str = "gpt-3.5-turbo",
- supports_stream: bool = True,
- timeout: int = None,
- **kwargs,
- ):
- super().__init__(
- id=id,
- created=created,
- temperature=temperature,
- max_tokens=max_tokens,
- stop_tokens=stop_tokens,
- function=function,
- stream=stream,
- logger=logger,
- **kwargs
- )
- self.intent = intent
- self.cursor_file = cursor_file
- self.cursor0 = cursor0
- self.cursor1 = cursor1
- self.sources = sources
- self.metering_generated_tokens_n = 0
- self.metering_total_tokens_n = 0
- self.needs_upload = False
-
- self._model_n = model_n
- self.__model_name = None
-
- if not supports_stream: self.stream = False
- self._stream_timeout_sec: float = 15
-
- self._txt: str = self.sources.get(self.cursor_file)
-
- self.cursor0, self.cursor1, self.selection = full_line_selection(
- self.cursor0, self.cursor1, self._txt
- )
- self.enc = engine_to_encoding(self.model_name)
-
- def trim_context(self) -> Tuple[int, int, str]:
- cursor0, cursor1, ctxt = trim_context_tok(self.cursor0, self.cursor1, self._txt, self.enc)
- return cursor0, cursor1, ctxt
-
- @property
- def prices(self) -> Tuple[int, int]:
- return gpt_prices(self.model_name)
-
- @property
- def model_name(self) -> str:
- if not self.__model_name:
- model_name = 'gpt-3.5-turbo-0613'
- if self._model_n == 'gpt-3.5-turbo' or self._model_n == 'gpt-4':
- model_name = self._model_n + '-0613'
- self.__model_name = model_name
- return self.__model_name
-
- @model_name.setter
- def model_name(self, val: str):
- self.__model_name = val
-
- async def completion(self) -> Iterator[Dict[str, str]]:
- if self.max_tokens < 1: self.max_tokens = 256
- self.messages = self._messages()
- self.completion_so_far: str = ""
- self.metering_prompt_tokens_n = 0
- self.metering_generated_tokens_n = 0
- self.openai_prompt_tokens_n = 0
- self.openai_completion_tokens = 0
-
- def forward_streaming():
- modified = self._postprocess(self.completion_so_far)
- return {self.cursor_file: modified}
-
- try:
- gen = await openai.ChatCompletion.acreate(
- model=self.model_name,
- messages=self.messages,
- max_tokens=self.max_tokens,
- stream=self.stream,
- temperature=self.temp,
- stop=['<|end|>'],
- )
-
- if not self.stream:
- resp = gen
- self.completion_so_far = resp["choices"][0]["message"]["content"]
- if DEBUG:
- sys.stdout.write(termcolor.colored(self.completion_so_far, "green"))
- sys.stdout.flush()
- self.openai_prompt_tokens_n = resp["usage"]["prompt_tokens"]
- self.openai_completion_tokens = resp["usage"]["completion_tokens"]
- print(resp["usage"])
- self.model_name = resp["model"]
- self.finish_reason = resp["choices"][0]["finish_reason"] or "END"
- else:
- self.finish_reason = ""
- self.completion_so_far = ""
- tokens = 0
- while True:
- resp = await asyncio.wait_for(gen.__anext__(), self._stream_timeout_sec)
- delta = resp.choices[0].delta
- if "content" in delta:
- if DEBUG:
- sys.stdout.write(termcolor.colored(delta["content"], "green"))
- sys.stdout.flush()
- self.completion_so_far += delta["content"]
- tokens += 1 # assuming 1 token per chunk
- if "model" in resp:
- self.model_name = resp["model"]
- if "finish_reason" in resp.choices[0] and resp.choices[0]["finish_reason"] is not None:
- self.finish_reason = resp.choices[0]["finish_reason"]
- if self.finish_reason:
- break
- if tokens % ACCUMULATE_N_STREAMING_CHUNKS == 0:
- yield forward_streaming()
- if self.finish_reason:
- break
- if self.model_name == "":
- self.debuglog("ScratchpadToolboxGPT: model_name is empty")
- if self.finish_reason == "":
- self.finish_reason = "END"
- except asyncio.exceptions.TimeoutError as e:
- self.debuglog("FUNCTIONS TIMEOUT:", str(type(e)), str(e))
- except Exception as e:
- self.debuglog("FUNCTIONS EXCEPTION:", str(type(e)), str(e))
- self.finish_reason = "ERROR"
- yield forward_streaming()
-
- def _messages(self) -> List[Dict[str, str]]:
- raise NotImplementedError
-
- def _postprocess(self, completion: str) -> str:
- completion = code_block_postprocess(completion)
- return self._txt[:self.cursor0] + completion + self._txt[self.cursor1:]
-
- def toplevel_fields(self):
- if not self.finish_reason:
- return {}
- else:
- calc_prompt_tokens_n, calc_generated_tokens_n = calculate_chat_tokens(
- self.model_name, self.messages, self.completion_so_far
- )
- self.metering_prompt_tokens_n = self.openai_prompt_tokens_n or calc_prompt_tokens_n
- self.metering_generated_tokens_n = self.openai_completion_tokens or calc_generated_tokens_n
- metering_message = {
- "metering_prompt_tokens_n": self.metering_prompt_tokens_n,
- "metering_generated_tokens_n": self.metering_generated_tokens_n,
- "pp1000t_prompt": self.prices[0],
- "pp1000t_generated": self.prices[1],
- "model_name": self.model_name,
- }
- self.debuglog(json.dumps(metering_message))
- return metering_message
-
- def debuglog(self, *args):
- if self._logger:
- self._logger(*args)
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_utils.py b/refact_scratchpads_no_gpu/gpt_toolbox/gpt_utils.py
deleted file mode 100644
index 75746371..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_utils.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import re
-from itertools import zip_longest
-from typing import *
-import tiktoken
-
-from refact_scratchpads_no_gpu.gpt_toolbox.scratchpad_utils import full_line_selection
-
-
-def msg(role: str, content: str) -> Dict[str, str]:
- assert role in ['system', 'user', 'assistant']
- return {'role': role, 'content': content}
-
-
-def code_block_postprocess(txt: str) -> str:
- lines_code = []
- is_code = False
- for line in txt.split('\n'):
- if '```' in line:
- is_code = not is_code
- continue
- if is_code:
- lines_code.append(line)
-
- code = '\n'.join(lines_code) or txt
- return code
-
-
-def find_substring_positions(substring, text) -> Optional[Tuple[int, int]]:
- words = substring.split()
- pattern = r'\s*'.join(map(re.escape, words))
- match = re.search(pattern, text)
- if not match:
- return
-
- c0, c1, _ = full_line_selection(match.start(), match.end(), text)
- return c0, c1
-
-
-def trim_context_tok(
- cursor0: int,
- cursor1: int,
- text: str,
- enc: tiktoken.Encoding,
- max_tokens: int = 2000
-) -> Tuple[int, int, str]:
- selection = text[cursor0:cursor1]
- tokens_left = max_tokens - len(enc.encode(selection, disallowed_special=()))
-
- lines_before = ((l, 'before') for l in reversed(text[:cursor0].splitlines()))
- lines_after = ((l, 'after') for l in text[cursor1:].splitlines())
- merged_lines = [val for pair in zip_longest(lines_before, lines_after) for val in pair if val]
-
- lines_before_p, lines_after_p = [], []
- for line, t in merged_lines:
- if (line_tok_cnt := len(enc.encode(line, disallowed_special=()))) >= tokens_left: break
- lines_before_p.append(line) if t == 'before' else lines_after_p.append(line)
- tokens_left -= line_tok_cnt
-
- txt_before = '\n'.join(reversed(lines_before_p)) + '\n'
- txt_after = '\n'.join(lines_after_p)
- txt = txt_before + selection + txt_after
- cursor0, cursor1 = len(txt_before), len(txt_before) + len(selection)
-
- # print("chars before %i -> cut to %i" % (len(text[:cursor0]), len(txt_before)))
- # print("chars after %i -> cut to %i" % (len(text[cursor1:]), len(txt_after)))
- # print("before %i bytes -> %i tokens" % (len(txt_before), len(enc.encode(txt_before, disallowed_special=()))))
- # print("after %i bytes -> %i tokens" % (len(txt_after), len(enc.encode(txt_after, disallowed_special=()))))
- # print("tokens + tokens + tokens = %i" % (len(enc.encode(txt, disallowed_special=()))))
-
- return cursor0, cursor1, txt
-
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/scratchpad_utils.py b/refact_scratchpads_no_gpu/gpt_toolbox/scratchpad_utils.py
deleted file mode 100644
index 3a943412..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/scratchpad_utils.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from typing import Tuple
-
-
-def full_line_selection(cursor0: int, cursor1: int, txt: str) -> Tuple[int, int, str]:
- """
- Adjusts selection to only include full lines.
- """
- c0, c1, buff = '<|cursor0|>', '<|cursor1|>', ''
- txt: str = txt[:cursor0] + c0 + txt[cursor0:cursor1] + c1 + txt[cursor1:]
-
- lines_new = []
- for line in txt.split('\n'):
- if buff:
- line = buff + line
- buff = ''
- if c0 in line:
- if not line.split(c0)[1].strip():
- buff = c0
- line = line.replace(c0, "")
- else:
- line = c0 + line.replace(c0, "")
-
- if c1 in line:
- if not line.split(c1)[0].strip() and lines_new:
- lines_new[-1] += c1
- line = line.replace(c1, "")
- else:
- line = line.replace(c1, "") + c1
- lines_new.append(line)
-
- txt_new = '\n'.join(lines_new)
- cursor0 = txt_new.index(c0)
- cursor1 = txt_new.replace(c0, "").index(c1)
- selection = txt_new.replace(c0, "").replace(c1, "")[cursor0:cursor1]
-
- return cursor0, cursor1, selection
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/__init__.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/__init__.py
deleted file mode 100644
index 7e02f281..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from .experimental_gpt_bugs_highlight import GptBugsHighlight
-from .experimental_gpt_code_review import ScratchpadCodeReviewHighlightGPT4
-from .experimental_gpt_completion import GptCompletion, GptCompletionGPT4
-from .experimental_gpt_vulnerabilities_highlight import GptDetectVulnerabilitiesHighlightGPT4
-from .gpt_add_console_logs import GptAddConsoleLogs
-from .gpt_comment_each_line import GptCommentEachLine
-from .gpt_explain_code_block import GptExplainCodeBlock, GptExplainCodeBlockGPT4
-from .gpt_fix_bug import GptFixBug, GptFixBugGPT4
-from .gpt_make_code_shorter import GptMakeCodeShorter, GptMakeCodeShorterGPT4
-from .gpt_precise_naming import GptPreciseNaming
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_bugs_highlight.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_bugs_highlight.py
deleted file mode 100644
index 8641fb9e..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_bugs_highlight.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import json
-
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, find_substring_positions
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptBugsHighlight(ScratchpadToolboxGPT):
- def __init__(self, model_n="gpt3.5-turbo-0301", supports_stream=False, **kwargs):
- super().__init__(
- model_n=model_n,
- supports_stream=supports_stream,
- **kwargs
- )
-
- def _messages(self) -> List[Dict[str, str]]:
- return [
- msg(
- 'system',
- "You are an AI programming assistant.\n"
- "Follow the user's requirements carefully & to the letter."
- ),
- msg('user', '''
-You are a code reviewer. Follow my instructions carefully & to the letter.
-
-You are to receive a single code file.
-It contain imports from other files that are present in the project, but you cannot see them.
-That's why you must not highlight errors that are connected to the imports, it's a false positive.
-
-Your assignment is:
-1. Carefully read code line by line up to the end.
-2. Find all errors likely to happen in runtime (ignore the imports)
-3. For each found error output a comment in the following format:
-{"code": " def _messages(self) -> list[dict[str, str]]:", "description": "errors in type annotations"}
-{"code": "for call, idx in enumerate(calls_unfiltered):", "description": "Invalid variable assignment"}
-
-FIELDS DESCRIPTION:
-- code: the code you found issue in
-- description: brief description of the issue and short instructions hints how to fix it
-
-Guidelines:
-Explain yourself as briefly as possible, do not explain outside of code block.
-The output you provide must be decodable using jsonlines format.
-Do not highlight any errors connected to imports.
-'''
- ),
- msg(
- 'user',
- """from routers import FindRouter
-
-if __name__ == "__main__":
- from argparse import ArgumentParser
- parser = ArgumentParser()
-"""
- ),
- msg(
- 'assistant',
- """{"code": "from routers import FindRouter", "description": "ModuleNotFoundError: no module named routers"}"""
- ),
- msg(
- 'user',
- 'Not valid. You have been told to ignore any kind of import errors!'
- ),
- msg('user', self._txt)
- ]
-
- def _postprocess(self, completion: str) -> str:
- self.debuglog(f'Completion:\n{completion}')
- suggestions = []
- for line in completion.splitlines():
- if not line.strip():
- continue
- try:
- suggestions.append(json.loads(line))
- except Exception as e:
- self.debuglog(e)
- for s in suggestions:
- code = s['code']
- indexes = find_substring_positions(code, self._txt)
- if not indexes:
- self.debuglog('Substring not found')
- continue
- s_start, s_end = indexes
- self._txt = \
- self._txt[:s_start] + \
- f'\n' \
- f'\nDESC: {s["description"]}\n' \
- f'{self._txt[s_start:s_end]}' \
- f'\n' + \
- self._txt[s_end:]
- return self._txt
-
-
-class GptBugsHighlightGPT4(GptBugsHighlight):
- def __init__(self, **kwargs):
- super().__init__(
- model_n='gpt-4',
- supports_stream=False,
- **kwargs
- )
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_code_review.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_code_review.py
deleted file mode 100644
index 639e8fb5..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_code_review.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import json
-
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, find_substring_positions
-
-
-class ScratchpadCodeReviewHighlightGPT4(ScratchpadToolboxGPT):
- def __init__(self, **kwargs):
- super().__init__(
- model_n='gpt-4',
- supports_stream=False,
- timeout=120,
- **kwargs
- )
-
- def _messages(self) -> List[Dict[str, str]]:
- return [
- msg(
- 'system',
- "You are an AI programming assistant.\n"
- "Follow the user's requirements carefully & to the letter."
- ),
- msg('user',
- '''
-You are a code reviewer.
-Follow my instructions carefully & to the letter.
-
-You are to receive a single code file.
-It contain imports from other files that are present in the project, but you cannot see them.
-That's why you must not highlight errors that are connected to the imports to not commit false-positive errors.
-
-Your assignment is:
-1. Carefully read code line by line up to the end.
-2. Find all possible errors that interrupt code runtime (except the cases listed above)
-3. For each found error you will suggest a comment in the following format:
-{"code": " def _messages(self) -> list[dict[str, str]]:", "description": "errors in type annotations"}
-{"code": "for call, idx in enumerate(calls_unfiltered):", "description": "Invalid variable assignment"}
-
-FIELDS DESCRIPTION:
-- code: the code you found issue in
-- description: extremely brief description of the issue and short instructions hints how to fix it
-
-Guidelines:
-Explain yourself as briefly and clear as possible, do not explain outside of code block.
-The output you provide must be decodable using jsonlines format.
-Do not highlight any error that is anyhow connected to imports!
-'''
- ),
- msg(
- 'user',
- """
-from routers import FindRouter
-
-if __name__ == "__main__":
- from argparse import ArgumentParser
- parser = ArgumentParser()
-"""
- ),
- msg(
- 'assistant',
- """{"code": "from routers import FindRouter", "description": "ModuleNotFoundError: no module named routers"}"""
- ),
- msg(
- 'user',
- 'Not valid. You have been told to ignore any kind of import errors!'
- ),
- msg(
- 'assistant',
- "Sorry for the confusion. Give me another example."
- ),
- msg('user', self._txt)
- ]
-
- def _postprocess(self, completion: str) -> str:
- suggestions = [json.loads(c) for c in completion.split('\n')]
-
- for s in suggestions:
- code = s['code']
- indexes = find_substring_positions(code, self._txt)
- if not indexes:
- print('Substring not found')
- continue
-
- s_start, s_end = indexes
- self._txt = \
- self._txt[:s_start] + \
- f'\n' \
- f'\nDESC: {s["description"]}\n' \
- f'SCORE: {s["critical_score"]}\n' \
- f'{self._txt[s_start:s_end]}' \
- f'\n' + \
- self._txt[s_end:]
-
- return self._txt
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_completion.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_completion.py
deleted file mode 100644
index 17205a74..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_completion.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptCompletion(ScratchpadToolboxGPT):
- def _messages(self) -> List[Dict[str, str]]:
- cursor0, _, ctxt = self.trim_context()
- ctxt = ctxt[:cursor0] + '<|complete-me|>' + ctxt[cursor0:]
- return [
- msg(
- 'system',
- "You are an AI programming assistant.\n"
- "Follow the user's requirements carefully and to the letter."
- ),
- msg(
- 'user',
- ctxt
- ),
- msg(
- 'assistant',
- 'What do I need to do with this code?'
- ),
- msg(
- 'user',
- "Replace <|complete-me|> with the code completion. "
- "Write it in the block of code. "
- "Do not explain anything. "
- "Write only the code completion."
- )
- ]
-
-
-class GptCompletionGPT4(GptCompletion):
- def __init__(self, **kwargs):
- super().__init__(
- model_n='gpt-4',
- **kwargs
- )
-
- def _postprocess(self, completion: str) -> str:
- # Output of GPT-4 does not need to be postprocessed, such as find ```
- return self._txt[:self.cursor0] + completion + self._txt[self.cursor1:]
-
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_vulnerabilities_highlight.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_vulnerabilities_highlight.py
deleted file mode 100644
index 9aef8f9b..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_vulnerabilities_highlight.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import json
-
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, find_substring_positions
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptDetectVulnerabilitiesHighlightGPT4(ScratchpadToolboxGPT):
- def __init__(self, **kwargs):
- super().__init__(
- model_n='gpt-4-0314',
- supports_stream=False,
- **kwargs
- )
-
- def _messages(self) -> List[Dict[str, str]]:
- return [
- msg(
- 'system',
- "You are an AI programming assistant.\n"
- "Follow the user's requirements carefully & to the letter."
- ),
- msg('user',
- 'I am a software engineer. '
- 'I have a question about one of my scripts. '
- 'I am afraid there are some vulnerabilities in it. I need you to find them and explain. '
- 'You need to stick to the following format: you will output a block of code in jsonlines format.'
- 'This is how you must format you output:'
- '''
- {"code": "VULNERABLE_CODE_PART_1", "vulnerability": "YOUR_VULNERABILITY_1_DESCRIPTION"}
- {"code": "VULNERABLE_CODE_PART_2", "vulnerability": "YOUR_VULNERABILITY_2_DESCRIPTION"}
- '''
- 'Explain as briefly as possible, do not explain outside of code block. '
- 'The output you provide must be decodable using jsonlines format. '
- ),
- msg('assistant',
- 'Thank you for detailed description. '
- 'Now please provide me this script that might contain vulnerabilities. '
- 'I will find them for you and explain them in the format you have given. '
- ),
- msg('user', self._txt)
- ]
-
- def _postprocess(self, completion: str) -> str:
- suggestions = [json.loads(c) for c in completion.split('\n')]
-
- for s in suggestions:
- code = s['code']
- indexes = find_substring_positions(code, self._txt)
- if not indexes:
- self.debuglog('Substring not found')
- continue
-
- s_start, s_end = indexes
- self._txt = \
- self._txt[:s_start] + \
- f'\n' \
- f'\nDESC: {s["vulnerability"]}\n' \
- f'{self._txt[s_start:s_end]}' \
- f'\n' + \
- self._txt[s_end:]
-
- return self._txt
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_add_console_logs.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_add_console_logs.py
deleted file mode 100644
index 5af71ec2..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_add_console_logs.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptAddConsoleLogs(ScratchpadToolboxGPT):
- def _messages(self) -> List[Dict[str, str]]:
- return [
- msg(
- 'system',
- 'You are a professional high-class code assistant. '
- 'Your were created to modify, generate, interpret and generalize code'
- ),
- msg(
- 'user',
- 'I will provide you a piece of code, you need to add console logs. '
- 'You have to localize places in a given piece of code where console logs might be useful for debugging '
- ),
- msg(
- 'user',
- """
- def __call__(self, filename: str) -> Optional[str]:
- for filt in self._filters:
- filtered = filt([filename])
- if filtered:
- return filt.language
- return None
-"""),
- msg(
- 'assistant',
- """```
- def __call__(self, filename: str) -> Optional[str]:
- print(f'filename: {filename}')
- print(f'filters: {self._filters}')
- for it, filt in enumerate(self._filters):
- print(f'it: {it}')
- print(f'filt: {filt}')
- filtered = filt([filename])
- print(f'filtered: {filtered}')'
- if filtered:
- print(f'language: {filt.language}')
- return filt.language
- print('language: None')
- return None
-```"""),
- msg('user', self.selection)
- ]
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_comment_each_line.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_comment_each_line.py
deleted file mode 100644
index 9768775a..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_comment_each_line.py
+++ /dev/null
@@ -1,91 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptCommentEachLine(ScratchpadToolboxGPT):
- def _messages(self) -> List[Dict[str, str]]:
- return [
- msg(
- 'system',
- 'You are a professional high-class code assistant. '
- 'Your were created to modify, generate, interpret and generalize code'
- ),
- msg(
- 'user',
- "I have an assignment for you, so listen attentively. "
- "I will provide you a piece of code, you need to explain each line of this piece of code. "
- "I need to know what each line of the piece of code does. "
- "You need to return the exact same piece of code given, but each line must have an explanation in comment above it"
- "Do you understand your assignment?"
- ),
- msg(
- 'assistant',
- "Yes, my assignment is clear. "
- "I am receiving a piece of code and I need to explain each line of it. "
- "I have to return the exact same piece of code, but each line must have an explanation in comment above it. "
- "I'm willing to do it for you! However, I still have some questions:\n"
- "1. If the code already has some comments, should I keep them or remove?\n"
- "2. If the code has any other issues that I can detect, may I fix them?"
- ),
- msg(
- 'user',
- "If you see existing comments in the piece of code, you must keep them, you should ignore them, no explaination for them needed. "
- "If you see some other issues in the code, you are prohibited react on them in any way, don't fix them. Ignore them! "
- "When code block ends you must print <|end|> after it. "
- "Is it clear?"
- ),
- msg(
- 'assistant',
- "Yes, absolutely! "
- "If I see existing comments in provided piece of code, I must keep them and I must not explain them.\n"
- "If I see any other issues in the piece of code, I will not try to fix them. It is prohibited.\n"
- "When code block ends I will print <|end|> after it.\n"
- "I have no doubts I can manage this task! Please provide me a piece of code each line of which I need explain."
- ),
- msg('user',
- """
- def completion(self, final: bool, tokens_batch: Optional[int] = 25) -> Iterator[Dict[str, str]]:
- tokens_batch: int = self.max_tokens if final else tokens_batch
-
- # implement more cool features
- return self.completion_stream(
- # engine must be one of the one in docs
- engine=self._engine,
- tokens_batch=tokens_batch,
- prompt=self.prompt,
- replace_modified=self._replace_modified
- )
-"""),
- msg('assistant',
- """
-```
- def completion(self, final: bool, tokens_batch: Optional[int] = 25) -> Iterator[Dict[str, str]]:
- # if not tokens_batch given, using max_tokens
- tokens_batch: int = self.max_tokens if final else tokens_batch
-
- # implement more cool features
- return self.completion_stream(
- # engine is a model codify API uses. E.g. text-davinci-003, code-davinci-002 etc
- # engine must be one of the one in docs
- engine=self._engine,
- # how many tokens will be in each batch
- tokens_batch=tokens_batch,
- # function that returns prompt for selected engine
- prompt=self.prompt,
- # replace selection from original code with generated code
- replace_modified=self._replace_modified
- )
-```
-<|end|>"""),
- msg(
- 'user',
- 'Thanks for your help! That is 100% precise for what I meant.'
- ),
- msg(
- 'assistant',
- 'Anytime! Give me another piece of code you need to explain each line. It is a pleasure to help you!'
- ),
- msg('user', self.selection)
- ]
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_explain_code_block.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_explain_code_block.py
deleted file mode 100644
index abe71379..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_explain_code_block.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, code_block_postprocess
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptExplainCodeBlock(ScratchpadToolboxGPT):
- def _messages(self) -> List[Dict[str, str]]:
- _, _, ctxt = self.trim_context()
- return [
- msg(
- 'system',
- "You are an AI programming assistant.\n"
- "Follow the user's requirements carefully & to the letter."
- ),
- msg(
- 'user',
- "I will provide you the code that I do not understand and I need you to explain me it."
- "I need you understand the logic of this code and t"
- "hen output the block of code that contain comments that explain the given code. "
- "Do not explain anything outside of a block of code. "
- "Firstly, I will provide you the context, which is the whole file. "
- "Then I will provide you the specific piece of code you will explain in a block. "
- "When the block of code ends you will print <|end|> after it."
- ),
- msg(
- 'assistant',
- "Provide me a context."
- ),
- msg('user',
- """
-from typing import *
-
-from .chatgpt_scratchpad import ScratchpadToolboxGPT
-
-
-class ScratchpadMakeCodeShorter(ScratchpadToolboxGPT):
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
-
- def _messages(self):
- return [*make_code_shorter(), msg('user', self._selection)]
-
- def completion(self, final, tokens_batch=25):
- tokens_batch: int = self.max_tokens if final else tokens_batch
-
- return self.completion_stream(
- engine=self._engine,
- tokens_batch=tokens_batch,
- prompt=self.prompt,
- replace_modified=self._replace_modified
- )
-
- def _postprocess(self, completion):
- return self._txt[:self.cursor0] + completion + self._txt[self.cursor1:]
-"""),
- msg('assistant',
- "Provide me the code you need to explain in a block."
- ),
- msg('user',
- """
- def completion(self, final, tokens_batch=25):
- tokens_batch: int = self.max_tokens if final else tokens_batch
-
- return self.completion_stream(
- engine=self._engine,
- tokens_batch=tokens_batch,
- prompt=self.prompt,
- replace_modified=self._replace_modified
- )
-"""),
- msg('assistant',
- """
-```
-# This code defines a function called "completion"
-# which takes two arguments: final which is boolean and tokens_batch, which is optional int
-# if final = true, streaming is disabled, and vice versa
-# tokens batch will be set to self.max_tokens if streaming is disabled
-# it returns a function self._completion_stream, which receives engine, tokens_batch, prompt and replace_modified
-# this function returns an iterator of Dicts, where keys and values are both strings
-```
-<|end|>"""),
- msg('user', ctxt),
- msg('assistant',
- "Thanks for giving me the context. "
- "Please provide me the part of code you need to explain in a block."
- ),
- msg('user', self.selection)
- ]
-
- def _postprocess(self, completion: str) -> str:
- completion = code_block_postprocess(completion)
- return self._txt[:self.cursor1] + '\n' + completion + self._txt[self.cursor1:]
-
-
-class GptExplainCodeBlockGPT4(GptExplainCodeBlock):
- def __init__(self, **kwargs):
- super().__init__(
- model_n='gpt-4',
- **kwargs
- )
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_fix_bug.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_fix_bug.py
deleted file mode 100644
index 4218b259..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_fix_bug.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptFixBug(ScratchpadToolboxGPT):
- def _messages(self) -> List[Dict[str, str]]:
- _, _, ctxt = self.trim_context()
- return [
- msg(
- 'system',
- "You are an AI programming assistant.\n"
- "Follow the user's requirements carefully & to the letter."
- ),
- msg(
- 'user',
- "I will provide you the code that contains bugs, which you will need to find and fix."
- "Output the block of rewritten bug-free code. "
- "Do not explain anything! "
- "Firstly, I will provide you the context, which is the whole file. "
- "Then I will provide you the specific piece of code to find and fix bugs. "
- "When the block of code ends you will print <|end|> after it."
- "In case there are no bugs, leave it as it is."
- ),
- msg(
- 'assistant',
- "Provide me a context."
- ),
- msg('user',
- """
-from utils import filter_filenames
-
-
-class LanguagesFilter:
- def __init__(self, languages, percent_in_commit):
- self._percent_in_commit = percent_in_commit
- self._filters = [LanguageFilter(language) for language in languages]
-
- def __call__(self, filenames):
- return filter_filenames(filenames, self._filters, self._percent_in_commit)
-
-class CodeLanguagesFilter:
- def __init__(self, languages):
- self._filters = [LanguageFilter(language) for language in languages]
-
- def __call__(self, filename):
- for (filt in self._filters):
- filtered = filt([filename)
- if (filtered):
- return filt.language
- return null
-
-if __name__ == "__main__":
- pass
-"""),
- msg('assistant',
- "Please provide me the code you need to fix bugs in. "
- ),
- msg('user',
- """
- def __call__(self, filename):
- for (filt in self._filters):
- filtered = filt([filename)
- if (filtered):
- return filt.language
- return null
- }"""),
- msg('assistant',
- """
-```
- def __call__(self, filename: str):
- for filt in self._filters:
- filtered = filt([filename])
- if filtered:
- return filt.language
- return None
-```
-<|end|>"""),
- msg('user', ctxt),
- msg('assistant',
- "Thanks for giving me the context. "
- "I understand it. "
- "Please provide me the part of code you need to fix bugs in."
- ),
- msg('user', self.selection)
- ]
-
-
-class GptFixBugGPT4(GptFixBug):
- def __init__(self, **kwargs):
- super().__init__(
- model_n='gpt-4',
- **kwargs
- )
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_make_code_shorter.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_make_code_shorter.py
deleted file mode 100644
index 8a1d1a0f..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_make_code_shorter.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptMakeCodeShorter(ScratchpadToolboxGPT):
- def _messages(self) -> List[Dict[str, str]]:
- _, _, ctxt = self.trim_context()
- return [
- msg(
- 'system',
- "You are an AI programming assistant.\n"
- "Follow the user's requirements carefully & to the letter."
- ),
- msg(
- 'user',
- "I will provide you the code that is suboptimal, verbose and complicated. "
- "You need to replace the suboptimal code with a shorter and more simple code. "
- "The code you generated will be placed in the context file, "
- "so keep all styles and indents. "
- "Do not explain anything. "
- "Firstly, I will provide you the whole file -- the context. "
- "Then you will receive a piece of code you will simplify. "
- "When the block of code ends you will print <|end|> after it."
- ),
- msg(
- 'assistant',
- "Provide me a context."
- ),
- msg('user',
- """
-class Person:
- def __init__(self, name, age):
- self.name = name
- self.age = age
-
-
-class People:
- def __init__(people):
- self.people = []
- for p in people:
- name = p[0]
- age = p[1]
- person = Person(name, age)
- self.people.append(person)
-
- def __iter__(self):
- yield from self.people
-
- """),
- msg('assistant',
- "Please provide me the code you need to simplify."
- ),
- msg('user',
- """
- self.people = []
- for p in people:
- name = p[0]
- age = p[1]
- person = Person(name, age)
- self.people.append(person)
- """),
- msg('assistant',
- """
-```
- self.people = [Person(name, age) for name, age in people]
-```
-<|end|>"""),
- msg('user', ctxt),
- msg('assistant',
- "Thanks for giving me the context. "
- "Please provide me the part of code you need to simplify."
- ),
- msg('user', self.selection)
- ]
-
-
-class GptMakeCodeShorterGPT4(GptMakeCodeShorter):
- def __init__(self, **kwargs):
- super().__init__(
- model_n='gpt-4',
- **kwargs
- )
diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_precise_naming.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_precise_naming.py
deleted file mode 100644
index 36b456a1..00000000
--- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_precise_naming.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from typing import Dict, List
-
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg
-from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT
-
-
-class GptPreciseNaming(ScratchpadToolboxGPT):
- def _messages(self) -> List[Dict[str, str]]:
- _, _, ctxt = self.trim_context()
- return [
- msg(
- 'system',
- "You are an AI programming assistant.\n"
- "Follow the user's requirements carefully & to the letter."
- ),
- msg(
- 'user',
- "I will provide you the code that has ambitious, unclear and incorrect naming."
- "I need you understand the logic of this code and then output the block of code with fixed naming. "
- "Do not explain anything. "
- "Firstly, I will provide you the context, which is the whole file. "
- "Then I will provide you the specific piece of code you will fix naming in. "
- "When the block of code ends you will print <|end|> after it."
- ),
- msg(
- 'assistant',
- "Provide me a context."
- ),
- msg('user',
- """
-from typing import *
-
-
-def f(x, c):
- r = 0
- xx = 1
- for cc in c:
- r += xx * cc
- xx *= x
- return r
-
-
-def main():
- years = [1693, 1900, 2000]
- for year in years:
- if f(year):
- print(year, "is leap")
- else:
- print(year, "is not leap")
-
-if __name__ == "__main__":
- main()
-"""),
- msg('assistant',
- "Please provide me the code you need to fix naming in."
- ),
- msg('user',
- """
-def f(x, c):
- r = 0
- xx = 1
- for cc in c:
- r += xx * cc
- xx *= x
- return r
-"""),
- msg('assistant', """```
-def polynom(x, coefficients):
- value = 0
- x_power = 1
- for c in coefficients:
- value += x_power * c
- x_power *= x
- return value
-```<|end|>"""),
- msg('user', ctxt),
- msg('assistant',
- "Thanks for giving me the context. "
- "Please provide me the part of code you need to fix naming in."
- ),
- msg('user', self.selection)
- ]
diff --git a/refact_scratchpads_no_gpu/infserver_no_gpu.py b/refact_scratchpads_no_gpu/infserver_no_gpu.py
deleted file mode 100644
index d5531fc0..00000000
--- a/refact_scratchpads_no_gpu/infserver_no_gpu.py
+++ /dev/null
@@ -1,239 +0,0 @@
-import os
-import socket
-import sys
-import time
-import json
-import datetime
-import traceback
-import signal
-import logging
-
-import importlib
-import asyncio
-
-from refact_scratchpads_no_gpu import stream_results_async
-
-
-DEBUG = int(os.environ.get("DEBUG", "0"))
-
-
-gpt_functions = {
- "free-chat": "refact_scratchpads_no_gpu.gpt_toolbox:GptChat",
- "free-chat-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptChat",
- "free-chat-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptChat",
-
- "make-code-shorter": "refact_scratchpads_no_gpu.gpt_toolbox:GptMakeCodeShorter",
- "make-code-shorter-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptMakeCodeShorter",
- "make-code-shorter-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptMakeCodeShorterGPT4",
-
- "fix-bug": "refact_scratchpads_no_gpu.gpt_toolbox:GptFixBug",
- "fix-bug-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptFixBug",
- "fix-bug-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptFixBugGPT4",
-
- "explain-code-block": "refact_scratchpads_no_gpu.gpt_toolbox:GptExplainCodeBlock",
- "explain-code-block-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptExplainCodeBlock",
- "explain-code-block-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptExplainCodeBlockGPT4",
-
- "comment-each-line": "refact_scratchpads_no_gpu.gpt_toolbox:GptCommentEachLine",
- "comment-each-line-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptCommentEachLine",
-
- "precise-naming": "refact_scratchpads_no_gpu.gpt_toolbox:GptPreciseNaming",
- "precise-naming-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptPreciseNaming",
-
- "add-console-logs": "refact_scratchpads_no_gpu.gpt_toolbox:GptAddConsoleLogs",
- "add-console-logs-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptAddConsoleLogs",
-
- "completion-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptCompletion",
- "completion-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptCompletionGPT4",
-}
-
-experimental_functions = {
- "bugs-highlight-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptBugsHighlight",
- "bugs-highlight-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptBugsHighlightGPT4",
- "vulnerabilities-highlight-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptDetectVulnerabilitiesHighlightGPT4",
-}
-
-supported_models = {
- "longthink/stable": {
- "functions": {
- **gpt_functions,
- **experimental_functions,
- }
- },
-}
-
-
-for mod in ["debug", "experimental"]:
- supported_models["longthink/" + mod] = supported_models["longthink/stable"]
-
-
-host = socket.getfqdn()
-quit_flag = False
-
-
-def dump_problematic_call(stacktrace: str, stacktrace_short: str, suspicious_call):
- if suspicious_call and not DEBUG:
- # not DEBUG means in production, save it to disk to check out later
- ymd = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
- dump_path = f'./{ymd}_infserver_no_gpu_stacktrace.dump'
- with open(dump_path, 'w') as f:
- f.write(f"{host} caught exception:\n{stacktrace}")
- f.flush()
- f.write(json.dumps(suspicious_call))
- sys.stdout.write("'%s' DUMP SAVED TO %s\n" % (stacktrace_short, dump_path))
- sys.stdout.flush()
- elif suspicious_call:
- # if DEBUG, just print the call that caused the problem
- sys.stdout.write(json.dumps(suspicious_call))
- sys.stdout.flush()
-
-
-def except_hook(exctype, value, tb, suspicious_call=None):
- msg = "".join(traceback.format_exception(exctype, value, tb, limit=10))
- sys.stderr.write(msg)
- sys.stderr.flush()
- if exctype == KeyboardInterrupt:
- quit()
- dump_problematic_call(
- "".join(traceback.format_exception(exctype, value, tb, limit=None, chain=True)),
- f"{exctype.__name__}: {value}",
- suspicious_call
- )
-
-
-async def handle_single_batch(routine_n, my_desc, model_dict, calls_unfiltered):
- ts_arrived = time.time()
- uproxy = stream_results_async.UploadAsync()
- upload_task = asyncio.create_task(uproxy.upload_results_coroutine())
- calls = []
- def logger(*args):
- msg = " ".join(map(str, args))
- msg = "R%04d" % routine_n + " " + msg
- stream_results_async.logger.info(msg)
- try:
- scratchpads = []
- for ci, call in enumerate(calls_unfiltered):
- function = call.get("function", "completion")
- import_str = model_dict["functions"].get(function, None)
- if import_str is None:
- logger("function '%s' is not supported in model '%s'" % (function, call["model"]))
- continue
- import_mod, import_class = import_str.rsplit(":", 1)
- mod = importlib.import_module(import_mod)
- Class = getattr(mod, import_class, None)
- if Class is None:
- logger("module '%s', class '%s' not found" % (import_mod, import_class))
- continue
- logger("running '%s' using %s" % (function, import_class))
- calls.append(call)
- spad = Class(logger=logger, **call)
- scratchpads.append(spad)
-
- ts_batch_started = time.time()
- # for i in range(len(calls)):
- # _prompt = scratchpads[i].prompt()
- ts_prompt = time.time()
- ts_first_token = time.time()
-
- for call_n, (call, spad) in enumerate(zip(calls, scratchpads)):
- async for files_dict in spad.completion():
- assert isinstance(files_dict, dict), f'expected dict, got {type(files_dict)}'
- cancelled_idset = uproxy.check_cancelled()
- if call["id"] in cancelled_idset:
- spad.finish_reason = "cancelled"
- uproxy.upload_result(
- my_desc,
- [call],
- ts_arrived=ts_arrived,
- ts_batch_started=ts_batch_started,
- ts_prompt=ts_prompt,
- ts_first_token=ts_first_token,
- ts_batch_finished=time.time() if spad.finish_reason else 0,
- idx_updated=[call_n],
- files=[files_dict],
- tokens=None,
- finish_reason=[spad.finish_reason],
- status=("completed" if spad.finish_reason else "in_progress"),
- more_toplevel_fields=[spad.toplevel_fields()],
- )
- if call["id"] in cancelled_idset:
- break
- except Exception as e:
- except_hook(type(e), e, e.__traceback__, calls[0] if len(calls) else None)
- finally:
- await uproxy.shutdown_coroutine()
- await upload_task
- await uproxy.close_session()
- uproxy.cancelled_reset()
- upload_task = None
-
-
-def catch_sigusr1(signum, frame):
- stream_results_async.logger.info("infserver_no_gpu catched SIGUSR1")
- global quit_flag
- quit_flag = True
-
-
-async def do_the_serving(
- longthink_variant: str,
- routine_n: int,
-):
- aio_session = stream_results_async.infserver_async_session()
- infmod_guid = longthink_variant + "_" + host + "_%04i" % routine_n
- infmod_guid = infmod_guid.replace("-", "_")
- stream_results_async.logger.info(f'infmod_guid: {infmod_guid}')
- while not quit_flag:
- model_dict = supported_models[longthink_variant]
- my_desc = stream_results_async.validate_description_dict(
- infeng_instance_guid=infmod_guid,
- account="engineer",
- model=longthink_variant,
- B=1,
- max_thinking_time=10,
- )
- retcode, calls_unfiltered = await stream_results_async.completions_wait_batch(aio_session, my_desc)
- if retcode == "WAIT":
- continue
- if retcode != "OK":
- stream_results_async.logger.warning("server retcode %s" % retcode)
- await asyncio.sleep(5)
- continue
- await handle_single_batch(routine_n, my_desc, model_dict, calls_unfiltered)
- await aio_session.close()
- stream_results_async.logger.info("clean shutdown")
-
-
-def main():
- logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s NOGPU %(message)s',
- datefmt='%Y%m%d %H:%M:%S',
- handlers=[logging.StreamHandler(stream=sys.stderr)])
-
- from argparse import ArgumentParser
-
- parser = ArgumentParser()
- parser.add_argument("longthink_variant", type=str, default='longthink/stable')
- parser.add_argument("-k", "--openai_key", type=str)
- parser.add_argument("-w", "--workers", type=int, default=1)
- args = parser.parse_args()
-
- if not (args.openai_key or os.environ.get('OPENAI_API_KEY')):
- raise RuntimeError("set OPENAI_API_KEY or use --openai_key")
-
- if args.openai_key:
- import openai
- openai.api_key = args.openai_key
-
- sys.excepthook = except_hook
- signal.signal(signal.SIGUSR1, catch_sigusr1)
-
- workers: int = max(1, args.workers) if not DEBUG else 1
- asyncio.get_event_loop().run_until_complete(asyncio.gather(*[
- do_the_serving(args.longthink_variant, routine_n)
- for routine_n in range(workers)
- ]))
-
-
-if __name__ == "__main__":
- main()
diff --git a/self_hosting_machinery/finetune/modelling/flash_sa.py b/self_hosting_machinery/finetune/modelling/flash_sa.py
index d77c5c91..1d629bda 100644
--- a/self_hosting_machinery/finetune/modelling/flash_sa.py
+++ b/self_hosting_machinery/finetune/modelling/flash_sa.py
@@ -48,7 +48,7 @@ def get_slopes_power_of_2(n):
return alibi, alibi_start, alibi_ratio
-def _prerequisites_are_ok(model):
+def _prerequisites_are_ok(model, try_triton_kernel: bool):
try:
from flash_attn import flash_attn_func
return True
@@ -56,12 +56,13 @@ def _prerequisites_are_ok(model):
logging.warning("Original flash attention is not installed, trying to use triton implementation...")
from self_hosting_machinery.finetune.modelling.triton_flash_sa import (apply_flash_mha_to_refact_model
as apply_triton_flash)
- apply_triton_flash(model)
+ if try_triton_kernel:
+ apply_triton_flash(model)
return False
def apply_flash_mha_to_refact_model(model):
- if not _prerequisites_are_ok(model):
+ if not _prerequisites_are_ok(model, try_triton_kernel=True):
return
from flash_attn import flash_attn_func
@@ -73,7 +74,8 @@ def _forward(
attention_mask: Optional[torch.Tensor] = None,
alibi: Optional[torch.Tensor] = None,
use_cache: Optional[bool] = False,
- output_attentions: Optional[bool] = False
+ output_attentions: Optional[bool] = False,
+ *args, **kwargs
):
q = einops.rearrange(self.q(x), "b t (h d) -> b t h d", h=self.num_heads)
kv = einops.rearrange(self.kv(x), "b t (h d) -> b t h d", h=2)
@@ -98,7 +100,7 @@ def _forward(
def apply_flash_mha_to_starcoder_model(model):
- if not _prerequisites_are_ok(model):
+ if not _prerequisites_are_ok(model, try_triton_kernel=False):
return
from flash_attn import flash_attn_func
@@ -113,6 +115,7 @@ def _forward(
encoder_attention_mask: Optional[torch.Tensor] = None,
use_cache: Optional[bool] = False,
output_attentions: Optional[bool] = False,
+ *args, **kwargs
):
qkv = self.c_attn(x)
q = einops.rearrange(qkv[:, :, :self.embed_dim], "b t (h d) -> b t h d", h=self.num_heads)
@@ -139,7 +142,7 @@ def _forward(
def apply_flash_mha_to_codellama_model(model):
- if not _prerequisites_are_ok(model):
+ if not _prerequisites_are_ok(model, try_triton_kernel=False):
return
from flash_attn import flash_attn_func
@@ -152,7 +155,7 @@ def _forward(
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: bool = False,
use_cache: bool = False,
- **kwargs
+ *args, **kwargs
):
from transformers.models.llama.modeling_llama import apply_rotary_pos_emb
diff --git a/self_hosting_machinery/finetune/modelling/triton_flash_sa.py b/self_hosting_machinery/finetune/modelling/triton_flash_sa.py
index 019c6df8..d960d197 100644
--- a/self_hosting_machinery/finetune/modelling/triton_flash_sa.py
+++ b/self_hosting_machinery/finetune/modelling/triton_flash_sa.py
@@ -574,7 +574,8 @@ def _forward(
attention_mask: Optional[th.Tensor] = None,
alibi: Optional[th.Tensor] = None,
use_cache: Optional[bool] = False,
- output_attentions: Optional[bool] = False
+ output_attentions: Optional[bool] = False,
+ *args, **kwargs
):
q = einops.rearrange(self.q(x), "b t (h d) -> b t h d", h=self.num_heads)
kv = einops.rearrange(self.kv(x), "b t (h d) -> b t h d", h=2)
diff --git a/self_hosting_machinery/finetune/scripts/finetune_filter.py b/self_hosting_machinery/finetune/scripts/finetune_filter.py
index 551aba2a..ffeda946 100644
--- a/self_hosting_machinery/finetune/scripts/finetune_filter.py
+++ b/self_hosting_machinery/finetune/scripts/finetune_filter.py
@@ -156,20 +156,21 @@ def catch_sigusr1(signum, frame):
model_name = get_finetune_config(models_db, logger=traces.log)["model_name"]
finetune_cfg = copy.deepcopy(base_config(model_name, models_db))
- _log_everywhere("Loading file sets context...")
- file_sets_context = FileSetsContext(
- autoselect_test_files_num=finetune_filter_cfg.get("autoselect_test_files_num", 3)
- )
- if file_sets_context.is_up_to_date():
- logging.info("Train set filtering: nothing changed since last time, quit")
- return
-
- traces.log(textwrap.fill(
- f"This filter calculates perplexity for each file and filters out "
- f"files with perplexity larger than {finetune_filter_cfg['filter_loss_threshold']:.3f}.\n"
- f"Those files likely don't have meaningful content to train on", width=100
- ))
try:
+ _log_everywhere("Loading file sets context...")
+ file_sets_context = FileSetsContext(
+ autoselect_test_files_num=finetune_filter_cfg.get("autoselect_test_files_num", 3)
+ )
+ if file_sets_context.is_up_to_date():
+ logging.info("Train set filtering: nothing changed since last time, quit")
+ return
+
+ traces.log(textwrap.fill(
+ f"This filter calculates perplexity for each file and filters out "
+ f"files with perplexity larger than {finetune_filter_cfg['filter_loss_threshold']:.3f}.\n"
+ f"Those files likely don't have meaningful content to train on", width=100
+ ))
+
status_tracker.update_status("starting")
finetune_filter(
status_tracker=status_tracker,
diff --git a/self_hosting_machinery/scripts/env.py b/self_hosting_machinery/scripts/env.py
index 91947bb5..1d853fde 100644
--- a/self_hosting_machinery/scripts/env.py
+++ b/self_hosting_machinery/scripts/env.py
@@ -42,6 +42,8 @@
FLAG_LAUNCH_FINETUNE = os.path.join(DIR_WATCHDOG_D, "_launch_finetune.flag")
FLAG_STOP_FINETUNE = os.path.join(DIR_WATCHDOG_D, "_stop_finetune.flag")
+FLAG_RESTART_LSP = os.path.join(DIR_WATCHDOG_D, "_restart_lsp.flag")
+
def create_dirs():
os.makedirs(DIR_WATCHDOG_D, exist_ok=True)
os.makedirs(DIR_WEIGHTS, exist_ok=True)
diff --git a/self_hosting_machinery/scripts/first_run.py b/self_hosting_machinery/scripts/first_run.py
index cd95392d..3712bf70 100644
--- a/self_hosting_machinery/scripts/first_run.py
+++ b/self_hosting_machinery/scripts/first_run.py
@@ -12,10 +12,13 @@ def copy_watchdog_configs_if_first_run_detected(model_assigner: ModelAssigner):
def convert_old_configs(model_assigner: ModelAssigner):
- # longthink.cfg is an old version of openai_api_worker.cfg
+ # longthink.cfg and openai_api_worker.cfg are deprecated watchdog configs
old_longthink = os.path.join(env.DIR_WATCHDOG_D, "longthink.cfg")
if os.path.exists(old_longthink):
os.unlink(old_longthink)
+ openai_watchdog_cfg_fn = os.path.join(env.DIR_WATCHDOG_D, "openai_api_worker.cfg")
+ if os.path.exists(openai_watchdog_cfg_fn):
+ os.unlink(openai_watchdog_cfg_fn)
for gpu in range(16):
fn = os.path.join(env.DIR_WATCHDOG_D, "model-gpu%d.cfg" % gpu)
diff --git a/self_hosting_machinery/watchdog/docker_watchdog.py b/self_hosting_machinery/watchdog/docker_watchdog.py
index 43f008e6..39ebd7c9 100644
--- a/self_hosting_machinery/watchdog/docker_watchdog.py
+++ b/self_hosting_machinery/watchdog/docker_watchdog.py
@@ -79,6 +79,7 @@ def __init__(self, cfg):
self.sent_sigusr1_ts = 0
self.status_from_stderr = ""
self.status_nickname = ""
+ self.command_not_found = False
def set_status(self, newstatus):
self.status_from_stderr = newstatus
@@ -96,7 +97,7 @@ def set_status(self, newstatus):
os.rename(save_status_fn + ".tmp", save_status_fn)
def _start(self):
- if self.p is not None:
+ if self.p is not None or self.command_not_found:
return
global compiling_now
alt_env = os.environ.copy()
@@ -113,12 +114,19 @@ def _start(self):
CUDA_VISIBLE_DEVICES = ",".join(["%d" % x for x in self.cfg["gpus"]])
alt_env["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES
self.start_ts = time.time()
- self.p = subprocess.Popen(
- cmdline,
- env=alt_env,
- stdout=subprocess.DEVNULL,
- stderr=subprocess.PIPE,
- )
+ try:
+ self.p = subprocess.Popen(
+ cmdline,
+ env=alt_env,
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.PIPE,
+ )
+ except FileNotFoundError as e:
+ log(f"failed to run command {self.cmdline_str}: '{e}'")
+ self.p = None
+ self.command_not_found = True
+ self.set_status("failed")
+ return
log("%s CVD=%s starting %s\n -> pid %s" % (
time.strftime("%Y%m%d %H:%M:%S"),
CUDA_VISIBLE_DEVICES,
diff --git a/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg b/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg
new file mode 100644
index 00000000..54d4617f
--- /dev/null
+++ b/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg
@@ -0,0 +1,12 @@
+{
+ "policy": ["always_on"],
+ "interrupt_when_file_appears": "%FLAG_RESTART_LSP%",
+ "command_line": [
+ "refact-lsp",
+ "--address-url", "http://127.0.0.1:8008",
+ "--http-port", "8001",
+ "--lsp-port", "8002",
+ "--logs-stderr"
+ ],
+ "gpus": []
+}
diff --git a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
index 65ad0672..ff115fad 100644
--- a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
+++ b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
@@ -2,11 +2,15 @@
import json
import copy
import asyncio
+import aiohttp
import termcolor
+import os
+import litellm
from fastapi import APIRouter, Request, HTTPException, Query
from fastapi.responses import StreamingResponse
+from self_hosting_machinery import env
from self_hosting_machinery.webgui.selfhost_model_resolve import completion_resolve_model
from self_hosting_machinery.webgui.selfhost_model_resolve import static_resolve_model
from self_hosting_machinery.webgui.selfhost_req_queue import Ticket
@@ -243,26 +247,48 @@ def __init__(self,
self.add_api_route("/coding_assistant_caps.json", self._coding_assistant_caps, methods=["GET"])
self.add_api_route("/v1/completions", self._completions, methods=["POST"])
+ self.add_api_route("/v1/models", self._models, methods=["GET"])
+ self.add_api_route("/v1/chat/completions", self._chat_completions, methods=["POST"])
+
self._inference_queue = inference_queue
self._id2ticket = id2ticket
self._model_assigner = model_assigner
self._timeout = timeout
+ @staticmethod
+ def _interations_env_setup():
+ inference = {}
+ if os.path.exists(env.CONFIG_INFERENCE):
+ inference = json.load(open(env.CONFIG_INFERENCE, 'r'))
+ integrations = {}
+ if os.path.exists(env.CONFIG_INTEGRATIONS):
+ integrations = json.load(open(env.CONFIG_INTEGRATIONS, 'r'))
+ openai_api_key = integrations.get("openai_api_key", "") if inference.get("openai_api_enable", False) else ""
+ os.environ["OPENAI_API_KEY"] = openai_api_key
+
async def _coding_assistant_caps(self):
+ models_available = self._inference_queue.models_available(force_read=True)
code_completion_default_model, _ = completion_resolve_model(self._inference_queue)
+ code_chat_default_model = ""
+ for model_name in models_available:
+ if self._model_assigner.models_db.get(model_name, {}).get("chat_scratchpad_class", None) is not None \
+ or model_name in litellm.model_list:
+ code_chat_default_model = model_name
+ break
return {
"cloud_name": "Refact Self-Hosted",
"endpoint_template": "v1/completions",
+ "endpoint_chat_passthrough": "v1/chat/completions",
"endpoint_style": "openai",
"telemetry_basic_dest": "/stats/telemetry-basic",
"telemetry_corrected_snippets_dest": "/stats/telemetry-snippets",
- "running_models": self._inference_queue.models_available(),
+ "running_models": models_available,
"code_completion_default_model": code_completion_default_model,
- "code_chat_default_model": "",
+ "code_chat_default_model": code_chat_default_model,
"tokenizer_path_template": "https://huggingface.co/$MODEL/resolve/main/tokenizer.json",
"tokenizer_rewrite_path": {
model: self._model_assigner.models_db[model]["model_path"]
- for model in self._inference_queue.models_available()
+ for model in models_available
if model in self._model_assigner.models_db
},
}
@@ -278,7 +304,7 @@ async def _login(self):
}
filter_caps = set([
capability
- for model in self._inference_queue.models_available()
+ for model in self._inference_queue.models_available(force_read=True)
for capability in models_mini_db_extended.get(model, {}).get("filter_caps", [])
])
for rec in self._model_assigner.models_caps_db:
@@ -423,3 +449,92 @@ async def _chat(self, post: ChatContext, request: Request, account: str = "XXX")
self._id2ticket[ticket.id()] = ticket
await q.put(ticket)
return StreamingResponse(chat_streamer(ticket, self._timeout, req["created"]))
+
+ async def _models(self):
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.get("http://127.0.0.1:8001/v1/caps") as resp:
+ lsp_server_caps = await resp.json()
+ except aiohttp.ClientConnectorError as e:
+ err_msg = f"LSP server is not ready yet: {e}"
+ log(err_msg)
+ raise HTTPException(status_code=401, detail=err_msg)
+ completion_models = set()
+ for model, caps in lsp_server_caps["code_completion_models"].items():
+ completion_models.update({model, *caps["similar_models"]})
+ chat_models = set()
+ for model, caps in lsp_server_caps["code_chat_models"].items():
+ chat_models.update({model, *caps["similar_models"]})
+ data = [
+ {
+ "id": model, "root": model, "object": "model",
+ "created": 0, "owned_by": "", "permission": [], "parent": None,
+ "completion": model in completion_models, "chat": model in chat_models,
+ }
+ for model in lsp_server_caps["running_models"]
+ ]
+ return {
+ "object": "list",
+ "data": data,
+ }
+
+ async def _chat_completions(self, post: ChatContext, account: str = "XXX"):
+ prefix, postfix = "data: ", "\n\n"
+
+ if post.model in litellm.model_list:
+ async def litellm_streamer(post: ChatContext):
+ try:
+ self._interations_env_setup()
+ response = await litellm.acompletion(
+ model=post.model, messages=post.messages, stream=True,
+ temperature=post.temperature, top_p=post.top_p, max_tokens=post.max_tokens, stop=post.stop)
+ finish_reason = None
+ async for model_response in response:
+ try:
+ data = model_response.dict()
+ finish_reason = data["choices"][0]["finish_reason"]
+ except json.JSONDecodeError:
+ data = {"choices": [{"finish_reason": finish_reason}]}
+ yield prefix + json.dumps(data) + postfix
+ # NOTE: DONE neededed by refact-lsp server
+ yield prefix + "[DONE]" + postfix
+ except BaseException as e:
+ err_msg = f"litellm error: {e}"
+ log(err_msg)
+ yield prefix + json.dumps({"error": err_msg}) + postfix
+
+ response_streamer = litellm_streamer(post)
+
+ else:
+ async def chat_completion_streamer(post: ChatContext):
+ post_url = "http://127.0.0.1:8001/v1/chat"
+ post_data = {
+ "messages": [m.dict() for m in post.messages],
+ "stream": True,
+ "model": post.model,
+ "parameters": {
+ "temperature": post.temperature,
+ "max_new_tokens": post.max_tokens,
+ }
+ }
+ async with aiohttp.ClientSession() as session:
+ try:
+ async with session.post(post_url, json=post_data) as response:
+ finish_reason = None
+ async for data, _ in response.content.iter_chunks():
+ try:
+ data = data.decode("utf-8")
+ data = json.loads(data[len(prefix):-len(postfix)])
+ finish_reason = data["choices"][0]["finish_reason"]
+ data["choices"][0]["finish_reason"] = None
+ except json.JSONDecodeError:
+ data = {"choices": [{"finish_reason": finish_reason}]}
+ yield prefix + json.dumps(data) + postfix
+ except aiohttp.ClientConnectorError as e:
+ err_msg = f"LSP server is not ready yet: {e}"
+ log(err_msg)
+ yield prefix + json.dumps({"error": err_msg}) + postfix
+
+ response_streamer = chat_completion_streamer(post)
+
+ return StreamingResponse(response_streamer, media_type="text/event-stream")
diff --git a/self_hosting_machinery/webgui/selfhost_model_assigner.py b/self_hosting_machinery/webgui/selfhost_model_assigner.py
index 9872a03e..5078e002 100644
--- a/self_hosting_machinery/webgui/selfhost_model_assigner.py
+++ b/self_hosting_machinery/webgui/selfhost_model_assigner.py
@@ -70,7 +70,6 @@ def models_to_watchdog_configs(self, inference_config=None):
inference_config = self._model_assign_filter(inference_config)
inference_config = self._model_inference_setup(inference_config)
- inference_config = self._integrations_inference_setup(inference_config)
with open(env.CONFIG_INFERENCE + ".tmp", "w") as f:
json.dump(inference_config, f, indent=4)
@@ -135,30 +134,6 @@ def _model_inference_setup(self, inference_config: Dict[str, Any]) -> Dict[str,
"more_models_than_gpus": more_models_than_gpus,
}
- def _integrations_inference_setup(self, inference_config: Dict[str, Any]) -> Dict[str, Any]:
- integrations = {}
- if os.path.exists(env.CONFIG_INTEGRATIONS):
- integrations = json.load(open(env.CONFIG_INTEGRATIONS, 'r'))
-
- openai_api_key = integrations.get("openai_api_key", "")
- openai_watchdog_cfg_fn = os.path.join(env.DIR_WATCHDOG_D, "openai_api_worker.cfg")
-
- if inference_config.get("openai_api_enable", False) and openai_api_key.startswith("sk-"):
- cfg = json.load(open(os.path.join(env.DIR_WATCHDOG_TEMPLATES, "openai_api_worker.cfg"), 'r'))
- cfg.pop('unfinished')
- cfg['command_line'].append('--openai_key')
- cfg['command_line'].append(openai_api_key)
- with open(openai_watchdog_cfg_fn + ".tmp", "w") as f:
- json.dump(cfg, f, indent=4)
- os.rename(openai_watchdog_cfg_fn + ".tmp", openai_watchdog_cfg_fn)
- else:
- try:
- os.unlink(openai_watchdog_cfg_fn)
- except FileNotFoundError:
- pass
-
- return inference_config
-
def first_run(self):
default_config = {
"model_assign": {
@@ -248,3 +223,8 @@ def model_assignment(self):
if model in self.models_db
}
return j
+
+ @staticmethod
+ def restart_lsp():
+ with open(env.FLAG_RESTART_LSP, "w") as f:
+ f.write("")
diff --git a/self_hosting_machinery/webgui/selfhost_queue.py b/self_hosting_machinery/webgui/selfhost_queue.py
index 129e043e..b5620fda 100644
--- a/self_hosting_machinery/webgui/selfhost_queue.py
+++ b/self_hosting_machinery/webgui/selfhost_queue.py
@@ -24,9 +24,9 @@ def model_name_to_queue(self, ticket, model_name, no_checks=False):
raise HTTPException(status_code=400, detail="model '%s' is not available at this moment." % model_name)
return self._user2gpu_queue[model_name]
- def models_available(self) -> List[str]:
+ def models_available(self, force_read: bool = False) -> List[str]:
t1 = time.time()
- if self._models_available_ts + self.CACHE_MODELS_AVAILABLE > t1:
+ if not force_read and self._models_available_ts + self.CACHE_MODELS_AVAILABLE > t1:
return self._models_available
self._models_available = []
if os.path.exists(env.CONFIG_INFERENCE):
@@ -35,7 +35,6 @@ def models_available(self) -> List[str]:
self._models_available.append(model)
self._models_available_ts = time.time()
if j.get("openai_api_enable", False):
- # self._models_available.append('gpt3.5')
- # self._models_available.append('gpt4')
- self._models_available.append('longthink/stable')
+ self._models_available.append('gpt-3.5-turbo')
+ self._models_available.append('gpt-4')
return self._models_available
diff --git a/self_hosting_machinery/webgui/static/tab-finetune.js b/self_hosting_machinery/webgui/static/tab-finetune.js
index b59f6e20..3e2aebdc 100644
--- a/self_hosting_machinery/webgui/static/tab-finetune.js
+++ b/self_hosting_machinery/webgui/static/tab-finetune.js
@@ -900,16 +900,16 @@ function finetune_controls_state()
render_ftf_stats(finetune_state.finetune_filter_stats);
- if(finetune_state.finetune_filter_stats.filterting_status) {
+ if(finetune_state.finetune_filter_stats.filtering_status) {
document.querySelector('.ftf-status').classList.remove('d-none');
- document.querySelector('.ftf-status span').innerHTML = finetune_state.finetune_filter_stats.filterting_status;
+ document.querySelector('.ftf-status span').innerHTML = finetune_state.finetune_filter_stats.filtering_status;
} else {
document.querySelector('.ftf-status').classList.add('d-none');
}
let error_span = document.querySelector('.ftf-error span');
let ftf_error = document.querySelector('.ftf-error');
- if (finetune_state.finetune_filter_stats.filterting_status == "failed") {
+ if (finetune_state.finetune_filter_stats.filtering_status == "failed") {
ftf_error.classList.remove('d-none');
if(finetune_state.finetune_filter_stats.error && finetune_state.finetune_filter_stats.error !== '') {
error_span.innerHTML = finetune_state.finetune_filter_stats.error;
diff --git a/self_hosting_machinery/webgui/tab_models_host.py b/self_hosting_machinery/webgui/tab_models_host.py
index c1138e85..ac0606b3 100644
--- a/self_hosting_machinery/webgui/tab_models_host.py
+++ b/self_hosting_machinery/webgui/tab_models_host.py
@@ -51,4 +51,5 @@ async def _tab_host_models_assign(self, post: TabHostModelsAssign):
else:
validated["completion"] = ""
self._model_assigner.models_to_watchdog_configs(validated)
+ self._model_assigner.restart_lsp()
return JSONResponse("OK")
diff --git a/self_hosting_machinery/webgui/webgui.py b/self_hosting_machinery/webgui/webgui.py
index 7ef22b52..8d0a6e7d 100644
--- a/self_hosting_machinery/webgui/webgui.py
+++ b/self_hosting_machinery/webgui/webgui.py
@@ -40,6 +40,7 @@ def __init__(self,
*args, **kwargs):
super().__init__(*args, **kwargs)
+ self._model_assigner = model_assigner
inference_queue = InferenceQueue()
id2ticket: Dict[str, Ticket] = weakref.WeakValueDictionary()
for router in self._routers_list(id2ticket, inference_queue, model_assigner, stats_service):
@@ -99,6 +100,9 @@ def handle_sigint(*args):
signal.signal(signal.SIGINT, handle_sigint)
signal.signal(signal.SIGUSR1, handle_sigint)
+ # NOTE: try restart LSP after server started
+ self._model_assigner.restart_lsp()
+
if __name__ == "__main__":
from argparse import ArgumentParser
diff --git a/setup.py b/setup.py
index 6ae83133..172aa35f 100644
--- a/setup.py
+++ b/setup.py
@@ -44,8 +44,8 @@ class PyPackage:
"self_hosting_machinery": PyPackage(
requires=["aiohttp", "aiofiles", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic==1.10.13",
"starlette==0.27.0", "uvicorn", "uvloop", "python-multipart", "auto-gptq==0.4.2", "accelerate",
- "termcolor", "torch", "transformers==4.34.0", "bitsandbytes", "safetensors", "peft", "triton",
- "torchinfo", "mpi4py", "deepspeed==0.11.1", "pandas>=2.0.3",
+ "termcolor", "torch", "transformers==4.34.0", "bitsandbytes", "safetensors", "peft", "triton",`
+ "torchinfo", "mpi4py", "deepspeed==0.11.1", "litellm", "pandas>=2.0.3",
"sqlalchemy", "sqlalchemy-utils", "psycopg2-binary"],
optional=["ninja", "flash_attn @ git+https://github.com/smallcloudai/flash-attention@feat/alibi"],
requires_packages=["refact_scratchpads", "refact_scratchpads_no_gpu",