diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cb069eff..a265b014 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -21,7 +21,6 @@ separate terminals. ```commandline python -m self_hosting_machinery.webgui.webgui DEBUG=1 python -m self_hosting_machinery.inference.inference_worker --model wizardlm/7b -DEBUG=1 python -m refact_scratchpads_no_gpu.infserver_no_gpu longthink/stable --openai_key sk-XXXYYY ``` That should be enough to get started! diff --git a/Dockerfile b/Dockerfile index 1636c586..0d87175b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,6 +41,14 @@ ENV PATH="${PATH}:/tmp/linguist/bin" RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y python3-packaging +# refact lsp requisites +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y +ENV PATH="${PATH}:/root/.cargo/bin" +RUN git clone https://github.com/smallcloudai/refact-lsp.git /tmp/refact-lsp \ + && cd /tmp/refact-lsp \ + && cargo install --path . \ + && rm -rf /tmp/refact-lsp + ENV INSTALL_OPTIONAL=TRUE ENV BUILD_CUDA_EXT=1 ENV GITHUB_ACTIONS=true diff --git a/refact_scratchpads_no_gpu/async_scratchpad/__init__.py b/refact_scratchpads_no_gpu/async_scratchpad/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/refact_scratchpads_no_gpu/async_scratchpad/ascratch.py b/refact_scratchpads_no_gpu/async_scratchpad/ascratch.py deleted file mode 100644 index 60e5885a..00000000 --- a/refact_scratchpads_no_gpu/async_scratchpad/ascratch.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Callable, Union, List, Dict, Iterator - - -class AsyncScratchpad: - def __init__( - self, - id: str, - created: float, - temperature: float, - top_p: float, - max_tokens: int, - stop_tokens: Union[str, List[str]], - function: str, - stream: bool, - logger: Callable, - **unused - ): - self.id = id - self.created = created - self.finish_reason = "" - self.temp = min(max(float(temperature), 0.0), 1.0) - self.top_p = top_p - self.max_tokens = int(max_tokens) - self.function = function - self.stream = stream - self._logger = logger - tmp = stop_tokens - if isinstance(tmp, str): - stop_strings = [tmp] - else: - stop_strings = tmp - self.metering_generated_tokens_n = 0 - self.metering_total_tokens_n = 0 - self.needs_upload = False - for k, v in unused.items(): - self.debuglog("AsyncScratchpad: unused parameter '%s' = '%s'" % (k, v)) - - def toplevel_fields(self): - return {} - - def debuglog(self, *args): - if self._logger: - self._logger(*args) - - async def completion(self) -> Iterator[Dict[str, str]]: - raise NotImplementedError diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/__init__.py b/refact_scratchpads_no_gpu/gpt_toolbox/__init__.py deleted file mode 100644 index 22e441de..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from refact_scratchpads_no_gpu.gpt_toolbox.toolbox_functions import * -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_chat_spad import GptChat diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_chat_spad.py b/refact_scratchpads_no_gpu/gpt_toolbox/gpt_chat_spad.py deleted file mode 100644 index 1942a8f4..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_chat_spad.py +++ /dev/null @@ -1,161 +0,0 @@ -import asyncio -import functools -import json -from typing import List, Tuple, Dict, Union, Iterator - -from refact_scratchpads_no_gpu.async_scratchpad import ascratch - -import openai -import tiktoken - - -def gpt_prices( # Apr 4 2023: - model_name: str, -) -> Tuple[int, int]: - # GPT-4 8K prompt[$0.03 / 1K tokens] generated[$0.06 / 1K tokens] - if model_name.startswith("gpt-4") or model_name.startswith("gpt4"): - pp1000t_prompt = 30_000 - pp1000t_generated = 60_000 - # gpt-3.5-turbo $0.002 / 1K tokens - elif model_name.startswith("gpt-3.5-turbo"): - pp1000t_prompt = 2_000 - pp1000t_generated = 2_000 - else: - raise ValueError(f'get_prices: Unknown model: {model_name}') - return pp1000t_prompt, pp1000t_generated - - -@functools.lru_cache(maxsize=10) -def engine_to_encoding(engine: str) -> tiktoken.Encoding: - enc = tiktoken.encoding_for_model(engine) - return enc - - -ACCUMULATE_N_STREAMING_CHUNKS = 5 -engine_to_encoding("text-davinci-003") # this immediately tests if tiktoken works or not - - -def calculate_chat_tokens(model_name, messages, completion): - enc = engine_to_encoding(model_name) - calc_prompt_tokens_n = 2 # warmup - for d in messages: - calc_prompt_tokens_n += len(enc.encode(d["content"], disallowed_special=())) - calc_prompt_tokens_n += len(enc.encode(d["role"], disallowed_special=())) - calc_prompt_tokens_n += 4 # to switch user/assistant - calc_generated_tokens_n = len(enc.encode(completion, disallowed_special=())) + 2 # one to switch, another EOF - return calc_prompt_tokens_n, calc_generated_tokens_n - - -class GptChat(ascratch.AsyncScratchpad): - def __init__( - self, - id: str, - *, - created: float, - temperature: float, - top_p: float, - max_tokens: int, - stop_tokens: Union[str, List[str]], - messages: List[Dict[str, str]], - model: str, # always "longthink", don't use - **more, - ): - super().__init__( - id=id, - created=created, - temperature=temperature, - top_p=top_p, - max_tokens=max_tokens, - stop_tokens=stop_tokens, - **more, - ) - - self._model_name = "gpt-3.5-turbo" - if "gpt4" in self.function or "gpt-4" in self.function: - self._model_name = "gpt-4" - self._stream_timeout_sec = 15 - - messages = messages or [] - if not messages or messages[0].get('role') != 'system': - messages = [ - { - "role": "system", - "content": "You are a coding assistant that outputs short answers, give links to documentation.", - }, *messages - ] - self._messages = messages - self._completion = "" - - @property - def prices(self) -> Tuple[int, int]: - return gpt_prices(self._model_name) - - async def completion(self) -> Iterator[Dict[str, str]]: - gen = await openai.ChatCompletion.acreate( - model=self._model_name, - messages=self._messages, - max_tokens=self.max_tokens, - temperature=self.temp, - stream=True, - ) - accum = "" - role = "" - tokens = 0 - self.metering_prompt_tokens_n = 0 - self.metering_generated_tokens_n = 0 - try: - def forward_streaming(): - nonlocal tokens, accum, role - self._completion += accum - msg = { - "chat__role": "assistant", - "chat__content": self._completion, - } - accum = "" - return msg - - while True: - resp = await asyncio.wait_for(gen.__anext__(), self._stream_timeout_sec) - delta = resp.choices[0].delta - if "role" in delta: - role = delta["role"] - if "content" in delta: - accum += delta["content"] - tokens += 1 # assuming 1 token per chunk - if "swear" in accum: - raise ValueError("swear!") - if "finish_reason" in resp.choices[0] and resp.choices[0]["finish_reason"] is not None: - self.finish_reason = resp.choices[0]["finish_reason"] - if self.finish_reason: - break - if tokens % ACCUMULATE_N_STREAMING_CHUNKS == 0: - yield forward_streaming() - if self.finish_reason: # cancelled from main coroutine - break - if self.finish_reason == "": - self.finish_reason = "END" - except asyncio.exceptions.TimeoutError as e: - self.debuglog("CHAT TIMEOUT:", str(type(e)), str(e)) - except Exception as e: - self.debuglog("CHAT EXCEPTION:", str(type(e)), str(e)) - self.finish_reason = "ERROR" - yield forward_streaming() - - def toplevel_fields(self): - if not self.finish_reason: - return {} - else: - calc_prompt_tokens_n, calc_generated_tokens_n = calculate_chat_tokens( - self._model_name, self._messages, self._completion - ) - self.metering_prompt_tokens_n = calc_prompt_tokens_n - self.metering_generated_tokens_n = calc_generated_tokens_n - metering_message = { - "metering_prompt_tokens_n": self.metering_prompt_tokens_n, - "metering_generated_tokens_n": self.metering_generated_tokens_n, - "pp1000t_prompt": self.prices[0], - "pp1000t_generated": self.prices[1], - "model_name": self._model_name, - } - self.debuglog(json.dumps(metering_message)) - return metering_message diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_toolbox_spad.py b/refact_scratchpads_no_gpu/gpt_toolbox/gpt_toolbox_spad.py deleted file mode 100644 index e68d7364..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_toolbox_spad.py +++ /dev/null @@ -1,204 +0,0 @@ -import os -import sys -import asyncio -import termcolor -import functools -import json -from typing import List, Union, Callable, Dict, Iterator, Tuple - -import openai -import tiktoken - -from refact_scratchpads_no_gpu.gpt_toolbox.scratchpad_utils import full_line_selection -from refact_scratchpads_no_gpu.async_scratchpad import ascratch - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_chat_spad import gpt_prices, calculate_chat_tokens -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import trim_context_tok, code_block_postprocess - - -DEBUG = int(os.environ.get("DEBUG", "0")) - - -@functools.lru_cache(maxsize=10) -def engine_to_encoding(engine: str) -> tiktoken.Encoding: - enc = tiktoken.encoding_for_model(engine) - return enc - - -ACCUMULATE_N_STREAMING_CHUNKS = 5 -engine_to_encoding("text-davinci-003") # this immediately tests if tiktoken works or not - - -class ScratchpadToolboxGPT(ascratch.AsyncScratchpad): - def __init__( - self, - id: str, - created: float, - temperature: float, - max_tokens: int, - stop_tokens: Union[str, List[str]], - function: str, - intent: str, - cursor_file: str, - cursor0: int, - cursor1: int, - sources: Dict[str, str], - stream: bool, - logger: Callable, - - model_n: str = "gpt-3.5-turbo", - supports_stream: bool = True, - timeout: int = None, - **kwargs, - ): - super().__init__( - id=id, - created=created, - temperature=temperature, - max_tokens=max_tokens, - stop_tokens=stop_tokens, - function=function, - stream=stream, - logger=logger, - **kwargs - ) - self.intent = intent - self.cursor_file = cursor_file - self.cursor0 = cursor0 - self.cursor1 = cursor1 - self.sources = sources - self.metering_generated_tokens_n = 0 - self.metering_total_tokens_n = 0 - self.needs_upload = False - - self._model_n = model_n - self.__model_name = None - - if not supports_stream: self.stream = False - self._stream_timeout_sec: float = 15 - - self._txt: str = self.sources.get(self.cursor_file) - - self.cursor0, self.cursor1, self.selection = full_line_selection( - self.cursor0, self.cursor1, self._txt - ) - self.enc = engine_to_encoding(self.model_name) - - def trim_context(self) -> Tuple[int, int, str]: - cursor0, cursor1, ctxt = trim_context_tok(self.cursor0, self.cursor1, self._txt, self.enc) - return cursor0, cursor1, ctxt - - @property - def prices(self) -> Tuple[int, int]: - return gpt_prices(self.model_name) - - @property - def model_name(self) -> str: - if not self.__model_name: - model_name = 'gpt-3.5-turbo-0613' - if self._model_n == 'gpt-3.5-turbo' or self._model_n == 'gpt-4': - model_name = self._model_n + '-0613' - self.__model_name = model_name - return self.__model_name - - @model_name.setter - def model_name(self, val: str): - self.__model_name = val - - async def completion(self) -> Iterator[Dict[str, str]]: - if self.max_tokens < 1: self.max_tokens = 256 - self.messages = self._messages() - self.completion_so_far: str = "" - self.metering_prompt_tokens_n = 0 - self.metering_generated_tokens_n = 0 - self.openai_prompt_tokens_n = 0 - self.openai_completion_tokens = 0 - - def forward_streaming(): - modified = self._postprocess(self.completion_so_far) - return {self.cursor_file: modified} - - try: - gen = await openai.ChatCompletion.acreate( - model=self.model_name, - messages=self.messages, - max_tokens=self.max_tokens, - stream=self.stream, - temperature=self.temp, - stop=['<|end|>'], - ) - - if not self.stream: - resp = gen - self.completion_so_far = resp["choices"][0]["message"]["content"] - if DEBUG: - sys.stdout.write(termcolor.colored(self.completion_so_far, "green")) - sys.stdout.flush() - self.openai_prompt_tokens_n = resp["usage"]["prompt_tokens"] - self.openai_completion_tokens = resp["usage"]["completion_tokens"] - print(resp["usage"]) - self.model_name = resp["model"] - self.finish_reason = resp["choices"][0]["finish_reason"] or "END" - else: - self.finish_reason = "" - self.completion_so_far = "" - tokens = 0 - while True: - resp = await asyncio.wait_for(gen.__anext__(), self._stream_timeout_sec) - delta = resp.choices[0].delta - if "content" in delta: - if DEBUG: - sys.stdout.write(termcolor.colored(delta["content"], "green")) - sys.stdout.flush() - self.completion_so_far += delta["content"] - tokens += 1 # assuming 1 token per chunk - if "model" in resp: - self.model_name = resp["model"] - if "finish_reason" in resp.choices[0] and resp.choices[0]["finish_reason"] is not None: - self.finish_reason = resp.choices[0]["finish_reason"] - if self.finish_reason: - break - if tokens % ACCUMULATE_N_STREAMING_CHUNKS == 0: - yield forward_streaming() - if self.finish_reason: - break - if self.model_name == "": - self.debuglog("ScratchpadToolboxGPT: model_name is empty") - if self.finish_reason == "": - self.finish_reason = "END" - except asyncio.exceptions.TimeoutError as e: - self.debuglog("FUNCTIONS TIMEOUT:", str(type(e)), str(e)) - except Exception as e: - self.debuglog("FUNCTIONS EXCEPTION:", str(type(e)), str(e)) - self.finish_reason = "ERROR" - yield forward_streaming() - - def _messages(self) -> List[Dict[str, str]]: - raise NotImplementedError - - def _postprocess(self, completion: str) -> str: - completion = code_block_postprocess(completion) - return self._txt[:self.cursor0] + completion + self._txt[self.cursor1:] - - def toplevel_fields(self): - if not self.finish_reason: - return {} - else: - calc_prompt_tokens_n, calc_generated_tokens_n = calculate_chat_tokens( - self.model_name, self.messages, self.completion_so_far - ) - self.metering_prompt_tokens_n = self.openai_prompt_tokens_n or calc_prompt_tokens_n - self.metering_generated_tokens_n = self.openai_completion_tokens or calc_generated_tokens_n - metering_message = { - "metering_prompt_tokens_n": self.metering_prompt_tokens_n, - "metering_generated_tokens_n": self.metering_generated_tokens_n, - "pp1000t_prompt": self.prices[0], - "pp1000t_generated": self.prices[1], - "model_name": self.model_name, - } - self.debuglog(json.dumps(metering_message)) - return metering_message - - def debuglog(self, *args): - if self._logger: - self._logger(*args) diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_utils.py b/refact_scratchpads_no_gpu/gpt_toolbox/gpt_utils.py deleted file mode 100644 index 75746371..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/gpt_utils.py +++ /dev/null @@ -1,71 +0,0 @@ -import re -from itertools import zip_longest -from typing import * -import tiktoken - -from refact_scratchpads_no_gpu.gpt_toolbox.scratchpad_utils import full_line_selection - - -def msg(role: str, content: str) -> Dict[str, str]: - assert role in ['system', 'user', 'assistant'] - return {'role': role, 'content': content} - - -def code_block_postprocess(txt: str) -> str: - lines_code = [] - is_code = False - for line in txt.split('\n'): - if '```' in line: - is_code = not is_code - continue - if is_code: - lines_code.append(line) - - code = '\n'.join(lines_code) or txt - return code - - -def find_substring_positions(substring, text) -> Optional[Tuple[int, int]]: - words = substring.split() - pattern = r'\s*'.join(map(re.escape, words)) - match = re.search(pattern, text) - if not match: - return - - c0, c1, _ = full_line_selection(match.start(), match.end(), text) - return c0, c1 - - -def trim_context_tok( - cursor0: int, - cursor1: int, - text: str, - enc: tiktoken.Encoding, - max_tokens: int = 2000 -) -> Tuple[int, int, str]: - selection = text[cursor0:cursor1] - tokens_left = max_tokens - len(enc.encode(selection, disallowed_special=())) - - lines_before = ((l, 'before') for l in reversed(text[:cursor0].splitlines())) - lines_after = ((l, 'after') for l in text[cursor1:].splitlines()) - merged_lines = [val for pair in zip_longest(lines_before, lines_after) for val in pair if val] - - lines_before_p, lines_after_p = [], [] - for line, t in merged_lines: - if (line_tok_cnt := len(enc.encode(line, disallowed_special=()))) >= tokens_left: break - lines_before_p.append(line) if t == 'before' else lines_after_p.append(line) - tokens_left -= line_tok_cnt - - txt_before = '\n'.join(reversed(lines_before_p)) + '\n' - txt_after = '\n'.join(lines_after_p) - txt = txt_before + selection + txt_after - cursor0, cursor1 = len(txt_before), len(txt_before) + len(selection) - - # print("chars before %i -> cut to %i" % (len(text[:cursor0]), len(txt_before))) - # print("chars after %i -> cut to %i" % (len(text[cursor1:]), len(txt_after))) - # print("before %i bytes -> %i tokens" % (len(txt_before), len(enc.encode(txt_before, disallowed_special=())))) - # print("after %i bytes -> %i tokens" % (len(txt_after), len(enc.encode(txt_after, disallowed_special=())))) - # print("tokens + tokens + tokens = %i" % (len(enc.encode(txt, disallowed_special=())))) - - return cursor0, cursor1, txt - diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/scratchpad_utils.py b/refact_scratchpads_no_gpu/gpt_toolbox/scratchpad_utils.py deleted file mode 100644 index 3a943412..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/scratchpad_utils.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Tuple - - -def full_line_selection(cursor0: int, cursor1: int, txt: str) -> Tuple[int, int, str]: - """ - Adjusts selection to only include full lines. - """ - c0, c1, buff = '<|cursor0|>', '<|cursor1|>', '' - txt: str = txt[:cursor0] + c0 + txt[cursor0:cursor1] + c1 + txt[cursor1:] - - lines_new = [] - for line in txt.split('\n'): - if buff: - line = buff + line - buff = '' - if c0 in line: - if not line.split(c0)[1].strip(): - buff = c0 - line = line.replace(c0, "") - else: - line = c0 + line.replace(c0, "") - - if c1 in line: - if not line.split(c1)[0].strip() and lines_new: - lines_new[-1] += c1 - line = line.replace(c1, "") - else: - line = line.replace(c1, "") + c1 - lines_new.append(line) - - txt_new = '\n'.join(lines_new) - cursor0 = txt_new.index(c0) - cursor1 = txt_new.replace(c0, "").index(c1) - selection = txt_new.replace(c0, "").replace(c1, "")[cursor0:cursor1] - - return cursor0, cursor1, selection diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/__init__.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/__init__.py deleted file mode 100644 index 7e02f281..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .experimental_gpt_bugs_highlight import GptBugsHighlight -from .experimental_gpt_code_review import ScratchpadCodeReviewHighlightGPT4 -from .experimental_gpt_completion import GptCompletion, GptCompletionGPT4 -from .experimental_gpt_vulnerabilities_highlight import GptDetectVulnerabilitiesHighlightGPT4 -from .gpt_add_console_logs import GptAddConsoleLogs -from .gpt_comment_each_line import GptCommentEachLine -from .gpt_explain_code_block import GptExplainCodeBlock, GptExplainCodeBlockGPT4 -from .gpt_fix_bug import GptFixBug, GptFixBugGPT4 -from .gpt_make_code_shorter import GptMakeCodeShorter, GptMakeCodeShorterGPT4 -from .gpt_precise_naming import GptPreciseNaming diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_bugs_highlight.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_bugs_highlight.py deleted file mode 100644 index 8641fb9e..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_bugs_highlight.py +++ /dev/null @@ -1,101 +0,0 @@ -import json - -from typing import Dict, List - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, find_substring_positions -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT - - -class GptBugsHighlight(ScratchpadToolboxGPT): - def __init__(self, model_n="gpt3.5-turbo-0301", supports_stream=False, **kwargs): - super().__init__( - model_n=model_n, - supports_stream=supports_stream, - **kwargs - ) - - def _messages(self) -> List[Dict[str, str]]: - return [ - msg( - 'system', - "You are an AI programming assistant.\n" - "Follow the user's requirements carefully & to the letter." - ), - msg('user', ''' -You are a code reviewer. Follow my instructions carefully & to the letter. - -You are to receive a single code file. -It contain imports from other files that are present in the project, but you cannot see them. -That's why you must not highlight errors that are connected to the imports, it's a false positive. - -Your assignment is: -1. Carefully read code line by line up to the end. -2. Find all errors likely to happen in runtime (ignore the imports) -3. For each found error output a comment in the following format: -{"code": " def _messages(self) -> list[dict[str, str]]:", "description": "errors in type annotations"} -{"code": "for call, idx in enumerate(calls_unfiltered):", "description": "Invalid variable assignment"} - -FIELDS DESCRIPTION: -- code: the code you found issue in -- description: brief description of the issue and short instructions hints how to fix it - -Guidelines: -Explain yourself as briefly as possible, do not explain outside of code block. -The output you provide must be decodable using jsonlines format. -Do not highlight any errors connected to imports. -''' - ), - msg( - 'user', - """from routers import FindRouter - -if __name__ == "__main__": - from argparse import ArgumentParser - parser = ArgumentParser() -""" - ), - msg( - 'assistant', - """{"code": "from routers import FindRouter", "description": "ModuleNotFoundError: no module named routers"}""" - ), - msg( - 'user', - 'Not valid. You have been told to ignore any kind of import errors!' - ), - msg('user', self._txt) - ] - - def _postprocess(self, completion: str) -> str: - self.debuglog(f'Completion:\n{completion}') - suggestions = [] - for line in completion.splitlines(): - if not line.strip(): - continue - try: - suggestions.append(json.loads(line)) - except Exception as e: - self.debuglog(e) - for s in suggestions: - code = s['code'] - indexes = find_substring_positions(code, self._txt) - if not indexes: - self.debuglog('Substring not found') - continue - s_start, s_end = indexes - self._txt = \ - self._txt[:s_start] + \ - f'\n' \ - f'\nDESC: {s["description"]}\n' \ - f'{self._txt[s_start:s_end]}' \ - f'\n' + \ - self._txt[s_end:] - return self._txt - - -class GptBugsHighlightGPT4(GptBugsHighlight): - def __init__(self, **kwargs): - super().__init__( - model_n='gpt-4', - supports_stream=False, - **kwargs - ) diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_code_review.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_code_review.py deleted file mode 100644 index 639e8fb5..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_code_review.py +++ /dev/null @@ -1,96 +0,0 @@ -import json - -from typing import Dict, List - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, find_substring_positions - - -class ScratchpadCodeReviewHighlightGPT4(ScratchpadToolboxGPT): - def __init__(self, **kwargs): - super().__init__( - model_n='gpt-4', - supports_stream=False, - timeout=120, - **kwargs - ) - - def _messages(self) -> List[Dict[str, str]]: - return [ - msg( - 'system', - "You are an AI programming assistant.\n" - "Follow the user's requirements carefully & to the letter." - ), - msg('user', - ''' -You are a code reviewer. -Follow my instructions carefully & to the letter. - -You are to receive a single code file. -It contain imports from other files that are present in the project, but you cannot see them. -That's why you must not highlight errors that are connected to the imports to not commit false-positive errors. - -Your assignment is: -1. Carefully read code line by line up to the end. -2. Find all possible errors that interrupt code runtime (except the cases listed above) -3. For each found error you will suggest a comment in the following format: -{"code": " def _messages(self) -> list[dict[str, str]]:", "description": "errors in type annotations"} -{"code": "for call, idx in enumerate(calls_unfiltered):", "description": "Invalid variable assignment"} - -FIELDS DESCRIPTION: -- code: the code you found issue in -- description: extremely brief description of the issue and short instructions hints how to fix it - -Guidelines: -Explain yourself as briefly and clear as possible, do not explain outside of code block. -The output you provide must be decodable using jsonlines format. -Do not highlight any error that is anyhow connected to imports! -''' - ), - msg( - 'user', - """ -from routers import FindRouter - -if __name__ == "__main__": - from argparse import ArgumentParser - parser = ArgumentParser() -""" - ), - msg( - 'assistant', - """{"code": "from routers import FindRouter", "description": "ModuleNotFoundError: no module named routers"}""" - ), - msg( - 'user', - 'Not valid. You have been told to ignore any kind of import errors!' - ), - msg( - 'assistant', - "Sorry for the confusion. Give me another example." - ), - msg('user', self._txt) - ] - - def _postprocess(self, completion: str) -> str: - suggestions = [json.loads(c) for c in completion.split('\n')] - - for s in suggestions: - code = s['code'] - indexes = find_substring_positions(code, self._txt) - if not indexes: - print('Substring not found') - continue - - s_start, s_end = indexes - self._txt = \ - self._txt[:s_start] + \ - f'\n' \ - f'\nDESC: {s["description"]}\n' \ - f'SCORE: {s["critical_score"]}\n' \ - f'{self._txt[s_start:s_end]}' \ - f'\n' + \ - self._txt[s_end:] - - return self._txt diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_completion.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_completion.py deleted file mode 100644 index 17205a74..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_completion.py +++ /dev/null @@ -1,45 +0,0 @@ -from typing import Dict, List - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT - - -class GptCompletion(ScratchpadToolboxGPT): - def _messages(self) -> List[Dict[str, str]]: - cursor0, _, ctxt = self.trim_context() - ctxt = ctxt[:cursor0] + '<|complete-me|>' + ctxt[cursor0:] - return [ - msg( - 'system', - "You are an AI programming assistant.\n" - "Follow the user's requirements carefully and to the letter." - ), - msg( - 'user', - ctxt - ), - msg( - 'assistant', - 'What do I need to do with this code?' - ), - msg( - 'user', - "Replace <|complete-me|> with the code completion. " - "Write it in the block of code. " - "Do not explain anything. " - "Write only the code completion." - ) - ] - - -class GptCompletionGPT4(GptCompletion): - def __init__(self, **kwargs): - super().__init__( - model_n='gpt-4', - **kwargs - ) - - def _postprocess(self, completion: str) -> str: - # Output of GPT-4 does not need to be postprocessed, such as find ``` - return self._txt[:self.cursor0] + completion + self._txt[self.cursor1:] - diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_vulnerabilities_highlight.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_vulnerabilities_highlight.py deleted file mode 100644 index 9aef8f9b..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/experimental_gpt_vulnerabilities_highlight.py +++ /dev/null @@ -1,64 +0,0 @@ -import json - -from typing import Dict, List - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, find_substring_positions -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT - - -class GptDetectVulnerabilitiesHighlightGPT4(ScratchpadToolboxGPT): - def __init__(self, **kwargs): - super().__init__( - model_n='gpt-4-0314', - supports_stream=False, - **kwargs - ) - - def _messages(self) -> List[Dict[str, str]]: - return [ - msg( - 'system', - "You are an AI programming assistant.\n" - "Follow the user's requirements carefully & to the letter." - ), - msg('user', - 'I am a software engineer. ' - 'I have a question about one of my scripts. ' - 'I am afraid there are some vulnerabilities in it. I need you to find them and explain. ' - 'You need to stick to the following format: you will output a block of code in jsonlines format.' - 'This is how you must format you output:' - ''' - {"code": "VULNERABLE_CODE_PART_1", "vulnerability": "YOUR_VULNERABILITY_1_DESCRIPTION"} - {"code": "VULNERABLE_CODE_PART_2", "vulnerability": "YOUR_VULNERABILITY_2_DESCRIPTION"} - ''' - 'Explain as briefly as possible, do not explain outside of code block. ' - 'The output you provide must be decodable using jsonlines format. ' - ), - msg('assistant', - 'Thank you for detailed description. ' - 'Now please provide me this script that might contain vulnerabilities. ' - 'I will find them for you and explain them in the format you have given. ' - ), - msg('user', self._txt) - ] - - def _postprocess(self, completion: str) -> str: - suggestions = [json.loads(c) for c in completion.split('\n')] - - for s in suggestions: - code = s['code'] - indexes = find_substring_positions(code, self._txt) - if not indexes: - self.debuglog('Substring not found') - continue - - s_start, s_end = indexes - self._txt = \ - self._txt[:s_start] + \ - f'\n' \ - f'\nDESC: {s["vulnerability"]}\n' \ - f'{self._txt[s_start:s_end]}' \ - f'\n' + \ - self._txt[s_end:] - - return self._txt diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_add_console_logs.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_add_console_logs.py deleted file mode 100644 index 5af71ec2..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_add_console_logs.py +++ /dev/null @@ -1,48 +0,0 @@ -from typing import Dict, List - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT - - -class GptAddConsoleLogs(ScratchpadToolboxGPT): - def _messages(self) -> List[Dict[str, str]]: - return [ - msg( - 'system', - 'You are a professional high-class code assistant. ' - 'Your were created to modify, generate, interpret and generalize code' - ), - msg( - 'user', - 'I will provide you a piece of code, you need to add console logs. ' - 'You have to localize places in a given piece of code where console logs might be useful for debugging ' - ), - msg( - 'user', - """ - def __call__(self, filename: str) -> Optional[str]: - for filt in self._filters: - filtered = filt([filename]) - if filtered: - return filt.language - return None -"""), - msg( - 'assistant', - """``` - def __call__(self, filename: str) -> Optional[str]: - print(f'filename: {filename}') - print(f'filters: {self._filters}') - for it, filt in enumerate(self._filters): - print(f'it: {it}') - print(f'filt: {filt}') - filtered = filt([filename]) - print(f'filtered: {filtered}')' - if filtered: - print(f'language: {filt.language}') - return filt.language - print('language: None') - return None -```"""), - msg('user', self.selection) - ] diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_comment_each_line.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_comment_each_line.py deleted file mode 100644 index 9768775a..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_comment_each_line.py +++ /dev/null @@ -1,91 +0,0 @@ -from typing import Dict, List - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT - - -class GptCommentEachLine(ScratchpadToolboxGPT): - def _messages(self) -> List[Dict[str, str]]: - return [ - msg( - 'system', - 'You are a professional high-class code assistant. ' - 'Your were created to modify, generate, interpret and generalize code' - ), - msg( - 'user', - "I have an assignment for you, so listen attentively. " - "I will provide you a piece of code, you need to explain each line of this piece of code. " - "I need to know what each line of the piece of code does. " - "You need to return the exact same piece of code given, but each line must have an explanation in comment above it" - "Do you understand your assignment?" - ), - msg( - 'assistant', - "Yes, my assignment is clear. " - "I am receiving a piece of code and I need to explain each line of it. " - "I have to return the exact same piece of code, but each line must have an explanation in comment above it. " - "I'm willing to do it for you! However, I still have some questions:\n" - "1. If the code already has some comments, should I keep them or remove?\n" - "2. If the code has any other issues that I can detect, may I fix them?" - ), - msg( - 'user', - "If you see existing comments in the piece of code, you must keep them, you should ignore them, no explaination for them needed. " - "If you see some other issues in the code, you are prohibited react on them in any way, don't fix them. Ignore them! " - "When code block ends you must print <|end|> after it. " - "Is it clear?" - ), - msg( - 'assistant', - "Yes, absolutely! " - "If I see existing comments in provided piece of code, I must keep them and I must not explain them.\n" - "If I see any other issues in the piece of code, I will not try to fix them. It is prohibited.\n" - "When code block ends I will print <|end|> after it.\n" - "I have no doubts I can manage this task! Please provide me a piece of code each line of which I need explain." - ), - msg('user', - """ - def completion(self, final: bool, tokens_batch: Optional[int] = 25) -> Iterator[Dict[str, str]]: - tokens_batch: int = self.max_tokens if final else tokens_batch - - # implement more cool features - return self.completion_stream( - # engine must be one of the one in docs - engine=self._engine, - tokens_batch=tokens_batch, - prompt=self.prompt, - replace_modified=self._replace_modified - ) -"""), - msg('assistant', - """ -``` - def completion(self, final: bool, tokens_batch: Optional[int] = 25) -> Iterator[Dict[str, str]]: - # if not tokens_batch given, using max_tokens - tokens_batch: int = self.max_tokens if final else tokens_batch - - # implement more cool features - return self.completion_stream( - # engine is a model codify API uses. E.g. text-davinci-003, code-davinci-002 etc - # engine must be one of the one in docs - engine=self._engine, - # how many tokens will be in each batch - tokens_batch=tokens_batch, - # function that returns prompt for selected engine - prompt=self.prompt, - # replace selection from original code with generated code - replace_modified=self._replace_modified - ) -``` -<|end|>"""), - msg( - 'user', - 'Thanks for your help! That is 100% precise for what I meant.' - ), - msg( - 'assistant', - 'Anytime! Give me another piece of code you need to explain each line. It is a pleasure to help you!' - ), - msg('user', self.selection) - ] diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_explain_code_block.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_explain_code_block.py deleted file mode 100644 index abe71379..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_explain_code_block.py +++ /dev/null @@ -1,101 +0,0 @@ -from typing import Dict, List - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg, code_block_postprocess -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT - - -class GptExplainCodeBlock(ScratchpadToolboxGPT): - def _messages(self) -> List[Dict[str, str]]: - _, _, ctxt = self.trim_context() - return [ - msg( - 'system', - "You are an AI programming assistant.\n" - "Follow the user's requirements carefully & to the letter." - ), - msg( - 'user', - "I will provide you the code that I do not understand and I need you to explain me it." - "I need you understand the logic of this code and t" - "hen output the block of code that contain comments that explain the given code. " - "Do not explain anything outside of a block of code. " - "Firstly, I will provide you the context, which is the whole file. " - "Then I will provide you the specific piece of code you will explain in a block. " - "When the block of code ends you will print <|end|> after it." - ), - msg( - 'assistant', - "Provide me a context." - ), - msg('user', - """ -from typing import * - -from .chatgpt_scratchpad import ScratchpadToolboxGPT - - -class ScratchpadMakeCodeShorter(ScratchpadToolboxGPT): - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def _messages(self): - return [*make_code_shorter(), msg('user', self._selection)] - - def completion(self, final, tokens_batch=25): - tokens_batch: int = self.max_tokens if final else tokens_batch - - return self.completion_stream( - engine=self._engine, - tokens_batch=tokens_batch, - prompt=self.prompt, - replace_modified=self._replace_modified - ) - - def _postprocess(self, completion): - return self._txt[:self.cursor0] + completion + self._txt[self.cursor1:] -"""), - msg('assistant', - "Provide me the code you need to explain in a block." - ), - msg('user', - """ - def completion(self, final, tokens_batch=25): - tokens_batch: int = self.max_tokens if final else tokens_batch - - return self.completion_stream( - engine=self._engine, - tokens_batch=tokens_batch, - prompt=self.prompt, - replace_modified=self._replace_modified - ) -"""), - msg('assistant', - """ -``` -# This code defines a function called "completion" -# which takes two arguments: final which is boolean and tokens_batch, which is optional int -# if final = true, streaming is disabled, and vice versa -# tokens batch will be set to self.max_tokens if streaming is disabled -# it returns a function self._completion_stream, which receives engine, tokens_batch, prompt and replace_modified -# this function returns an iterator of Dicts, where keys and values are both strings -``` -<|end|>"""), - msg('user', ctxt), - msg('assistant', - "Thanks for giving me the context. " - "Please provide me the part of code you need to explain in a block." - ), - msg('user', self.selection) - ] - - def _postprocess(self, completion: str) -> str: - completion = code_block_postprocess(completion) - return self._txt[:self.cursor1] + '\n' + completion + self._txt[self.cursor1:] - - -class GptExplainCodeBlockGPT4(GptExplainCodeBlock): - def __init__(self, **kwargs): - super().__init__( - model_n='gpt-4', - **kwargs - ) diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_fix_bug.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_fix_bug.py deleted file mode 100644 index 4218b259..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_fix_bug.py +++ /dev/null @@ -1,95 +0,0 @@ -from typing import Dict, List - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT - - -class GptFixBug(ScratchpadToolboxGPT): - def _messages(self) -> List[Dict[str, str]]: - _, _, ctxt = self.trim_context() - return [ - msg( - 'system', - "You are an AI programming assistant.\n" - "Follow the user's requirements carefully & to the letter." - ), - msg( - 'user', - "I will provide you the code that contains bugs, which you will need to find and fix." - "Output the block of rewritten bug-free code. " - "Do not explain anything! " - "Firstly, I will provide you the context, which is the whole file. " - "Then I will provide you the specific piece of code to find and fix bugs. " - "When the block of code ends you will print <|end|> after it." - "In case there are no bugs, leave it as it is." - ), - msg( - 'assistant', - "Provide me a context." - ), - msg('user', - """ -from utils import filter_filenames - - -class LanguagesFilter: - def __init__(self, languages, percent_in_commit): - self._percent_in_commit = percent_in_commit - self._filters = [LanguageFilter(language) for language in languages] - - def __call__(self, filenames): - return filter_filenames(filenames, self._filters, self._percent_in_commit) - -class CodeLanguagesFilter: - def __init__(self, languages): - self._filters = [LanguageFilter(language) for language in languages] - - def __call__(self, filename): - for (filt in self._filters): - filtered = filt([filename) - if (filtered): - return filt.language - return null - -if __name__ == "__main__": - pass -"""), - msg('assistant', - "Please provide me the code you need to fix bugs in. " - ), - msg('user', - """ - def __call__(self, filename): - for (filt in self._filters): - filtered = filt([filename) - if (filtered): - return filt.language - return null - }"""), - msg('assistant', - """ -``` - def __call__(self, filename: str): - for filt in self._filters: - filtered = filt([filename]) - if filtered: - return filt.language - return None -``` -<|end|>"""), - msg('user', ctxt), - msg('assistant', - "Thanks for giving me the context. " - "I understand it. " - "Please provide me the part of code you need to fix bugs in." - ), - msg('user', self.selection) - ] - - -class GptFixBugGPT4(GptFixBug): - def __init__(self, **kwargs): - super().__init__( - model_n='gpt-4', - **kwargs - ) diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_make_code_shorter.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_make_code_shorter.py deleted file mode 100644 index 8a1d1a0f..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_make_code_shorter.py +++ /dev/null @@ -1,84 +0,0 @@ -from typing import Dict, List - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT - - -class GptMakeCodeShorter(ScratchpadToolboxGPT): - def _messages(self) -> List[Dict[str, str]]: - _, _, ctxt = self.trim_context() - return [ - msg( - 'system', - "You are an AI programming assistant.\n" - "Follow the user's requirements carefully & to the letter." - ), - msg( - 'user', - "I will provide you the code that is suboptimal, verbose and complicated. " - "You need to replace the suboptimal code with a shorter and more simple code. " - "The code you generated will be placed in the context file, " - "so keep all styles and indents. " - "Do not explain anything. " - "Firstly, I will provide you the whole file -- the context. " - "Then you will receive a piece of code you will simplify. " - "When the block of code ends you will print <|end|> after it." - ), - msg( - 'assistant', - "Provide me a context." - ), - msg('user', - """ -class Person: - def __init__(self, name, age): - self.name = name - self.age = age - - -class People: - def __init__(people): - self.people = [] - for p in people: - name = p[0] - age = p[1] - person = Person(name, age) - self.people.append(person) - - def __iter__(self): - yield from self.people - - """), - msg('assistant', - "Please provide me the code you need to simplify." - ), - msg('user', - """ - self.people = [] - for p in people: - name = p[0] - age = p[1] - person = Person(name, age) - self.people.append(person) - """), - msg('assistant', - """ -``` - self.people = [Person(name, age) for name, age in people] -``` -<|end|>"""), - msg('user', ctxt), - msg('assistant', - "Thanks for giving me the context. " - "Please provide me the part of code you need to simplify." - ), - msg('user', self.selection) - ] - - -class GptMakeCodeShorterGPT4(GptMakeCodeShorter): - def __init__(self, **kwargs): - super().__init__( - model_n='gpt-4', - **kwargs - ) diff --git a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_precise_naming.py b/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_precise_naming.py deleted file mode 100644 index 36b456a1..00000000 --- a/refact_scratchpads_no_gpu/gpt_toolbox/toolbox_functions/gpt_precise_naming.py +++ /dev/null @@ -1,82 +0,0 @@ -from typing import Dict, List - -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_utils import msg -from refact_scratchpads_no_gpu.gpt_toolbox.gpt_toolbox_spad import ScratchpadToolboxGPT - - -class GptPreciseNaming(ScratchpadToolboxGPT): - def _messages(self) -> List[Dict[str, str]]: - _, _, ctxt = self.trim_context() - return [ - msg( - 'system', - "You are an AI programming assistant.\n" - "Follow the user's requirements carefully & to the letter." - ), - msg( - 'user', - "I will provide you the code that has ambitious, unclear and incorrect naming." - "I need you understand the logic of this code and then output the block of code with fixed naming. " - "Do not explain anything. " - "Firstly, I will provide you the context, which is the whole file. " - "Then I will provide you the specific piece of code you will fix naming in. " - "When the block of code ends you will print <|end|> after it." - ), - msg( - 'assistant', - "Provide me a context." - ), - msg('user', - """ -from typing import * - - -def f(x, c): - r = 0 - xx = 1 - for cc in c: - r += xx * cc - xx *= x - return r - - -def main(): - years = [1693, 1900, 2000] - for year in years: - if f(year): - print(year, "is leap") - else: - print(year, "is not leap") - -if __name__ == "__main__": - main() -"""), - msg('assistant', - "Please provide me the code you need to fix naming in." - ), - msg('user', - """ -def f(x, c): - r = 0 - xx = 1 - for cc in c: - r += xx * cc - xx *= x - return r -"""), - msg('assistant', """``` -def polynom(x, coefficients): - value = 0 - x_power = 1 - for c in coefficients: - value += x_power * c - x_power *= x - return value -```<|end|>"""), - msg('user', ctxt), - msg('assistant', - "Thanks for giving me the context. " - "Please provide me the part of code you need to fix naming in." - ), - msg('user', self.selection) - ] diff --git a/refact_scratchpads_no_gpu/infserver_no_gpu.py b/refact_scratchpads_no_gpu/infserver_no_gpu.py deleted file mode 100644 index d5531fc0..00000000 --- a/refact_scratchpads_no_gpu/infserver_no_gpu.py +++ /dev/null @@ -1,239 +0,0 @@ -import os -import socket -import sys -import time -import json -import datetime -import traceback -import signal -import logging - -import importlib -import asyncio - -from refact_scratchpads_no_gpu import stream_results_async - - -DEBUG = int(os.environ.get("DEBUG", "0")) - - -gpt_functions = { - "free-chat": "refact_scratchpads_no_gpu.gpt_toolbox:GptChat", - "free-chat-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptChat", - "free-chat-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptChat", - - "make-code-shorter": "refact_scratchpads_no_gpu.gpt_toolbox:GptMakeCodeShorter", - "make-code-shorter-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptMakeCodeShorter", - "make-code-shorter-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptMakeCodeShorterGPT4", - - "fix-bug": "refact_scratchpads_no_gpu.gpt_toolbox:GptFixBug", - "fix-bug-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptFixBug", - "fix-bug-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptFixBugGPT4", - - "explain-code-block": "refact_scratchpads_no_gpu.gpt_toolbox:GptExplainCodeBlock", - "explain-code-block-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptExplainCodeBlock", - "explain-code-block-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptExplainCodeBlockGPT4", - - "comment-each-line": "refact_scratchpads_no_gpu.gpt_toolbox:GptCommentEachLine", - "comment-each-line-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptCommentEachLine", - - "precise-naming": "refact_scratchpads_no_gpu.gpt_toolbox:GptPreciseNaming", - "precise-naming-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptPreciseNaming", - - "add-console-logs": "refact_scratchpads_no_gpu.gpt_toolbox:GptAddConsoleLogs", - "add-console-logs-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptAddConsoleLogs", - - "completion-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptCompletion", - "completion-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptCompletionGPT4", -} - -experimental_functions = { - "bugs-highlight-gpt3.5": "refact_scratchpads_no_gpu.gpt_toolbox:GptBugsHighlight", - "bugs-highlight-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptBugsHighlightGPT4", - "vulnerabilities-highlight-gpt4": "refact_scratchpads_no_gpu.gpt_toolbox:GptDetectVulnerabilitiesHighlightGPT4", -} - -supported_models = { - "longthink/stable": { - "functions": { - **gpt_functions, - **experimental_functions, - } - }, -} - - -for mod in ["debug", "experimental"]: - supported_models["longthink/" + mod] = supported_models["longthink/stable"] - - -host = socket.getfqdn() -quit_flag = False - - -def dump_problematic_call(stacktrace: str, stacktrace_short: str, suspicious_call): - if suspicious_call and not DEBUG: - # not DEBUG means in production, save it to disk to check out later - ymd = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - dump_path = f'./{ymd}_infserver_no_gpu_stacktrace.dump' - with open(dump_path, 'w') as f: - f.write(f"{host} caught exception:\n{stacktrace}") - f.flush() - f.write(json.dumps(suspicious_call)) - sys.stdout.write("'%s' DUMP SAVED TO %s\n" % (stacktrace_short, dump_path)) - sys.stdout.flush() - elif suspicious_call: - # if DEBUG, just print the call that caused the problem - sys.stdout.write(json.dumps(suspicious_call)) - sys.stdout.flush() - - -def except_hook(exctype, value, tb, suspicious_call=None): - msg = "".join(traceback.format_exception(exctype, value, tb, limit=10)) - sys.stderr.write(msg) - sys.stderr.flush() - if exctype == KeyboardInterrupt: - quit() - dump_problematic_call( - "".join(traceback.format_exception(exctype, value, tb, limit=None, chain=True)), - f"{exctype.__name__}: {value}", - suspicious_call - ) - - -async def handle_single_batch(routine_n, my_desc, model_dict, calls_unfiltered): - ts_arrived = time.time() - uproxy = stream_results_async.UploadAsync() - upload_task = asyncio.create_task(uproxy.upload_results_coroutine()) - calls = [] - def logger(*args): - msg = " ".join(map(str, args)) - msg = "R%04d" % routine_n + " " + msg - stream_results_async.logger.info(msg) - try: - scratchpads = [] - for ci, call in enumerate(calls_unfiltered): - function = call.get("function", "completion") - import_str = model_dict["functions"].get(function, None) - if import_str is None: - logger("function '%s' is not supported in model '%s'" % (function, call["model"])) - continue - import_mod, import_class = import_str.rsplit(":", 1) - mod = importlib.import_module(import_mod) - Class = getattr(mod, import_class, None) - if Class is None: - logger("module '%s', class '%s' not found" % (import_mod, import_class)) - continue - logger("running '%s' using %s" % (function, import_class)) - calls.append(call) - spad = Class(logger=logger, **call) - scratchpads.append(spad) - - ts_batch_started = time.time() - # for i in range(len(calls)): - # _prompt = scratchpads[i].prompt() - ts_prompt = time.time() - ts_first_token = time.time() - - for call_n, (call, spad) in enumerate(zip(calls, scratchpads)): - async for files_dict in spad.completion(): - assert isinstance(files_dict, dict), f'expected dict, got {type(files_dict)}' - cancelled_idset = uproxy.check_cancelled() - if call["id"] in cancelled_idset: - spad.finish_reason = "cancelled" - uproxy.upload_result( - my_desc, - [call], - ts_arrived=ts_arrived, - ts_batch_started=ts_batch_started, - ts_prompt=ts_prompt, - ts_first_token=ts_first_token, - ts_batch_finished=time.time() if spad.finish_reason else 0, - idx_updated=[call_n], - files=[files_dict], - tokens=None, - finish_reason=[spad.finish_reason], - status=("completed" if spad.finish_reason else "in_progress"), - more_toplevel_fields=[spad.toplevel_fields()], - ) - if call["id"] in cancelled_idset: - break - except Exception as e: - except_hook(type(e), e, e.__traceback__, calls[0] if len(calls) else None) - finally: - await uproxy.shutdown_coroutine() - await upload_task - await uproxy.close_session() - uproxy.cancelled_reset() - upload_task = None - - -def catch_sigusr1(signum, frame): - stream_results_async.logger.info("infserver_no_gpu catched SIGUSR1") - global quit_flag - quit_flag = True - - -async def do_the_serving( - longthink_variant: str, - routine_n: int, -): - aio_session = stream_results_async.infserver_async_session() - infmod_guid = longthink_variant + "_" + host + "_%04i" % routine_n - infmod_guid = infmod_guid.replace("-", "_") - stream_results_async.logger.info(f'infmod_guid: {infmod_guid}') - while not quit_flag: - model_dict = supported_models[longthink_variant] - my_desc = stream_results_async.validate_description_dict( - infeng_instance_guid=infmod_guid, - account="engineer", - model=longthink_variant, - B=1, - max_thinking_time=10, - ) - retcode, calls_unfiltered = await stream_results_async.completions_wait_batch(aio_session, my_desc) - if retcode == "WAIT": - continue - if retcode != "OK": - stream_results_async.logger.warning("server retcode %s" % retcode) - await asyncio.sleep(5) - continue - await handle_single_batch(routine_n, my_desc, model_dict, calls_unfiltered) - await aio_session.close() - stream_results_async.logger.info("clean shutdown") - - -def main(): - logging.basicConfig( - level=logging.INFO, - format='%(asctime)s NOGPU %(message)s', - datefmt='%Y%m%d %H:%M:%S', - handlers=[logging.StreamHandler(stream=sys.stderr)]) - - from argparse import ArgumentParser - - parser = ArgumentParser() - parser.add_argument("longthink_variant", type=str, default='longthink/stable') - parser.add_argument("-k", "--openai_key", type=str) - parser.add_argument("-w", "--workers", type=int, default=1) - args = parser.parse_args() - - if not (args.openai_key or os.environ.get('OPENAI_API_KEY')): - raise RuntimeError("set OPENAI_API_KEY or use --openai_key") - - if args.openai_key: - import openai - openai.api_key = args.openai_key - - sys.excepthook = except_hook - signal.signal(signal.SIGUSR1, catch_sigusr1) - - workers: int = max(1, args.workers) if not DEBUG else 1 - asyncio.get_event_loop().run_until_complete(asyncio.gather(*[ - do_the_serving(args.longthink_variant, routine_n) - for routine_n in range(workers) - ])) - - -if __name__ == "__main__": - main() diff --git a/self_hosting_machinery/finetune/modelling/flash_sa.py b/self_hosting_machinery/finetune/modelling/flash_sa.py index d77c5c91..1d629bda 100644 --- a/self_hosting_machinery/finetune/modelling/flash_sa.py +++ b/self_hosting_machinery/finetune/modelling/flash_sa.py @@ -48,7 +48,7 @@ def get_slopes_power_of_2(n): return alibi, alibi_start, alibi_ratio -def _prerequisites_are_ok(model): +def _prerequisites_are_ok(model, try_triton_kernel: bool): try: from flash_attn import flash_attn_func return True @@ -56,12 +56,13 @@ def _prerequisites_are_ok(model): logging.warning("Original flash attention is not installed, trying to use triton implementation...") from self_hosting_machinery.finetune.modelling.triton_flash_sa import (apply_flash_mha_to_refact_model as apply_triton_flash) - apply_triton_flash(model) + if try_triton_kernel: + apply_triton_flash(model) return False def apply_flash_mha_to_refact_model(model): - if not _prerequisites_are_ok(model): + if not _prerequisites_are_ok(model, try_triton_kernel=True): return from flash_attn import flash_attn_func @@ -73,7 +74,8 @@ def _forward( attention_mask: Optional[torch.Tensor] = None, alibi: Optional[torch.Tensor] = None, use_cache: Optional[bool] = False, - output_attentions: Optional[bool] = False + output_attentions: Optional[bool] = False, + *args, **kwargs ): q = einops.rearrange(self.q(x), "b t (h d) -> b t h d", h=self.num_heads) kv = einops.rearrange(self.kv(x), "b t (h d) -> b t h d", h=2) @@ -98,7 +100,7 @@ def _forward( def apply_flash_mha_to_starcoder_model(model): - if not _prerequisites_are_ok(model): + if not _prerequisites_are_ok(model, try_triton_kernel=False): return from flash_attn import flash_attn_func @@ -113,6 +115,7 @@ def _forward( encoder_attention_mask: Optional[torch.Tensor] = None, use_cache: Optional[bool] = False, output_attentions: Optional[bool] = False, + *args, **kwargs ): qkv = self.c_attn(x) q = einops.rearrange(qkv[:, :, :self.embed_dim], "b t (h d) -> b t h d", h=self.num_heads) @@ -139,7 +142,7 @@ def _forward( def apply_flash_mha_to_codellama_model(model): - if not _prerequisites_are_ok(model): + if not _prerequisites_are_ok(model, try_triton_kernel=False): return from flash_attn import flash_attn_func @@ -152,7 +155,7 @@ def _forward( past_key_value: Optional[Tuple[torch.Tensor]] = None, output_attentions: bool = False, use_cache: bool = False, - **kwargs + *args, **kwargs ): from transformers.models.llama.modeling_llama import apply_rotary_pos_emb diff --git a/self_hosting_machinery/finetune/modelling/triton_flash_sa.py b/self_hosting_machinery/finetune/modelling/triton_flash_sa.py index 019c6df8..d960d197 100644 --- a/self_hosting_machinery/finetune/modelling/triton_flash_sa.py +++ b/self_hosting_machinery/finetune/modelling/triton_flash_sa.py @@ -574,7 +574,8 @@ def _forward( attention_mask: Optional[th.Tensor] = None, alibi: Optional[th.Tensor] = None, use_cache: Optional[bool] = False, - output_attentions: Optional[bool] = False + output_attentions: Optional[bool] = False, + *args, **kwargs ): q = einops.rearrange(self.q(x), "b t (h d) -> b t h d", h=self.num_heads) kv = einops.rearrange(self.kv(x), "b t (h d) -> b t h d", h=2) diff --git a/self_hosting_machinery/finetune/scripts/finetune_filter.py b/self_hosting_machinery/finetune/scripts/finetune_filter.py index 551aba2a..ffeda946 100644 --- a/self_hosting_machinery/finetune/scripts/finetune_filter.py +++ b/self_hosting_machinery/finetune/scripts/finetune_filter.py @@ -156,20 +156,21 @@ def catch_sigusr1(signum, frame): model_name = get_finetune_config(models_db, logger=traces.log)["model_name"] finetune_cfg = copy.deepcopy(base_config(model_name, models_db)) - _log_everywhere("Loading file sets context...") - file_sets_context = FileSetsContext( - autoselect_test_files_num=finetune_filter_cfg.get("autoselect_test_files_num", 3) - ) - if file_sets_context.is_up_to_date(): - logging.info("Train set filtering: nothing changed since last time, quit") - return - - traces.log(textwrap.fill( - f"This filter calculates perplexity for each file and filters out " - f"files with perplexity larger than {finetune_filter_cfg['filter_loss_threshold']:.3f}.\n" - f"Those files likely don't have meaningful content to train on", width=100 - )) try: + _log_everywhere("Loading file sets context...") + file_sets_context = FileSetsContext( + autoselect_test_files_num=finetune_filter_cfg.get("autoselect_test_files_num", 3) + ) + if file_sets_context.is_up_to_date(): + logging.info("Train set filtering: nothing changed since last time, quit") + return + + traces.log(textwrap.fill( + f"This filter calculates perplexity for each file and filters out " + f"files with perplexity larger than {finetune_filter_cfg['filter_loss_threshold']:.3f}.\n" + f"Those files likely don't have meaningful content to train on", width=100 + )) + status_tracker.update_status("starting") finetune_filter( status_tracker=status_tracker, diff --git a/self_hosting_machinery/scripts/env.py b/self_hosting_machinery/scripts/env.py index 91947bb5..1d853fde 100644 --- a/self_hosting_machinery/scripts/env.py +++ b/self_hosting_machinery/scripts/env.py @@ -42,6 +42,8 @@ FLAG_LAUNCH_FINETUNE = os.path.join(DIR_WATCHDOG_D, "_launch_finetune.flag") FLAG_STOP_FINETUNE = os.path.join(DIR_WATCHDOG_D, "_stop_finetune.flag") +FLAG_RESTART_LSP = os.path.join(DIR_WATCHDOG_D, "_restart_lsp.flag") + def create_dirs(): os.makedirs(DIR_WATCHDOG_D, exist_ok=True) os.makedirs(DIR_WEIGHTS, exist_ok=True) diff --git a/self_hosting_machinery/scripts/first_run.py b/self_hosting_machinery/scripts/first_run.py index cd95392d..3712bf70 100644 --- a/self_hosting_machinery/scripts/first_run.py +++ b/self_hosting_machinery/scripts/first_run.py @@ -12,10 +12,13 @@ def copy_watchdog_configs_if_first_run_detected(model_assigner: ModelAssigner): def convert_old_configs(model_assigner: ModelAssigner): - # longthink.cfg is an old version of openai_api_worker.cfg + # longthink.cfg and openai_api_worker.cfg are deprecated watchdog configs old_longthink = os.path.join(env.DIR_WATCHDOG_D, "longthink.cfg") if os.path.exists(old_longthink): os.unlink(old_longthink) + openai_watchdog_cfg_fn = os.path.join(env.DIR_WATCHDOG_D, "openai_api_worker.cfg") + if os.path.exists(openai_watchdog_cfg_fn): + os.unlink(openai_watchdog_cfg_fn) for gpu in range(16): fn = os.path.join(env.DIR_WATCHDOG_D, "model-gpu%d.cfg" % gpu) diff --git a/self_hosting_machinery/watchdog/docker_watchdog.py b/self_hosting_machinery/watchdog/docker_watchdog.py index 43f008e6..39ebd7c9 100644 --- a/self_hosting_machinery/watchdog/docker_watchdog.py +++ b/self_hosting_machinery/watchdog/docker_watchdog.py @@ -79,6 +79,7 @@ def __init__(self, cfg): self.sent_sigusr1_ts = 0 self.status_from_stderr = "" self.status_nickname = "" + self.command_not_found = False def set_status(self, newstatus): self.status_from_stderr = newstatus @@ -96,7 +97,7 @@ def set_status(self, newstatus): os.rename(save_status_fn + ".tmp", save_status_fn) def _start(self): - if self.p is not None: + if self.p is not None or self.command_not_found: return global compiling_now alt_env = os.environ.copy() @@ -113,12 +114,19 @@ def _start(self): CUDA_VISIBLE_DEVICES = ",".join(["%d" % x for x in self.cfg["gpus"]]) alt_env["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES self.start_ts = time.time() - self.p = subprocess.Popen( - cmdline, - env=alt_env, - stdout=subprocess.DEVNULL, - stderr=subprocess.PIPE, - ) + try: + self.p = subprocess.Popen( + cmdline, + env=alt_env, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + ) + except FileNotFoundError as e: + log(f"failed to run command {self.cmdline_str}: '{e}'") + self.p = None + self.command_not_found = True + self.set_status("failed") + return log("%s CVD=%s starting %s\n -> pid %s" % ( time.strftime("%Y%m%d %H:%M:%S"), CUDA_VISIBLE_DEVICES, diff --git a/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg b/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg new file mode 100644 index 00000000..54d4617f --- /dev/null +++ b/self_hosting_machinery/watchdog/watchdog.d/lsp.cfg @@ -0,0 +1,12 @@ +{ + "policy": ["always_on"], + "interrupt_when_file_appears": "%FLAG_RESTART_LSP%", + "command_line": [ + "refact-lsp", + "--address-url", "http://127.0.0.1:8008", + "--http-port", "8001", + "--lsp-port", "8002", + "--logs-stderr" + ], + "gpus": [] +} diff --git a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py index 65ad0672..ff115fad 100644 --- a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py +++ b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py @@ -2,11 +2,15 @@ import json import copy import asyncio +import aiohttp import termcolor +import os +import litellm from fastapi import APIRouter, Request, HTTPException, Query from fastapi.responses import StreamingResponse +from self_hosting_machinery import env from self_hosting_machinery.webgui.selfhost_model_resolve import completion_resolve_model from self_hosting_machinery.webgui.selfhost_model_resolve import static_resolve_model from self_hosting_machinery.webgui.selfhost_req_queue import Ticket @@ -243,26 +247,48 @@ def __init__(self, self.add_api_route("/coding_assistant_caps.json", self._coding_assistant_caps, methods=["GET"]) self.add_api_route("/v1/completions", self._completions, methods=["POST"]) + self.add_api_route("/v1/models", self._models, methods=["GET"]) + self.add_api_route("/v1/chat/completions", self._chat_completions, methods=["POST"]) + self._inference_queue = inference_queue self._id2ticket = id2ticket self._model_assigner = model_assigner self._timeout = timeout + @staticmethod + def _interations_env_setup(): + inference = {} + if os.path.exists(env.CONFIG_INFERENCE): + inference = json.load(open(env.CONFIG_INFERENCE, 'r')) + integrations = {} + if os.path.exists(env.CONFIG_INTEGRATIONS): + integrations = json.load(open(env.CONFIG_INTEGRATIONS, 'r')) + openai_api_key = integrations.get("openai_api_key", "") if inference.get("openai_api_enable", False) else "" + os.environ["OPENAI_API_KEY"] = openai_api_key + async def _coding_assistant_caps(self): + models_available = self._inference_queue.models_available(force_read=True) code_completion_default_model, _ = completion_resolve_model(self._inference_queue) + code_chat_default_model = "" + for model_name in models_available: + if self._model_assigner.models_db.get(model_name, {}).get("chat_scratchpad_class", None) is not None \ + or model_name in litellm.model_list: + code_chat_default_model = model_name + break return { "cloud_name": "Refact Self-Hosted", "endpoint_template": "v1/completions", + "endpoint_chat_passthrough": "v1/chat/completions", "endpoint_style": "openai", "telemetry_basic_dest": "/stats/telemetry-basic", "telemetry_corrected_snippets_dest": "/stats/telemetry-snippets", - "running_models": self._inference_queue.models_available(), + "running_models": models_available, "code_completion_default_model": code_completion_default_model, - "code_chat_default_model": "", + "code_chat_default_model": code_chat_default_model, "tokenizer_path_template": "https://huggingface.co/$MODEL/resolve/main/tokenizer.json", "tokenizer_rewrite_path": { model: self._model_assigner.models_db[model]["model_path"] - for model in self._inference_queue.models_available() + for model in models_available if model in self._model_assigner.models_db }, } @@ -278,7 +304,7 @@ async def _login(self): } filter_caps = set([ capability - for model in self._inference_queue.models_available() + for model in self._inference_queue.models_available(force_read=True) for capability in models_mini_db_extended.get(model, {}).get("filter_caps", []) ]) for rec in self._model_assigner.models_caps_db: @@ -423,3 +449,92 @@ async def _chat(self, post: ChatContext, request: Request, account: str = "XXX") self._id2ticket[ticket.id()] = ticket await q.put(ticket) return StreamingResponse(chat_streamer(ticket, self._timeout, req["created"])) + + async def _models(self): + try: + async with aiohttp.ClientSession() as session: + async with session.get("http://127.0.0.1:8001/v1/caps") as resp: + lsp_server_caps = await resp.json() + except aiohttp.ClientConnectorError as e: + err_msg = f"LSP server is not ready yet: {e}" + log(err_msg) + raise HTTPException(status_code=401, detail=err_msg) + completion_models = set() + for model, caps in lsp_server_caps["code_completion_models"].items(): + completion_models.update({model, *caps["similar_models"]}) + chat_models = set() + for model, caps in lsp_server_caps["code_chat_models"].items(): + chat_models.update({model, *caps["similar_models"]}) + data = [ + { + "id": model, "root": model, "object": "model", + "created": 0, "owned_by": "", "permission": [], "parent": None, + "completion": model in completion_models, "chat": model in chat_models, + } + for model in lsp_server_caps["running_models"] + ] + return { + "object": "list", + "data": data, + } + + async def _chat_completions(self, post: ChatContext, account: str = "XXX"): + prefix, postfix = "data: ", "\n\n" + + if post.model in litellm.model_list: + async def litellm_streamer(post: ChatContext): + try: + self._interations_env_setup() + response = await litellm.acompletion( + model=post.model, messages=post.messages, stream=True, + temperature=post.temperature, top_p=post.top_p, max_tokens=post.max_tokens, stop=post.stop) + finish_reason = None + async for model_response in response: + try: + data = model_response.dict() + finish_reason = data["choices"][0]["finish_reason"] + except json.JSONDecodeError: + data = {"choices": [{"finish_reason": finish_reason}]} + yield prefix + json.dumps(data) + postfix + # NOTE: DONE neededed by refact-lsp server + yield prefix + "[DONE]" + postfix + except BaseException as e: + err_msg = f"litellm error: {e}" + log(err_msg) + yield prefix + json.dumps({"error": err_msg}) + postfix + + response_streamer = litellm_streamer(post) + + else: + async def chat_completion_streamer(post: ChatContext): + post_url = "http://127.0.0.1:8001/v1/chat" + post_data = { + "messages": [m.dict() for m in post.messages], + "stream": True, + "model": post.model, + "parameters": { + "temperature": post.temperature, + "max_new_tokens": post.max_tokens, + } + } + async with aiohttp.ClientSession() as session: + try: + async with session.post(post_url, json=post_data) as response: + finish_reason = None + async for data, _ in response.content.iter_chunks(): + try: + data = data.decode("utf-8") + data = json.loads(data[len(prefix):-len(postfix)]) + finish_reason = data["choices"][0]["finish_reason"] + data["choices"][0]["finish_reason"] = None + except json.JSONDecodeError: + data = {"choices": [{"finish_reason": finish_reason}]} + yield prefix + json.dumps(data) + postfix + except aiohttp.ClientConnectorError as e: + err_msg = f"LSP server is not ready yet: {e}" + log(err_msg) + yield prefix + json.dumps({"error": err_msg}) + postfix + + response_streamer = chat_completion_streamer(post) + + return StreamingResponse(response_streamer, media_type="text/event-stream") diff --git a/self_hosting_machinery/webgui/selfhost_model_assigner.py b/self_hosting_machinery/webgui/selfhost_model_assigner.py index 9872a03e..5078e002 100644 --- a/self_hosting_machinery/webgui/selfhost_model_assigner.py +++ b/self_hosting_machinery/webgui/selfhost_model_assigner.py @@ -70,7 +70,6 @@ def models_to_watchdog_configs(self, inference_config=None): inference_config = self._model_assign_filter(inference_config) inference_config = self._model_inference_setup(inference_config) - inference_config = self._integrations_inference_setup(inference_config) with open(env.CONFIG_INFERENCE + ".tmp", "w") as f: json.dump(inference_config, f, indent=4) @@ -135,30 +134,6 @@ def _model_inference_setup(self, inference_config: Dict[str, Any]) -> Dict[str, "more_models_than_gpus": more_models_than_gpus, } - def _integrations_inference_setup(self, inference_config: Dict[str, Any]) -> Dict[str, Any]: - integrations = {} - if os.path.exists(env.CONFIG_INTEGRATIONS): - integrations = json.load(open(env.CONFIG_INTEGRATIONS, 'r')) - - openai_api_key = integrations.get("openai_api_key", "") - openai_watchdog_cfg_fn = os.path.join(env.DIR_WATCHDOG_D, "openai_api_worker.cfg") - - if inference_config.get("openai_api_enable", False) and openai_api_key.startswith("sk-"): - cfg = json.load(open(os.path.join(env.DIR_WATCHDOG_TEMPLATES, "openai_api_worker.cfg"), 'r')) - cfg.pop('unfinished') - cfg['command_line'].append('--openai_key') - cfg['command_line'].append(openai_api_key) - with open(openai_watchdog_cfg_fn + ".tmp", "w") as f: - json.dump(cfg, f, indent=4) - os.rename(openai_watchdog_cfg_fn + ".tmp", openai_watchdog_cfg_fn) - else: - try: - os.unlink(openai_watchdog_cfg_fn) - except FileNotFoundError: - pass - - return inference_config - def first_run(self): default_config = { "model_assign": { @@ -248,3 +223,8 @@ def model_assignment(self): if model in self.models_db } return j + + @staticmethod + def restart_lsp(): + with open(env.FLAG_RESTART_LSP, "w") as f: + f.write("") diff --git a/self_hosting_machinery/webgui/selfhost_queue.py b/self_hosting_machinery/webgui/selfhost_queue.py index 129e043e..b5620fda 100644 --- a/self_hosting_machinery/webgui/selfhost_queue.py +++ b/self_hosting_machinery/webgui/selfhost_queue.py @@ -24,9 +24,9 @@ def model_name_to_queue(self, ticket, model_name, no_checks=False): raise HTTPException(status_code=400, detail="model '%s' is not available at this moment." % model_name) return self._user2gpu_queue[model_name] - def models_available(self) -> List[str]: + def models_available(self, force_read: bool = False) -> List[str]: t1 = time.time() - if self._models_available_ts + self.CACHE_MODELS_AVAILABLE > t1: + if not force_read and self._models_available_ts + self.CACHE_MODELS_AVAILABLE > t1: return self._models_available self._models_available = [] if os.path.exists(env.CONFIG_INFERENCE): @@ -35,7 +35,6 @@ def models_available(self) -> List[str]: self._models_available.append(model) self._models_available_ts = time.time() if j.get("openai_api_enable", False): - # self._models_available.append('gpt3.5') - # self._models_available.append('gpt4') - self._models_available.append('longthink/stable') + self._models_available.append('gpt-3.5-turbo') + self._models_available.append('gpt-4') return self._models_available diff --git a/self_hosting_machinery/webgui/static/tab-finetune.js b/self_hosting_machinery/webgui/static/tab-finetune.js index b59f6e20..3e2aebdc 100644 --- a/self_hosting_machinery/webgui/static/tab-finetune.js +++ b/self_hosting_machinery/webgui/static/tab-finetune.js @@ -900,16 +900,16 @@ function finetune_controls_state() render_ftf_stats(finetune_state.finetune_filter_stats); - if(finetune_state.finetune_filter_stats.filterting_status) { + if(finetune_state.finetune_filter_stats.filtering_status) { document.querySelector('.ftf-status').classList.remove('d-none'); - document.querySelector('.ftf-status span').innerHTML = finetune_state.finetune_filter_stats.filterting_status; + document.querySelector('.ftf-status span').innerHTML = finetune_state.finetune_filter_stats.filtering_status; } else { document.querySelector('.ftf-status').classList.add('d-none'); } let error_span = document.querySelector('.ftf-error span'); let ftf_error = document.querySelector('.ftf-error'); - if (finetune_state.finetune_filter_stats.filterting_status == "failed") { + if (finetune_state.finetune_filter_stats.filtering_status == "failed") { ftf_error.classList.remove('d-none'); if(finetune_state.finetune_filter_stats.error && finetune_state.finetune_filter_stats.error !== '') { error_span.innerHTML = finetune_state.finetune_filter_stats.error; diff --git a/self_hosting_machinery/webgui/tab_models_host.py b/self_hosting_machinery/webgui/tab_models_host.py index c1138e85..ac0606b3 100644 --- a/self_hosting_machinery/webgui/tab_models_host.py +++ b/self_hosting_machinery/webgui/tab_models_host.py @@ -51,4 +51,5 @@ async def _tab_host_models_assign(self, post: TabHostModelsAssign): else: validated["completion"] = "" self._model_assigner.models_to_watchdog_configs(validated) + self._model_assigner.restart_lsp() return JSONResponse("OK") diff --git a/self_hosting_machinery/webgui/webgui.py b/self_hosting_machinery/webgui/webgui.py index 7ef22b52..8d0a6e7d 100644 --- a/self_hosting_machinery/webgui/webgui.py +++ b/self_hosting_machinery/webgui/webgui.py @@ -40,6 +40,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self._model_assigner = model_assigner inference_queue = InferenceQueue() id2ticket: Dict[str, Ticket] = weakref.WeakValueDictionary() for router in self._routers_list(id2ticket, inference_queue, model_assigner, stats_service): @@ -99,6 +100,9 @@ def handle_sigint(*args): signal.signal(signal.SIGINT, handle_sigint) signal.signal(signal.SIGUSR1, handle_sigint) + # NOTE: try restart LSP after server started + self._model_assigner.restart_lsp() + if __name__ == "__main__": from argparse import ArgumentParser diff --git a/setup.py b/setup.py index 6ae83133..172aa35f 100644 --- a/setup.py +++ b/setup.py @@ -44,8 +44,8 @@ class PyPackage: "self_hosting_machinery": PyPackage( requires=["aiohttp", "aiofiles", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic==1.10.13", "starlette==0.27.0", "uvicorn", "uvloop", "python-multipart", "auto-gptq==0.4.2", "accelerate", - "termcolor", "torch", "transformers==4.34.0", "bitsandbytes", "safetensors", "peft", "triton", - "torchinfo", "mpi4py", "deepspeed==0.11.1", "pandas>=2.0.3", + "termcolor", "torch", "transformers==4.34.0", "bitsandbytes", "safetensors", "peft", "triton",` + "torchinfo", "mpi4py", "deepspeed==0.11.1", "litellm", "pandas>=2.0.3", "sqlalchemy", "sqlalchemy-utils", "psycopg2-binary"], optional=["ninja", "flash_attn @ git+https://github.com/smallcloudai/flash-attention@feat/alibi"], requires_packages=["refact_scratchpads", "refact_scratchpads_no_gpu",