Add --experimental-use-full-context to work better with prompt caching

Cerlancism · Oct 19, 2024 · 8630e68 · 8630e68
1 parent 4b8406c
commit 8630e68
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 16 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -22,7 +22,7 @@
             ],
             "program": "${workspaceFolder}/cli/translator.mjs",
             // "args": ["--plain-text", "你好"]
-            "args": ["--stream", "--to", "Chinese", "--input", "test/data/test_ja_small.srt", "-t", "0", "--batch-sizes", "[2, 3]", "--history-prompt-length", "3"],
+            "args": ["--stream", "--to", "Chinese", "--input", "test/data/test_ja.srt", "-t", "0", "--batch-sizes", "[5]", "--history-prompt-length", "1", "--experimental-use-full-context"],
             // "args": ["-i", "./test/data/test_cn_multiline.srt", "--stream", "--batch-sizes", "[5, 10]"],
             // "args": ["-i", "./test/data/test_cn_multiline.srt", "--stream", "--experimental-structured-mode", "--experimental-max_token", "4096", "--experimental-input-multiplier", "2", "--batch-sizes", "[5, 10]", "--experimental-fallback-model", "gpt-3.5-turbo-0613"],
             // "args": ["-i", "./test/data/test_cn_multiline.srt", "--experimental-structured-mode", "object", "--experimental-max_token", "4096", "--experimental-input-multiplier", "2", "--batch-sizes", "[5, 10]"],

diff --git a/cli/translator.mjs b/cli/translator.mjs
@@ -52,10 +52,12 @@ export function createInstance(args)
         .option("-f, --file <file>", "Deprecated: alias for -i, --input")
         .option("-s, --system-instruction <instruction>", "Override the prompt system instruction template `Translate ${from} to ${to}` with this plain text")
         .option("-p, --plain-text <text>", "Only translate this input plain text")
+
         .option("--experimental-max_token <value>", "", parseInt, 0)
         .option("--experimental-input-multiplier <value>", "", parseInt, 0)
         .option("--experimental-fallback-model <value>", "Model to be used for refusal fallback")
         .addOption(new Option("--experimental-structured-mode [mode]", "Enable structured response formats as outlined by https://openai.com/index/introducing-structured-outputs-in-the-api/").choices(["array", "object"]))
+        .option("--experimental-use-full-context", "Use the full history, chunked by historyPromptLength, to work better with prompt caching.")
 
         .option("--initial-prompts <prompts>", "Initiation prompt messages before the translation request messages in JSON Array", JSON.parse, DefaultOptions.initialPrompts)
         .option("--no-use-moderator", "Don't use the OpenAI Moderation tool")
@@ -107,6 +109,7 @@ export function createInstance(args)
         ...(opts.experimentalMax_token && { max_token: opts.experimentalMax_token }),
         ...(opts.experimentalInputMultiplier && { inputMultiplier: opts.experimentalInputMultiplier }),
         ...(opts.experimentalFallbackModel && { fallbackModel: opts.experimentalFallbackModel }),
+        ...(opts.experimentalUseFullContext && { useFullContext: opts.experimentalUseFullContext }),
         ...(opts.logLevel && { logLevel: opts.logLevel })
     };
 

diff --git a/src/translator.mjs b/src/translator.mjs
@@ -33,6 +33,8 @@ import { TranslationOutput } from './translatorOutput.mjs';
  * Enforce one to one line quantity input output matching
  * @property {number} historyPromptLength `10` \
  * Length of the prompt history to be retained and passed over to the next translation request in order to maintain some context.
+ * @property {boolean} useFullContext
+ * Use the full history, chunked by historyPromptLength, to work better with prompt caching.
  * @property {number[]} batchSizes `[10, 100]` \
  * The number of lines to include in each translation prompt, provided that they are estimated to within the token limit. 
  * In case of mismatched output line quantities, this number will be decreased step-by-step according to the values in the array, ultimately reaching one.
@@ -55,6 +57,7 @@ export const DefaultOptions = {
     prefixNumber: true,
     lineMatching: true,
     historyPromptLength: 10,
+    useFullContext: false,
     batchSizes: [10, 100],
     structuredMode: false,
     max_token: 0,
@@ -465,44 +468,66 @@ export class Translator
     {
         if (this.workingProgress.length === 0 || this.options.historyPromptLength === 0)
         {
-            return
+            return;
+        }
+
+        let sliced;
+        if (this.options.useFullContext)
+        {
+            // Use the entire workingProgress if useFullContext is true
+            sliced = this.workingProgress;
+        } else
+        {
+            // Otherwise, slice based on historyPromptLength
+            sliced = this.workingProgress.slice(-this.options.historyPromptLength);
         }
-        const sliced = this.workingProgress.slice(-this.options.historyPromptLength)
-        const offset = this.workingProgress.length - this.options.historyPromptLength
+        const offset = this.workingProgress.length - sliced.length;
 
         /**
          * @param {string} text
          * @param {number} index
          */
         const checkFlaggedMapper = (text, index) =>
         {
-            const id = index + (offset < 0 ? 0 : offset)
+            const id = index + (offset < 0 ? 0 : offset);
             if (this.moderatorFlags.has(id))
             {
                 // log.warn("[Translator]", "Prompt Flagged", id, text)
-                return this.preprocessLine("-", id, 0)
+                return this.preprocessLine("-", id, 0);
             }
-            return text
-        }
+            return text;
+        };
 
-        const checkedSource = sliced.map((x, i) => checkFlaggedMapper(x.source, i))
-        const checkedTransform = sliced.map((x, i) => checkFlaggedMapper(x.transform, i))
-        this.promptContext = this.getContext(checkedSource, checkedTransform)
+        const checkedSource = sliced.map((x, i) => checkFlaggedMapper(x.source, i));
+        const checkedTransform = sliced.map((x, i) => checkFlaggedMapper(x.transform, i));
+        this.promptContext = this.getContext(checkedSource, checkedTransform);
     }
 
-
     /**
      * @param {string[]} sourceLines
      * @param {string[]} transformLines
      */
     getContext(sourceLines, transformLines)
     {
-        return  /** @type {import('openai').OpenAI.Chat.ChatCompletionMessage[]}*/ ([
-            { role: "user", content: this.getContextLines(sourceLines, "user") },
-            { role: "assistant", content: this.getContextLines(transformLines, "assistant") }
-        ])
+        const chunks = [];
+        const chunkSize = this.options.historyPromptLength;
+        for (let i = 0; i < sourceLines.length; i += chunkSize)
+        {
+            const sourceChunk = sourceLines.slice(i, i + chunkSize);
+            const transformChunk = transformLines.slice(i, i + chunkSize);
+            chunks.push({
+                role: "user",
+                content: this.getContextLines(sourceChunk, "user")
+            });
+            chunks.push({
+                role: "assistant",
+                content: this.getContextLines(transformChunk, "assistant")
+            });
+        }
+        return /** @type {import('openai').OpenAI.Chat.ChatCompletionMessage[]}*/ (chunks);
     }
 
+
     /**
      * @param {string[]} lines 
      * @param {"user" | "assistant" } role