diff --git a/docs/src/components/BuiltinTools.mdx b/docs/src/components/BuiltinTools.mdx
index 941a1efc6a..7caed9168d 100644
--- a/docs/src/components/BuiltinTools.mdx
+++ b/docs/src/components/BuiltinTools.mdx
@@ -48,7 +48,7 @@ import { LinkCard } from '@astrojs/starlight/components';
-
+
diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx
index 58d3dfd4ae..cad4f2d84b 100644
--- a/docs/src/content/docs/reference/scripts/system.mdx
+++ b/docs/src/content/docs/reference/scripts/system.mdx
@@ -3566,7 +3566,7 @@ defTool(
Video manipulation tools
- tool `video_probe`: Probe a video file and returns the metadata information
-- tool `video_extract_audio`: Extract audio from a video file into a .wav file. Returns the audio filename.
+- tool `video_extract_audio`: Extract audio from a video file into an audio file. Returns the audio filename.
- tool `video_extract_frames`: Extract frames from a video file
`````js wrap title="system.video"
@@ -3582,14 +3582,16 @@ defTool(
properties: {
filename: {
type: "string",
- description: "The video filename or URL to probe",
+ description: "The video filename to probe",
},
},
required: ["filename"],
},
async (args) => {
const { context, filename } = args
- if (!filename) return "No filename or url provided"
+ if (!filename) return "No filename provided"
+ if (!(await workspace.stat(filename)))
+ return `File ${filename} does not exist.`
context.log(`probing ${filename}`)
const info = await ffmpeg.probe(filename)
return YAML.stringify(info)
@@ -3598,20 +3600,22 @@ defTool(
defTool(
"video_extract_audio",
- "Extract audio from a video file into a .wav file. Returns the audio filename.",
+ "Extract audio from a video file into an audio file. Returns the audio filename.",
{
type: "object",
properties: {
filename: {
type: "string",
- description: "The video filename or URL to probe",
+ description: "The video filename to probe",
},
},
required: ["filename"],
},
async (args) => {
const { context, filename } = args
- if (!filename) return "No filename or url provided"
+ if (!filename) return "No filename provided"
+ if (!(await workspace.stat(filename)))
+ return `File ${filename} does not exist.`
context.log(`extracting audio from ${filename}`)
const audioFile = await ffmpeg.extractAudio(filename)
return audioFile
@@ -3626,7 +3630,7 @@ defTool(
properties: {
filename: {
type: "string",
- description: "The video filename or URL to probe",
+ description: "The video filename to probe",
},
keyframes: {
type: "boolean",
@@ -3655,7 +3659,9 @@ defTool(
},
async (args) => {
const { context, filename, transcription, ...options } = args
- if (!filename) return "No filename or url provided"
+ if (!filename) return "No filename provided"
+ if (!(await workspace.stat(filename)))
+ return `File ${filename} does not exist.`
context.log(`extracting frames from ${filename}`)
if (transcription) {
diff --git a/docs/src/content/docs/reference/scripts/videos.mdx b/docs/src/content/docs/reference/scripts/videos.mdx
index 8e3254f8b2..4bd07c6226 100644
--- a/docs/src/content/docs/reference/scripts/videos.mdx
+++ b/docs/src/content/docs/reference/scripts/videos.mdx
@@ -28,13 +28,13 @@ or configure the `FFMPEG_PATH` / `FFPROBE_PATH` environment variables to point t
As mentionned above, multi-modal LLMs typically support images as a sequence
of frames (or screenshots).
-The `ffmpeg.extractFrames` will render frames from a video file or url
+The `ffmpeg.extractFrames` will render frames from a video file
and return them as an array of file paths. You can use the result with `defImages` directly.
- by default, extract keyframes (intra-frames)
```js
-const frames = await ffmpeg.extractFrames("path_url_to_video")
+const frames = await ffmpeg.extractFrames("path_to_video")
def("FRAMES", frames)
```
@@ -66,11 +66,11 @@ const transcript = await transcribe("...", { sceneThreshold: 0.3 })
## Extracting audio
-The `ffmpeg.extractAudio` will extract the audio from a video file or url
+The `ffmpeg.extractAudio` will extract the audio from a video file
as a `.wav` file.
```js
-const audio = await ffmpeg.extractAudio("path_url_to_video")
+const audio = await ffmpeg.extractAudio("path_to_video")
```
The conversion to audio happens automatically
@@ -78,10 +78,10 @@ for videos when using [transcribe](/genaiscript/reference/scripts/transcription)
## Probing videos
-You can extract metadata from a video file or url using `ffmpeg.probe`.
+You can extract metadata from a video file using `ffmpeg.probe`.
```js
-const info = await ffmpeg.probe("path_url_to_video")
+const info = await ffmpeg.probe("path_to_video")
const { duration } = info.streams[0]
console.log(`video duration: ${duration} seconds`)
```
@@ -92,7 +92,7 @@ You can further customize the `ffmpeg` configuration
by passing `outputOptions`.
```js 'outputOptions: "-b:a 16k",'
-const audio = await ffmpeg.extractAudio("path_url_to_video", {
+const audio = await ffmpeg.extractAudio("path_to_video", {
outputOptions: "-b:a 16k",
})
```
diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts
index e90f6d1439..4ba2f5d88a 100644
--- a/packages/core/src/fetch.ts
+++ b/packages/core/src/fetch.ts
@@ -9,12 +9,15 @@ import {
import { errorMessage } from "./error"
import { logVerbose, toStringList } from "./util"
import { CancellationOptions, CancellationToken } from "./cancellation"
-import { readText } from "./fs"
import { resolveHttpProxyAgent } from "./proxy"
import { host } from "./host"
import { renderWithPrecision } from "./precision"
import crossFetch from "cross-fetch"
import prettyBytes from "pretty-bytes"
+import { fileTypeFromBuffer } from "file-type"
+import { isBinaryMimeType } from "./binary"
+import { toBase64 } from "./base64"
+import { deleteUndefinedValues } from "./cleaners"
export type FetchType = (
input: string | URL | globalThis.Request,
@@ -137,7 +140,7 @@ export async function fetchText(
const url = urlOrFile.filename
let ok = false
let status = 404
- let text: string
+ let bytes: Uint8Array
if (/^https?:\/\//i.test(url)) {
const f = await createFetch({
retries,
@@ -149,25 +152,39 @@ export async function fetchText(
const resp = await f(url, rest)
ok = resp.ok
status = resp.status
- if (ok) text = await resp.text()
+ if (ok) bytes = new Uint8Array(await resp.arrayBuffer())
} else {
try {
- text = await readText("workspace://" + url)
- ok = true
+ bytes = await host.readFile(url)
} catch (e) {
logVerbose(e)
ok = false
status = 404
}
}
- const file: WorkspaceFile = {
- filename: urlOrFile.filename,
- content: text,
+
+ let content: string
+ let encoding: "base64"
+ let type: string
+ const mime = await fileTypeFromBuffer(bytes)
+ if (isBinaryMimeType(mime?.mime)) {
+ encoding = "base64"
+ content = toBase64(bytes)
+ } else {
+ content = host.createUTF8Decoder().decode(bytes)
}
+ ok = true
+ const file: WorkspaceFile = deleteUndefinedValues({
+ filename: urlOrFile.filename,
+ encoding,
+ type,
+ content,
+ })
return {
ok,
status,
- text,
+ text: content,
+ bytes,
file,
}
}
diff --git a/packages/core/src/filesystem.ts b/packages/core/src/filesystem.ts
index 3e7aa0223e..b4c5f22139 100644
--- a/packages/core/src/filesystem.ts
+++ b/packages/core/src/filesystem.ts
@@ -1,3 +1,4 @@
+import { stat } from "fs/promises"
import { JSONLineCache } from "./cache"
import { DOT_ENV_REGEX } from "./constants"
import { CSVTryParse } from "./csv"
@@ -111,6 +112,17 @@ export function createFileSystem(): Omit {
const res = JSONLineCache.byName(name)
return res
},
+ stat: async (filename: string) => {
+ try {
+ const res = await stat(filename)
+ return {
+ size: res.size,
+ mode: res.mode,
+ }
+ } catch {
+ return undefined
+ }
+ },
} satisfies Omit
;(fs as any).readFile = readText
return Object.freeze(fs)
diff --git a/packages/core/src/genaisrc/system.video.genai.js b/packages/core/src/genaisrc/system.video.genai.js
index 6743b68562..1a7b02472d 100644
--- a/packages/core/src/genaisrc/system.video.genai.js
+++ b/packages/core/src/genaisrc/system.video.genai.js
@@ -10,14 +10,16 @@ defTool(
properties: {
filename: {
type: "string",
- description: "The video filename or URL to probe",
+ description: "The video filename to probe",
},
},
required: ["filename"],
},
async (args) => {
const { context, filename } = args
- if (!filename) return "No filename or url provided"
+ if (!filename) return "No filename provided"
+ if (!(await workspace.stat(filename)))
+ return `File ${filename} does not exist.`
context.log(`probing ${filename}`)
const info = await ffmpeg.probe(filename)
return YAML.stringify(info)
@@ -26,20 +28,22 @@ defTool(
defTool(
"video_extract_audio",
- "Extract audio from a video file into a .wav file. Returns the audio filename.",
+ "Extract audio from a video file into an audio file. Returns the audio filename.",
{
type: "object",
properties: {
filename: {
type: "string",
- description: "The video filename or URL to probe",
+ description: "The video filename to probe",
},
},
required: ["filename"],
},
async (args) => {
const { context, filename } = args
- if (!filename) return "No filename or url provided"
+ if (!filename) return "No filename provided"
+ if (!(await workspace.stat(filename)))
+ return `File ${filename} does not exist.`
context.log(`extracting audio from ${filename}`)
const audioFile = await ffmpeg.extractAudio(filename)
return audioFile
@@ -54,7 +58,7 @@ defTool(
properties: {
filename: {
type: "string",
- description: "The video filename or URL to probe",
+ description: "The video filename to probe",
},
keyframes: {
type: "boolean",
@@ -83,7 +87,9 @@ defTool(
},
async (args) => {
const { context, filename, transcription, ...options } = args
- if (!filename) return "No filename or url provided"
+ if (!filename) return "No filename provided"
+ if (!(await workspace.stat(filename)))
+ return `File ${filename} does not exist.`
context.log(`extracting frames from ${filename}`)
if (transcription) {
diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts
index b8fcbff173..24d187f776 100644
--- a/packages/core/src/promptcontext.ts
+++ b/packages/core/src/promptcontext.ts
@@ -74,6 +74,7 @@ export async function createPromptContext(
})
return res
},
+ stat: (filename) => runtimeHost.workspace.stat(filename),
grep: async (
query,
grepOptions: string | WorkspaceGrepOptions,
diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
index beb5eb12d3..f4ffd5c3de 100644
--- a/packages/core/src/types/prompt_template.d.ts
+++ b/packages/core/src/types/prompt_template.d.ts
@@ -885,6 +885,14 @@ interface FindFilesOptions {
readText?: boolean
}
+interface FileStats {
+ /**
+ * Size of the file in bytes
+ */
+ size: number
+ mode: number
+}
+
interface WorkspaceFileSystem {
/**
* Searches for files using the glob pattern and returns a list of files.
@@ -911,6 +919,12 @@ interface WorkspaceFileSystem {
options?: Omit
): Promise
+ /**
+ * Reads metadata information about the file. Returns undefined if the file does not exist.
+ * @param filename
+ */
+ stat(filename: string): Promise
+
/**
* Reads the content of a file as text
* @param path