Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved PDF rendering reliability #1075

Merged
merged 4 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@
"devDependencies": {
"@astrojs/check": "^0.9.4",
"@astrojs/starlight": "^0.31.1",
"astro": "^5.1.10",
"astro": "^5.2.3",
"astro-embed": "^0.9.0",
"rehype-mermaid": "^3.0.0",
"starlight-blog": "^0.16.1",
"starlight-links-validator": "^0.14.1",
"starlight-package-managers": "^0.9.0",
"starlight-links-validator": "^0.14.2",
"starlight-package-managers": "^0.9.1",
"typescript": "5.7.3",
"zx": "^8.3.0"
"zx": "^8.3.1"
}
}
220 changes: 113 additions & 107 deletions docs/yarn.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,6 @@
"npm-check-updates": "^17.1.14",
"npm-run-all": "^4.1.5",
"prettier": "^3.4.2",
"zx": "^8.3.0"
"zx": "^8.3.1"
}
}
14 changes: 7 additions & 7 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@
"@octokit/rest": "^21.0.2",
"@stoplight/json-schema-generator": "^1.0.2",
"dockerode": "^4.0.4",
"es-toolkit": "^1.31.0",
"es-toolkit": "^1.32.0",
"fluent-ffmpeg": "^2.1.3",
"gpt-tokenizer": "^2.8.1",
"html-to-text": "^9.0.5",
"jimp": "^1.6.0",
"mammoth": "^1.9.0",
"mathjs": "^14.1.0",
"mathjs": "^14.2.0",
"tabletojson": "^4.1.5",
"tsx": "^4.19.2",
"turndown": "^7.2.0",
Expand All @@ -80,7 +80,7 @@
"optionalDependencies": {
"@huggingface/transformers": "^3.3.2",
"@lvce-editor/ripgrep": "^1.5.0",
"pdfjs-dist": "4.9.155",
"pdfjs-dist": "4.10.38",
"playwright": "^1.50.0",
"skia-canvas": "^2.0.2",
"tree-sitter-wasms": "^0.1.11",
Expand All @@ -97,27 +97,27 @@
"@types/dockerode": "^3.3.32",
"@types/fs-extra": "^11.0.4",
"@types/memorystream": "^0.3.4",
"@types/node": "^22.12.0",
"@types/node": "^22.13.0",
"@types/papaparse": "^5.3.15",
"@types/prompts": "^2.4.9",
"@types/replace-ext": "^2.0.2",
"@types/ws": "^8.5.13",
"commander": "^12.1.0",
"diff": "^7.0.0",
"dotenv": "^16.4.7",
"es-toolkit": "^1.31.0",
"es-toolkit": "^1.32.0",
"esbuild": "^0.24.2",
"execa": "^9.5.2",
"fs-extra": "^11.3.0",
"glob": "^11.0.1",
"memorystream": "^0.3.1",
"node-sarif-builder": "^3.2.0",
"octokit": "^4.0.3",
"openai": "^4.80.1",
"openai": "^4.82.0",
"pretty-bytes": "^6.1.1",
"replace-ext": "^2.0.0",
"ws": "^8.18.0",
"zx": "^8.3.0"
"zx": "^8.3.1"
},
"scripts": {
"compile:runtime:declarations": "tsc src/runtime.ts --skipLibCheck --outDir built --declaration --target es2020 --moduleResolution node --module esnext --emitDeclarationOnly",
Expand Down
17 changes: 8 additions & 9 deletions packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
},
"optionalDependencies": {
"@lvce-editor/ripgrep": "^1.5.0",
"pdfjs-dist": "4.9.155",
"pdfjs-dist": "4.10.38",
"skia-canvas": "^2.0.2",
"web-tree-sitter": "0.22.2"
},
Expand All @@ -34,13 +34,12 @@
"@tidyjs/tidy": "^2.5.2",
"@types/diff": "^6.0.0",
"@types/fluent-ffmpeg": "^2.1.27",
"@types/html-escaper": "^3.0.2",
"@types/html-to-text": "^9.0.4",
"@types/inflection": "^1.13.2",
"@types/ini": "^4.1.1",
"@types/mime-types": "^2.1.4",
"@types/mustache": "^4.2.5",
"@types/node": "^22.12.0",
"@types/node": "^22.13.0",
"@types/object-inspect": "^1.13.0",
"@types/semver": "^7.5.8",
"@types/shell-quote": "^1.7.5",
Expand All @@ -50,7 +49,7 @@
"csv-stringify": "^6.5.2",
"diff": "^7.0.0",
"dotenv": "^16.4.7",
"es-toolkit": "^1.31.0",
"es-toolkit": "^1.32.0",
"esbuild": "^0.24.2",
"fast-xml-parser": "^4.5.1",
"fetch-retry": "^6.0.0",
Expand All @@ -59,7 +58,7 @@
"fluent-ffmpeg": "^2.1.3",
"gpt-tokenizer": "^2.8.1",
"groq-js": "^1.14.2",
"html-escaper": "^3.0.3",
"html-escaper": "3.0.3",
"html-to-text": "^9.0.5",
"https-proxy-agent": "^7.0.6",
"ignore": "^7.0.3",
Expand All @@ -71,21 +70,21 @@
"jsonrepair": "^3.11.2",
"magic-string": "^0.30.17",
"mammoth": "^1.9.0",
"mathjs": "^14.1.0",
"mathjs": "^14.2.0",
"merge-descriptors": "^2.0.0",
"mime": "^4.0.6",
"minimatch": "^10.0.1",
"minisearch": "^7.1.1",
"mustache": "^4.2.0",
"object-inspect": "^1.13.3",
"openai": "^4.80.1",
"openai": "^4.82.0",
"p-limit": "^6.2.0",
"package-manager-detector": "^0.2.8",
"package-manager-detector": "^0.2.9",
"parse-diff": "^0.11.1",
"prettier": "^3.4.2",
"pretty-bytes": "^6.1.1",
"sanitize-html": "^2.14.0",
"semver": "^7.6.3",
"semver": "^7.7.0",
"serialize-error": "^12.0.0",
"shell-quote": "^1.8.2",
"tabletojson": "^4.1.5",
Expand Down
5 changes: 3 additions & 2 deletions packages/core/src/bufferlike.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ export async function resolveBufferLike(
else if (bufferLike instanceof ReadableStream) {
const stream: ReadableStream = bufferLike
return Buffer.from(await new Response(stream).arrayBuffer())
} else if (bufferLike instanceof ArrayBuffer)
bufferLike = Buffer.from(bufferLike)
} else if (bufferLike instanceof ArrayBuffer) return Buffer.from(bufferLike)
else if (bufferLike instanceof Uint8Array) return Buffer.from(bufferLike)
else if (
typeof bufferLike === "object" &&
(bufferLike as WorkspaceFile).content
Expand All @@ -24,6 +24,7 @@ export async function resolveBufferLike(
(bufferLike as WorkspaceFile).content,
(bufferLike as WorkspaceFile).encoding || "utf-8"
)
console.log(bufferLike)
throw new Error("Unsupported buffer-like object")
}

Expand Down
4 changes: 4 additions & 0 deletions packages/core/src/html-escaper.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
declare module "html-escaper"
{
export function escape(html: string): string
}
1 change: 1 addition & 0 deletions packages/core/src/html.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// This module provides functions to convert HTML content into different formats such as JSON, plain text, and Markdown.
// It imports necessary libraries for HTML conversion and logging purposes.
/// <reference path="./html-escaper.d.ts" />

import { TraceOptions } from "./trace" // Import TraceOptions for optional logging features
import { escape as HTMLEscape_ } from "html-escaper"
Expand Down
12 changes: 9 additions & 3 deletions packages/core/src/pdf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import os from "os"
import { serializeError } from "./error"
import { logVerbose, logWarn } from "./util"
import { PDF_SCALE } from "./constants"
import { resolveGlobal } from "./globals"

let standardFontDataUrl: string

Expand Down Expand Up @@ -88,8 +89,12 @@ async function tryImportCanvas() {
if (CanvasFactory.createCanvas) return CanvasFactory.createCanvas

try {
const { Canvas } = await import("skia-canvas")
const createCanvas = (w: number, h: number) => new Canvas(w, h)
const skia = await import("skia-canvas")
const createCanvas = (w: number, h: number) => new skia.Canvas(w, h)
const glob = resolveGlobal()
glob.ImageData ??= skia.ImageData
glob.Path2D ??= skia.Path2D
glob.Canvas ??= skia.Canvas
CanvasFactory.createCanvas = createCanvas
return createCanvas
} catch (error) {
Expand Down Expand Up @@ -182,7 +187,7 @@ async function PDFTryParse(
pages.push(p)

if (createCanvas) {
const viewport = page.getViewport({ scale: PDF_SCALE })
const viewport = page.getViewport({ scale })
const canvas = await createCanvas(
viewport.width,
viewport.height
Expand All @@ -199,6 +204,7 @@ async function PDFTryParse(
}
return { ok: true, pages }
} catch (error) {
logVerbose(error)
trace?.error(`reading pdf`, error) // Log error if tracing is enabled
return { ok: false, error: serializeError(error) }
}
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/promptdom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ function renderDefNode(def: PromptDefNode): string {

let res: string
if (name && fenceFormat === "xml") {
res = `\n<${name}${dtype ? ` lang="${dtype}"` : ""}${filename ? ` file="${filename}"` : ""}${schema ? ` schema=${schema}` : ""}${diffFormat}>\n${body}<${name}>\n`
res = `\n<${name}${dtype ? ` lang="${dtype}"` : ""}${filename ? ` file="${filename}"` : ""}${schema ? ` schema=${schema}` : ""}${diffFormat}>\n${body}</${name}>\n`
} else if (fenceFormat === "none") {
res = `\n${name ? name + ":\n" : ""}${body}\n`
} else {
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/types/prompt_template.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3212,6 +3212,7 @@ type BufferLike =
| Buffer
| Blob
| ArrayBuffer
| Uint8Array
| ReadableStream

type TranscriptionModelType = OptionsOrString<"openai:whisper-1">
Expand Down
4 changes: 2 additions & 2 deletions packages/vscode/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -455,13 +455,13 @@
"@types/vscode": "^1.95.0",
"@vscode/vsce": "^3.2.1",
"assert": "^2.1.0",
"es-toolkit": "^1.31.0",
"es-toolkit": "^1.32.0",
"markdown-it-github-alerts": "^0.3.0",
"process": "^0.11.10",
"typescript": "5.7.3",
"vscode-uri": "^3.0.8",
"webfont": "^11.2.26",
"websocket-polyfill": "0.0.3",
"zx": "^8.3.0"
"zx": "^8.3.1"
}
}
73 changes: 37 additions & 36 deletions packages/web/package.json
Original file line number Diff line number Diff line change
@@ -1,38 +1,39 @@
{
"name": "genaiscript-web",
"version": "1.97.2",
"license": "MIT",
"private": true,
"scripts": {
"compile": "node build.mjs",
"watch": "node --watch --watch-path=./src --watch-path=../core/src build.mjs",
"serve": "serve -n .",
"typecheck": "tsc --noEmit"
},
"devDependencies": {
"@daiji256/rehype-mathml": "^1.1.1",
"@types/marked": "^6.0.0",
"@types/react": "^19.0.8",
"@types/react-dom": "^19.0.3",
"@vscode-elements/elements": "1.11.1-pre.0",
"@vscode-elements/webview-playground": "^1.4.0",
"clsx": "^2.1.1",
"esbuild": "^0.24.2",
"mermaid": "^11.4.1",
"pretty-bytes": "^6.1.1",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-dropzone": "^14.3.5",
"react-error-boundary": "^5.0.0",
"react-markdown": "^9.0.3",
"rehype-highlight": "^7.0.1",
"rehype-mermaid": "^3.0.0",
"rehype-raw": "^7.0.0",
"rehype-sanitize": "^6.0.0",
"remark-gfm": "^4.0.0",
"remark-github-blockquote-alert": "^1.3.0",
"remark-math": "^6.0.0",
"remark-mermaid": "^0.2.0",
"typescript": "^5.7.3"
}
"name": "genaiscript-web",
"version": "1.97.2",
"license": "MIT",
"private": true,
"scripts": {
"compile": "node build.mjs",
"watch": "node --watch --watch-path=./src --watch-path=../core/src build.mjs",
"serve": "serve -n .",
"typecheck": "tsc --noEmit"
},
"devDependencies": {
"@daiji256/rehype-mathml": "^1.1.1",
"@types/marked": "^6.0.0",
"@types/react": "^19.0.8",
"@types/react-dom": "^19.0.3",
"@vscode-elements/elements": "1.11.1-pre.0",
"@vscode-elements/webview-playground": "^1.4.0",
"clsx": "^2.1.1",
"esbuild": "^0.24.2",
"html-escaper": "3.0.3",
"mermaid": "^11.4.1",
"pretty-bytes": "^6.1.1",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-dropzone": "^14.3.5",
"react-error-boundary": "^5.0.0",
"react-markdown": "^9.0.3",
"rehype-highlight": "^7.0.1",
"rehype-mermaid": "^3.0.0",
"rehype-raw": "^7.0.0",
"rehype-sanitize": "^6.0.0",
"remark-gfm": "^4.0.0",
"remark-github-blockquote-alert": "^1.3.0",
"remark-math": "^6.0.0",
"remark-mermaid": "^0.2.0",
"typescript": "^5.7.3"
}
}
2 changes: 1 addition & 1 deletion slides/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
"devDependencies": {
"@slidev/cli": "^51.0.2",
"@slidev/theme-default": "^0.25.0",
"zx": "^8.3.0"
"zx": "^8.3.1"
}
}
Loading
Loading