From 4e8c9d8cf300a87e9dddb92ec5420abf9dd70822 Mon Sep 17 00:00:00 2001 From: Roy Scheepens Date: Fri, 15 Nov 2024 10:35:51 +0100 Subject: [PATCH 1/3] fix: embeddings --- biome.json | 10 +- .../google-tag-manager.tsx | 0 lib/generate-embeddings.ts | 856 +++++++++--------- pages/_app.tsx | 27 +- 4 files changed, 449 insertions(+), 444 deletions(-) rename lib/GoogleTagManager.tsx => components/google-tag-manager.tsx (100%) diff --git a/biome.json b/biome.json index 47cdaaa..cbff4ed 100644 --- a/biome.json +++ b/biome.json @@ -1,7 +1,7 @@ { - "$schema": "./node_modules/@biomejs/biome/configuration_schema.json", - "extends": ["./node_modules/@onbeam/biome-config/biome.json"], - "files": { - "ignore": ["./styled-system"] - } + "$schema": "./node_modules/@biomejs/biome/configuration_schema.json", + "extends": ["./node_modules/@onbeam/biome-config/biome.json"], + "files": { + "ignore": ["./styled-system"] + } } diff --git a/lib/GoogleTagManager.tsx b/components/google-tag-manager.tsx similarity index 100% rename from lib/GoogleTagManager.tsx rename to components/google-tag-manager.tsx diff --git a/lib/generate-embeddings.ts b/lib/generate-embeddings.ts index 4fbeb5e..374064c 100644 --- a/lib/generate-embeddings.ts +++ b/lib/generate-embeddings.ts @@ -1,25 +1,25 @@ -import { createHash } from 'node:crypto'; -import { basename, dirname, join } from 'node:path'; -import { createClient } from '@supabase/supabase-js'; -import dotenv from 'dotenv'; -import { readFile, readdir, stat } from 'node:fs/promises'; -import GithubSlugger from 'github-slugger'; -import { Content, Root } from 'mdast'; -import { fromMarkdown } from 'mdast-util-from-markdown'; -import { frontmatterFromMarkdown } from 'mdast-util-frontmatter'; -import { mdxFromMarkdown } from 'mdast-util-mdx'; -import { toMarkdown } from 'mdast-util-to-markdown'; -import { toString as toStringUtil } from 'mdast-util-to-string'; -import { frontmatter } from 'micromark-extension-frontmatter'; -import { mdxjs } from 'micromark-extension-mdxjs'; -import OpenAI from 'openai'; -import { u } from 'unist-builder'; -import { filter } from 'unist-util-filter'; -import yargs from 'yargs'; +import { createHash } from "node:crypto"; +import { readFile, readdir, stat } from "node:fs/promises"; +import { basename, dirname, join } from "node:path"; +import { createClient } from "@supabase/supabase-js"; +import dotenv from "dotenv"; +import GithubSlugger from "github-slugger"; +import { Content, Root } from "mdast"; +import { fromMarkdown } from "mdast-util-from-markdown"; +import { frontmatterFromMarkdown } from "mdast-util-frontmatter"; +import { mdxFromMarkdown } from "mdast-util-mdx"; +import { toMarkdown } from "mdast-util-to-markdown"; +import { toString as toStringUtil } from "mdast-util-to-string"; +import { frontmatter } from "micromark-extension-frontmatter"; +import { mdxjs } from "micromark-extension-mdxjs"; +import OpenAI from "openai"; +import { u } from "unist-builder"; +import { filter } from "unist-util-filter"; +import yargs from "yargs"; dotenv.config(); -const ignoredFiles = ['pages/_app.mdx', 'pages/index.mdx', 'pages/404.mdx']; +const ignoredFiles = ["pages/_app.mdx", "pages/index.mdx", "pages/404.mdx"]; /** * Splits a `mdast` tree into multiple trees based on @@ -29,35 +29,35 @@ const ignoredFiles = ['pages/_app.mdx', 'pages/index.mdx', 'pages/404.mdx']; * Useful to split a markdown file into smaller sections. */ function splitTreeBy(tree: Root, predicate: (node: Content) => boolean) { - return tree.children.reduce((trees, node) => { - const [lastTree] = trees.slice(-1); + return tree.children.reduce((trees, node) => { + const [lastTree] = trees.slice(-1); - if (!lastTree || predicate(node)) { - const tree: Root = u('root', [node]); - return trees.concat(tree); - } + if (!lastTree || predicate(node)) { + const tree: Root = u("root", [node]); + return trees.concat(tree); + } - lastTree.children.push(node); - return trees; - }, []); + lastTree.children.push(node); + return trees; + }, []); } function extractMetaTags(mdxTree: Root) { - const metaTagsNode = mdxTree.children.find(({ type }) => type === 'yaml'); + const metaTagsNode = mdxTree.children.find(({ type }) => type === "yaml"); - if (!metaTagsNode) { - return {}; - } + if (!metaTagsNode) { + return {}; + } - const parsed = metaTagsNode.value.split(/\\r?\\n/).reduce((meta, line) => { - const [key, value] = line.split(': '); - return { - ...meta, - [key]: value, - }; - }, {}); + const parsed = metaTagsNode.value.split(/\\r?\\n/).reduce((meta, line) => { + const [key, value] = line.split(": "); + return { + ...meta, + [key]: value, + }; + }, {}); - return parsed; + return parsed; } /** @@ -66,28 +66,31 @@ function extractMetaTags(mdxTree: Root) { * @param slug * @returns */ + +// biome-ignore lint/suspicious/noExplicitAny: any are you ok? const parseMetaTitle = (meta: any, slug: string): string => { - if (!meta[slug]) return slug; + if (!meta[slug]) return slug; - if (typeof meta[slug] === 'object') { - return `${(meta[slug] as any).title}` ?? slug; - } + if (typeof meta[slug] === "object") { + // biome-ignore lint/suspicious/noExplicitAny: any are you ok? + return `${(meta[slug] as any).title}` ?? slug; + } - return meta[slug] as string; + return meta[slug] as string; }; type Meta = ReturnType; type Section = { - content: string; - heading?: string; - slug?: string; + content: string; + heading?: string; + slug?: string; }; type ProcessedMdx = { - checksum: string; - meta: Meta; - sections: Section[]; + checksum: string; + meta: Meta; + sections: Section[]; }; /** @@ -96,400 +99,403 @@ type ProcessedMdx = { * and splits it into sub-sections based on criteria. */ function processMdxForSearch(title: string, content: string): ProcessedMdx { - const checksum = createHash('sha256').update(content).digest('base64'); - - const mdxTree = fromMarkdown(content, { - extensions: [mdxjs(), frontmatter()], - mdastExtensions: [mdxFromMarkdown(), frontmatterFromMarkdown(['yaml'])], - }); - - // Extract meta tags from markdown - const meta = extractMetaTags(mdxTree); - if (!meta.title) meta.title = title; - - // Remove all MDX elements from markdown - const mdTree = filter( - mdxTree, - (node) => - ![ - 'mdxjsEsm', - 'mdxJsxFlowElement', - 'mdxJsxTextElement', - 'mdxFlowExpression', - 'mdxTextExpression', - ].includes(node.type), - ); - - if (!mdTree) { - return { - checksum, - meta, - sections: [], - }; - } - - const sectionTrees = splitTreeBy(mdTree, (node) => node.type === 'heading'); - - const slugger = new GithubSlugger(); - - const sections = sectionTrees - // Filter out trees that contain only the page's metadata - .filter(({ children }) => children[0]?.type !== 'yaml') - .map((tree) => { - const [firstNode] = tree.children; - - const heading = - firstNode.type === 'heading' ? toStringUtil(firstNode) : undefined; - const slug = heading ? slugger.slug(heading) : undefined; - - return { - content: toMarkdown(tree), - heading, - slug, - }; - }); - - return { - checksum, - meta, - sections, - }; + const checksum = createHash("sha256").update(content).digest("base64"); + + const mdxTree = fromMarkdown(content, { + extensions: [mdxjs(), frontmatter()], + mdastExtensions: [mdxFromMarkdown(), frontmatterFromMarkdown(["yaml"])], + }); + + // Extract meta tags from markdown + const meta = extractMetaTags(mdxTree); + if (!meta.title) meta.title = title; + + // Remove all MDX elements from markdown + const mdTree = filter( + mdxTree, + (node) => + ![ + "mdxjsEsm", + "mdxJsxFlowElement", + "mdxJsxTextElement", + "mdxFlowExpression", + "mdxTextExpression", + ].includes(node.type), + ); + + if (!mdTree) { + return { + checksum, + meta, + sections: [], + }; + } + + const sectionTrees = splitTreeBy(mdTree, (node) => node.type === "heading"); + + const slugger = new GithubSlugger(); + + const sections = sectionTrees + // Filter out trees that contain only the page's metadata + .filter(({ children }) => children[0]?.type !== "yaml") + .map((tree) => { + const [firstNode] = tree.children; + + const heading = + firstNode.type === "heading" ? toStringUtil(firstNode) : undefined; + const slug = heading ? slugger.slug(heading) : undefined; + + return { + content: toMarkdown(tree), + heading, + slug, + }; + }); + + return { + checksum, + meta, + sections, + }; } type WalkEntry = { - path: string; - parentPath?: string; + path: string; + parentPath?: string; }; async function walk(dir: string, parentPath?: string): Promise { - const immediateFiles = await readdir(dir); - - const recursiveFiles = await Promise.all( - immediateFiles.map(async (file) => { - const path = join(dir, file); - const stats = await stat(path); - if (stats.isDirectory()) { - // Keep track of document hierarchy (if this dir has corresponding doc file) - const docPath = `${basename(path)}.mdx`; - - return walk( - path, - immediateFiles.includes(docPath) - ? join(dirname(path), docPath) - : parentPath, - ); - } - if (stats.isFile()) { - return [ - { - path: path, - parentPath, - }, - ]; - } - return []; - }), - ); - - const flattenedFiles = recursiveFiles.reduce( - (all, folderContents) => all.concat(folderContents), - [], - ); - - return flattenedFiles.sort((a, b) => a.path.localeCompare(b.path)); + const immediateFiles = await readdir(dir); + + const recursiveFiles = await Promise.all( + immediateFiles.map(async (file) => { + const path = join(dir, file); + const stats = await stat(path); + if (stats.isDirectory()) { + // Keep track of document hierarchy (if this dir has corresponding doc file) + const docPath = `${basename(path)}.mdx`; + + return walk( + path, + immediateFiles.includes(docPath) + ? join(dirname(path), docPath) + : parentPath, + ); + } + if (stats.isFile()) { + return [ + { + path: path, + parentPath, + }, + ]; + } + return []; + }), + ); + + const flattenedFiles = recursiveFiles.reduce( + (all, folderContents) => all.concat(folderContents), + [], + ); + + return flattenedFiles.sort((a, b) => a.path.localeCompare(b.path)); } abstract class BaseEmbeddingSource { - checksum?: string; - meta?: Meta; - sections?: Section[]; - - constructor( - public source: string, - public path: string, - public parentPath?: string, - ) {} - - abstract load(): Promise<{ - checksum: string; - meta?: Meta; - sections: Section[]; - }>; + checksum?: string; + meta?: Meta; + sections?: Section[]; + + constructor( + public source: string, + public path: string, + public parentPath?: string, + ) {} + + abstract load(): Promise<{ + checksum: string; + meta?: Meta; + sections: Section[]; + }>; } class MarkdownEmbeddingSource extends BaseEmbeddingSource { - type = 'markdown' as const; - - constructor( - source: string, - public filePath: string, - public parentFilePath?: string, - ) { - const path = filePath.replace(/^pages/, '').replace(/\.mdx?$/, ''); - const parentPath = parentFilePath - ?.replace(/^pages/, '') - .replace(/\.mdx?$/, ''); - - super(source, path, parentPath); - } - - async load() { - const contents = await readFile(this.filePath, 'utf8'); - - const slug = this.filePath - .split('/') - .at(-1) - .replace(/\.mdx?$/, ''); - - const metaPath = this.filePath.replace(/[^/]+$/, '_meta.json'); - const metaJson = await readFile(metaPath, 'utf8'); - - const title = parseMetaTitle(JSON.parse(metaJson), slug); - - const { checksum, meta, sections } = processMdxForSearch(title, contents); - - this.checksum = checksum; - this.meta = meta; - this.sections = sections; - - return { - checksum, - meta, - sections, - }; - } + type = "markdown" as const; + + constructor( + source: string, + public filePath: string, + public parentFilePath?: string, + ) { + const path = filePath.replace(/^pages/, "").replace(/\.mdx?$/, ""); + const parentPath = parentFilePath + ?.replace(/^pages/, "") + .replace(/\.mdx?$/, ""); + + super(source, path, parentPath); + } + + async load() { + const contents = await readFile(this.filePath, "utf8"); + + const slug = this.filePath.split("/").at(-1)?.replace(/\.mdx?$/, "") ?? ""; + + const metaPath = join( + process.cwd(), + this.filePath.replace(/[^/]+$/, "_meta.ts"), + ); + + const metaFile = (await import(metaPath)).default; + + const title = parseMetaTitle(metaFile, slug); + + const { checksum, meta, sections } = processMdxForSearch(title, contents); + + this.checksum = checksum; + this.meta = meta; + this.sections = sections; + + return { + checksum, + meta, + sections, + }; + } } type EmbeddingSource = MarkdownEmbeddingSource; async function generateEmbeddings() { - const argv = await yargs.option('refresh', { - alias: 'r', - description: 'Refresh data', - type: 'boolean', - }).argv; - - const shouldRefresh = argv.refresh; - - if ( - !process.env.SUPABASE_URL || - !process.env.SUPABASE_SERVICE_ROLE_KEY || - !process.env.OPENAI_KEY - ) { - return console.info( - 'Environment variables SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY, and OPENAI_KEY are required: skipping embeddings generation', - ); - } - - const supabaseClient = createClient( - process.env.SUPABASE_URL, - process.env.SUPABASE_SERVICE_ROLE_KEY, - { - auth: { - persistSession: false, - autoRefreshToken: false, - }, - }, - ); - - const embeddingSources: EmbeddingSource[] = [ - ...(await walk('pages')) - .filter(({ path }) => /\.mdx?$/.test(path)) - .filter(({ path }) => !ignoredFiles.includes(path)) - .map( - (entry) => - new MarkdownEmbeddingSource('guide', entry.path, entry.parentPath), - ), - ]; - - console.info(`Discovered ${embeddingSources.length} pages`); - - if (!shouldRefresh) { - console.info('Checking which pages are new or have changed'); - } else console.info('Refresh flag set, re-generating all pages'); - - for (const embeddingSource of embeddingSources) { - const { type, source, path, parentPath } = embeddingSource; - - try { - const { checksum, meta, sections } = await embeddingSource.load(); - - // Check for existing page in DB and compare checksums - const { error: fetchPageError, data: existingPage } = await supabaseClient - .from('docs_page') - .select('id, path, checksum, parentPage:parent_page_id(id, path)') - .filter('path', 'eq', path) - .limit(1) - .maybeSingle(); - - if (fetchPageError) { - throw fetchPageError; - } - - type ParentPage = T extends any[] ? T[number] | null : T; - - // We use checksum to determine if this page & its sections need to be regenerated - if (!shouldRefresh && existingPage?.checksum === checksum) { - const existingParentPage = - existingPage?.parentPage as unknown as ParentPage< - typeof existingPage.parentPage - >; - - // If parent page changed, update it - if (existingParentPage?.path !== parentPath) { - console.info( - `[${path}] Parent page has changed. Updating to '${parentPath}'...`, - ); - const { error: fetchParentPageError, data: parentPage } = - await supabaseClient - .from('docs_page') - .select() - .filter('path', 'eq', parentPath) - .limit(1) - .maybeSingle(); - - if (fetchParentPageError) { - throw fetchParentPageError; - } - - const { error: updatePageError } = await supabaseClient - .from('docs_page') - .update({ parent_page_id: parentPage?.id }) - .filter('id', 'eq', existingPage.id); - - if (updatePageError) { - throw updatePageError; - } - } - continue; - } - - if (existingPage) { - if (!shouldRefresh) { - console.info( - `[${path}] Docs have changed, removing old page sections and their embeddings`, - ); - } else - console.info( - '[$path] Refresh flag set, removing old page sections and their embeddings', - ); - - const { error: deletePageSectionError } = await supabaseClient - .from('docs_page_section') - .delete() - .filter('page_id', 'eq', existingPage.id); - - if (deletePageSectionError) { - throw deletePageSectionError; - } - } - - const { error: fetchParentPageError, data: parentPage } = - await supabaseClient - .from('docs_page') - .select() - .filter('path', 'eq', parentPath) - .limit(1) - .maybeSingle(); - - if (fetchParentPageError) { - throw fetchParentPageError; - } - - // Create/update page record. Intentionally clear checksum until we - // have successfully generated all page sections. - const { error: upsertPageError, data: page } = await supabaseClient - .from('docs_page') - .upsert( - { - checksum: null, - path, - type, - source, - meta, - parent_page_id: parentPage?.id, - }, - { onConflict: 'path' }, - ) - .select() - .limit(1) - .single(); - - if (upsertPageError) { - throw upsertPageError; - } - - console.info( - `[${path}] Adding ${sections.length} page sections (with embeddings)`, - ); - for (const { slug, heading, content } of sections) { - // OpenAI recommends replacing newlines with spaces for best results (specific to embeddings) - const input = content.replace(/\n/g, ' '); - - try { - const openai = new OpenAI({ - apiKey: process.env.OPENAI_KEY, - }); - - const embeddingResponse = await openai.embeddings.create({ - model: 'text-embedding-ada-002', - input, - }); - - const [responseData] = embeddingResponse.data; - - const { error: insertPageSectionError } = await supabaseClient - .from('docs_page_section') - .insert({ - page_id: page.id, - slug, - heading, - content, - token_count: embeddingResponse.usage.total_tokens, - embedding: responseData.embedding, - }) - .select() - .limit(1) - .single(); - - if (insertPageSectionError) { - throw insertPageSectionError; - } - } catch (err) { - // TODO: decide how to better handle failed embeddings - console.error( - `Failed to generate embeddings for '${path}' page section starting with '${input.slice( - 0, - 40, - )}...'`, - ); - - throw err; - } - } - - // Set page checksum so that we know this page was stored successfully - const { error: updatePageError } = await supabaseClient - .from('docs_page') - .update({ checksum }) - .filter('id', 'eq', page.id); - - if (updatePageError) { - throw updatePageError; - } - } catch (err) { - console.error( - `Page '${path}' or one/multiple of its page sections failed to store properly. Page has been marked with null checksum to indicate that it needs to be re-generated.`, - ); - console.error(err); - } - } - - console.info('Embedding generation complete'); + // @ts-ignore + const argv = await yargs.option("refresh", { + alias: "r", + description: "Refresh data", + type: "boolean", + }).argv; + + const shouldRefresh = argv.refresh; + + if ( + !process.env.SUPABASE_URL || + !process.env.SUPABASE_SERVICE_ROLE_KEY || + !process.env.OPENAI_KEY + ) { + return console.info( + "Environment variables SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY, and OPENAI_KEY are required: skipping embeddings generation", + ); + } + + const supabaseClient = createClient( + process.env.SUPABASE_URL, + process.env.SUPABASE_SERVICE_ROLE_KEY, + { + auth: { + persistSession: false, + autoRefreshToken: false, + }, + }, + ); + + const embeddingSources: EmbeddingSource[] = [ + ...(await walk("pages")) + .filter(({ path }) => /\.mdx?$/.test(path)) + .filter(({ path }) => !ignoredFiles.includes(path)) + .map( + (entry) => + new MarkdownEmbeddingSource("guide", entry.path, entry.parentPath), + ), + ]; + + console.info(`Discovered ${embeddingSources.length} pages`); + + if (!shouldRefresh) { + console.info("Checking which pages are new or have changed"); + } else console.info("Refresh flag set, re-generating all pages"); + + for (const embeddingSource of embeddingSources) { + const { type, source, path, parentPath } = embeddingSource; + + try { + const { checksum, meta, sections } = await embeddingSource.load(); + + // Check for existing page in DB and compare checksums + const { error: fetchPageError, data: existingPage } = await supabaseClient + .from("docs_page") + .select("id, path, checksum, parentPage:parent_page_id(id, path)") + .filter("path", "eq", path) + .limit(1) + .maybeSingle(); + + if (fetchPageError) { + throw fetchPageError; + } + + // biome-ignore lint/suspicious/noExplicitAny: any are you ok? + type ParentPage = T extends any[] ? T[number] | null : T; + + // We use checksum to determine if this page & its sections need to be regenerated + if (!shouldRefresh && existingPage?.checksum === checksum) { + const existingParentPage = + existingPage?.parentPage as unknown as ParentPage< + typeof existingPage.parentPage + >; + + // If parent page changed, update it + if (existingParentPage?.path !== parentPath) { + console.info( + `[${path}] Parent page has changed. Updating to '${parentPath}'...`, + ); + const { error: fetchParentPageError, data: parentPage } = + await supabaseClient + .from("docs_page") + .select() + .filter("path", "eq", parentPath) + .limit(1) + .maybeSingle(); + + if (fetchParentPageError) { + throw fetchParentPageError; + } + + const { error: updatePageError } = await supabaseClient + .from("docs_page") + .update({ parent_page_id: parentPage?.id }) + .filter("id", "eq", existingPage.id); + + if (updatePageError) { + throw updatePageError; + } + } + continue; + } + + if (existingPage) { + if (!shouldRefresh) { + console.info( + `[${path}] Docs have changed, removing old page sections and their embeddings`, + ); + } else + console.info( + "[$path] Refresh flag set, removing old page sections and their embeddings", + ); + + const { error: deletePageSectionError } = await supabaseClient + .from("docs_page_section") + .delete() + .filter("page_id", "eq", existingPage.id); + + if (deletePageSectionError) { + throw deletePageSectionError; + } + } + + const { error: fetchParentPageError, data: parentPage } = + await supabaseClient + .from("docs_page") + .select() + .filter("path", "eq", parentPath) + .limit(1) + .maybeSingle(); + + if (fetchParentPageError) { + throw fetchParentPageError; + } + + // Create/update page record. Intentionally clear checksum until we + // have successfully generated all page sections. + const { error: upsertPageError, data: page } = await supabaseClient + .from("docs_page") + .upsert( + { + checksum: null, + path, + type, + source, + meta, + parent_page_id: parentPage?.id, + }, + { onConflict: "path" }, + ) + .select() + .limit(1) + .single(); + + if (upsertPageError) { + throw upsertPageError; + } + + console.info( + `[${path}] Adding ${sections.length} page sections (with embeddings)`, + ); + for (const { slug, heading, content } of sections) { + // OpenAI recommends replacing newlines with spaces for best results (specific to embeddings) + const input = content.replace(/\n/g, " "); + + try { + const openai = new OpenAI({ + apiKey: process.env.OPENAI_KEY, + }); + + const embeddingResponse = await openai.embeddings.create({ + model: "text-embedding-ada-002", + input, + }); + + const [responseData] = embeddingResponse.data; + + const { error: insertPageSectionError } = await supabaseClient + .from("docs_page_section") + .insert({ + page_id: page.id, + slug, + heading, + content, + token_count: embeddingResponse.usage.total_tokens, + embedding: responseData.embedding, + }) + .select() + .limit(1) + .single(); + + if (insertPageSectionError) { + throw insertPageSectionError; + } + } catch (err) { + // TODO: decide how to better handle failed embeddings + console.error( + `Failed to generate embeddings for '${path}' page section starting with '${input.slice( + 0, + 40, + )}...'`, + ); + + throw err; + } + } + + // Set page checksum so that we know this page was stored successfully + const { error: updatePageError } = await supabaseClient + .from("docs_page") + .update({ checksum }) + .filter("id", "eq", page.id); + + if (updatePageError) { + throw updatePageError; + } + } catch (err) { + console.error( + `Page '${path}' or one/multiple of its page sections failed to store properly. Page has been marked with null checksum to indicate that it needs to be re-generated.`, + ); + console.error(err); + } + } + + console.info("Embedding generation complete"); } async function main() { - await generateEmbeddings(); + await generateEmbeddings(); } main().catch((err) => console.error(err)); diff --git a/pages/_app.tsx b/pages/_app.tsx index a03c6e2..68b91ca 100644 --- a/pages/_app.tsx +++ b/pages/_app.tsx @@ -1,17 +1,16 @@ -import React from 'react'; -import { Analytics } from '@vercel/analytics/react'; -import { CookieConsentModal } from '@onbeam/features'; - -import '../styles.css'; -import { GoogleTagManager } from '../lib/GoogleTagManager'; +import { CookieConsentModal } from "@onbeam/features"; +import { Analytics } from "@vercel/analytics/react"; +import React from "react"; +import { GoogleTagManager } from "../components/google-tag-manager"; +import "../styles.css"; export default function Nextra({ Component, pageProps }) { - return ( - <> - - - - - - ); + return ( + <> + + + + + + ); } From a321e7d2370474426a13fe329c151f23907888b8 Mon Sep 17 00:00:00 2001 From: Roy Scheepens Date: Fri, 15 Nov 2024 10:46:16 +0100 Subject: [PATCH 2/3] fix: formatting --- biome.json | 10 +- lib/generate-embeddings.ts | 862 +++++++++++++++++++------------------ pages/_app.tsx | 26 +- 3 files changed, 451 insertions(+), 447 deletions(-) diff --git a/biome.json b/biome.json index cbff4ed..47cdaaa 100644 --- a/biome.json +++ b/biome.json @@ -1,7 +1,7 @@ { - "$schema": "./node_modules/@biomejs/biome/configuration_schema.json", - "extends": ["./node_modules/@onbeam/biome-config/biome.json"], - "files": { - "ignore": ["./styled-system"] - } + "$schema": "./node_modules/@biomejs/biome/configuration_schema.json", + "extends": ["./node_modules/@onbeam/biome-config/biome.json"], + "files": { + "ignore": ["./styled-system"] + } } diff --git a/lib/generate-embeddings.ts b/lib/generate-embeddings.ts index 374064c..416c6cd 100644 --- a/lib/generate-embeddings.ts +++ b/lib/generate-embeddings.ts @@ -1,25 +1,25 @@ -import { createHash } from "node:crypto"; -import { readFile, readdir, stat } from "node:fs/promises"; -import { basename, dirname, join } from "node:path"; -import { createClient } from "@supabase/supabase-js"; -import dotenv from "dotenv"; -import GithubSlugger from "github-slugger"; -import { Content, Root } from "mdast"; -import { fromMarkdown } from "mdast-util-from-markdown"; -import { frontmatterFromMarkdown } from "mdast-util-frontmatter"; -import { mdxFromMarkdown } from "mdast-util-mdx"; -import { toMarkdown } from "mdast-util-to-markdown"; -import { toString as toStringUtil } from "mdast-util-to-string"; -import { frontmatter } from "micromark-extension-frontmatter"; -import { mdxjs } from "micromark-extension-mdxjs"; -import OpenAI from "openai"; -import { u } from "unist-builder"; -import { filter } from "unist-util-filter"; -import yargs from "yargs"; +import { createHash } from 'node:crypto'; +import { readFile, readdir, stat } from 'node:fs/promises'; +import { basename, dirname, join } from 'node:path'; +import { createClient } from '@supabase/supabase-js'; +import dotenv from 'dotenv'; +import GithubSlugger from 'github-slugger'; +import { Content, Root } from 'mdast'; +import { fromMarkdown } from 'mdast-util-from-markdown'; +import { frontmatterFromMarkdown } from 'mdast-util-frontmatter'; +import { mdxFromMarkdown } from 'mdast-util-mdx'; +import { toMarkdown } from 'mdast-util-to-markdown'; +import { toString as toStringUtil } from 'mdast-util-to-string'; +import { frontmatter } from 'micromark-extension-frontmatter'; +import { mdxjs } from 'micromark-extension-mdxjs'; +import OpenAI from 'openai'; +import { u } from 'unist-builder'; +import { filter } from 'unist-util-filter'; +import yargs from 'yargs'; dotenv.config(); -const ignoredFiles = ["pages/_app.mdx", "pages/index.mdx", "pages/404.mdx"]; +const ignoredFiles = ['pages/_app.mdx', 'pages/index.mdx', 'pages/404.mdx']; /** * Splits a `mdast` tree into multiple trees based on @@ -29,35 +29,35 @@ const ignoredFiles = ["pages/_app.mdx", "pages/index.mdx", "pages/404.mdx"]; * Useful to split a markdown file into smaller sections. */ function splitTreeBy(tree: Root, predicate: (node: Content) => boolean) { - return tree.children.reduce((trees, node) => { - const [lastTree] = trees.slice(-1); + return tree.children.reduce((trees, node) => { + const [lastTree] = trees.slice(-1); - if (!lastTree || predicate(node)) { - const tree: Root = u("root", [node]); - return trees.concat(tree); - } + if (!lastTree || predicate(node)) { + const tree: Root = u('root', [node]); + return trees.concat(tree); + } - lastTree.children.push(node); - return trees; - }, []); + lastTree.children.push(node); + return trees; + }, []); } function extractMetaTags(mdxTree: Root) { - const metaTagsNode = mdxTree.children.find(({ type }) => type === "yaml"); + const metaTagsNode = mdxTree.children.find(({ type }) => type === 'yaml'); - if (!metaTagsNode) { - return {}; - } + if (!metaTagsNode) { + return {}; + } - const parsed = metaTagsNode.value.split(/\\r?\\n/).reduce((meta, line) => { - const [key, value] = line.split(": "); - return { - ...meta, - [key]: value, - }; - }, {}); + const parsed = metaTagsNode.value.split(/\\r?\\n/).reduce((meta, line) => { + const [key, value] = line.split(': '); + return { + ...meta, + [key]: value, + }; + }, {}); - return parsed; + return parsed; } /** @@ -69,28 +69,28 @@ function extractMetaTags(mdxTree: Root) { // biome-ignore lint/suspicious/noExplicitAny: any are you ok? const parseMetaTitle = (meta: any, slug: string): string => { - if (!meta[slug]) return slug; + if (!meta[slug]) return slug; - if (typeof meta[slug] === "object") { - // biome-ignore lint/suspicious/noExplicitAny: any are you ok? - return `${(meta[slug] as any).title}` ?? slug; - } + if (typeof meta[slug] === 'object') { + // biome-ignore lint/suspicious/noExplicitAny: any are you ok? + return `${(meta[slug] as any).title}` ?? slug; + } - return meta[slug] as string; + return meta[slug] as string; }; type Meta = ReturnType; type Section = { - content: string; - heading?: string; - slug?: string; + content: string; + heading?: string; + slug?: string; }; type ProcessedMdx = { - checksum: string; - meta: Meta; - sections: Section[]; + checksum: string; + meta: Meta; + sections: Section[]; }; /** @@ -99,403 +99,407 @@ type ProcessedMdx = { * and splits it into sub-sections based on criteria. */ function processMdxForSearch(title: string, content: string): ProcessedMdx { - const checksum = createHash("sha256").update(content).digest("base64"); - - const mdxTree = fromMarkdown(content, { - extensions: [mdxjs(), frontmatter()], - mdastExtensions: [mdxFromMarkdown(), frontmatterFromMarkdown(["yaml"])], - }); - - // Extract meta tags from markdown - const meta = extractMetaTags(mdxTree); - if (!meta.title) meta.title = title; - - // Remove all MDX elements from markdown - const mdTree = filter( - mdxTree, - (node) => - ![ - "mdxjsEsm", - "mdxJsxFlowElement", - "mdxJsxTextElement", - "mdxFlowExpression", - "mdxTextExpression", - ].includes(node.type), - ); - - if (!mdTree) { - return { - checksum, - meta, - sections: [], - }; - } - - const sectionTrees = splitTreeBy(mdTree, (node) => node.type === "heading"); - - const slugger = new GithubSlugger(); - - const sections = sectionTrees - // Filter out trees that contain only the page's metadata - .filter(({ children }) => children[0]?.type !== "yaml") - .map((tree) => { - const [firstNode] = tree.children; - - const heading = - firstNode.type === "heading" ? toStringUtil(firstNode) : undefined; - const slug = heading ? slugger.slug(heading) : undefined; - - return { - content: toMarkdown(tree), - heading, - slug, - }; - }); - - return { - checksum, - meta, - sections, - }; + const checksum = createHash('sha256').update(content).digest('base64'); + + const mdxTree = fromMarkdown(content, { + extensions: [mdxjs(), frontmatter()], + mdastExtensions: [mdxFromMarkdown(), frontmatterFromMarkdown(['yaml'])], + }); + + // Extract meta tags from markdown + const meta = extractMetaTags(mdxTree); + if (!meta.title) meta.title = title; + + // Remove all MDX elements from markdown + const mdTree = filter( + mdxTree, + (node) => + ![ + 'mdxjsEsm', + 'mdxJsxFlowElement', + 'mdxJsxTextElement', + 'mdxFlowExpression', + 'mdxTextExpression', + ].includes(node.type), + ); + + if (!mdTree) { + return { + checksum, + meta, + sections: [], + }; + } + + const sectionTrees = splitTreeBy(mdTree, (node) => node.type === 'heading'); + + const slugger = new GithubSlugger(); + + const sections = sectionTrees + // Filter out trees that contain only the page's metadata + .filter(({ children }) => children[0]?.type !== 'yaml') + .map((tree) => { + const [firstNode] = tree.children; + + const heading = + firstNode.type === 'heading' ? toStringUtil(firstNode) : undefined; + const slug = heading ? slugger.slug(heading) : undefined; + + return { + content: toMarkdown(tree), + heading, + slug, + }; + }); + + return { + checksum, + meta, + sections, + }; } type WalkEntry = { - path: string; - parentPath?: string; + path: string; + parentPath?: string; }; async function walk(dir: string, parentPath?: string): Promise { - const immediateFiles = await readdir(dir); - - const recursiveFiles = await Promise.all( - immediateFiles.map(async (file) => { - const path = join(dir, file); - const stats = await stat(path); - if (stats.isDirectory()) { - // Keep track of document hierarchy (if this dir has corresponding doc file) - const docPath = `${basename(path)}.mdx`; - - return walk( - path, - immediateFiles.includes(docPath) - ? join(dirname(path), docPath) - : parentPath, - ); - } - if (stats.isFile()) { - return [ - { - path: path, - parentPath, - }, - ]; - } - return []; - }), - ); - - const flattenedFiles = recursiveFiles.reduce( - (all, folderContents) => all.concat(folderContents), - [], - ); - - return flattenedFiles.sort((a, b) => a.path.localeCompare(b.path)); + const immediateFiles = await readdir(dir); + + const recursiveFiles = await Promise.all( + immediateFiles.map(async (file) => { + const path = join(dir, file); + const stats = await stat(path); + if (stats.isDirectory()) { + // Keep track of document hierarchy (if this dir has corresponding doc file) + const docPath = `${basename(path)}.mdx`; + + return walk( + path, + immediateFiles.includes(docPath) + ? join(dirname(path), docPath) + : parentPath, + ); + } + if (stats.isFile()) { + return [ + { + path: path, + parentPath, + }, + ]; + } + return []; + }), + ); + + const flattenedFiles = recursiveFiles.reduce( + (all, folderContents) => all.concat(folderContents), + [], + ); + + return flattenedFiles.sort((a, b) => a.path.localeCompare(b.path)); } abstract class BaseEmbeddingSource { - checksum?: string; - meta?: Meta; - sections?: Section[]; - - constructor( - public source: string, - public path: string, - public parentPath?: string, - ) {} - - abstract load(): Promise<{ - checksum: string; - meta?: Meta; - sections: Section[]; - }>; + checksum?: string; + meta?: Meta; + sections?: Section[]; + + constructor( + public source: string, + public path: string, + public parentPath?: string, + ) {} + + abstract load(): Promise<{ + checksum: string; + meta?: Meta; + sections: Section[]; + }>; } class MarkdownEmbeddingSource extends BaseEmbeddingSource { - type = "markdown" as const; - - constructor( - source: string, - public filePath: string, - public parentFilePath?: string, - ) { - const path = filePath.replace(/^pages/, "").replace(/\.mdx?$/, ""); - const parentPath = parentFilePath - ?.replace(/^pages/, "") - .replace(/\.mdx?$/, ""); - - super(source, path, parentPath); - } - - async load() { - const contents = await readFile(this.filePath, "utf8"); - - const slug = this.filePath.split("/").at(-1)?.replace(/\.mdx?$/, "") ?? ""; - - const metaPath = join( - process.cwd(), - this.filePath.replace(/[^/]+$/, "_meta.ts"), - ); - - const metaFile = (await import(metaPath)).default; - - const title = parseMetaTitle(metaFile, slug); - - const { checksum, meta, sections } = processMdxForSearch(title, contents); - - this.checksum = checksum; - this.meta = meta; - this.sections = sections; - - return { - checksum, - meta, - sections, - }; - } + type = 'markdown' as const; + + constructor( + source: string, + public filePath: string, + public parentFilePath?: string, + ) { + const path = filePath.replace(/^pages/, '').replace(/\.mdx?$/, ''); + const parentPath = parentFilePath + ?.replace(/^pages/, '') + .replace(/\.mdx?$/, ''); + + super(source, path, parentPath); + } + + async load() { + const contents = await readFile(this.filePath, 'utf8'); + + const slug = + this.filePath + .split('/') + .at(-1) + ?.replace(/\.mdx?$/, '') ?? ''; + + const metaPath = join( + process.cwd(), + this.filePath.replace(/[^/]+$/, '_meta.ts'), + ); + + const metaFile = (await import(metaPath)).default; + + const title = parseMetaTitle(metaFile, slug); + + const { checksum, meta, sections } = processMdxForSearch(title, contents); + + this.checksum = checksum; + this.meta = meta; + this.sections = sections; + + return { + checksum, + meta, + sections, + }; + } } type EmbeddingSource = MarkdownEmbeddingSource; async function generateEmbeddings() { - // @ts-ignore - const argv = await yargs.option("refresh", { - alias: "r", - description: "Refresh data", - type: "boolean", - }).argv; - - const shouldRefresh = argv.refresh; - - if ( - !process.env.SUPABASE_URL || - !process.env.SUPABASE_SERVICE_ROLE_KEY || - !process.env.OPENAI_KEY - ) { - return console.info( - "Environment variables SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY, and OPENAI_KEY are required: skipping embeddings generation", - ); - } - - const supabaseClient = createClient( - process.env.SUPABASE_URL, - process.env.SUPABASE_SERVICE_ROLE_KEY, - { - auth: { - persistSession: false, - autoRefreshToken: false, - }, - }, - ); - - const embeddingSources: EmbeddingSource[] = [ - ...(await walk("pages")) - .filter(({ path }) => /\.mdx?$/.test(path)) - .filter(({ path }) => !ignoredFiles.includes(path)) - .map( - (entry) => - new MarkdownEmbeddingSource("guide", entry.path, entry.parentPath), - ), - ]; - - console.info(`Discovered ${embeddingSources.length} pages`); - - if (!shouldRefresh) { - console.info("Checking which pages are new or have changed"); - } else console.info("Refresh flag set, re-generating all pages"); - - for (const embeddingSource of embeddingSources) { - const { type, source, path, parentPath } = embeddingSource; - - try { - const { checksum, meta, sections } = await embeddingSource.load(); - - // Check for existing page in DB and compare checksums - const { error: fetchPageError, data: existingPage } = await supabaseClient - .from("docs_page") - .select("id, path, checksum, parentPage:parent_page_id(id, path)") - .filter("path", "eq", path) - .limit(1) - .maybeSingle(); - - if (fetchPageError) { - throw fetchPageError; - } - - // biome-ignore lint/suspicious/noExplicitAny: any are you ok? - type ParentPage = T extends any[] ? T[number] | null : T; - - // We use checksum to determine if this page & its sections need to be regenerated - if (!shouldRefresh && existingPage?.checksum === checksum) { - const existingParentPage = - existingPage?.parentPage as unknown as ParentPage< - typeof existingPage.parentPage - >; - - // If parent page changed, update it - if (existingParentPage?.path !== parentPath) { - console.info( - `[${path}] Parent page has changed. Updating to '${parentPath}'...`, - ); - const { error: fetchParentPageError, data: parentPage } = - await supabaseClient - .from("docs_page") - .select() - .filter("path", "eq", parentPath) - .limit(1) - .maybeSingle(); - - if (fetchParentPageError) { - throw fetchParentPageError; - } - - const { error: updatePageError } = await supabaseClient - .from("docs_page") - .update({ parent_page_id: parentPage?.id }) - .filter("id", "eq", existingPage.id); - - if (updatePageError) { - throw updatePageError; - } - } - continue; - } - - if (existingPage) { - if (!shouldRefresh) { - console.info( - `[${path}] Docs have changed, removing old page sections and their embeddings`, - ); - } else - console.info( - "[$path] Refresh flag set, removing old page sections and their embeddings", - ); - - const { error: deletePageSectionError } = await supabaseClient - .from("docs_page_section") - .delete() - .filter("page_id", "eq", existingPage.id); - - if (deletePageSectionError) { - throw deletePageSectionError; - } - } - - const { error: fetchParentPageError, data: parentPage } = - await supabaseClient - .from("docs_page") - .select() - .filter("path", "eq", parentPath) - .limit(1) - .maybeSingle(); - - if (fetchParentPageError) { - throw fetchParentPageError; - } - - // Create/update page record. Intentionally clear checksum until we - // have successfully generated all page sections. - const { error: upsertPageError, data: page } = await supabaseClient - .from("docs_page") - .upsert( - { - checksum: null, - path, - type, - source, - meta, - parent_page_id: parentPage?.id, - }, - { onConflict: "path" }, - ) - .select() - .limit(1) - .single(); - - if (upsertPageError) { - throw upsertPageError; - } - - console.info( - `[${path}] Adding ${sections.length} page sections (with embeddings)`, - ); - for (const { slug, heading, content } of sections) { - // OpenAI recommends replacing newlines with spaces for best results (specific to embeddings) - const input = content.replace(/\n/g, " "); - - try { - const openai = new OpenAI({ - apiKey: process.env.OPENAI_KEY, - }); - - const embeddingResponse = await openai.embeddings.create({ - model: "text-embedding-ada-002", - input, - }); - - const [responseData] = embeddingResponse.data; - - const { error: insertPageSectionError } = await supabaseClient - .from("docs_page_section") - .insert({ - page_id: page.id, - slug, - heading, - content, - token_count: embeddingResponse.usage.total_tokens, - embedding: responseData.embedding, - }) - .select() - .limit(1) - .single(); - - if (insertPageSectionError) { - throw insertPageSectionError; - } - } catch (err) { - // TODO: decide how to better handle failed embeddings - console.error( - `Failed to generate embeddings for '${path}' page section starting with '${input.slice( - 0, - 40, - )}...'`, - ); - - throw err; - } - } - - // Set page checksum so that we know this page was stored successfully - const { error: updatePageError } = await supabaseClient - .from("docs_page") - .update({ checksum }) - .filter("id", "eq", page.id); - - if (updatePageError) { - throw updatePageError; - } - } catch (err) { - console.error( - `Page '${path}' or one/multiple of its page sections failed to store properly. Page has been marked with null checksum to indicate that it needs to be re-generated.`, - ); - console.error(err); - } - } - - console.info("Embedding generation complete"); + // @ts-ignore + const argv = await yargs.option('refresh', { + alias: 'r', + description: 'Refresh data', + type: 'boolean', + }).argv; + + const shouldRefresh = argv.refresh; + + if ( + !process.env.SUPABASE_URL || + !process.env.SUPABASE_SERVICE_ROLE_KEY || + !process.env.OPENAI_KEY + ) { + return console.info( + 'Environment variables SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY, and OPENAI_KEY are required: skipping embeddings generation', + ); + } + + const supabaseClient = createClient( + process.env.SUPABASE_URL, + process.env.SUPABASE_SERVICE_ROLE_KEY, + { + auth: { + persistSession: false, + autoRefreshToken: false, + }, + }, + ); + + const embeddingSources: EmbeddingSource[] = [ + ...(await walk('pages')) + .filter(({ path }) => /\.mdx?$/.test(path)) + .filter(({ path }) => !ignoredFiles.includes(path)) + .map( + (entry) => + new MarkdownEmbeddingSource('guide', entry.path, entry.parentPath), + ), + ]; + + console.info(`Discovered ${embeddingSources.length} pages`); + + if (!shouldRefresh) { + console.info('Checking which pages are new or have changed'); + } else console.info('Refresh flag set, re-generating all pages'); + + for (const embeddingSource of embeddingSources) { + const { type, source, path, parentPath } = embeddingSource; + + try { + const { checksum, meta, sections } = await embeddingSource.load(); + + // Check for existing page in DB and compare checksums + const { error: fetchPageError, data: existingPage } = await supabaseClient + .from('docs_page') + .select('id, path, checksum, parentPage:parent_page_id(id, path)') + .filter('path', 'eq', path) + .limit(1) + .maybeSingle(); + + if (fetchPageError) { + throw fetchPageError; + } + + // biome-ignore lint/suspicious/noExplicitAny: any are you ok? + type ParentPage = T extends any[] ? T[number] | null : T; + + // We use checksum to determine if this page & its sections need to be regenerated + if (!shouldRefresh && existingPage?.checksum === checksum) { + const existingParentPage = + existingPage?.parentPage as unknown as ParentPage< + typeof existingPage.parentPage + >; + + // If parent page changed, update it + if (existingParentPage?.path !== parentPath) { + console.info( + `[${path}] Parent page has changed. Updating to '${parentPath}'...`, + ); + const { error: fetchParentPageError, data: parentPage } = + await supabaseClient + .from('docs_page') + .select() + .filter('path', 'eq', parentPath) + .limit(1) + .maybeSingle(); + + if (fetchParentPageError) { + throw fetchParentPageError; + } + + const { error: updatePageError } = await supabaseClient + .from('docs_page') + .update({ parent_page_id: parentPage?.id }) + .filter('id', 'eq', existingPage.id); + + if (updatePageError) { + throw updatePageError; + } + } + continue; + } + + if (existingPage) { + if (!shouldRefresh) { + console.info( + `[${path}] Docs have changed, removing old page sections and their embeddings`, + ); + } else + console.info( + '[$path] Refresh flag set, removing old page sections and their embeddings', + ); + + const { error: deletePageSectionError } = await supabaseClient + .from('docs_page_section') + .delete() + .filter('page_id', 'eq', existingPage.id); + + if (deletePageSectionError) { + throw deletePageSectionError; + } + } + + const { error: fetchParentPageError, data: parentPage } = + await supabaseClient + .from('docs_page') + .select() + .filter('path', 'eq', parentPath) + .limit(1) + .maybeSingle(); + + if (fetchParentPageError) { + throw fetchParentPageError; + } + + // Create/update page record. Intentionally clear checksum until we + // have successfully generated all page sections. + const { error: upsertPageError, data: page } = await supabaseClient + .from('docs_page') + .upsert( + { + checksum: null, + path, + type, + source, + meta, + parent_page_id: parentPage?.id, + }, + { onConflict: 'path' }, + ) + .select() + .limit(1) + .single(); + + if (upsertPageError) { + throw upsertPageError; + } + + console.info( + `[${path}] Adding ${sections.length} page sections (with embeddings)`, + ); + for (const { slug, heading, content } of sections) { + // OpenAI recommends replacing newlines with spaces for best results (specific to embeddings) + const input = content.replace(/\n/g, ' '); + + try { + const openai = new OpenAI({ + apiKey: process.env.OPENAI_KEY, + }); + + const embeddingResponse = await openai.embeddings.create({ + model: 'text-embedding-ada-002', + input, + }); + + const [responseData] = embeddingResponse.data; + + const { error: insertPageSectionError } = await supabaseClient + .from('docs_page_section') + .insert({ + page_id: page.id, + slug, + heading, + content, + token_count: embeddingResponse.usage.total_tokens, + embedding: responseData.embedding, + }) + .select() + .limit(1) + .single(); + + if (insertPageSectionError) { + throw insertPageSectionError; + } + } catch (err) { + // TODO: decide how to better handle failed embeddings + console.error( + `Failed to generate embeddings for '${path}' page section starting with '${input.slice( + 0, + 40, + )}...'`, + ); + + throw err; + } + } + + // Set page checksum so that we know this page was stored successfully + const { error: updatePageError } = await supabaseClient + .from('docs_page') + .update({ checksum }) + .filter('id', 'eq', page.id); + + if (updatePageError) { + throw updatePageError; + } + } catch (err) { + console.error( + `Page '${path}' or one/multiple of its page sections failed to store properly. Page has been marked with null checksum to indicate that it needs to be re-generated.`, + ); + console.error(err); + } + } + + console.info('Embedding generation complete'); } async function main() { - await generateEmbeddings(); + await generateEmbeddings(); } main().catch((err) => console.error(err)); diff --git a/pages/_app.tsx b/pages/_app.tsx index 68b91ca..0c5ad55 100644 --- a/pages/_app.tsx +++ b/pages/_app.tsx @@ -1,16 +1,16 @@ -import { CookieConsentModal } from "@onbeam/features"; -import { Analytics } from "@vercel/analytics/react"; -import React from "react"; -import { GoogleTagManager } from "../components/google-tag-manager"; -import "../styles.css"; +import { CookieConsentModal } from '@onbeam/features'; +import { Analytics } from '@vercel/analytics/react'; +import React from 'react'; +import { GoogleTagManager } from '../components/google-tag-manager'; +import '../styles.css'; export default function Nextra({ Component, pageProps }) { - return ( - <> - - - - - - ); + return ( + <> + + + + + + ); } From 7a094c6311f9148839a3024f809a1d1f666b19d4 Mon Sep 17 00:00:00 2001 From: Leon van der Noll Date: Fri, 15 Nov 2024 10:51:47 +0100 Subject: [PATCH 3/3] fix: remove biome-ignores --- lib/generate-embeddings.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/generate-embeddings.ts b/lib/generate-embeddings.ts index 416c6cd..b943044 100644 --- a/lib/generate-embeddings.ts +++ b/lib/generate-embeddings.ts @@ -67,13 +67,11 @@ function extractMetaTags(mdxTree: Root) { * @returns */ -// biome-ignore lint/suspicious/noExplicitAny: any are you ok? const parseMetaTitle = (meta: any, slug: string): string => { if (!meta[slug]) return slug; if (typeof meta[slug] === 'object') { - // biome-ignore lint/suspicious/noExplicitAny: any are you ok? - return `${(meta[slug] as any).title}` ?? slug; + return meta[slug]?.title ? `${(meta[slug] as any).title}` : slug; } return meta[slug] as string; @@ -335,7 +333,6 @@ async function generateEmbeddings() { throw fetchPageError; } - // biome-ignore lint/suspicious/noExplicitAny: any are you ok? type ParentPage = T extends any[] ? T[number] | null : T; // We use checksum to determine if this page & its sections need to be regenerated