Skip to content

Commit

Permalink
Support llms.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
gregberge committed Jan 22, 2025
1 parent d3e573c commit 4421a66
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .changeset/beige-trains-sort.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'gitbook': patch
---

Support llms.txt
Binary file modified bun.lockb
Binary file not shown.
2 changes: 2 additions & 0 deletions packages/gitbook/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"jsontoxml": "^1.0.1",
"katex": "^0.16.9",
"mathjax": "^3.2.2",
"mdast-util-to-markdown": "^2.1.2",
"memoizee": "^0.4.15",
"next": "14.2.15",
"next-themes": "^0.2.1",
Expand Down Expand Up @@ -74,6 +75,7 @@
"@types/js-cookie": "^3.0.6",
"@types/jsontoxml": "^1.0.5",
"@types/jsonwebtoken": "^9.0.6",
"@types/mdast": "^4.0.4",
"@types/node": "^20",
"@types/object-hash": "^3.0.6",
"@types/parse-cache-control": "^1.0.4",
Expand Down
141 changes: 141 additions & 0 deletions packages/gitbook/src/app/middleware/(site)/(core)/llms.txt/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import { SiteSection, SiteSpace, SiteStructure } from '@gitbook/api';
import assertNever from 'assert-never';
import { Heading, ListItem, Paragraph, Root, RootContent } from 'mdast';
import { toMarkdown } from 'mdast-util-to-markdown';
import { NextRequest } from 'next/server';

import { getPublishedContentSite, getRevisionPages } from '@/lib/api';
import { getAbsoluteHref } from '@/lib/links';
import { getPagePath } from '@/lib/pages';
import { joinPath } from '@/lib/paths';
import { checkIsRootPointer, getSiteContentPointer } from '@/lib/pointer';
import { getIndexablePages } from '@/lib/sitemap';

export const runtime = 'edge';

/**
* Generate a sitemap.xml for the current space.
*/
export async function GET(req: NextRequest) {
const pointer = await getSiteContentPointer();

const { structure: siteStructure, site } = await getPublishedContentSite({
organizationId: pointer.organizationId,
siteId: pointer.siteId,
siteShareKey: pointer.siteShareKey,
});

// This sitemap is only available at root (/sitemap.xml).
if (!checkIsRootPointer(pointer, siteStructure)) {
return new Response('Not found', { status: 404 });
}

const tree: Root = {
type: 'root',
children: [
{
type: 'heading',
depth: 1,
children: [{ type: 'text', value: site.title }],
},
...(await getNodesFromSiteStructure(siteStructure)),
],
};

return new Response(toMarkdown(tree), {
headers: {
'Content-Type': 'text/plain; charset=utf-8',
},
});
}

/**
* Get Sitemap Nodes from site structure.
*/
async function getNodesFromSiteStructure(siteStructure: SiteStructure): Promise<RootContent[]> {
switch (siteStructure.type) {
case 'sections':
return getNodesFromSections(siteStructure.structure);
case 'siteSpaces':
return getNodesFromSiteSpaces(siteStructure.structure, { depth: 2 });
default:
assertNever(siteStructure);
}
}

/**
* Get Sitemap Nodes from site sections.
*/
async function getNodesFromSections(siteSections: SiteSection[]): Promise<RootContent[]> {
const all = await Promise.all(
siteSections.map(async (siteSection): Promise<RootContent[]> => {
const siteSpaceNodes = await getNodesFromSiteSpaces(siteSection.siteSpaces, {
depth: 3,
});
return [
{
type: 'heading',
depth: 2,
children: [{ type: 'text', value: siteSection.title }],
},
...siteSpaceNodes,
];
}),
);
return all.flat();
}

/**
* Get Sitemap Nodes from site spaces.
*/
async function getNodesFromSiteSpaces(
siteSpaces: SiteSpace[],
options: { depth: Heading['depth'] },
): Promise<RootContent[]> {
const all = await Promise.all(
siteSpaces.map(async (siteSpace): Promise<RootContent[]> => {
const siteSpaceUrl = siteSpace.urls.published;
if (!siteSpaceUrl) {
return [];
}
const rootPages = await getRevisionPages(siteSpace.space.id, siteSpace.space.revision, {
metadata: false,
});
const pages = getIndexablePages(rootPages);
const listChildren = await Promise.all(
pages.map(async ({ page }): Promise<ListItem> => {
const url = await getAbsoluteHref(
joinPath(new URL(siteSpaceUrl).pathname, getPagePath(rootPages, page)),
true,
);
const children: Paragraph['children'] = [
{
type: 'link',
url,
children: [{ type: 'text', value: page.title }],
},
];
if (page.description) {
children.push({ type: 'text', value: `: ${page.description}` });
}
return {
type: 'listItem',
children: [{ type: 'paragraph', children }],
};
}),
);
return [
{
type: 'heading',
depth: options.depth,
children: [{ type: 'text', value: siteSpace.title }],
},
{
type: 'list',
children: listChildren,
},
];
}),
);
return all.flat();
}

0 comments on commit 4421a66

Please sign in to comment.