diff --git a/src/Instance.ts b/src/Instance.ts index 23ac904624..b0859045c8 100644 --- a/src/Instance.ts +++ b/src/Instance.ts @@ -6,7 +6,7 @@ import { _Renderer } from './Renderer.ts'; import { _Tokenizer } from './Tokenizer.ts'; import { _TextRenderer } from './TextRenderer.ts'; import { escape } from './helpers.ts'; -import type { MarkedExtension, MarkedOptions } from './MarkedOptions.ts'; +import { tokenizerBlockPositions, tokenizerInlinePositions, type MarkedExtension, type MarkedOptions } from './MarkedOptions.ts'; import type { Token, Tokens, TokensList } from './Tokens.ts'; export type MaybePromise = void | Promise; @@ -74,7 +74,7 @@ export class Marked { } use(...args: MarkedExtension[]) { - const extensions: MarkedOptions['extensions'] = this.defaults.extensions || { renderers: {}, childTokens: {} }; + const extensions: MarkedOptions['extensions'] = this.defaults.extensions || { renderers: {}, childTokens: {}, tokenizers: {} }; args.forEach((pack) => { // copy options to new object @@ -105,23 +105,26 @@ export class Marked { } } if ('tokenizer' in ext) { // Tokenizer Extensions - if (!ext.level || (ext.level !== 'block' && ext.level !== 'inline')) { + if (ext.level && (ext.level !== 'block' && ext.level !== 'inline')) { throw new Error("extension level must be 'block' or 'inline'"); } - const extLevel = extensions[ext.level]; - if (extLevel) { - extLevel.unshift(ext.tokenizer); - } else { - extensions[ext.level] = [ext.tokenizer]; + if (ext.position && ![...tokenizerBlockPositions, ...tokenizerInlinePositions].includes(ext.position)) { + throw new Error(`extension position must be one of '${tokenizerBlockPositions.join("', '")}', '${tokenizerInlinePositions.join("', '")}'`); + } + if (!ext.level && !ext.position) { + throw new Error('extension level or position is required'); } + const position = ext.position || (ext.level === 'block' ? tokenizerBlockPositions[0] : tokenizerInlinePositions[0]); + extensions.tokenizers[position] = extensions.tokenizers[position] || []; + extensions.tokenizers[position].unshift(ext.tokenizer); if (ext.start) { // Function to check for start of token - if (ext.level === 'block') { + if (tokenizerBlockPositions.includes(position as typeof tokenizerBlockPositions[number])) { if (extensions.startBlock) { extensions.startBlock.push(ext.start); } else { extensions.startBlock = [ext.start]; } - } else if (ext.level === 'inline') { + } else if (tokenizerInlinePositions.includes(position as typeof tokenizerInlinePositions[number])) { if (extensions.startInline) { extensions.startInline.push(ext.start); } else { diff --git a/src/Lexer.ts b/src/Lexer.ts index d8094d6d0f..7263a070a0 100644 --- a/src/Lexer.ts +++ b/src/Lexer.ts @@ -2,7 +2,7 @@ import { _Tokenizer } from './Tokenizer.ts'; import { _defaults } from './defaults.ts'; import { other, block, inline } from './rules.ts'; import type { Token, TokensList, Tokens } from './Tokens.ts'; -import type { MarkedOptions } from './MarkedOptions.ts'; +import type { MarkedOptions, TokenizerPosition } from './MarkedOptions.ts'; /** * Block Lexer @@ -111,14 +111,9 @@ export class _Lexer { while (src) { let token: Tokens.Generic | undefined; - if (this.options.extensions?.block?.some((extTokenizer) => { - if (token = extTokenizer.call({ lexer: this }, src, tokens)) { - src = src.substring(token.raw.length); - tokens.push(token); - return true; - } - return false; - })) { + if (token = this.runExtensions('beforeSpace', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } @@ -136,6 +131,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeCode', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // code if (token = this.tokenizer.code(src)) { src = src.substring(token.raw.length); @@ -151,6 +152,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeFences', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // fences if (token = this.tokenizer.fences(src)) { src = src.substring(token.raw.length); @@ -158,6 +165,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeHeading', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // heading if (token = this.tokenizer.heading(src)) { src = src.substring(token.raw.length); @@ -165,6 +178,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeHr', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // hr if (token = this.tokenizer.hr(src)) { src = src.substring(token.raw.length); @@ -172,6 +191,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeBlockquote', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // blockquote if (token = this.tokenizer.blockquote(src)) { src = src.substring(token.raw.length); @@ -179,6 +204,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeList', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // list if (token = this.tokenizer.list(src)) { src = src.substring(token.raw.length); @@ -186,6 +217,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeHtml', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // html if (token = this.tokenizer.html(src)) { src = src.substring(token.raw.length); @@ -193,6 +230,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeDef', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // def if (token = this.tokenizer.def(src)) { src = src.substring(token.raw.length); @@ -210,6 +253,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeTable', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // table (gfm) if (token = this.tokenizer.table(src)) { src = src.substring(token.raw.length); @@ -217,6 +266,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeLheading', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // lheading if (token = this.tokenizer.lheading(src)) { src = src.substring(token.raw.length); @@ -224,6 +279,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeParagraph', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // top-level paragraph // prevent paragraph consuming extensions by clipping 'src' to extension start let cutSrc = src; @@ -256,6 +317,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeBlockText', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // text if (token = this.tokenizer.text(src)) { src = src.substring(token.raw.length); @@ -271,6 +338,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeBlockEnd', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + if (src) { const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0); if (this.options.silent) { @@ -332,15 +405,9 @@ export class _Lexer { let token: Tokens.Generic | undefined; - // extensions - if (this.options.extensions?.inline?.some((extTokenizer) => { - if (token = extTokenizer.call({ lexer: this }, src, tokens)) { - src = src.substring(token.raw.length); - tokens.push(token); - return true; - } - return false; - })) { + if (token = this.runExtensions('beforeEscape', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } @@ -351,6 +418,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeTag', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // tag if (token = this.tokenizer.tag(src)) { src = src.substring(token.raw.length); @@ -358,6 +431,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeLink', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // link if (token = this.tokenizer.link(src)) { src = src.substring(token.raw.length); @@ -365,6 +444,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeReflink', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // reflink, nolink if (token = this.tokenizer.reflink(src, this.tokens.links)) { src = src.substring(token.raw.length); @@ -378,6 +463,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeEmStrong', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // em & strong if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) { src = src.substring(token.raw.length); @@ -385,6 +476,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeCodespan', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // code if (token = this.tokenizer.codespan(src)) { src = src.substring(token.raw.length); @@ -392,6 +489,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeBr', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // br if (token = this.tokenizer.br(src)) { src = src.substring(token.raw.length); @@ -399,6 +502,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeDel', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // del (gfm) if (token = this.tokenizer.del(src)) { src = src.substring(token.raw.length); @@ -406,6 +515,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeAutolink', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // autolink if (token = this.tokenizer.autolink(src)) { src = src.substring(token.raw.length); @@ -413,6 +528,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeUrl', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // url (gfm) if (!this.state.inLink && (token = this.tokenizer.url(src))) { src = src.substring(token.raw.length); @@ -420,6 +541,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeInlineText', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // text // prevent inlineText consuming extensions by clipping 'src' to extension start let cutSrc = src; @@ -453,6 +580,12 @@ export class _Lexer { continue; } + if (token = this.runExtensions('beforeInlineEnd', src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + if (src) { const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0); if (this.options.silent) { @@ -466,4 +599,13 @@ export class _Lexer { return tokens; } + + private runExtensions(position: TokenizerPosition, src: string, tokens: Token[]): Tokens.Generic | undefined { + for (const tokenizer of this.options.extensions?.tokenizers?.[position] ?? []) { + const token = tokenizer.call({ lexer: this }, src, tokens); + if (token && token.raw?.length) { + return token; + } + } + } } diff --git a/src/MarkedOptions.ts b/src/MarkedOptions.ts index 1794fb9ad3..01d362a093 100644 --- a/src/MarkedOptions.ts +++ b/src/MarkedOptions.ts @@ -13,14 +13,59 @@ export type TokenizerExtensionFunction = (this: TokenizerThis, src: string, toke export type TokenizerStartFunction = (this: TokenizerThis, src: string) => number | void; -export interface TokenizerExtension { +export const tokenizerBlockPositions = [ + 'beforeSpace', + 'beforeCode', + 'beforeFences', + 'beforeHeading', + 'beforeHr', + 'beforeBlockquote', + 'beforeList', + 'beforeHtml', + 'beforeDef', + 'beforeTable', + 'beforeLheading', + 'beforeParagraph', + 'beforeBlockText', + 'beforeBlockEnd', +] as const; + +export const tokenizerInlinePositions = [ + 'beforeEscape', + 'beforeTag', + 'beforeLink', + 'beforeReflink', + 'beforeEmStrong', + 'beforeCodespan', + 'beforeBr', + 'beforeDel', + 'beforeAutolink', + 'beforeUrl', + 'beforeInlineText', + 'beforeInlineEnd', +] as const; + +export type TokenizerPosition = typeof tokenizerBlockPositions[number] | typeof tokenizerInlinePositions[number]; + +interface TokenizerPositionExtension { name: string; + position: TokenizerPosition; + level?: 'block' | 'inline'; + start?: TokenizerStartFunction; + tokenizer: TokenizerExtensionFunction; + childTokens?: string[]; +} +interface TokenizerLevelExtension { + name: string; + position?: TokenizerPosition; level: 'block' | 'inline'; start?: TokenizerStartFunction; tokenizer: TokenizerExtensionFunction; childTokens?: string[]; } +export type TokenizerExtension = TokenizerPositionExtension | TokenizerLevelExtension; + export interface RendererThis { parser: _Parser; } @@ -138,11 +183,13 @@ export interface MarkedOptions extends Omit src.match(/name/)?.index, + tokenizer(src: string): NameToken | undefined { + if (src === 'name') { + const token: NameToken = { + type: 'name', + raw: src, + text: src, + tokens: this.lexer.inline(src), + items: [] + }; + this.lexer.inline(token.text, token.items); + return token; + } + }, + childTokens: ['items'] +}; + +const tokenizerPositionExtension: TokenizerExtension = { + name: 'name', + position: 'beforeSpace', + start: (src: string) => src.match(/name/)?.index, + tokenizer(src: string): NameToken | undefined { + if (src === 'name') { + const token: NameToken = { + type: 'name', + raw: src, + text: src, + tokens: this.lexer.inline(src), + items: [] + }; + this.lexer.inline(token.text, token.items); + return token; + } + }, + childTokens: ['items'] +}; + const rendererExtension: RendererExtension = { name: 'name', renderer(t) {