From 7093ad38849c83cde6d986acf9f9e17db90c8dae Mon Sep 17 00:00:00 2001 From: Sam Wilson Date: Mon, 2 Dec 2024 14:14:48 -0500 Subject: [PATCH 1/5] Improve docs for sourcepos and NodeValue::Escaped --- src/nodes.rs | 2 ++ src/parser/mod.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/nodes.rs b/src/nodes.rs index 85407a7a..cd749c1e 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -181,6 +181,8 @@ pub enum NodeValue { MultilineBlockQuote(NodeMultilineBlockQuote), /// **Inline**. A character that has been [escaped](https://github.github.com/gfm/#backslash-escapes) + /// + /// Enabled with [`escaped_char_spans`](crate::RenderOptionsBuilder::escaped_char_spans). Escaped, /// **Inline**. A wikilink to some URL. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a39cd250..aa8ec134 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -865,6 +865,8 @@ pub struct RenderOptions { /// let xml = markdown_to_commonmark_xml(input, &options); /// assert!(xml.contains("")); /// ``` + /// + /// [`experimental_inline_sourcepos`]: crate::RenderOptionsBuilder::experimental_inline_sourcepos #[builder(default)] pub sourcepos: bool, From 6ce626f97165869704a8493992306b021d457bb1 Mon Sep 17 00:00:00 2001 From: Sam Wilson Date: Wed, 4 Dec 2024 15:03:26 -0500 Subject: [PATCH 2/5] Mark syntect as required for example --- Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 38bb9e04..55466560 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,3 +78,7 @@ clap = { version = "4.0", optional = true, features = [ "string", "wrap_help", ] } + +[[example]] +name = "syntect" +required-features = [ "syntect" ] From abbfab5e1492181aa41b009520d3d3933b887dea Mon Sep 17 00:00:00 2001 From: Sam Wilson Date: Wed, 4 Dec 2024 15:03:02 -0500 Subject: [PATCH 3/5] Support non-bool extensions in html_opts! and assert_ast_match! --- src/tests.rs | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/tests.rs b/src/tests.rs index 68cf08e8..a764bdff 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -127,20 +127,35 @@ macro_rules! html_opts { ([$($optclass:ident.$optname:ident),*], $lhs:expr, $rhs:expr) => { html_opts!([$($optclass.$optname),*], $lhs, $rhs,) }; + ([$($optclass:ident.$optname:ident = $val:expr),*], $lhs:expr, $rhs:expr) => { + html_opts!([$($optclass.$optname = $val),*], $lhs, $rhs,) + }; ([$($optclass:ident.$optname:ident),*], $lhs:expr, $rhs:expr,) => { html_opts!([$($optclass.$optname),*], $lhs, $rhs, roundtrip) }; + ([$($optclass:ident.$optname:ident = $val:expr),*], $lhs:expr, $rhs:expr,) => { + html_opts!([$($optclass.$optname = $val),*], $lhs, $rhs, roundtrip) + }; ([$($optclass:ident.$optname:ident),*], $lhs:expr, $rhs:expr, $rt:ident) => { html_opts!([$($optclass.$optname),*], $lhs, $rhs, $rt,) }; + ([$($optclass:ident.$optname:ident = $val:expr),*], $lhs:expr, $rhs:expr, $rt:ident) => { + html_opts!([$($optclass.$optname = $val),*], $lhs, $rhs, $rt,) + }; ([$($optclass:ident.$optname:ident),*], $lhs:expr, $rhs:expr, roundtrip,) => { + html_opts!([$($optclass.$optname = true),*], $lhs, $rhs, roundtrip,) + }; + ([$($optclass:ident.$optname:ident = $val:expr),*], $lhs:expr, $rhs:expr, roundtrip,) => { $crate::tests::html_opts_i($lhs, $rhs, true, |opts| { - $(opts.$optclass.$optname = true;)* + $(opts.$optclass.$optname = $val;)* }); }; ([$($optclass:ident.$optname:ident),*], $lhs:expr, $rhs:expr, no_roundtrip,) => { + html_opts!([$($optclass.$optname = true),*], $lhs, $rhs, no_roundtrip,) + }; + ([$($optclass:ident.$optname:ident = $val:expr),*], $lhs:expr, $rhs:expr, no_roundtrip,) => { $crate::tests::html_opts_i($lhs, $rhs, false, |opts| { - $(opts.$optclass.$optname = true;)* + $(opts.$optclass.$optname = $val;)* }); }; } @@ -312,13 +327,20 @@ macro_rules! assert_ast_match { $amt ) }; - ([ $( $optclass:ident.$optname:ident ),* ], $( $md:literal )+, $amt:tt) => { + ([ $( $optclass:ident.$optname:ident = $val:expr ),* ], $( $md:literal )+, $amt:tt) => { crate::tests::assert_ast_match_i( concat!( $( $md ),+ ), ast!($amt), - |#[allow(unused_variables)] opts| {$(opts.$optclass.$optname = true;)*}, + |#[allow(unused_variables)] opts| {$(opts.$optclass.$optname = $val;)*}, ); }; + ([ $( $optclass:ident.$optname:ident ),* ], $( $md:literal )+, $amt:tt) => { + assert_ast_match!( + [ $( $optclass.$optname = true),* ], + $( $md )+, + $amt + ) + }; } pub(crate) use assert_ast_match; From 612abd48cff801ed0cbb75f6d3af57ae3c89d0fd Mon Sep 17 00:00:00 2001 From: Sam Wilson Date: Wed, 4 Dec 2024 15:02:02 -0500 Subject: [PATCH 4/5] Make wikilinks orders mutually exclusive --- examples/s-expr.rs | 5 ++-- src/cm.rs | 6 ++-- src/lib.rs | 2 +- src/main.rs | 20 +++++++++++-- src/parser/inlines.rs | 17 +++++------ src/parser/mod.rs | 42 +++++++++++++++------------ src/tests/api.rs | 5 ++-- src/tests/commonmark.rs | 3 +- src/tests/wikilinks.rs | 63 +++++++++++++++++++++++++++-------------- 9 files changed, 101 insertions(+), 62 deletions(-) diff --git a/examples/s-expr.rs b/examples/s-expr.rs index db0a3552..1d01890c 100644 --- a/examples/s-expr.rs +++ b/examples/s-expr.rs @@ -14,7 +14,7 @@ const INDENT: usize = 4; const CLOSE_NEWLINE: bool = false; use comrak::nodes::{AstNode, NodeValue}; -use comrak::{parse_document, Arena, ExtensionOptions, Options}; +use comrak::{parse_document, Arena, ExtensionOptions, Options, WikiLinksMode}; use std::env; use std::error::Error; use std::fs::File; @@ -86,8 +86,7 @@ fn dump(source: &str) -> io::Result<()> { .multiline_block_quotes(true) .math_dollars(true) .math_code(true) - .wikilinks_title_after_pipe(true) - .wikilinks_title_before_pipe(true) + .wikilinks(WikiLinksMode::TitleFirst) .build(); let opts = Options { diff --git a/src/cm.rs b/src/cm.rs index 5cffc4d4..f018fcdb 100644 --- a/src/cm.rs +++ b/src/cm.rs @@ -6,7 +6,7 @@ use crate::nodes::{ use crate::nodes::{NodeList, TableAlignment}; #[cfg(feature = "shortcodes")] use crate::parser::shortcodes::NodeShortCode; -use crate::parser::Options; +use crate::parser::{Options, WikiLinksMode}; use crate::scanners; use crate::strings::trim_start_match; use crate::{nodes, Plugins}; @@ -761,12 +761,12 @@ impl<'a, 'o, 'c> CommonMarkFormatter<'a, 'o, 'c> { fn format_wikilink(&mut self, nl: &NodeWikiLink, entering: bool) -> bool { if entering { write!(self, "[[").unwrap(); - if self.options.extension.wikilinks_title_after_pipe { + if self.options.extension.wikilinks == Some(WikiLinksMode::UrlFirst) { self.output(nl.url.as_bytes(), false, Escaping::Url); write!(self, "|").unwrap(); } } else { - if self.options.extension.wikilinks_title_before_pipe { + if self.options.extension.wikilinks == Some(WikiLinksMode::TitleFirst) { write!(self, "|").unwrap(); self.output(nl.url.as_bytes(), false, Escaping::Url); } diff --git a/src/lib.rs b/src/lib.rs index f3eab8cd..b805c8cc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -93,7 +93,7 @@ pub use parser::{ parse_document, BrokenLinkCallback, BrokenLinkReference, ExtensionOptions, ExtensionOptionsBuilder, ListStyleType, Options, ParseOptions, ParseOptionsBuilder, Plugins, PluginsBuilder, RenderOptions, RenderOptionsBuilder, RenderPlugins, RenderPluginsBuilder, - ResolvedReference, URLRewriter, + ResolvedReference, URLRewriter, WikiLinksMode, }; pub use typed_arena::Arena; pub use xml::format_document as format_xml; diff --git a/src/main.rs b/src/main.rs index f9bd64ad..a767ca48 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ use std::path::PathBuf; use std::process; use clap::{Parser, ValueEnum}; -use comrak::{ExtensionOptions, ParseOptions, RenderOptions}; +use comrak::{ExtensionOptions, ParseOptions, RenderOptions, WikiLinksMode}; const EXIT_SUCCESS: i32 = 0; const EXIT_PARSE_CONFIG: i32 = 2; @@ -252,6 +252,21 @@ fn main() -> Result<(), Box> { let exts = &cli.extensions; + let wikilinks_title_after_pipe = exts.contains(&Extension::WikilinksTitleAfterPipe); + let wikilinks_title_before_pipe = exts.contains(&Extension::WikilinksTitleBeforePipe); + let wikilinks_mode = match (wikilinks_title_after_pipe, wikilinks_title_before_pipe) { + (false, false) => None, + (true, false) => Some(WikiLinksMode::UrlFirst), + (false, true) => Some(WikiLinksMode::TitleFirst), + (true, true) => { + eprintln!(concat!( + "cannot enable both wikilinks-title-after-pipe ", + "and wikilinks-title-before-pipe at the same time" + )); + process::exit(EXIT_PARSE_CONFIG); + } + }; + let extension = ExtensionOptions::builder() .strikethrough(exts.contains(&Extension::Strikethrough) || cli.gfm) .tagfilter(exts.contains(&Extension::Tagfilter) || cli.gfm) @@ -265,8 +280,7 @@ fn main() -> Result<(), Box> { .multiline_block_quotes(exts.contains(&Extension::MultilineBlockQuotes)) .math_dollars(exts.contains(&Extension::MathDollars)) .math_code(exts.contains(&Extension::MathCode)) - .wikilinks_title_after_pipe(exts.contains(&Extension::WikilinksTitleAfterPipe)) - .wikilinks_title_before_pipe(exts.contains(&Extension::WikilinksTitleBeforePipe)) + .maybe_wikilinks(wikilinks_mode) .underline(exts.contains(&Extension::Underline)) .subscript(exts.contains(&Extension::Subscript)) .spoiler(exts.contains(&Extension::Spoiler)) diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index 4a9f3fad..e432147c 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -21,6 +21,8 @@ use std::str; use typed_arena::Arena; use unicode_categories::UnicodeCategories; +use super::WikiLinksMode; + const MAXBACKTICKS: usize = 80; const MAX_LINK_LABEL_LENGTH: usize = 1000; const MAX_MATH_DOLLARS: usize = 2; @@ -235,8 +237,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> { let mut wikilink_inl = None; - if (self.options.extension.wikilinks_title_after_pipe - || self.options.extension.wikilinks_title_before_pipe) + if self.options.extension.wikilinks.is_some() && !self.within_brackets && self.peek_char() == Some(&(b'[')) { @@ -1804,16 +1805,16 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> { if self.peek_char() == Some(&(b']')) && self.peek_char_n(1) == Some(&(b']')) { self.pos += 2; - if self.options.extension.wikilinks_title_after_pipe { - Some(WikilinkComponents { + match self.options.extension.wikilinks { + Some(WikiLinksMode::UrlFirst) => Some(WikilinkComponents { url: left, link_label: Some((right, right_startpos + 1, self.pos - 3)), - }) - } else { - Some(WikilinkComponents { + }), + Some(WikiLinksMode::TitleFirst) => Some(WikilinkComponents { url: right, link_label: Some((left, left_startpos + 1, right_startpos - 1)), - }) + }), + None => unreachable!(), } } else { self.pos = left_startpos; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index aa8ec134..8f6cf45c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -192,6 +192,21 @@ where } } +#[non_exhaustive] +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] +/// Selects between wikilinks with the title first or the URL first. +/// +/// See [`ExtensionOptions::wikilinks`]. +pub enum WikiLinksMode { + /// Indicates that the URL precedes the title. For example: `[[http://example.com|link + /// title]]`. + UrlFirst, + + /// Indicates that the title precedes the URL. For example: `[[link title|http://example.com]]`. + TitleFirst, +} + #[non_exhaustive] #[derive(Default, Debug, Clone, Builder)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] @@ -466,37 +481,28 @@ pub struct ExtensionOptions<'c> { #[builder(default)] pub shortcodes: bool, - /// Enables wikilinks using title after pipe syntax + /// Enables wikilinks + /// + /// With [`WikiLinksMode::TitleFirst`]: /// /// ```` md - /// [[url|link label]] + /// [[link label|url]] /// ```` /// - /// ``` - /// # use comrak::{markdown_to_html, Options}; - /// let mut options = Options::default(); - /// options.extension.wikilinks_title_after_pipe = true; - /// assert_eq!(markdown_to_html("[[url|link label]]", &options), - /// "

link label

\n"); - /// ``` - #[builder(default)] - pub wikilinks_title_after_pipe: bool, - - /// Enables wikilinks using title before pipe syntax + /// With [`WikiLinksMode::UrlFirst`]: /// /// ```` md - /// [[link label|url]] + /// [[url|link label]] /// ```` /// /// ``` - /// # use comrak::{markdown_to_html, Options}; + /// # use comrak::{markdown_to_html, Options, WikiLinksMode}; /// let mut options = Options::default(); - /// options.extension.wikilinks_title_before_pipe = true; + /// options.extension.wikilinks = Some(WikiLinksMode::TitleFirst); /// assert_eq!(markdown_to_html("[[link label|url]]", &options), /// "

link label

\n"); /// ``` - #[builder(default)] - pub wikilinks_title_before_pipe: bool, + pub wikilinks: Option, /// Enables underlines using double underscores /// diff --git a/src/tests/api.rs b/src/tests/api.rs index dd82d7b5..4d5dde38 100644 --- a/src/tests/api.rs +++ b/src/tests/api.rs @@ -1,6 +1,6 @@ use std::sync::{Arc, Mutex}; -use parser::BrokenLinkReference; +use parser::{BrokenLinkReference, WikiLinksMode}; use crate::{ adapters::{HeadingAdapter, HeadingMeta, SyntaxHighlighterAdapter}, @@ -68,8 +68,7 @@ fn exercise_full_api() { let extension = extension.shortcodes(true); let _extension = extension - .wikilinks_title_after_pipe(true) - .wikilinks_title_before_pipe(true) + .wikilinks(WikiLinksMode::UrlFirst) .underline(true) .subscript(true) .spoiler(true) diff --git a/src/tests/commonmark.rs b/src/tests/commonmark.rs index 5d6e2e6c..b1473f52 100644 --- a/src/tests/commonmark.rs +++ b/src/tests/commonmark.rs @@ -4,6 +4,7 @@ use self::nodes::{Ast, LineColumn, ListType, NodeList}; use super::*; use ntest::test_case; +use parser::WikiLinksMode; #[test] fn commonmark_removes_redundant_strong() { @@ -83,7 +84,7 @@ fn math(markdown: &str, cm: &str) { #[test_case("This [[url|link label]] that", "This [[url|link%20label]] that\n")] fn wikilinks(markdown: &str, cm: &str) { let mut options = Options::default(); - options.extension.wikilinks_title_before_pipe = true; + options.extension.wikilinks = Some(WikiLinksMode::TitleFirst); commonmark(markdown, cm, Some(&options)); } diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index 039816e6..3f31f141 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -1,16 +1,17 @@ use super::*; +use crate::WikiLinksMode; #[test] fn wikilinks_does_not_unescape_html_entities_in_link_label() { html_opts!( - [extension.wikilinks_title_after_pipe], + [extension.wikilinks = Some(WikiLinksMode::UrlFirst)], concat!("This is [[<script>alert(0)</script>|a <link]]",), concat!("

This is a <link

\n"), no_roundtrip, ); html_opts!( - [extension.wikilinks_title_before_pipe], + [extension.wikilinks = Some(WikiLinksMode::TitleFirst)], concat!("This is [[a <link|<script>alert(0)</script>]]",), concat!("

This is a <link

\n"), no_roundtrip, @@ -20,13 +21,13 @@ fn wikilinks_does_not_unescape_html_entities_in_link_label() { #[test] fn wikilinks_sanitizes_the_href_attribute_case_1() { html_opts!( - [extension.wikilinks_title_after_pipe], + [extension.wikilinks = Some(WikiLinksMode::UrlFirst)], concat!("[[http:\'\"injected=attribute><img/src=\"0\"onerror=\"alert(0)\">https://example.com|a]]",), concat!("

a

\n"), ); html_opts!( - [extension.wikilinks_title_before_pipe], + [extension.wikilinks = Some(WikiLinksMode::TitleFirst)], concat!("[[a|http:\'\"injected=attribute><img/src=\"0\"onerror=\"alert(0)\">https://example.com]]",), concat!("

a

\n"), ); @@ -35,13 +36,13 @@ fn wikilinks_sanitizes_the_href_attribute_case_1() { #[test] fn wikilinks_sanitizes_the_href_attribute_case_2() { html_opts!( - [extension.wikilinks_title_after_pipe], + [extension.wikilinks = Some(WikiLinksMode::UrlFirst)], concat!("[[\'\"><svg><i/class=gl-show-field-errors><input/title=\"<script>alert(0)</script>\"/></svg>https://example.com|a]]",), concat!("

a

\n"), ); html_opts!( - [extension.wikilinks_title_before_pipe], + [extension.wikilinks = Some(WikiLinksMode::TitleFirst)], concat!("[[a|\'\"><svg><i/class=gl-show-field-errors><input/title=\"<script>alert(0)</script>\"/></svg>https://example.com]]",), concat!("

a

\n"), ); @@ -50,7 +51,7 @@ fn wikilinks_sanitizes_the_href_attribute_case_2() { #[test] fn wikilinks_title_escape_chars() { html_opts!( - [extension.wikilinks_title_before_pipe, render.escaped_char_spans], + [extension.wikilinks = Some(WikiLinksMode::TitleFirst), render.escaped_char_spans = true], concat!("[[Name \\[of\\] page|http://example.com]]",), concat!("

Name [of] page

\n"), no_roundtrip, @@ -61,8 +62,8 @@ fn wikilinks_title_escape_chars() { fn wikilinks_supercedes_relaxed_autolinks() { html_opts!( [ - extension.wikilinks_title_after_pipe, - parse.relaxed_autolinks + extension.wikilinks = Some(WikiLinksMode::UrlFirst), + parse.relaxed_autolinks = true ], concat!("[[http://example.com]]",), concat!( @@ -72,8 +73,8 @@ fn wikilinks_supercedes_relaxed_autolinks() { html_opts!( [ - extension.wikilinks_title_before_pipe, - parse.relaxed_autolinks + extension.wikilinks = Some(WikiLinksMode::TitleFirst), + parse.relaxed_autolinks = true ], concat!("[[http://example.com]]",), concat!( @@ -85,7 +86,10 @@ fn wikilinks_supercedes_relaxed_autolinks() { #[test] fn wikilinks_only_url_in_tables() { html_opts!( - [extension.wikilinks_title_after_pipe, extension.table], + [ + extension.wikilinks = Some(WikiLinksMode::UrlFirst), + extension.table = true + ], concat!("| header |\n", "| ------- |\n", "| [[url]] |\n",), concat!( "\n", @@ -104,7 +108,10 @@ fn wikilinks_only_url_in_tables() { ); html_opts!( - [extension.wikilinks_title_before_pipe, extension.table], + [ + extension.wikilinks = Some(WikiLinksMode::TitleFirst), + extension.table = true + ], concat!("| header |\n", "| ------- |\n", "| [[url]] |\n",), concat!( "
\n", @@ -126,7 +133,10 @@ fn wikilinks_only_url_in_tables() { #[test] fn wikilinks_full_in_tables_not_supported() { html_opts!( - [extension.wikilinks_title_after_pipe, extension.table], + [ + extension.wikilinks = Some(WikiLinksMode::UrlFirst), + extension.table = true + ], concat!("| header |\n", "| ------- |\n", "| [[url|link label]] |\n",), concat!( "
\n", @@ -145,7 +155,10 @@ fn wikilinks_full_in_tables_not_supported() { ); html_opts!( - [extension.wikilinks_title_before_pipe, extension.table], + [ + extension.wikilinks = Some(WikiLinksMode::TitleFirst), + extension.table = true + ], concat!("| header |\n", "| ------- |\n", "| [[link label|url]] |\n",), concat!( "
\n", @@ -170,7 +183,7 @@ fn wikilinks_exceeds_label_limit() { let expected = format!("

{}

\n", long_label); html_opts!( - [extension.wikilinks_title_after_pipe], + [extension.wikilinks = Some(WikiLinksMode::UrlFirst)], &long_label, &expected, ); @@ -179,7 +192,10 @@ fn wikilinks_exceeds_label_limit() { #[test] fn wikilinks_autolinker_ignored() { html_opts!( - [extension.wikilinks_title_after_pipe, extension.autolink], + [ + extension.wikilinks = Some(WikiLinksMode::UrlFirst), + extension.autolink = true + ], concat!("[[http://example.com]]",), concat!( "

http://example.com

\n" @@ -187,7 +203,10 @@ fn wikilinks_autolinker_ignored() { ); html_opts!( - [extension.wikilinks_title_before_pipe, extension.autolink], + [ + extension.wikilinks = Some(WikiLinksMode::TitleFirst), + extension.autolink = true + ], concat!("[[http://example.com]]",), concat!( "

http://example.com

\n" @@ -198,7 +217,7 @@ fn wikilinks_autolinker_ignored() { #[test] fn sourcepos() { assert_ast_match!( - [extension.wikilinks_title_after_pipe], + [extension.wikilinks = Some(WikiLinksMode::UrlFirst)], "This [[http://example.com|link label]] that\n", (document (1:1-1:43) [ (paragraph (1:1-1:43) [ @@ -212,7 +231,7 @@ fn sourcepos() { ); assert_ast_match!( - [extension.wikilinks_title_before_pipe], + [extension.wikilinks = Some(WikiLinksMode::TitleFirst)], "This [[link label|http://example.com]] that\n", (document (1:1-1:43) [ (paragraph (1:1-1:43) [ @@ -226,7 +245,7 @@ fn sourcepos() { ); assert_ast_match!( - [extension.wikilinks_title_before_pipe], + [extension.wikilinks = Some(WikiLinksMode::TitleFirst)], "This [[http://example.com]] that\n", (document (1:1-1:32) [ (paragraph (1:1-1:32) [ @@ -240,7 +259,7 @@ fn sourcepos() { ); assert_ast_match!( - [extension.wikilinks_title_before_pipe], + [extension.wikilinks = Some(WikiLinksMode::TitleFirst)], "This [[link\\[label|http://example.com]] that\n", (document (1:1-1:44) [ (paragraph (1:1-1:44) [ From 38f24039d16af26a1b62cd42328e76e611ca45d5 Mon Sep 17 00:00:00 2001 From: Sam Wilson Date: Wed, 4 Dec 2024 16:53:00 -0500 Subject: [PATCH 5/5] Test sourcepos for all NodeValue variants --- Cargo.lock | 31 +- Cargo.toml | 1 + src/nodes.rs | 11 +- src/parser/math.rs | 4 +- src/parser/multiline_block_quote.rs | 2 +- src/tests.rs | 2 + src/tests/sourcepos.rs | 512 ++++++++++++++++++++++++++++ 7 files changed, 556 insertions(+), 7 deletions(-) create mode 100644 src/tests/sourcepos.rs diff --git a/Cargo.lock b/Cargo.lock index bdb6805b..525a53c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -137,7 +137,7 @@ version = "4.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro-error", "proc-macro2", "quote", @@ -169,6 +169,7 @@ dependencies = [ "regex", "shell-words", "slug", + "strum", "syntect", "toml 0.7.3", "typed-arena", @@ -309,6 +310,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.3.9" @@ -724,6 +731,28 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.87", +] + [[package]] name = "syn" version = "1.0.107" diff --git a/Cargo.toml b/Cargo.toml index 55466560..d1b70d10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,7 @@ caseless = "0.2.1" [dev-dependencies] ntest = "0.9" +strum = { version = "0.26.3", features = ["derive"] } toml = "0.7.3" [features] diff --git a/src/nodes.rs b/src/nodes.rs index cd749c1e..9781363d 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -12,6 +12,11 @@ pub use crate::parser::multiline_block_quote::NodeMultilineBlockQuote; /// The core AST node enum. #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(test, derive(strum::EnumDiscriminants))] +#[cfg_attr( + test, + strum_discriminants(vis(pub(crate)), derive(strum::VariantArray, Hash)) +)] pub enum NodeValue { /// The root of every CommonMark document. Contains **blocks**. Document, @@ -246,7 +251,7 @@ pub struct NodeTable { } /// An inline [code span](https://github.github.com/gfm/#code-spans). -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct NodeCode { /// The number of backticks pub num_backticks: usize, @@ -259,7 +264,7 @@ pub struct NodeCode { } /// The details of a link's destination, or an image's source. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct NodeLink { /// The URL for the link destination or image source. pub url: String, @@ -272,7 +277,7 @@ pub struct NodeLink { } /// The details of a wikilink's destination. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct NodeWikiLink { /// The URL for the link destination. pub url: String, diff --git a/src/parser/math.rs b/src/parser/math.rs index b1441e28..4f6cfb57 100644 --- a/src/parser/math.rs +++ b/src/parser/math.rs @@ -1,5 +1,5 @@ /// An inline math span -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct NodeMath { /// Whether this is dollar math (`$` or `$$`). /// `false` indicates it is code math @@ -8,7 +8,7 @@ pub struct NodeMath { /// Whether this is display math (using `$$`) pub display_math: bool, - /// The literal contents of the math span. + /// The literal contents of the math span. /// As the contents are not interpreted as Markdown at all, /// they are contained within this structure, /// rather than inserted into a child inline of any kind. diff --git a/src/parser/multiline_block_quote.rs b/src/parser/multiline_block_quote.rs index 2a8b5710..e6107eba 100644 --- a/src/parser/multiline_block_quote.rs +++ b/src/parser/multiline_block_quote.rs @@ -1,5 +1,5 @@ /// The metadata of a multiline blockquote. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] pub struct NodeMultilineBlockQuote { /// The length of the fence. pub fence_length: usize, diff --git a/src/tests.rs b/src/tests.rs index a764bdff..151f11e5 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -24,6 +24,8 @@ mod plugins; mod regressions; mod rewriter; mod shortcodes; +#[path = "tests/sourcepos.rs"] +mod sourcepos_; mod spoiler; mod strikethrough; mod subscript; diff --git a/src/tests/sourcepos.rs b/src/tests/sourcepos.rs new file mode 100644 index 00000000..3119e3a6 --- /dev/null +++ b/src/tests/sourcepos.rs @@ -0,0 +1,512 @@ +use nodes::NodeValueDiscriminants; +use strum::VariantArray; + +use super::*; + +type TestCase = (&'static [Sourcepos], &'static str); + +const DOCUMENT: TestCase = (&[sourcepos!((1:1-1:1))], "a"); + +const FRONT_MATTER: TestCase = ( + &[sourcepos!((1:1-3:3))], + r#"--- +a: b +--- + +hello world +"#, +); + +const BLOCK_QUOTE: TestCase = ( + &[sourcepos!((1:1-3:36))], + r#"> hello world +> this is line 1 +> this is line 2 and some extra text + +hello world"#, +); + +const MULTILINE_BLOCK_QUOTE: TestCase = ( + &[sourcepos!((3:1-7:3))], + r#"Some text + +>>> +hello world +this is line 1 +this is line 2 and some extra text +>>> + +hello world"#, +); + +const LIST: TestCase = ( + &[sourcepos!((1:1-2:38))], + r#"- bullet point one +- bullet point two and some extra text + +hello world +"#, +); + +const ITEM: TestCase = ( + &[sourcepos!((1:1-1:18)), sourcepos!((2:1-2:38))], + r#"- bullet point one +- bullet point two and some extra text + +hello world +"#, +); + +const TASK_ITEM: TestCase = ( + &[sourcepos!((1:1-1:22)), sourcepos!((3:1-3:24))], + r#"- [ ] bullet point one +- bullet point two and some extra text +- [x] bullet point three + +hello world +"#, +); + +const DESCRIPTION_LIST: TestCase = ( + &[sourcepos!((1:1-7:11))], + r#"Term 1 + +: Details 1 + +Term 2 + +: Details 2"#, +); + +const DESCRIPTION_ITEM: TestCase = ( + &[sourcepos!((1:1-3:11)), sourcepos!((5:1-7:11))], + r#"Term 1 + +: Details 1 + +Term 2 + +: Details 2"#, +); + +const DESCRIPTION_TERM: TestCase = ( + &[sourcepos!((1:1-1:6))], + r#"Term 1 + +: Details 1 + +hello world +"#, +); + +const DESCRIPTION_DETAILS: TestCase = ( + &[sourcepos!((3:1-3:11))], + r#"Term 1 + +: Details 1 + +hello world +"#, +); + +const CODE_BLOCK: TestCase = ( + &[sourcepos!((1:1-3:3))], + r#"``` +hello world +``` + +hello world +"#, +); + +const HTML_BLOCK: TestCase = ( + &[sourcepos!((1:1-2:30)), sourcepos!((5:1-5:10))], + r#"
+hello world + +hello world +
+ +hello world +"#, +); + +const HTML_INLINE: TestCase = ( + &[sourcepos!((1:7-3:14))], + r#"hello bar world +"#, +); + +const PARAGRAPH: TestCase = ( + &[sourcepos!((1:1-1:11)), sourcepos!((4:1-4:11))], + r#"hello world + + +hello world +"#, +); + +const HEADING: TestCase = ( + &[sourcepos!((5:1-5:13))], + r#"--- +a: b +--- + +# Hello World + +hello world +"#, +); + +const THEMATIC_BREAK: TestCase = ( + &[sourcepos!((3:1-3:3))], + r#"Hello + +--- + +World"#, +); + +const FOOTNOTE_DEFINITION: TestCase = ( + &[sourcepos!((3:1-3:11))], + r#"Hello[^1] + +[^1]: World +"#, +); + +const FOOTNOTE_REFERENCE: TestCase = ( + &[sourcepos!((1:6-1:9))], + r#"Hello[^1] + +[^1]: World +"#, +); + +const TABLE: TestCase = ( + &[sourcepos!((3:1-5:17))], + r#"stuff before + +| Hello | World | +| ----- | ----- | +| cell1 | cell2 | + +hello world +"#, +); + +const TABLE_ROW: TestCase = ( + &[sourcepos!((3:1-3:17)), sourcepos!((5:1-5:18))], + r#"stuff before + +| Hello | World | +| ----- | ----- | +| cell1 | cell02 | + +hello world +"#, +); + +const TABLE_CELL: TestCase = ( + &[ + sourcepos!((3:2-3:8)), + sourcepos!((3:10-3:16)), + sourcepos!((5:2-5:8)), + sourcepos!((5:10-5:17)), + ], + r#"stuff before + +| Hello | World | +| ----- | ----- | +| cell1 | cell02 | + +hello world +"#, +); + +const TEXT: TestCase = ( + &[ + sourcepos!((1:1-1:12)), + sourcepos!((3:3-3:7)), + sourcepos!((3:11-3:15)), + sourcepos!((5:3-5:7)), + sourcepos!((5:11-5:16)), + sourcepos!((7:1-7:11)), + sourcepos!((9:3-9:13)), + sourcepos!((11:3-11:8)), + sourcepos!((12:3-12:9)), + sourcepos!((12:12-12:15)), + sourcepos!((14:7-14:14)), + ], + r#"stuff before + +| Hello | World | +| ----- | ----- | +| cell1 | cell02 | + +hello world + +> hello world + +- item 1[^1] +- item 2 **bold** + +[^1]: The end. +"#, +); + +const SOFT_BREAK: TestCase = (&[sourcepos!((1:13-1:13))], "stuff before\nstuff after"); +const LINE_BREAK: TestCase = (&[sourcepos!((1:13-1:15))], "stuff before \nstuff after"); + +const CODE: TestCase = (&[sourcepos!((1:7-1:13))], "hello `world`"); + +const EMPH: TestCase = ( + &[sourcepos!((1:7-1:13)), sourcepos!((1:23-2:4))], + "hello *world* between *wo\nrld* after", +); + +const STRONG: TestCase = ( + &[sourcepos!((1:7-1:15)), sourcepos!((1:25-2:5))], + "hello **world** between **wo\nrld** after", +); + +const STRIKETHROUGH: TestCase = ( + &[sourcepos!((1:7-1:15)), sourcepos!((1:25-2:5))], + "hello ~~world~~ between ~~wo\nrld~~ after", +); + +const SUPERSCRIPT: TestCase = ( + &[sourcepos!((1:7-1:13)), sourcepos!((1:23-2:4))], + "hello ^world^ between ^wo\nrld^ after", +); + +const SUBSCRIPT: TestCase = ( + &[sourcepos!((1:7-1:13)), sourcepos!((1:23-2:4))], + "hello ~world~ between ~wo\nrld~ after", +); + +const LINK: TestCase = ( + &[ + sourcepos!((1:7-1:32)), + sourcepos!((2:7-2:32)), + sourcepos!((3:7-3:11)), + sourcepos!((4:7-4:16)), + sourcepos!((5:7-5:29)), + ], + r#"hello world +hello [foo](https://example.com) world +hello [foo] world +hello [bar][bar] world +hello https://example.com/foo world + +[foo]: https://example.com +[bar]: https://example.com"#, +); + +const IMAGE: TestCase = ( + &[sourcepos!((1:7-1:38))], + "hello ![alt text](https://example.com) banana", +); + +const MATH: TestCase = ( + &[ + sourcepos!((3:1-3:7)), + sourcepos!((3:17-3:26)), + sourcepos!((3:36-3:44)), + ], + r#"hello + +$1 + 1$ between $`1 + 23`$ between $$a + b$$ + +banana"#, +); + +const ESCAPED: TestCase = ( + &[ + sourcepos!((1:1-1:2)), + sourcepos!((1:3-1:4)), + sourcepos!((1:5-1:6)), + sourcepos!((1:7-1:8)), + sourcepos!((1:9-1:10)), + sourcepos!((1:11-1:12)), + sourcepos!((1:13-1:14)), + sourcepos!((1:15-1:16)), + sourcepos!((1:17-1:18)), + sourcepos!((1:19-1:20)), + sourcepos!((1:21-1:22)), + sourcepos!((1:23-1:24)), + sourcepos!((1:25-1:26)), + sourcepos!((1:27-1:28)), + sourcepos!((1:29-1:30)), + sourcepos!((1:31-1:32)), + sourcepos!((1:33-1:34)), + sourcepos!((1:35-1:36)), + sourcepos!((1:37-1:38)), + sourcepos!((1:39-1:40)), + sourcepos!((1:41-1:42)), + sourcepos!((1:43-1:44)), + sourcepos!((1:45-1:46)), + sourcepos!((1:47-1:48)), + sourcepos!((1:49-1:50)), + sourcepos!((1:51-1:52)), + sourcepos!((1:53-1:54)), + sourcepos!((1:55-1:56)), + sourcepos!((1:57-1:58)), + sourcepos!((1:59-1:60)), + sourcepos!((1:61-1:62)), + sourcepos!((1:63-1:64)), + ], + r#"\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~\a"#, +); + +const WIKI_LINK: TestCase = ( + &[sourcepos!((1:1-1:9)), sourcepos!((3:1-3:33))], + r#"[[floop]] + +[[http://example.com|some title]] + +after"#, +); + +const UNDERLINE: TestCase = (&[sourcepos!((1:8-1:22))], "before __hello world__ after"); + +const SPOILERED_TEXT: TestCase = ( + &[sourcepos!((2:1-2:11))], + r#"before +||spoiler|| +after"#, +); + +const ESCAPED_TAG: TestCase = ( + &[sourcepos!((2:1-2:8))], + r#"before +||hello| +after"#, +); + +fn node_values() -> HashMap { + use NodeValueDiscriminants::*; + + NodeValueDiscriminants::VARIANTS + .iter() + .filter(|v| { + !matches!( + v, + // Remove buggy variants. + List // end is 3:0 + | Item // end is 3:0 + | TaskItem // end is 4:0 + | DescriptionItem // end is 4:0 + | DescriptionTerm // end is 3:0 + | DescriptionDetails // end is 4:0 + | HtmlInline // end is 1:31 but should be 3:14 + | LineBreak // start is 1:15 but should be 1:13 + | Code // is 1:8-1:12 but should be 1:7-1:13 + | ThematicBreak // end is 4:0 + | Link // inconsistent between link types + | Math // is 3:2-3:6 but should be 3:1-3:7 + ) + }) + .filter_map(|v| { + let text = match v { + Document => DOCUMENT, + FrontMatter => FRONT_MATTER, + BlockQuote => BLOCK_QUOTE, + MultilineBlockQuote => MULTILINE_BLOCK_QUOTE, + List => LIST, + Item => ITEM, + TaskItem => TASK_ITEM, + DescriptionList => DESCRIPTION_LIST, + DescriptionItem => DESCRIPTION_ITEM, + DescriptionTerm => DESCRIPTION_TERM, + DescriptionDetails => DESCRIPTION_DETAILS, + CodeBlock => CODE_BLOCK, + HtmlBlock => HTML_BLOCK, + HtmlInline => HTML_INLINE, + Paragraph => PARAGRAPH, + Heading => HEADING, + ThematicBreak => THEMATIC_BREAK, + FootnoteDefinition => FOOTNOTE_DEFINITION, + FootnoteReference => FOOTNOTE_REFERENCE, + Table => TABLE, + TableRow => TABLE_ROW, + TableCell => TABLE_CELL, + Text => TEXT, + SoftBreak => SOFT_BREAK, + LineBreak => LINE_BREAK, + Code => CODE, + Emph => EMPH, + Strong => STRONG, + Strikethrough => STRIKETHROUGH, + Superscript => SUPERSCRIPT, + Subscript => SUBSCRIPT, + Link => LINK, + Image => IMAGE, + Math => MATH, + Escaped => ESCAPED, + WikiLink => WIKI_LINK, + Underline => UNDERLINE, + SpoileredText => SPOILERED_TEXT, + EscapedTag => ESCAPED_TAG, + }; + Some((*v, text)) + }) + .collect() +} + +#[test] +fn sourcepos() { + // Use a single test instead of one test per node type so that we get a compile error when new + // variants are added to the `NodeValue` enum. + let node_values = node_values(); + + let mut options = Options::default(); + options.render = RenderOptions::builder().escaped_char_spans(true).build(); + + options.extension = ExtensionOptions::builder() + .front_matter_delimiter("---".to_string()) + .description_lists(true) + .footnotes(true) + .table(true) + .tasklist(true) + .strikethrough(true) + .superscript(true) + .subscript(true) + .autolink(true) + .math_code(true) + .math_dollars(true) + .multiline_block_quotes(true) + .wikilinks(WikiLinksMode::UrlFirst) + .underline(true) + .spoiler(true) + .build(); + + for (kind, (expecteds, text)) in node_values { + let arena = Arena::new(); + let root = parse_document(&arena, text, &options); + let asts: Vec<_> = root + .descendants() + .filter(|d| NodeValueDiscriminants::from(&d.data.borrow().value) == kind) + .collect(); + + if asts.len() != expecteds.len() { + panic!( + "expected {} node(s) of kind {:?}, but got {}", + expecteds.len(), + kind, + asts.len() + ); + } + + for (ast, expected) in asts.into_iter().zip(expecteds) { + let actual = ast.data.borrow().sourcepos; + assert_eq!( + *expected, actual, + "{} != {} for {:?}", + expected, actual, kind + ); + } + } +}