Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for wikilinks format #407

Merged
merged 10 commits into from
May 16, 2024
2 changes: 2 additions & 0 deletions examples/s-expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ fn dump(source: &str) -> io::Result<()> {
.multiline_block_quotes(true)
.math_dollars(true)
.math_code(true)
.wikilinks_title_after_pipe(true)
.wikilinks_title_before_pipe(true)
.build()
.unwrap();

Expand Down
4 changes: 3 additions & 1 deletion fuzz/fuzz_targets/all_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ fuzz_target!(|s: &str| {
extension.math_code = true;
extension.front_matter_delimiter = Some("---".to_string());
extension.shortcodes = true;

extension.wikilinks_title_after_pipe = true;
extension.wikilinks_title_before_pipe = true;

let mut parse = ParseOptions::default();
parse.smart = true;
parse.default_info_string = Some("rust".to_string());
Expand Down
4 changes: 4 additions & 0 deletions fuzz/fuzz_targets/quadratic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ struct FuzzExtensionOptions {
math_dollars: bool,
math_code: bool,
shortcodes: bool,
wikilinks_title_after_pipe: bool,
wikilinks_title_before_pipe: bool,
}

impl FuzzExtensionOptions {
Expand All @@ -213,6 +215,8 @@ impl FuzzExtensionOptions {
extension.math_dollars = self.math_dollars;
extension.math_code = self.math_code;
extension.shortcodes = self.shortcodes;
extension.wikilinks_title_after_pipe = self.wikilinks_title_after_pipe;
extension.wikilinks_title_before_pipe = self.wikilinks_title_before_pipe;
extension.front_matter_delimiter = None;
extension.header_ids = None;
extension
Expand Down
4 changes: 4 additions & 0 deletions script/cibuild
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/math_dol
|| failed=1
python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/math_code.md "$PROGRAM_ARG -e math-code" \
|| failed=1
python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/wikilinks_title_after_pipe.md "$PROGRAM_ARG -e wikilinks-title-after-pipe" \
|| failed=1
python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/wikilinks_title_before_pipe.md "$PROGRAM_ARG -e wikilinks-title-before-pipe" \
|| failed=1

python3 spec_tests.py --no-normalize --spec regression.txt "$PROGRAM_ARG" \
|| failed=1
Expand Down
21 changes: 20 additions & 1 deletion src/cm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::ctype::{isalpha, isdigit, ispunct, isspace};
use crate::nodes::TableAlignment;
use crate::nodes::{
AstNode, ListDelimType, ListType, NodeCodeBlock, NodeHeading, NodeHtmlBlock, NodeLink,
NodeMath, NodeTable, NodeValue,
NodeMath, NodeTable, NodeValue, NodeWikiLink,
};
#[cfg(feature = "shortcodes")]
use crate::parser::shortcodes::NodeShortCode;
Expand Down Expand Up @@ -385,6 +385,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
// noop - automatic escaping is already being done
}
NodeValue::Math(ref math) => self.format_math(math, allow_wrap, entering),
NodeValue::WikiLink(ref nl) => return self.format_wikilink(nl, entering),
};
true
}
Expand Down Expand Up @@ -689,6 +690,24 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
true
}

fn format_wikilink(&mut self, nl: &NodeWikiLink, entering: bool) -> bool {
if entering {
write!(self, "[[").unwrap();
if self.options.extension.wikilinks_title_after_pipe {
self.output(nl.url.as_bytes(), false, Escaping::Url);
write!(self, "|").unwrap();
}
} else {
if self.options.extension.wikilinks_title_before_pipe {
write!(self, "|").unwrap();
self.output(nl.url.as_bytes(), false, Escaping::Url);
}
write!(self, "]]").unwrap();
}

true
}

fn format_image(&mut self, nl: &NodeLink, allow_wrap: bool, entering: bool) {
if entering {
write!(self, "![").unwrap();
Expand Down
15 changes: 15 additions & 0 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,21 @@ impl<'o> HtmlFormatter<'o> {
self.render_math_inline(node, literal, display_math, dollar_math)?;
}
}
NodeValue::WikiLink(ref nl) => {
if entering {
self.output.write_all(b"<a")?;
self.render_sourcepos(node)?;
self.output.write_all(b" href=\"")?;
let url = nl.url.as_bytes();
if self.options.render.unsafe_ || !dangerous_url(url) {
self.escape_href(url)?;
}
self.output.write_all(b"\" data-wikilink=\"true")?;
self.output.write_all(b"\">")?;
} else {
self.output.write_all(b"</a>")?;
}
}
}
Ok(false)
}
Expand Down
4 changes: 4 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ enum Extension {
MultilineBlockQuotes,
MathDollars,
MathCode,
WikilinksTitleAfterPipe,
WikilinksTitleBeforePipe,
}

#[derive(Clone, Copy, Debug, ValueEnum)]
Expand Down Expand Up @@ -238,6 +240,8 @@ fn main() -> Result<(), Box<dyn Error>> {
.multiline_block_quotes(exts.contains(&Extension::MultilineBlockQuotes))
.math_dollars(exts.contains(&Extension::MathDollars))
.math_code(exts.contains(&Extension::MathCode))
.wikilinks_title_after_pipe(exts.contains(&Extension::WikilinksTitleAfterPipe))
.wikilinks_title_before_pipe(exts.contains(&Extension::WikilinksTitleBeforePipe))
.front_matter_delimiter(cli.front_matter_delimiter);

#[cfg(feature = "shortcodes")]
Expand Down
16 changes: 15 additions & 1 deletion src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ pub enum NodeValue {

/// **Inline**. A character that has been [escaped](https://github.github.com/gfm/#backslash-escapes)
Escaped,

/// **Inline**. A wikilink to some URL.
WikiLink(NodeWikiLink),
}

/// Alignment of a single table cell.
Expand Down Expand Up @@ -253,6 +256,13 @@ pub struct NodeLink {
pub title: String,
}

/// The details of a wikilink's destination.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NodeWikiLink {
/// The URL for the link destination.
pub url: String,
}

/// The metadata of a list; the kind of list, the delimiter used and so on.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct NodeList {
Expand Down Expand Up @@ -489,6 +499,7 @@ impl NodeValue {
NodeValue::MultilineBlockQuote(_) => "multiline_block_quote",
NodeValue::Escaped => "escaped",
NodeValue::Math(..) => "math",
NodeValue::WikiLink(..) => "wikilink",
}
}
}
Expand Down Expand Up @@ -639,7 +650,8 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
| NodeValue::Emph
| NodeValue::Strong
| NodeValue::Link(..)
| NodeValue::Image(..) => !child.block(),
| NodeValue::Image(..)
| NodeValue::WikiLink(..) => !child.block(),

NodeValue::Table(..) => matches!(*child, NodeValue::TableRow(..)),

Expand All @@ -657,6 +669,7 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
| NodeValue::Strikethrough
| NodeValue::HtmlInline(..)
| NodeValue::Math(..)
| NodeValue::WikiLink(..)
),

#[cfg(feature = "shortcodes")]
Expand All @@ -672,6 +685,7 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
| NodeValue::Strikethrough
| NodeValue::HtmlInline(..)
| NodeValue::Math(..)
| NodeValue::WikiLink(..)
),

NodeValue::MultilineBlockQuote(_) => {
Expand Down
159 changes: 153 additions & 6 deletions src/parser/inlines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use crate::arena_tree::Node;
use crate::ctype::{isdigit, ispunct, isspace};
use crate::entity;
use crate::nodes::{
Ast, AstNode, NodeCode, NodeFootnoteReference, NodeLink, NodeMath, NodeValue, Sourcepos,
Ast, AstNode, NodeCode, NodeFootnoteReference, NodeLink, NodeMath, NodeValue, NodeWikiLink,
Sourcepos,
};
#[cfg(feature = "shortcodes")]
use crate::parser::shortcodes::NodeShortCode;
Expand Down Expand Up @@ -105,6 +106,12 @@ struct Bracket<'a> {
bracket_after: bool,
}

#[derive(Clone, Copy)]
struct WikilinkComponents<'i> {
url: &'i [u8],
link_label: Option<(&'i [u8], usize, usize)>,
}

impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> {
pub fn new(
arena: &'a Arena<AstNode<'a>>,
Expand Down Expand Up @@ -183,11 +190,30 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> {
'.' => Some(self.handle_period()),
'[' => {
self.pos += 1;
let inl =
self.make_inline(NodeValue::Text("[".to_string()), self.pos - 1, self.pos - 1);
self.push_bracket(false, inl);
self.within_brackets = true;
Some(inl)

let mut wikilink_inl = None;

if (self.options.extension.wikilinks_title_after_pipe
|| self.options.extension.wikilinks_title_before_pipe)
&& !self.within_brackets
&& self.peek_char() == Some(&(b'['))
{
wikilink_inl = self.handle_wikilink();
}

if wikilink_inl.is_none() {
let inl = self.make_inline(
NodeValue::Text("[".to_string()),
self.pos - 1,
self.pos - 1,
);
self.push_bracket(false, inl);
self.within_brackets = true;

Some(inl)
} else {
wikilink_inl
}
}
']' => {
self.within_brackets = false;
Expand Down Expand Up @@ -1548,6 +1574,127 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> {
}
}

// Handles wikilink syntax
// [[link text|url]]
// [[url|link text]]
pub fn handle_wikilink(&mut self) -> Option<&'a AstNode<'a>> {
let startpos = self.pos;
let component = self.wikilink_url_link_label()?;
let url_clean = strings::clean_url(component.url);
let (link_label, link_label_start_column, link_label_end_column) =
match component.link_label {
Some((label, sc, ec)) => (entity::unescape_html(label), sc, ec),
None => (
entity::unescape_html(component.url),
startpos + 1,
self.pos - 3,
),
};

let nl = NodeWikiLink {
url: String::from_utf8(url_clean).unwrap(),
};
let inl = self.make_inline(NodeValue::WikiLink(nl), startpos - 1, self.pos - 1);
inl.append(self.make_inline(
NodeValue::Text(String::from_utf8(link_label).unwrap()),
link_label_start_column,
link_label_end_column,
));

Some(inl)
}

fn wikilink_url_link_label(&mut self) -> Option<WikilinkComponents<'i>> {
let left_startpos = self.pos;

if self.peek_char() != Some(&(b'[')) {
return None;
}

let found_left = self.wikilink_component();

if !found_left {
self.pos = left_startpos;
return None;
}

let left = strings::trim_slice(&self.input[left_startpos + 1..self.pos]);

if self.peek_char() == Some(&(b']')) && self.peek_char_n(1) == Some(&(b']')) {
self.pos += 2;
return Some(WikilinkComponents {
url: left,
link_label: None,
});
} else if self.peek_char() != Some(&(b'|')) {
self.pos = left_startpos;
return None;
}

let right_startpos = self.pos;
let found_right = self.wikilink_component();

if !found_right {
self.pos = left_startpos;
return None;
}

let right = strings::trim_slice(&self.input[right_startpos + 1..self.pos]);

if self.peek_char() == Some(&(b']')) && self.peek_char_n(1) == Some(&(b']')) {
self.pos += 2;

if self.options.extension.wikilinks_title_after_pipe {
Some(WikilinkComponents {
url: left,
link_label: Some((right, right_startpos + 1, self.pos - 3)),
})
} else {
Some(WikilinkComponents {
url: right,
link_label: Some((left, left_startpos + 1, right_startpos - 1)),
})
}
} else {
self.pos = left_startpos;
None
}
}

// Locates the edge of a wikilink component (link label or url), and sets the
// self.pos to it's end if it's found.
fn wikilink_component(&mut self) -> bool {
let startpos = self.pos;

if self.peek_char() != Some(&(b'[')) && self.peek_char() != Some(&(b'|')) {
return false;
}

self.pos += 1;

let mut length = 0;
let mut c = 0;
while unwrap_into_copy(self.peek_char(), &mut c) && c != b'[' && c != b']' && c != b'|' {
if c == b'\\' {
self.pos += 1;
length += 1;
if self.peek_char().map_or(false, |&c| ispunct(c)) {
self.pos += 1;
length += 1;
}
} else {
self.pos += 1;
length += 1;
}
if length > MAX_LINK_LABEL_LENGTH {
self.pos = startpos;
return false;
}
}

true
}

pub fn spnl(&mut self) {
self.skip_spaces();
if self.skip_line_end() {
Expand Down
Loading