Skip to content

Commit

Permalink
Feature/fstrings (#566)
Browse files Browse the repository at this point in the history
Implements fstring syntax inspired by Python
  • Loading branch information
aannleax authored Jan 7, 2025
2 parents fdc094b + 8b14625 commit 2d18ada
Show file tree
Hide file tree
Showing 10 changed files with 258 additions and 15 deletions.
11 changes: 9 additions & 2 deletions nemo/src/parser/ast/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use basic::{
string::StringLiteral, variable::Variable,
};
use complex::{
aggregation::Aggregation, arithmetic::Arithmetic, atom::Atom, map::Map, negation::Negation,
operation::Operation, parenthesized::ParenthesizedExpression, tuple::Tuple,
aggregation::Aggregation, arithmetic::Arithmetic, atom::Atom, fstring::FormatString, map::Map,
negation::Negation, operation::Operation, parenthesized::ParenthesizedExpression, tuple::Tuple,
};
use nom::{branch::alt, combinator::map};

Expand Down Expand Up @@ -37,6 +37,8 @@ pub enum Expression<'a> {
Boolean(Boolean<'a>),
/// Constant
Constant(Constant<'a>),
/// Format String
FormatString(FormatString<'a>),
/// Map
Map(Map<'a>),
/// Negation
Expand Down Expand Up @@ -67,6 +69,7 @@ impl<'a> Expression<'a> {
Expression::Blank(expression) => expression.context(),
Expression::Boolean(expression) => expression.context(),
Expression::Constant(expression) => expression.context(),
Expression::FormatString(expression) => expression.context(),
Expression::Map(expression) => expression.context(),
Expression::Number(expression) => expression.context(),
Expression::Negation(expression) => expression.context(),
Expand Down Expand Up @@ -101,6 +104,7 @@ impl<'a> Expression<'a> {
map(Map::parse, Self::Map),
map(Negation::parse, Self::Negation),
map(Tuple::parse, Self::Tuple),
map(FormatString::parse, Self::FormatString),
))(input)
}
}
Expand All @@ -116,6 +120,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> {
Expression::Blank(expression) => expression,
Expression::Boolean(expression) => expression,
Expression::Constant(expression) => expression,
Expression::FormatString(expression) => expression,
Expression::Map(expression) => expression,
Expression::Number(expression) => expression,
Expression::Negation(expression) => expression,
Expand All @@ -136,6 +141,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> {
Expression::Blank(expression) => expression.span(),
Expression::Boolean(expression) => expression.span(),
Expression::Constant(expression) => expression.span(),
Expression::FormatString(expression) => expression.span(),
Expression::Map(expression) => expression.span(),
Expression::Number(expression) => expression.span(),
Expression::Negation(expression) => expression.span(),
Expand Down Expand Up @@ -201,6 +207,7 @@ mod test {
("\"\"", ParserContext::String),
("(1,)", ParserContext::Tuple),
("?variable", ParserContext::Variable),
("f\"{?x + ?y}\"", ParserContext::FormatString),
];

for (input, expect) in test {
Expand Down
7 changes: 6 additions & 1 deletion nemo/src/parser/ast/expression/basic/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#![allow(missing_docs)]

use nom::{
branch::alt,
combinator::opt,
sequence::{delimited, pair},
};
Expand Down Expand Up @@ -39,7 +40,11 @@ impl<'a> StringLiteral<'a> {

/// Parse the main part of the string.
pub fn parse_string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
delimited(Token::quote, Token::string, Token::quote)(input)
delimited(
Token::quote,
alt((Token::string, Token::empty)),
Token::quote,
)(input)
}

/// Parse the language tag of the string.
Expand Down
1 change: 1 addition & 0 deletions nemo/src/parser/ast/expression/complex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
pub mod aggregation;
pub mod arithmetic;
pub mod atom;
pub mod fstring;
pub mod infix;
pub mod map;
pub mod negation;
Expand Down
138 changes: 138 additions & 0 deletions nemo/src/parser/ast/expression/complex/fstring.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
//! This module defines [FormatString].
use nom::{branch::alt, combinator::map, multi::many0, sequence::delimited};

use crate::parser::{
ast::{expression::Expression, token::Token, ProgramAST},
context::{context, ParserContext},
input::ParserInput,
span::Span,
ParserResult,
};

/// Elements that make up a [FormatString]
#[derive(Debug)]
pub enum FormatStringElement<'a> {
/// String
String(Token<'a>),
/// Expression
Expression(Expression<'a>),
}

/// A string which may include sub expressions
#[derive(Debug)]
pub struct FormatString<'a> {
/// [Span] associated with this node
span: Span<'a>,

/// List of [FormatStringElement]
elements: Vec<FormatStringElement<'a>>,
}

impl<'a> FormatString<'a> {
/// Return an iterator over the underlying [Expression]s.
pub fn elements(&self) -> impl Iterator<Item = &FormatStringElement<'a>> {
self.elements.iter()
}

/// Parse an [Expression] surrounded by fstring start and end tokens.
fn parse_expression(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> {
delimited(
Token::fstring_expression_start,
Expression::parse,
Token::fstring_expression_end,
)(input)
}

/// Parse [FormatStringElement] by parsing either a string or an expression element.
fn parse_element(input: ParserInput<'a>) -> ParserResult<'a, FormatStringElement<'a>> {
alt((
map(Token::fstring, FormatStringElement::String),
map(Self::parse_expression, FormatStringElement::Expression),
))(input)
}
}

const CONTEXT: ParserContext = ParserContext::FormatString;

impl<'a> ProgramAST<'a> for FormatString<'a> {
fn children(&self) -> Vec<&dyn ProgramAST> {
let mut result = Vec::<&dyn ProgramAST>::new();

for element in &self.elements {
match element {
FormatStringElement::String(_token) => {}
FormatStringElement::Expression(expression) => result.push(expression),
}
}

result
}

fn span(&self) -> Span<'a> {
self.span
}

fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self>
where
Self: Sized + 'a,
{
let input_span = input.span;

context(
CONTEXT,
delimited(
Token::fstring_open,
many0(Self::parse_element),
Token::fstring_close,
),
)(input)
.map(|(rest, elements)| {
let rest_span = rest.span;

(
rest,
Self {
span: input_span.until_rest(&rest_span),
elements,
},
)
})
}

fn context(&self) -> ParserContext {
CONTEXT
}
}

#[cfg(test)]
mod test {
use nom::combinator::all_consuming;

use crate::parser::{
ast::{expression::complex::fstring::FormatString, ProgramAST},
input::ParserInput,
ParserState,
};

#[test]
fn parse_format_string() {
let test = vec![
("f\"\"", 0),
("f\"string\"", 1),
("f\"{?x + 1}\"", 1),
("f\"result: {?x + 1}\"", 2),
("f\"{?x} + {?y} = {?x + ?y}\"", 5),
];

for (input, expected) in test {
let parser_input = ParserInput::new(input, ParserState::default());
let result = all_consuming(FormatString::parse)(parser_input);

assert!(result.is_ok());

let result = result.unwrap().1;
assert_eq!(expected, result.elements().count());
}
}
}
57 changes: 46 additions & 11 deletions nemo/src/parser/ast/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::{
self, comment,
datavalues::{self, boolean, iri, map, string, tuple, RDF_DATATYPE_INDICATOR},
directive,
expression::{aggregate, atom, operation, variable},
expression::{aggregate, atom, format_string, operation, variable},
operator, rule,
},
};
Expand Down Expand Up @@ -150,6 +150,18 @@ pub enum TokenKind {
/// Quote
#[assoc(name = "\"")]
Quote,
/// Format string open
#[assoc(name = format_string::OPEN)]
FormatStringOpen,
/// Format string close
#[assoc(name = format_string::CLOSE)]
FormatStringClose,
/// Format string open
#[assoc(name = format_string::EXPRESSION_START)]
FormatStringExpressionStart,
/// Format string close
#[assoc(name = format_string::EXPRESSION_END)]
FormatStringExpressionEnd,
/// Blank node prefix
#[assoc(name = "_:")]
BlankNodePrefix,
Expand Down Expand Up @@ -177,6 +189,9 @@ pub enum TokenKind {
/// String
#[assoc(name = "string")]
String,
/// String
#[assoc(name = "format-string")]
FormatString,
/// Token marking language tag
#[assoc(name = string::LANG_TAG)]
LangTagIndicator,
Expand Down Expand Up @@ -343,25 +358,38 @@ impl<'a> Token<'a> {
})
}

/// Parse [TokenKind::String].
pub fn string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
let input_span = input.span;
// NOTE: Optional for empty string, because `is_not` fails on "\""
opt(is_not("\""))(input).map(|(rest, result)| {
/// Parse arbitrary characters excluding the ones given as a paramater.
fn parse_character_sequence(
input: ParserInput<'a>,
exclude: &str,
) -> ParserResult<'a, Token<'a>> {
is_not(exclude)(input).map(|(rest, result)| {
(
rest.clone(),
Token {
span: if let Some(result) = result {
result.span
} else {
input_span.until_rest(&rest.span)
},
span: result.span,
kind: TokenKind::String,
},
)
})
}

/// Parse [TokenKind::String].
pub fn string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
Self::parse_character_sequence(input, "\"")
}

/// Parse [TokenKind::FormatString].
pub fn fstring(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
let excluded = format!(
"\"{}{}",
format_string::EXPRESSION_START,
format_string::EXPRESSION_END
);

Self::parse_character_sequence(input, &excluded)
}

/// Parse [TokenKind::Digits].
pub fn digits(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> {
context(ParserContext::token(TokenKind::Digits), digit1)(input).map(
Expand Down Expand Up @@ -605,6 +633,13 @@ impl<'a> Token<'a> {
string_token!(doc_comment, TokenKind::DocComment);
string_token!(toplevel_comment, TokenKind::TopLevelComment);
string_token!(quote, TokenKind::Quote);
string_token!(fstring_open, TokenKind::FormatStringOpen);
string_token!(fstring_close, TokenKind::FormatStringClose);
string_token!(
fstring_expression_start,
TokenKind::FormatStringExpressionStart
);
string_token!(fstring_expression_end, TokenKind::FormatStringExpressionEnd);
string_token!(blank_node_prefix, TokenKind::BlankNodePrefix);
string_token!(exponent_lower, TokenKind::ExponentLower);
string_token!(exponent_upper, TokenKind::ExponentUpper);
Expand Down
3 changes: 3 additions & 0 deletions nemo/src/parser/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ pub enum ParserContext {
/// String
#[assoc(name = "string")]
String,
/// Format String
#[assoc(name = "format-string")]
FormatString,
/// Iri
#[assoc(name = "iri")]
Iri,
Expand Down
37 changes: 36 additions & 1 deletion nemo/src/rule_model/translation/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ use crate::{
literal::Literal,
rule::{Rule, RuleBuilder},
tag::Tag,
term::{primitive::Primitive, Term},
term::{
operation::{operation_kind::OperationKind, Operation},
primitive::Primitive,
Term,
},
ProgramComponent,
},
error::{translation_error::TranslationErrorKind, TranslationError},
Expand Down Expand Up @@ -181,7 +185,38 @@ impl<'a> ASTProgramTranslation<'a> {
ast::expression::Expression::Parenthesized(parenthesized) => {
self.build_inner_term(parenthesized.expression())
}
ast::expression::Expression::FormatString(format_string) => {
self.build_format_string(format_string).map(Term::from)
}
}?
.set_origin(self.register_node(expression)))
}

/// Construct a [Operation] from a given
/// [ast::expression::complex::fstring::FormatString]
/// by converting it into a string concatenation.
fn build_format_string(
&mut self,
format_string: &'a ast::expression::complex::fstring::FormatString,
) -> Result<Operation, TranslationError> {
let mut subterms = Vec::new();

for element in format_string.elements() {
let term = match element {
ast::expression::complex::fstring::FormatStringElement::String(token) => {
Term::from(token.to_string())
}
ast::expression::complex::fstring::FormatStringElement::Expression(expression) => {
let inner_term = self.build_inner_term(expression)?;
let string_conversion =
Operation::new(OperationKind::LexicalValue, vec![inner_term]);
Term::from(string_conversion)
}
};

subterms.push(term);
}

Ok(Operation::new(OperationKind::StringConcatenation, subterms))
}
}
Loading

0 comments on commit 2d18ada

Please sign in to comment.