From 5d175a759f9d40b9e10f1724475fd61636987c05 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Wed, 4 Dec 2024 09:16:21 +0100 Subject: [PATCH 1/2] Implement format strings --- nemo/src/parser/ast/expression.rs | 11 +- .../src/parser/ast/expression/basic/string.rs | 7 +- nemo/src/parser/ast/expression/complex.rs | 1 + .../parser/ast/expression/complex/fstring.rs | 138 ++++++++++++++++++ nemo/src/parser/ast/token.rs | 57 ++++++-- nemo/src/parser/context.rs | 3 + nemo/src/rule_model/translation/rule.rs | 37 ++++- nemo/src/syntax.rs | 13 ++ 8 files changed, 252 insertions(+), 15 deletions(-) create mode 100644 nemo/src/parser/ast/expression/complex/fstring.rs diff --git a/nemo/src/parser/ast/expression.rs b/nemo/src/parser/ast/expression.rs index 6ab52173..b8c58825 100644 --- a/nemo/src/parser/ast/expression.rs +++ b/nemo/src/parser/ast/expression.rs @@ -8,8 +8,8 @@ use basic::{ string::StringLiteral, variable::Variable, }; use complex::{ - aggregation::Aggregation, arithmetic::Arithmetic, atom::Atom, map::Map, negation::Negation, - operation::Operation, parenthesized::ParenthesizedExpression, tuple::Tuple, + aggregation::Aggregation, arithmetic::Arithmetic, atom::Atom, fstring::FormatString, map::Map, + negation::Negation, operation::Operation, parenthesized::ParenthesizedExpression, tuple::Tuple, }; use nom::{branch::alt, combinator::map}; @@ -37,6 +37,8 @@ pub enum Expression<'a> { Boolean(Boolean<'a>), /// Constant Constant(Constant<'a>), + /// Format String + FormatString(FormatString<'a>), /// Map Map(Map<'a>), /// Negation @@ -67,6 +69,7 @@ impl<'a> Expression<'a> { Expression::Blank(expression) => expression.context(), Expression::Boolean(expression) => expression.context(), Expression::Constant(expression) => expression.context(), + Expression::FormatString(expression) => expression.context(), Expression::Map(expression) => expression.context(), Expression::Number(expression) => expression.context(), Expression::Negation(expression) => expression.context(), @@ -101,6 +104,7 @@ impl<'a> Expression<'a> { map(Map::parse, Self::Map), map(Negation::parse, Self::Negation), map(Tuple::parse, Self::Tuple), + map(FormatString::parse, Self::FormatString), ))(input) } } @@ -116,6 +120,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> { Expression::Blank(expression) => expression, Expression::Boolean(expression) => expression, Expression::Constant(expression) => expression, + Expression::FormatString(expression) => expression, Expression::Map(expression) => expression, Expression::Number(expression) => expression, Expression::Negation(expression) => expression, @@ -136,6 +141,7 @@ impl<'a> ProgramAST<'a> for Expression<'a> { Expression::Blank(expression) => expression.span(), Expression::Boolean(expression) => expression.span(), Expression::Constant(expression) => expression.span(), + Expression::FormatString(expression) => expression.span(), Expression::Map(expression) => expression.span(), Expression::Number(expression) => expression.span(), Expression::Negation(expression) => expression.span(), @@ -201,6 +207,7 @@ mod test { ("\"\"", ParserContext::String), ("(1,)", ParserContext::Tuple), ("?variable", ParserContext::Variable), + ("f\"{?x + ?y}\"", ParserContext::FormatString), ]; for (input, expect) in test { diff --git a/nemo/src/parser/ast/expression/basic/string.rs b/nemo/src/parser/ast/expression/basic/string.rs index 22c9db92..d3478faa 100644 --- a/nemo/src/parser/ast/expression/basic/string.rs +++ b/nemo/src/parser/ast/expression/basic/string.rs @@ -2,6 +2,7 @@ #![allow(missing_docs)] use nom::{ + branch::alt, combinator::opt, sequence::{delimited, pair}, }; @@ -39,7 +40,11 @@ impl<'a> StringLiteral<'a> { /// Parse the main part of the string. pub fn parse_string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { - delimited(Token::quote, Token::string, Token::quote)(input) + delimited( + Token::quote, + alt((Token::string, Token::empty)), + Token::quote, + )(input) } /// Parse the language tag of the string. diff --git a/nemo/src/parser/ast/expression/complex.rs b/nemo/src/parser/ast/expression/complex.rs index 4bddd217..17167459 100644 --- a/nemo/src/parser/ast/expression/complex.rs +++ b/nemo/src/parser/ast/expression/complex.rs @@ -3,6 +3,7 @@ pub mod aggregation; pub mod arithmetic; pub mod atom; +pub mod fstring; pub mod infix; pub mod map; pub mod negation; diff --git a/nemo/src/parser/ast/expression/complex/fstring.rs b/nemo/src/parser/ast/expression/complex/fstring.rs new file mode 100644 index 00000000..ad0adb2a --- /dev/null +++ b/nemo/src/parser/ast/expression/complex/fstring.rs @@ -0,0 +1,138 @@ +//! This module defines [FormatString]. + +use nom::{branch::alt, combinator::map, multi::many0, sequence::delimited}; + +use crate::parser::{ + ast::{expression::Expression, token::Token, ProgramAST}, + context::{context, ParserContext}, + input::ParserInput, + span::Span, + ParserResult, +}; + +/// Elements that make up a [FormatString] +#[derive(Debug)] +pub enum FormatStringElement<'a> { + /// String + String(Token<'a>), + /// Expression + Expression(Expression<'a>), +} + +/// A string which may include sub expressions +#[derive(Debug)] +pub struct FormatString<'a> { + /// [Span] associated with this node + span: Span<'a>, + + /// List of [FormatStringElement] + elements: Vec>, +} + +impl<'a> FormatString<'a> { + /// Return an iterator over the underlying [Expression]s. + pub fn elements(&self) -> impl Iterator> { + self.elements.iter() + } + + /// Parse an [Expression] surrounded by fstring start and end tokens. + fn parse_expression(input: ParserInput<'a>) -> ParserResult<'a, Expression<'a>> { + delimited( + Token::fstring_expression_start, + Expression::parse, + Token::fstring_expression_end, + )(input) + } + + /// Parse [FormatStringElement] by parsing either a string or an expression element. + fn parse_element(input: ParserInput<'a>) -> ParserResult<'a, FormatStringElement<'a>> { + alt(( + map(Token::fstring, FormatStringElement::String), + map(Self::parse_expression, FormatStringElement::Expression), + ))(input) + } +} + +const CONTEXT: ParserContext = ParserContext::FormatString; + +impl<'a> ProgramAST<'a> for FormatString<'a> { + fn children(&self) -> Vec<&dyn ProgramAST> { + let mut result = Vec::<&dyn ProgramAST>::new(); + + for element in &self.elements { + match element { + FormatStringElement::String(_token) => {} + FormatStringElement::Expression(expression) => result.push(expression), + } + } + + result + } + + fn span(&self) -> Span<'a> { + self.span + } + + fn parse(input: ParserInput<'a>) -> ParserResult<'a, Self> + where + Self: Sized + 'a, + { + let input_span = input.span; + + context( + CONTEXT, + delimited( + Token::fstring_open, + many0(Self::parse_element), + Token::fstring_close, + ), + )(input) + .map(|(rest, elements)| { + let rest_span = rest.span; + + ( + rest, + Self { + span: input_span.until_rest(&rest_span), + elements, + }, + ) + }) + } + + fn context(&self) -> ParserContext { + CONTEXT + } +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + use crate::parser::{ + ast::{expression::complex::fstring::FormatString, ProgramAST}, + input::ParserInput, + ParserState, + }; + + #[test] + fn parse_format_string() { + let test = vec![ + ("f\"\"", 0), + ("f\"string\"", 1), + ("f\"{?x + 1}\"", 1), + ("f\"result: {?x + 1}\"", 2), + ("f\"{?x} + {?y} = {?x + ?y}\"", 5), + ]; + + for (input, expected) in test { + let parser_input = ParserInput::new(input, ParserState::default()); + let result = all_consuming(FormatString::parse)(parser_input); + + assert!(result.is_ok()); + + let result = result.unwrap().1; + assert_eq!(expected, result.elements().count()); + } + } +} diff --git a/nemo/src/parser/ast/token.rs b/nemo/src/parser/ast/token.rs index 54228192..9d2a0375 100644 --- a/nemo/src/parser/ast/token.rs +++ b/nemo/src/parser/ast/token.rs @@ -24,7 +24,7 @@ use crate::{ self, comment, datavalues::{self, boolean, iri, map, string, tuple, RDF_DATATYPE_INDICATOR}, directive, - expression::{aggregate, atom, operation, variable}, + expression::{aggregate, atom, format_string, operation, variable}, operator, rule, }, }; @@ -150,6 +150,18 @@ pub enum TokenKind { /// Quote #[assoc(name = "\"")] Quote, + /// Format string open + #[assoc(name = format_string::OPEN)] + FormatStringOpen, + /// Format string close + #[assoc(name = format_string::CLOSE)] + FormatStringClose, + /// Format string open + #[assoc(name = format_string::EXPRESSION_START)] + FormatStringExpressionStart, + /// Format string close + #[assoc(name = format_string::EXPRESSION_END)] + FormatStringExpressionEnd, /// Blank node prefix #[assoc(name = "_:")] BlankNodePrefix, @@ -177,6 +189,9 @@ pub enum TokenKind { /// String #[assoc(name = "string")] String, + /// String + #[assoc(name = "format-string")] + FormatString, /// Token marking language tag #[assoc(name = string::LANG_TAG)] LangTagIndicator, @@ -343,25 +358,38 @@ impl<'a> Token<'a> { }) } - /// Parse [TokenKind::String]. - pub fn string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { - let input_span = input.span; - // NOTE: Optional for empty string, because `is_not` fails on "\"" - opt(is_not("\""))(input).map(|(rest, result)| { + /// Parse arbitrary characters excluding the ones given as a paramater. + fn parse_character_sequence( + input: ParserInput<'a>, + exclude: &str, + ) -> ParserResult<'a, Token<'a>> { + is_not(exclude)(input).map(|(rest, result)| { ( rest.clone(), Token { - span: if let Some(result) = result { - result.span - } else { - input_span.until_rest(&rest.span) - }, + span: result.span, kind: TokenKind::String, }, ) }) } + /// Parse [TokenKind::String]. + pub fn string(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + Self::parse_character_sequence(input, "\"") + } + + /// Parse [TokenKind::FormatString]. + pub fn fstring(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { + let excluded = format!( + "\"{}{}", + format_string::EXPRESSION_START, + format_string::EXPRESSION_END + ); + + Self::parse_character_sequence(input, &excluded) + } + /// Parse [TokenKind::Digits]. pub fn digits(input: ParserInput<'a>) -> ParserResult<'a, Token<'a>> { context(ParserContext::token(TokenKind::Digits), digit1)(input).map( @@ -605,6 +633,13 @@ impl<'a> Token<'a> { string_token!(doc_comment, TokenKind::DocComment); string_token!(toplevel_comment, TokenKind::TopLevelComment); string_token!(quote, TokenKind::Quote); + string_token!(fstring_open, TokenKind::FormatStringOpen); + string_token!(fstring_close, TokenKind::FormatStringClose); + string_token!( + fstring_expression_start, + TokenKind::FormatStringExpressionStart + ); + string_token!(fstring_expression_end, TokenKind::FormatStringExpressionEnd); string_token!(blank_node_prefix, TokenKind::BlankNodePrefix); string_token!(exponent_lower, TokenKind::ExponentLower); string_token!(exponent_upper, TokenKind::ExponentUpper); diff --git a/nemo/src/parser/context.rs b/nemo/src/parser/context.rs index c032b395..912c184d 100644 --- a/nemo/src/parser/context.rs +++ b/nemo/src/parser/context.rs @@ -25,6 +25,9 @@ pub enum ParserContext { /// String #[assoc(name = "string")] String, + /// Format String + #[assoc(name = "format-string")] + FormatString, /// Iri #[assoc(name = "iri")] Iri, diff --git a/nemo/src/rule_model/translation/rule.rs b/nemo/src/rule_model/translation/rule.rs index da60d9a2..89ba4b52 100644 --- a/nemo/src/rule_model/translation/rule.rs +++ b/nemo/src/rule_model/translation/rule.rs @@ -10,7 +10,11 @@ use crate::{ literal::Literal, rule::{Rule, RuleBuilder}, tag::Tag, - term::{primitive::Primitive, Term}, + term::{ + operation::{operation_kind::OperationKind, Operation}, + primitive::Primitive, + Term, + }, ProgramComponent, }, error::{translation_error::TranslationErrorKind, TranslationError}, @@ -181,7 +185,38 @@ impl<'a> ASTProgramTranslation<'a> { ast::expression::Expression::Parenthesized(parenthesized) => { self.build_inner_term(parenthesized.expression()) } + ast::expression::Expression::FormatString(format_string) => { + self.build_format_string(format_string).map(Term::from) + } }? .set_origin(self.register_node(expression))) } + + /// Construct a [Operation] from a given + /// [ast::expression::complex::fstring::FormatString] + /// by converting it into a string concatenation. + fn build_format_string( + &mut self, + format_string: &'a ast::expression::complex::fstring::FormatString, + ) -> Result { + let mut subterms = Vec::new(); + + for element in format_string.elements() { + let term = match element { + ast::expression::complex::fstring::FormatStringElement::String(token) => { + Term::from(token.to_string()) + } + ast::expression::complex::fstring::FormatStringElement::Expression(expression) => { + let inner_term = self.build_inner_term(expression)?; + let string_conversion = + Operation::new(OperationKind::LexicalValue, vec![inner_term]); + Term::from(string_conversion) + } + }; + + subterms.push(term); + } + + Ok(Operation::new(OperationKind::StringConcatenation, subterms)) + } } diff --git a/nemo/src/syntax.rs b/nemo/src/syntax.rs index f1954acb..ed23e2c8 100644 --- a/nemo/src/syntax.rs +++ b/nemo/src/syntax.rs @@ -125,6 +125,19 @@ pub mod expression { /// Closing delimiter for argument list pub const CLOSE: &str = ")"; } + + /// Syntax for format strings + pub mod format_string { + /// Opening part of a format string + pub const OPEN: &str = "f\""; + /// Closing part of a format string + pub const CLOSE: &str = "\""; + + /// Marker of the start of an expression + pub const EXPRESSION_START: &str = "{"; + /// Marker of the end of an expression + pub const EXPRESSION_END: &str = "}"; + } } pub mod comment { From 8b14625231e4106dcd8409944cdc1c11a6fbe1a6 Mon Sep 17 00:00:00 2001 From: Alex Ivliev Date: Mon, 9 Dec 2024 10:07:56 +0100 Subject: [PATCH 2/2] Add integration test --- resources/testcases/arithmetic/builtins.rls | 4 ++++ resources/testcases/arithmetic/builtins/result.csv | 2 ++ 2 files changed, 6 insertions(+) diff --git a/resources/testcases/arithmetic/builtins.rls b/resources/testcases/arithmetic/builtins.rls index 6291de4f..64222c56 100644 --- a/resources/testcases/arithmetic/builtins.rls +++ b/resources/testcases/arithmetic/builtins.rls @@ -72,6 +72,10 @@ result(stringstarts_false, ?R) :- strings(_, ?B), ?R = STRSTARTS(?B, "Hell"). result(stringends_true, ?R) :- strings(?A, _), ?R = STRENDS(?A, "ello"). result(stringends_false, ?R) :- strings(_, ?B), ?R = STRENDS(?B, "ello"). +% F-string literal +result(fstring_basic, ?R) :- strings(?A, ?B), ?R = f"{?A} and {?B}". +result(fstring_arithmetic, ?R) :- strings(?A, ?B), ?R = f"len*10={STRLEN(?A) * 10}". + % Numeric arithmetic result(calculation, ?R) :- doubles(?A, ?B, ?C), ?R = POW((?A + ?B + ?C) / 2E0, 3E0) * LOG(16E0, 4E0) + SIN(?C + 0.023599E0) * SQRT(16E0). result(remainder, ?R) :- integers(_, ?A, ?B), ?R = REM(?B, ?A). diff --git a/resources/testcases/arithmetic/builtins/result.csv b/resources/testcases/arithmetic/builtins/result.csv index 36351957..ba47af49 100644 --- a/resources/testcases/arithmetic/builtins/result.csv +++ b/resources/testcases/arithmetic/builtins/result.csv @@ -60,3 +60,5 @@ max,"""2""^^" bitand,0 bitor,3 bitxor,0 +fstring_basic,"""Hello and World""" +fstring_arithmetic,"""len*10=50""" \ No newline at end of file