diff --git a/liquid2/__init__.py b/liquid2/__init__.py index 4248e14..b94c562 100644 --- a/liquid2/__init__.py +++ b/liquid2/__init__.py @@ -26,6 +26,7 @@ from .token import is_raw_token from .token import is_tag_token from .token import is_token_type +from .token import is_template_string_token from .stream import TokenStream from .expression import Expression from .tag import Tag @@ -167,6 +168,7 @@ def extract_liquid( "is_range_token", "is_raw_token", "is_tag_token", + "is_template_string_token", "is_token_type", "LinesToken", "Node", diff --git a/liquid2/builtin/expressions.py b/liquid2/builtin/expressions.py index e884dd4..4962358 100644 --- a/liquid2/builtin/expressions.py +++ b/liquid2/builtin/expressions.py @@ -19,13 +19,17 @@ from typing import cast from markupsafe import Markup +from markupsafe import escape from liquid2 import PathToken from liquid2 import RenderContext from liquid2 import Token +from liquid2 import TokenStream from liquid2 import TokenType +from liquid2 import is_output_token from liquid2 import is_path_token from liquid2 import is_range_token +from liquid2 import is_template_string_token from liquid2 import is_token_type from liquid2.exceptions import LiquidSyntaxError from liquid2.exceptions import LiquidTypeError @@ -34,9 +38,9 @@ from liquid2.unescape import unescape if TYPE_CHECKING: + from liquid2 import OutputToken from liquid2 import PathT from liquid2 import RenderContext - from liquid2 import TokenStream from liquid2 import TokenT @@ -300,6 +304,67 @@ def children(self) -> list[Expression]: return [self.start, self.stop] +class TemplateString(Expression): + __slots__ = ("template",) + + def __init__(self, token: TokenT, template: list[Token | OutputToken]): + super().__init__(token) + self.template: list[Expression] = [] + + for _token in template: + if is_token_type(_token, TokenType.SINGLE_QUOTE_STRING): + self.template.append( + StringLiteral( + _token, unescape(_token.value.replace("\\'", "'"), token=_token) + ) + ) + elif is_token_type(_token, TokenType.DOUBLE_QUOTE_STRING): + self.template.append( + StringLiteral(_token, unescape(_token.value, token=_token)) + ) + elif is_output_token(_token): + self.template.append( + FilteredExpression.parse(TokenStream(_token.expression)) + ) + else: + raise LiquidSyntaxError( + "unexpected token in template string", token=_token + ) + + def __eq__(self, other: object) -> bool: + return isinstance(other, TemplateString) and self.template == other.template + + def __str__(self) -> str: + return repr( + "".join( + e.value if isinstance(e, StringLiteral) else f"${{{e}}}" + for e in self.template + ) + ) + + def __hash__(self) -> int: + return hash(tuple(self.template)) + + def __sizeof__(self) -> int: + return sum(sys.getsizeof(expr) for expr in self.template) + + def evaluate(self, context: RenderContext) -> str: + return "".join( + _to_liquid_string(expr.evaluate(context)) for expr in self.template + ) + + async def evaluate_async(self, context: RenderContext) -> object: + return "".join( + [ + _to_liquid_string(await expr.evaluate_async(context)) + for expr in self.template + ] + ) + + def children(self) -> list[Expression]: + return self.template + + RE_PROPERTY = re.compile(r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*") Segments: TypeAlias = tuple[Union[str, int, "Segments"], ...] @@ -475,6 +540,9 @@ def parse_primitive(token: TokenT) -> Expression: # noqa: PLR0911 token, unescape(token.value.replace("\\'", "'"), token=token) ) + if is_template_string_token(token): + return TemplateString(token, token.template) + if is_path_token(token): return Path(token, token.path) @@ -713,6 +781,10 @@ def parse( # noqa: PLR0912 filter_arguments.append( PositionalArgument(Path(token, [token.value])) ) + elif is_template_string_token(token): + filter_arguments.append( + PositionalArgument(TemplateString(token, token.template)) + ) elif is_path_token(token): filter_arguments.append( PositionalArgument(Path(token, token.path)) @@ -914,6 +986,8 @@ def parse_boolean_primitive( # noqa: PLR0912 left = StringLiteral( token, unescape(token.value.replace("\\'", "'"), token=token) ) + elif is_template_string_token(token): + left = TemplateString(token, token.template) elif is_path_token(token): left = Path(token, token.path) elif is_range_token(token): @@ -1584,7 +1658,10 @@ def parse_identifier(token: TokenT) -> Identifier: def parse_string_or_identifier(token: TokenT) -> Identifier: - """Parse _token_ as an identifier or a string literal.""" + """Parse _token_ as an identifier or a string literal. + + Excludes template strings. + """ if is_token_type(token, TokenType.DOUBLE_QUOTE_STRING): return Identifier(unescape(token.value, token=token), token=token) @@ -1603,7 +1680,10 @@ def parse_string_or_identifier(token: TokenT) -> Identifier: def parse_string_or_path(token: TokenT) -> StringLiteral | Path: - """Parse _token_ as a string literal or a path.""" + """Parse _token_ as a string literal or a path. + + Excludes template strings. + """ if is_token_type(token, TokenType.WORD): return Path(token, [token.value]) @@ -1789,3 +1869,35 @@ def _contains(token: TokenT, left: object, right: object) -> bool: f"and '{right.__class__.__name__}'", token=token, ) + + +# XXX: copied to avoid import issues +def _to_liquid_string(val: Any, *, auto_escape: bool = False) -> str: + """Stringify a Python object ready for output in a Liquid template.""" + if isinstance(val, str) or (auto_escape and hasattr(val, "__html__")): + pass + elif isinstance(val, bool): + val = str(val).lower() + elif val is None: + val = "" + elif isinstance(val, range): + val = f"{val.start}..{val.stop - 1}" + elif isinstance(val, Sequence): + if auto_escape: + val = Markup("").join( + _to_liquid_string(itm, auto_escape=auto_escape) for itm in val + ) + else: + val = "".join( + _to_liquid_string(itm, auto_escape=auto_escape) for itm in val + ) + elif isinstance(val, (Empty, Blank)): + val = "" + else: + val = str(val) + + if auto_escape: + val = escape(val) + + assert isinstance(val, str) + return val diff --git a/liquid2/lexer.py b/liquid2/lexer.py index e2073a3..984c1ca 100644 --- a/liquid2/lexer.py +++ b/liquid2/lexer.py @@ -23,6 +23,7 @@ from .token import RangeToken from .token import RawToken from .token import TagToken +from .token import TemplateStringToken from .token import Token from .token import TokenType from .token import WhitespaceControl @@ -65,7 +66,7 @@ class Lexer: RE_PROPERTY = re.compile(r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*") RE_INDEX = re.compile(r"-?[0-9]+") - ESCAPES = frozenset(["b", "f", "n", "r", "t", "u", "/", "\\"]) + ESCAPES = frozenset(["b", "f", "n", "r", "t", "u", "/", "\\", "$"]) SYMBOLS: dict[str, str] = { "GE": r">=", @@ -185,6 +186,7 @@ class Lexer: "expression", "wc", "path_stack", + "template_string_stack", ) def __init__(self, source: str) -> None: @@ -203,6 +205,9 @@ def __init__(self, source: str) -> None: self.path_stack: list[PathToken] = [] """Current path/query/variable, possibly with nested paths.""" + self.template_string_stack: list[TemplateStringToken] + """Current, possibly nested, interpolated string.""" + self.start = 0 """Pointer to the start of the current token.""" @@ -407,6 +412,144 @@ def accept_string(self, *, quote: str) -> None: ), ) + def accept_template_string(self, *, quote: str, expression: list[TokenT]) -> None: + _type = ( + TokenType.SINGLE_QUOTE_STRING + if quote == "'" + else TokenType.DOUBLE_QUOTE_STRING + ) + + # Assumes the opening quote has been consumed. + if self.peek() == quote: + # an empty string + self.next() # Move past the closing quote. + expression.append( + Token( + type_=_type, + source=self.source, + value="", + index=self.start, + ) + ) + self.start = self.pos + return + + # String token or output token + # The output token could contain more template strings + start = self.start + template_string: list[Token | OutputToken] = [] + + while True: + c = self.next() + + if c == "\\": + peeked = self.peek() + if peeked in self.ESCAPES or peeked == quote: + self.next() + else: + raise LiquidSyntaxError( + "invalid escape sequence", + token=ErrorToken( + type_=TokenType.ERROR, + index=self.pos, + value=peeked, + markup_start=self.markup_start, + markup_stop=self.pos, + source=self.source, + message="invalid escape sequence", + ), + ) + + if c == "$" and self.peek() == "{": + # `${` could be at the start of the string + if self.pos - 1 > self.start: # TODO: check me + template_string.append( + Token( + type_=_type, + source=self.source, + value=self.source[self.start : self.pos - 1], + index=self.start, + ) + ) + + self.start = self.pos - 1 + self.next() # Move past "{" + self.ignore() + sub_expression: list[TokenT] = [] + sub_expression_start = self.start + + while True: + self.ignore_whitespace() + if not self.accept_token(sub_expression): + if self.peek() == "}": + template_string.append( + OutputToken( + type_=TokenType.OUTPUT, + start=sub_expression_start, + stop=self.pos, + wc=( + WhitespaceControl.DEFAULT, + WhitespaceControl.DEFAULT, + ), + expression=sub_expression, + source=self.source, + ) + ) + self.next() + self.ignore() + self.start = self.pos + break + + self.error( + "unexpected end of template string expression, " + f"{self.peek()!r}" + ) + + if c == quote: + # template string expression could be at the end of the string + if self.pos - 1 > self.start: # TODO: check me + template_string.append( + Token( + type_=_type, + source=self.source, + value=self.source[self.start : self.pos - 1], + index=self.start, + ) + ) + + if len(template_string) == 1 and isinstance(template_string[0], Token): + # Just a plain string + expression.append(template_string[0]) + else: + expression.append( + TemplateStringToken( + type_=TokenType.SINGLE_QUOTE_TEMPLATE_STRING + if quote == "'" + else TokenType.DOUBLE_QUOTE_TEMPLATE_STRING, + source=self.source, + template=template_string, + start=start, + stop=self.pos, + ) + ) + + self.start = self.pos + return + + if not c: + raise LiquidSyntaxError( + "unclosed string literal or template string expression", + token=ErrorToken( + type_=TokenType.ERROR, + index=self.start, + value=self.source[self.start], + markup_start=self.markup_start, + markup_stop=self.pos, + source=self.source, + message="unclosed string literal", + ), + ) + def accept_range(self) -> None: rparen = self.expression.pop() assert is_token_type(rparen, TokenType.RPAREN) @@ -459,7 +602,7 @@ def accept_range(self) -> None: ) ) - def accept_token(self) -> bool: + def accept_token(self, expression: list[TokenT]) -> bool: match = self.TOKEN_RULES.match(self.source, pos=self.pos) if not match: @@ -473,46 +616,48 @@ def accept_token(self) -> bool: if kind == "SINGLE_QUOTE_STRING": self.ignore() - self.accept_string(quote="'") - self.expression.append( - Token( - type_=TokenType.SINGLE_QUOTE_STRING, - value=self.source[self.start : self.pos], - index=self.start, - source=self.source, - ) - ) - self.start = self.pos - assert self.next() == "'" - self.ignore() + # self.accept_string(quote="'") + # expression.append( + # Token( + # type_=TokenType.SINGLE_QUOTE_STRING, + # value=self.source[self.start : self.pos], + # index=self.start, + # source=self.source, + # ) + # ) + # self.start = self.pos + # assert self.next() == "'" + # self.ignore() + self.accept_template_string(quote="'", expression=expression) elif kind == "DOUBLE_QUOTE_STRING": self.ignore() - self.accept_string(quote='"') - self.expression.append( - Token( - type_=TokenType.DOUBLE_QUOTE_STRING, - value=self.source[self.start : self.pos], - index=self.start, - source=self.source, - ) - ) - self.start = self.pos - assert self.next() == '"' - self.ignore() + # self.accept_string(quote='"') + # expression.append( + # Token( + # type_=TokenType.DOUBLE_QUOTE_STRING, + # value=self.source[self.start : self.pos], + # index=self.start, + # source=self.source, + # ) + # ) + # self.start = self.pos + # assert self.next() == '"' + # self.ignore() + self.accept_template_string(quote='"', expression=expression) elif kind == "LBRACKET": self.backup() self.accept_path() - self.expression.append(self.path_stack.pop()) + expression.append(self.path_stack.pop()) elif kind == "WORD": if self.peek() in (".", "["): self.accept_path(carry=True) - self.expression.append(self.path_stack.pop()) + expression.append(self.path_stack.pop()) elif token_type := self.KEYWORD_MAP.get(value): - self.expression.append( + expression.append( Token( type_=token_type, value=value, @@ -521,7 +666,7 @@ def accept_token(self) -> bool: ) ) else: - self.expression.append( + expression.append( Token( type_=TokenType.WORD, value=value, @@ -533,7 +678,7 @@ def accept_token(self) -> bool: self.start = self.pos elif token_type := self.TOKEN_MAP.get(kind): - self.expression.append( + expression.append( Token( type_=token_type, value=value, @@ -755,7 +900,7 @@ def lex_inside_output_statement( ) -> StateFn | None: # noqa: PLR0911, PLR0912, PLR0915 while True: self.ignore_whitespace() - if not self.accept_token(): + if not self.accept_token(self.expression): if match := self.RE_OUTPUT_END.match(self.source, self.pos): self.wc.append(self.WC_MAP[match.group(1)]) self.pos += match.end() - match.start() @@ -784,7 +929,7 @@ def lex_inside_output_statement( def lex_inside_tag(self) -> StateFn | None: while True: self.ignore_whitespace() - if not self.accept_token(): + if not self.accept_token(self.expression): if match := self.RE_TAG_END.match(self.source, self.pos): self.wc.append(self.WC_MAP[match.group(1)]) self.pos += match.end() - match.start() @@ -894,7 +1039,7 @@ def lex_inside_line_statement(self) -> StateFn | None: self.expression = [] return self.lex_inside_liquid_tag - if not self.accept_token(): + if not self.accept_token(self.expression): if match := self.RE_TAG_END.match(self.source, self.pos): self.wc.append(self.WC_MAP[match.group(1)]) self.pos += match.end() - match.start() diff --git a/liquid2/stringify.py b/liquid2/stringify.py index 1b09545..c139e63 100644 --- a/liquid2/stringify.py +++ b/liquid2/stringify.py @@ -9,6 +9,8 @@ from liquid2.builtin import Blank from liquid2.builtin import Empty +# NOTE: liquid2.builtin.expressions has a version of this too. + def to_liquid_string(val: Any, *, auto_escape: bool = False) -> str: """Stringify a Python object ready for output in a Liquid template.""" diff --git a/liquid2/token.py b/liquid2/token.py index cf5e112..6af3aaf 100644 --- a/liquid2/token.py +++ b/liquid2/token.py @@ -250,6 +250,31 @@ def __str__(self) -> str: return "".join(buf) +@dataclass(kw_only=True, slots=True) +class TemplateStringToken(TokenT): + """A token representing a string with interpolated expressions.""" + + template: list[Token | OutputToken] + start: int + stop: int + source: str = field(repr=False) + + def __str__(self) -> str: + quote = "'" if self.type_ == TokenType.SINGLE_QUOTE_TEMPLATE_STRING else '"' + buf: list[str] = [] + for token in self.template: + if is_token_type(token, TokenType.SINGLE_QUOTE_STRING) or is_token_type( + token, TokenType.DOUBLE_QUOTE_STRING + ): + buf.append(token.value) + elif is_output_token(token): + buf.append(f"${{{_expression_as_string(token.expression)}}}") + else: + buf.append(str(token)) + + return f"{quote}{''.join(buf)}{quote}" + + @dataclass(kw_only=True, slots=True) class RangeToken(TokenT): """A token representing a range expression. @@ -324,6 +349,14 @@ def is_path_token(token: TokenT) -> TypeGuard[PathToken]: return token.type_ == TokenType.PATH +def is_template_string_token(token: TokenT) -> TypeGuard[TemplateStringToken]: + """A [TemplateStringToken][liquid2.token.TemplateStringToken] type guard.""" + return token.type_ in ( + TokenType.SINGLE_QUOTE_TEMPLATE_STRING, + TokenType.DOUBLE_QUOTE_TEMPLATE_STRING, + ) + + def is_range_token(token: TokenT) -> TypeGuard[RangeToken]: """A [RangeToken][liquid2.token.RangeToken] type guard.""" return token.type_ == TokenType.RANGE @@ -374,6 +407,7 @@ class TokenType(Enum): DOUBLE_DOT = auto() DOUBLE_PIPE = auto() DOUBLE_QUOTE_STRING = auto() + DOUBLE_QUOTE_TEMPLATE_STRING = auto() ELSE = auto() EQ = auto() FALSE = auto() @@ -395,6 +429,7 @@ class TokenType(Enum): REQUIRED = auto() RPAREN = auto() SINGLE_QUOTE_STRING = auto() + SINGLE_QUOTE_TEMPLATE_STRING = auto() TRUE = auto() WITH = auto() WORD = auto() diff --git a/liquid2/unescape.py b/liquid2/unescape.py index d773e8e..f70b02f 100644 --- a/liquid2/unescape.py +++ b/liquid2/unescape.py @@ -29,6 +29,9 @@ def _decode_escape_sequence( # noqa: PLR0911 ch = value[index] if ch == '"': return '"', index + if ch == "$": + # For escaping string interpolation. + return "$", index if ch == "\\": return "\\", index if ch == "/": diff --git a/tests/test_lexer.py b/tests/test_lexer.py index c152f79..004b22e 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -172,6 +172,31 @@ class Case: source="{% liquid break %}", want="{% liquid break %}", ), + Case( + name="template string, single quote", + source="{{ 'Hello, ${you}!' }}", + want="{{ 'Hello, ${you}!' }}", + ), + Case( + name="template string, double quote", + source='{{ "Hello, ${you}!" }}', + want='{{ "Hello, ${you}!" }}', + ), + Case( + name="template string, with filter", + source='{{ "Hello, ${you | upcase}!" }}', + want='{{ "Hello, ${you | upcase}!" }}', + ), + Case( + name="template string, with ternary expression", + source='{{ "Hello, ${you if a else b}!" }}', + want='{{ "Hello, ${you if a else b}!" }}', + ), + Case( + name="template string, just a placeholder", + source='{{ "${you}" }}', + want='{{ "${you}" }}', + ), ] diff --git a/tests/test_liquid_syntax_errors.py b/tests/test_liquid_syntax_errors.py index d174a74..7678571 100644 --- a/tests/test_liquid_syntax_errors.py +++ b/tests/test_liquid_syntax_errors.py @@ -229,6 +229,8 @@ class Case(NamedTuple): ), ] +# TODO: template strings + @pytest.mark.parametrize("case", test_cases, ids=operator.attrgetter("description")) def test_syntax_errors(case: Case) -> None: diff --git a/tests/test_string_interpolation.py b/tests/test_string_interpolation.py new file mode 100644 index 0000000..122142c --- /dev/null +++ b/tests/test_string_interpolation.py @@ -0,0 +1,131 @@ +import asyncio +import operator +from typing import Any +from typing import NamedTuple + +import pytest + +from liquid2 import render +from liquid2 import render_async + + +class Case(NamedTuple): + description: str + template: str + context: dict[str, Any] + expect: str + + +TEST_CASES: list[Case] = [ + Case( + description="output, single quoted", + template="{{ 'Hello, ${you}!' }}", + context={"you": "World"}, + expect="Hello, World!", + ), + Case( + description="output, double quoted", + template='{{ "Hello, ${you}!" }}', + context={"you": "World"}, + expect="Hello, World!", + ), + Case( + description="output, expression at end", + template='{{ "Hello, ${you}" }}', + context={"you": "World"}, + expect="Hello, World", + ), + Case( + description="output, expression at start", + template='{{ "${you}!" }}', + context={"you": "World"}, + expect="World!", + ), + Case( + description="output, just expression", + template='{{ "${you}" }}', + context={"you": "World"}, + expect="World", + ), + Case( + description="echo, single quoted", + template="{% echo 'Hello, ${you}!' %}", + context={"you": "World"}, + expect="Hello, World!", + ), + Case( + description="echo, double quoted", + template='{% echo "Hello, ${you}!" %}', + context={"you": "World"}, + expect="Hello, World!", + ), + Case( + description="echo, expression at end", + template='{% echo "Hello, ${you}" %}', + context={"you": "World"}, + expect="Hello, World", + ), + Case( + description="echo, expression at start", + template='{% echo "${you}!" %}', + context={"you": "World"}, + expect="World!", + ), + Case( + description="echo, just expression", + template='{% echo "${you}" %}', + context={"you": "World"}, + expect="World", + ), + Case( + description="filtered", + template="{{ 'Hello, ${you}' | append: '!' }}", + context={"you": "World"}, + expect="Hello, World!", + ), + Case( + description="filter argument", + template="{{ 'Hello ' | append: 'there, ${you}!' }}", + context={"you": "World"}, + expect="Hello there, World!", + ), + Case( + description="ternary alternative", + template="{{ 'Hello' if not you else 'Hello there, ${you}!' }}", + context={"you": "World"}, + expect="Hello there, World!", + ), + Case( + description="infix expression, left", + template="{% if 'Hello, ${you}' == 'Hello, World' %}true{% endif %}", + context={"you": "World"}, + expect="true", + ), + Case( + description="infix expression, right", + template="{% if 'Hello, World' == 'Hello, ${you}' %}true{% endif %}", + context={"you": "World"}, + expect="true", + ), + Case( + description="output, escaped", + template=r"{{ 'Hello, \${you}!' }}", + context={"you": "World"}, + expect="Hello, ${you}!", + ), + Case( + description="output, nested", + template="{{ 'Hello, ${you | append: '${something}'}!' }}", + context={"you": "World", "something": " and Liquid"}, + expect="Hello, World and Liquid!", + ), +] + + +@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) +def test_filter_auto_escape(case: Case) -> None: + async def coro() -> str: + return await render_async(case.template, **case.context) + + assert render(case.template, **case.context) == case.expect + assert asyncio.run(coro()) == case.expect diff --git a/tests/test_template_str.py b/tests/test_template_str.py index 98721e9..d4a4e68 100644 --- a/tests/test_template_str.py +++ b/tests/test_template_str.py @@ -430,3 +430,9 @@ def test_with_str_wc() -> None: ) template = parse(source) assert str(template) == source + + +def test_template_string_str() -> None: + source = "{{ 'Hello' if not you else 'Hello there, ${you}!' }}" + template = parse(source) + assert str(template) == source