From a3db7ab8d29ca2c288124733f9925990964abfcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Thu, 10 Oct 2024 17:45:46 +0200 Subject: [PATCH] Add option to preserve comments when parsing templates --- src/jinja2/environment.py | 16 ++++++++++++---- src/jinja2/lexer.py | 25 +++++++++++++++++++++---- src/jinja2/nodes.py | 7 +++++++ src/jinja2/parser.py | 10 +++++++++- tests/test_lexnparse.py | 22 ++++++++++++++++++++++ 5 files changed, 71 insertions(+), 9 deletions(-) diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py index 0b303d597..672874269 100644 --- a/src/jinja2/environment.py +++ b/src/jinja2/environment.py @@ -600,6 +600,7 @@ def parse( source: str, name: t.Optional[str] = None, filename: t.Optional[str] = None, + preserve_comments: bool = False, ) -> nodes.Template: """Parse the sourcecode and return the abstract syntax tree. This tree of nodes is used by the compiler to convert the template into @@ -610,15 +611,21 @@ def parse( this gives you a good overview of the node tree generated. """ try: - return self._parse(source, name, filename) + return self._parse(source, name, filename, preserve_comments) except TemplateSyntaxError: self.handle_exception(source=source) def _parse( - self, source: str, name: t.Optional[str], filename: t.Optional[str] + self, + source: str, + name: t.Optional[str], + filename: t.Optional[str], + preserve_comments: bool = False, ) -> nodes.Template: """Internal parsing function used by `parse` and `compile`.""" - return Parser(self, source, name, filename).parse() + return Parser( + self, source, name, filename, preserve_comments=preserve_comments + ).parse() def lex( self, @@ -663,12 +670,13 @@ def _tokenize( name: t.Optional[str], filename: t.Optional[str] = None, state: t.Optional[str] = None, + preserve_comments: bool = False, ) -> TokenStream: """Called by the parser to do the preprocessing and filtering for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`. """ source = self.preprocess(source, name, filename) - stream = self.lexer.tokenize(source, name, filename, state) + stream = self.lexer.tokenize(source, name, filename, state, preserve_comments) for ext in self.iter_extensions(): stream = ext.filter_stream(stream) # type: ignore diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 6dc94b67d..1b30922a7 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -146,17 +146,22 @@ f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})" ) -ignored_tokens = frozenset( +comment_tokens = frozenset( [ TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, TOKEN_COMMENT_END, - TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT, ] ) +ignored_tokens = frozenset( + [ + TOKEN_WHITESPACE, + *comment_tokens, + ] +) ignore_if_empty = frozenset( [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] ) @@ -607,22 +612,30 @@ def tokenize( name: t.Optional[str] = None, filename: t.Optional[str] = None, state: t.Optional[str] = None, + preserve_comments: bool = False, ) -> TokenStream: """Calls tokeniter + tokenize and wraps it in a token stream.""" stream = self.tokeniter(source, name, filename, state) - return TokenStream(self.wrap(stream, name, filename), name, filename) + return TokenStream( + self.wrap(stream, name, filename, preserve_comments), name, filename + ) def wrap( self, stream: t.Iterable[t.Tuple[int, str, str]], name: t.Optional[str] = None, filename: t.Optional[str] = None, + preserve_comments: bool = False, ) -> t.Iterator[Token]: """This is called with the stream as returned by `tokenize` and wraps every token in a :class:`Token` and converts the value. """ + ignored = ignored_tokens + if preserve_comments: + ignored -= comment_tokens + for lineno, token, value_str in stream: - if token in ignored_tokens: + if token in ignored: continue value: t.Any = value_str @@ -631,6 +644,10 @@ def wrap( token = TOKEN_BLOCK_BEGIN elif token == TOKEN_LINESTATEMENT_END: token = TOKEN_BLOCK_END + elif token == TOKEN_LINECOMMENT_BEGIN: + token = TOKEN_COMMENT_BEGIN + elif token == TOKEN_LINECOMMENT_END: + token = TOKEN_COMMENT_END # we are not interested in those tokens in the parser elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END): continue diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index 2f93b90ec..9c81008ba 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -715,6 +715,13 @@ def as_const(self, eval_ctx: t.Optional[EvalContext] = None) -> t.Any: return self.expr2.as_const(eval_ctx) +class Comment(Stmt): + """A template comment.""" + + fields = ("data",) + data: str + + def args_as_const( node: t.Union["_FilterTestCommon", "Call"], eval_ctx: t.Optional[EvalContext] ) -> t.Tuple[t.List[t.Any], t.Dict[t.Any, t.Any]]: diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 817abeccf..a8ed5941c 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -57,9 +57,12 @@ def __init__( name: t.Optional[str] = None, filename: t.Optional[str] = None, state: t.Optional[str] = None, + preserve_comments: bool = False, ) -> None: self.environment = environment - self.stream = environment._tokenize(source, name, filename, state) + self.stream = environment._tokenize( + source, name, filename, state, preserve_comments + ) self.name = name self.filename = filename self.closed = False @@ -1025,6 +1028,11 @@ def flush_data() -> None: else: body.append(rv) self.stream.expect("block_end") + elif token.type == "comment_begin": + flush_data() + next(self.stream) + body.append(nodes.Comment(next(self.stream).value)) + self.stream.expect("comment_end") else: raise AssertionError("internal parsing error") diff --git a/tests/test_lexnparse.py b/tests/test_lexnparse.py index c02adad5a..ca0708a75 100644 --- a/tests/test_lexnparse.py +++ b/tests/test_lexnparse.py @@ -314,6 +314,28 @@ def assert_error(code, expected): ) assert_error("{% unknown_tag %}", "Encountered unknown tag 'unknown_tag'.") + def test_comment_preservation(self, env): + ast = env.parse("{# foo #}{{ bar }}", preserve_comments=True) + assert len(ast.body) == 2 + assert isinstance(ast.body[0], nodes.Comment) + assert ast.body[0].data == " foo " + + ast = env.parse("{# foo #}{{ bar }}", preserve_comments=False) + assert len(ast.body) == 1 + assert not isinstance(ast.body[0], nodes.Comment) + + def test_line_comment_preservation(self, env): + env = Environment(line_comment_prefix="#") + + ast = env.parse("# foo\n{{ bar }}", preserve_comments=True) + assert len(ast.body) == 2 + assert isinstance(ast.body[0], nodes.Comment) + assert ast.body[0].data == " foo" + + ast = env.parse("# foo\n{{ bar }}", preserve_comments=False) + assert len(ast.body) == 1 + assert not isinstance(ast.body[0], nodes.Comment) + class TestSyntax: def test_call(self, env):