From 7b718193cb3292b27ae41a73a3d61c1d8e29a188 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Thu, 10 Oct 2024 20:58:31 +0200 Subject: [PATCH] fixup! Add option to preserve comments when parsing templates --- CHANGES.rst | 2 +- src/jinja2/environment.py | 14 +++----------- src/jinja2/lexer.py | 38 ++++---------------------------------- src/jinja2/parser.py | 16 ++++++++-------- tests/test_lexnparse.py | 13 ++----------- 5 files changed, 18 insertions(+), 65 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a10fa652d..c8e357133 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,7 +9,7 @@ Unreleased - Use modern packaging metadata with ``pyproject.toml`` instead of ``setup.cfg``. :pr:`1793` - Use ``flit_core`` instead of ``setuptools`` as build backend. -- Add the ``preserve_comments`` parameter to ``Environment.parse`` to preserve comments in template ASTs. :pr:`2037` +- Preserve comments in ASTs when parsing templates with ``Environment.parse``. :pr:`2037` Version 3.1.5 diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py index 7705913f2..531669eff 100644 --- a/src/jinja2/environment.py +++ b/src/jinja2/environment.py @@ -600,7 +600,6 @@ def parse( source: str, name: t.Optional[str] = None, filename: t.Optional[str] = None, - preserve_comments: bool = False, ) -> nodes.Template: """Parse the sourcecode and return the abstract syntax tree. This tree of nodes is used by the compiler to convert the template into @@ -609,12 +608,9 @@ def parse( If you are :ref:`developing Jinja extensions ` this gives you a good overview of the node tree generated. - - .. versionchanged:: 3.2 - Added `preserve_comments` parameter. """ try: - return self._parse(source, name, filename, preserve_comments) + return self._parse(source, name, filename) except TemplateSyntaxError: self.handle_exception(source=source) @@ -623,12 +619,9 @@ def _parse( source: str, name: t.Optional[str], filename: t.Optional[str], - preserve_comments: bool = False, ) -> nodes.Template: """Internal parsing function used by `parse` and `compile`.""" - return Parser( - self, source, name, filename, preserve_comments=preserve_comments - ).parse() + return Parser(self, source, name, filename).parse() def lex( self, @@ -673,13 +666,12 @@ def _tokenize( name: t.Optional[str], filename: t.Optional[str] = None, state: t.Optional[str] = None, - preserve_comments: bool = False, ) -> TokenStream: """Called by the parser to do the preprocessing and filtering for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`. """ source = self.preprocess(source, name, filename) - stream = self.lexer.tokenize(source, name, filename, state, preserve_comments) + stream = self.lexer.tokenize(source, name, filename, state) for ext in self.iter_extensions(): stream = ext.filter_stream(stream) # type: ignore diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 6b2980061..88f2fc3d6 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -146,22 +146,7 @@ f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})" ) -comment_tokens = frozenset( - [ - TOKEN_COMMENT_BEGIN, - TOKEN_COMMENT, - TOKEN_COMMENT_END, - TOKEN_LINECOMMENT_BEGIN, - TOKEN_LINECOMMENT_END, - TOKEN_LINECOMMENT, - ] -) -ignored_tokens = frozenset( - [ - TOKEN_WHITESPACE, - *comment_tokens, - ] -) +ignored_tokens = frozenset([TOKEN_WHITESPACE]) ignore_if_empty = frozenset( [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] ) @@ -612,37 +597,22 @@ def tokenize( name: t.Optional[str] = None, filename: t.Optional[str] = None, state: t.Optional[str] = None, - preserve_comments: bool = False, ) -> TokenStream: - """Calls tokeniter + tokenize and wraps it in a token stream. - - .. versionchanged:: 3.2 - Added `preserve_comments` parameter. - """ + """Calls tokeniter + tokenize and wraps it in a token stream.""" stream = self.tokeniter(source, name, filename, state) - return TokenStream( - self.wrap(stream, name, filename, preserve_comments), name, filename - ) + return TokenStream(self.wrap(stream, name, filename), name, filename) def wrap( self, stream: t.Iterable[t.Tuple[int, str, str]], name: t.Optional[str] = None, filename: t.Optional[str] = None, - preserve_comments: bool = False, ) -> t.Iterator[Token]: """This is called with the stream as returned by `tokenize` and wraps every token in a :class:`Token` and converts the value. - - .. versionchanged:: 3.2 - Added `preserve_comments` parameter. """ - ignored = ignored_tokens - if preserve_comments: - ignored -= comment_tokens - for lineno, token, value_str in stream: - if token in ignored: + if token in ignored_tokens: continue value: t.Any = value_str diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index a8ed5941c..9c3a94df8 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -57,12 +57,9 @@ def __init__( name: t.Optional[str] = None, filename: t.Optional[str] = None, state: t.Optional[str] = None, - preserve_comments: bool = False, ) -> None: self.environment = environment - self.stream = environment._tokenize( - source, name, filename, state, preserve_comments - ) + self.stream = environment._tokenize(source, name, filename, state) self.name = name self.filename = filename self.closed = False @@ -318,10 +315,13 @@ def parse_block(self) -> nodes.Block: # with whitespace data if node.required: for body_node in node.body: - if not isinstance(body_node, nodes.Output) or any( - not isinstance(output_node, nodes.TemplateData) - or not output_node.data.isspace() - for output_node in body_node.nodes + if not isinstance(body_node, (nodes.Output, nodes.Comment)) or ( + isinstance(body_node, nodes.Output) + and any( + not isinstance(output_node, nodes.TemplateData) + or not output_node.data.isspace() + for output_node in body_node.nodes + ) ): self.fail("Required blocks can only contain comments or whitespace") diff --git a/tests/test_lexnparse.py b/tests/test_lexnparse.py index ca0708a75..cac32cf71 100644 --- a/tests/test_lexnparse.py +++ b/tests/test_lexnparse.py @@ -315,27 +315,18 @@ def assert_error(code, expected): assert_error("{% unknown_tag %}", "Encountered unknown tag 'unknown_tag'.") def test_comment_preservation(self, env): - ast = env.parse("{# foo #}{{ bar }}", preserve_comments=True) + ast = env.parse("{# foo #}{{ bar }}") assert len(ast.body) == 2 assert isinstance(ast.body[0], nodes.Comment) assert ast.body[0].data == " foo " - ast = env.parse("{# foo #}{{ bar }}", preserve_comments=False) - assert len(ast.body) == 1 - assert not isinstance(ast.body[0], nodes.Comment) - def test_line_comment_preservation(self, env): env = Environment(line_comment_prefix="#") - - ast = env.parse("# foo\n{{ bar }}", preserve_comments=True) + ast = env.parse("# foo\n{{ bar }}") assert len(ast.body) == 2 assert isinstance(ast.body[0], nodes.Comment) assert ast.body[0].data == " foo" - ast = env.parse("# foo\n{{ bar }}", preserve_comments=False) - assert len(ast.body) == 1 - assert not isinstance(ast.body[0], nodes.Comment) - class TestSyntax: def test_call(self, env):