Skip to content

Commit

Permalink
Add option to preserve comments when parsing templates
Browse files Browse the repository at this point in the history
  • Loading branch information
pawamoy committed Oct 10, 2024
1 parent eb0df04 commit a3db7ab
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 9 deletions.
16 changes: 12 additions & 4 deletions src/jinja2/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ def parse(
source: str,
name: t.Optional[str] = None,
filename: t.Optional[str] = None,
preserve_comments: bool = False,
) -> nodes.Template:
"""Parse the sourcecode and return the abstract syntax tree. This
tree of nodes is used by the compiler to convert the template into
Expand All @@ -610,15 +611,21 @@ def parse(
this gives you a good overview of the node tree generated.
"""
try:
return self._parse(source, name, filename)
return self._parse(source, name, filename, preserve_comments)
except TemplateSyntaxError:
self.handle_exception(source=source)

def _parse(
self, source: str, name: t.Optional[str], filename: t.Optional[str]
self,
source: str,
name: t.Optional[str],
filename: t.Optional[str],
preserve_comments: bool = False,
) -> nodes.Template:
"""Internal parsing function used by `parse` and `compile`."""
return Parser(self, source, name, filename).parse()
return Parser(
self, source, name, filename, preserve_comments=preserve_comments
).parse()

def lex(
self,
Expand Down Expand Up @@ -663,12 +670,13 @@ def _tokenize(
name: t.Optional[str],
filename: t.Optional[str] = None,
state: t.Optional[str] = None,
preserve_comments: bool = False,
) -> TokenStream:
"""Called by the parser to do the preprocessing and filtering
for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`.
"""
source = self.preprocess(source, name, filename)
stream = self.lexer.tokenize(source, name, filename, state)
stream = self.lexer.tokenize(source, name, filename, state, preserve_comments)

for ext in self.iter_extensions():
stream = ext.filter_stream(stream) # type: ignore
Expand Down
25 changes: 21 additions & 4 deletions src/jinja2/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,17 +146,22 @@
f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})"
)

ignored_tokens = frozenset(
comment_tokens = frozenset(
[
TOKEN_COMMENT_BEGIN,
TOKEN_COMMENT,
TOKEN_COMMENT_END,
TOKEN_WHITESPACE,
TOKEN_LINECOMMENT_BEGIN,
TOKEN_LINECOMMENT_END,
TOKEN_LINECOMMENT,
]
)
ignored_tokens = frozenset(
[
TOKEN_WHITESPACE,
*comment_tokens,
]
)
ignore_if_empty = frozenset(
[TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT]
)
Expand Down Expand Up @@ -607,22 +612,30 @@ def tokenize(
name: t.Optional[str] = None,
filename: t.Optional[str] = None,
state: t.Optional[str] = None,
preserve_comments: bool = False,
) -> TokenStream:
"""Calls tokeniter + tokenize and wraps it in a token stream."""
stream = self.tokeniter(source, name, filename, state)
return TokenStream(self.wrap(stream, name, filename), name, filename)
return TokenStream(
self.wrap(stream, name, filename, preserve_comments), name, filename
)

def wrap(
self,
stream: t.Iterable[t.Tuple[int, str, str]],
name: t.Optional[str] = None,
filename: t.Optional[str] = None,
preserve_comments: bool = False,
) -> t.Iterator[Token]:
"""This is called with the stream as returned by `tokenize` and wraps
every token in a :class:`Token` and converts the value.
"""
ignored = ignored_tokens
if preserve_comments:
ignored -= comment_tokens

for lineno, token, value_str in stream:
if token in ignored_tokens:
if token in ignored:
continue

value: t.Any = value_str
Expand All @@ -631,6 +644,10 @@ def wrap(
token = TOKEN_BLOCK_BEGIN
elif token == TOKEN_LINESTATEMENT_END:
token = TOKEN_BLOCK_END
elif token == TOKEN_LINECOMMENT_BEGIN:
token = TOKEN_COMMENT_BEGIN
elif token == TOKEN_LINECOMMENT_END:
token = TOKEN_COMMENT_END
# we are not interested in those tokens in the parser
elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
continue
Expand Down
7 changes: 7 additions & 0 deletions src/jinja2/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,13 @@ def as_const(self, eval_ctx: t.Optional[EvalContext] = None) -> t.Any:
return self.expr2.as_const(eval_ctx)


class Comment(Stmt):
"""A template comment."""

fields = ("data",)
data: str


def args_as_const(
node: t.Union["_FilterTestCommon", "Call"], eval_ctx: t.Optional[EvalContext]
) -> t.Tuple[t.List[t.Any], t.Dict[t.Any, t.Any]]:
Expand Down
10 changes: 9 additions & 1 deletion src/jinja2/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,12 @@ def __init__(
name: t.Optional[str] = None,
filename: t.Optional[str] = None,
state: t.Optional[str] = None,
preserve_comments: bool = False,
) -> None:
self.environment = environment
self.stream = environment._tokenize(source, name, filename, state)
self.stream = environment._tokenize(
source, name, filename, state, preserve_comments
)
self.name = name
self.filename = filename
self.closed = False
Expand Down Expand Up @@ -1025,6 +1028,11 @@ def flush_data() -> None:
else:
body.append(rv)
self.stream.expect("block_end")
elif token.type == "comment_begin":
flush_data()
next(self.stream)
body.append(nodes.Comment(next(self.stream).value))
self.stream.expect("comment_end")
else:
raise AssertionError("internal parsing error")

Expand Down
22 changes: 22 additions & 0 deletions tests/test_lexnparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,28 @@ def assert_error(code, expected):
)
assert_error("{% unknown_tag %}", "Encountered unknown tag 'unknown_tag'.")

def test_comment_preservation(self, env):
ast = env.parse("{# foo #}{{ bar }}", preserve_comments=True)
assert len(ast.body) == 2
assert isinstance(ast.body[0], nodes.Comment)
assert ast.body[0].data == " foo "

ast = env.parse("{# foo #}{{ bar }}", preserve_comments=False)
assert len(ast.body) == 1
assert not isinstance(ast.body[0], nodes.Comment)

def test_line_comment_preservation(self, env):
env = Environment(line_comment_prefix="#")

ast = env.parse("# foo\n{{ bar }}", preserve_comments=True)
assert len(ast.body) == 2
assert isinstance(ast.body[0], nodes.Comment)
assert ast.body[0].data == " foo"

ast = env.parse("# foo\n{{ bar }}", preserve_comments=False)
assert len(ast.body) == 1
assert not isinstance(ast.body[0], nodes.Comment)


class TestSyntax:
def test_call(self, env):
Expand Down

0 comments on commit a3db7ab

Please sign in to comment.