From 95166c8d65dbc4af4604aa826bf3c42ca82b438f Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 2 Jun 2016 09:46:24 -0700 Subject: [PATCH 01/11] Simpler code for unescaping text --- Cheetah/legacy_parser.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/Cheetah/legacy_parser.py b/Cheetah/legacy_parser.py index 0b2c484e..ee35b1d6 100644 --- a/Cheetah/legacy_parser.py +++ b/Cheetah/legacy_parser.py @@ -96,16 +96,6 @@ def makeTripleQuoteRe(start, end): DIRECTIVE_END_RE = re.compile(escCharLookBehind + re.escape('#')) -def _unescapeCheetahVars(s): - r"""Unescape any escaped Cheetah \$vars in the string.""" - return s.replace('\\$', '$') - - -def _unescapeDirectives(s): - """Unescape any escaped Cheetah directives in the string.""" - return s.replace('\\#', '#') - - directiveNamesAndParsers = { # Python directives 'import': None, @@ -813,7 +803,7 @@ def eatPlainText(self): while not self.atEnd() and not self.matchTopLevelToken(): self.advance() text = self.readTo(self.pos(), start=start) - text = _unescapeDirectives(_unescapeCheetahVars(text)) + text = text.replace('\\$', '$').replace('\\#', '#') self._compiler.addStrConst(text) def eatComment(self): From afd3f5bb4dd72a33d8e3cec567765e66d943f1dd Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 2 Jun 2016 09:46:39 -0700 Subject: [PATCH 02/11] Use elif for readability --- Cheetah/legacy_parser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cheetah/legacy_parser.py b/Cheetah/legacy_parser.py index ee35b1d6..064f4c02 100644 --- a/Cheetah/legacy_parser.py +++ b/Cheetah/legacy_parser.py @@ -1013,8 +1013,7 @@ def _eatDefOrBlock(self, directiveName): raise ParseError( self, '#block must not have an argspec, did you mean #def?', ) - - if directiveName == 'def' and self.peek() != '(': + elif directiveName == 'def' and self.peek() != '(': raise ParseError(self, '#def must contain an argspec (at least ())') if directiveName == 'def': From ea612a517877b1f67e9ebc379b25281c42ddaf7c Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 2 Jun 2016 10:09:46 -0700 Subject: [PATCH 03/11] Simplify triple quoted string stuff --- Cheetah/legacy_parser.py | 47 ++++++++-------------------------------- 1 file changed, 9 insertions(+), 38 deletions(-) diff --git a/Cheetah/legacy_parser.py b/Cheetah/legacy_parser.py index 064f4c02..2672b3ec 100644 --- a/Cheetah/legacy_parser.py +++ b/Cheetah/legacy_parser.py @@ -12,54 +12,25 @@ import re import string import sys -from tokenize import PseudoToken +import tokenize import six from Cheetah.SourceReader import SourceReader -python_token_re = re.compile(PseudoToken) +python_token_re = re.compile(tokenize.PseudoToken) identchars = string.ascii_letters + '_' namechars = identchars + string.digits -single3 = "'''" -double3 = '"""' - -tripleQuotedStringStarts = ( - "'''", '"""', - "r'''", 'r"""', "R'''", 'R"""', - "u'''", 'u"""', "U'''", 'U"""', - "ur'''", 'ur"""', "Ur'''", 'Ur"""', - "uR'''", 'uR"""', "UR'''", 'UR"""', -) - -tripleQuotedStringPairs = { - "'''": single3, '"""': double3, - "r'''": single3, 'r"""': double3, - "u'''": single3, 'u"""': double3, - "ur'''": single3, 'ur"""': double3, - "R'''": single3, 'R"""': double3, - "U'''": single3, 'U"""': double3, - "uR'''": single3, 'uR"""': double3, - "Ur'''": single3, 'Ur"""': double3, - "UR'''": single3, 'UR"""': double3, +triple_quoted_pairs = {k: k[-3:] for k in tokenize.triple_quoted} +triple_quoted_res = { + k: re.compile('(?:{}).*?(?:{})'.format(k, v), re.DOTALL) + for k, v in triple_quoted_pairs.items() } closurePairs = {')': '(', ']': '[', '}': '{'} closurePairsRev = {'(': ')', '[': ']', '{': '}'} - -tripleQuotedStringREs = {} - - -def makeTripleQuoteRe(start, end): - start = re.escape(start) - end = re.escape(end) - return re.compile(r'(?:' + start + r').*?' + r'(?:' + end + r')', re.DOTALL) - -for start_part, end_part in tripleQuotedStringPairs.items(): - tripleQuotedStringREs[start_part] = makeTripleQuoteRe(start_part, end_part) - escCharLookBehind = r'(?:(?<=\A)|(? Date: Thu, 2 Jun 2016 10:18:58 -0700 Subject: [PATCH 04/11] Clean up a regex that had stuff we eliminated in 0.1.0 --- Cheetah/legacy_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cheetah/legacy_parser.py b/Cheetah/legacy_parser.py index 2672b3ec..bb5d3192 100644 --- a/Cheetah/legacy_parser.py +++ b/Cheetah/legacy_parser.py @@ -49,7 +49,7 @@ VAR_START_TOKEN_RE = re.compile( escCharLookBehind + VAR_START_ESC + - r'(?=[A-Za-z_\*!\{\(\[])' + r'(?=[A-Za-z_\{\(\[])' ) VAR_IN_EXPRESSION_START_TOKEN_RE = re.compile( VAR_START_ESC + r'(?=[A-Za-z_])' From 2c798e71e81fd020c3af40e4951f2ed8d2db0f3c Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 2 Jun 2016 10:23:01 -0700 Subject: [PATCH 05/11] Simplify a regex --- Cheetah/legacy_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cheetah/legacy_parser.py b/Cheetah/legacy_parser.py index bb5d3192..e03fe8ba 100644 --- a/Cheetah/legacy_parser.py +++ b/Cheetah/legacy_parser.py @@ -43,7 +43,7 @@ VAR_START_RE = re.compile( escCharLookBehind + r'(?P' + VAR_START_ESC + ')' + - r'(?P|(?:(?:\{|\(|\[)[ \t]*))' + # allow WS after + r'(?P|(?:\{|\(|\[))' + r'(?=[A-Za-z_])', ) VAR_START_TOKEN_RE = re.compile( From 9cda7cc247bc9d06699a487dbdb93f5dd6eb1d2f Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 2 Jun 2016 10:33:56 -0700 Subject: [PATCH 06/11] Some minor simplifications --- Cheetah/legacy_parser.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Cheetah/legacy_parser.py b/Cheetah/legacy_parser.py index e03fe8ba..a1b44100 100644 --- a/Cheetah/legacy_parser.py +++ b/Cheetah/legacy_parser.py @@ -171,11 +171,8 @@ def inner(self, *args, **kwargs): except Exception as e: six.reraise( ParseError, - ParseError( - self, - '{}: {}\n'.format(type(e).__name__, e) - ), - sys.exc_info()[2] + ParseError(self, '{}: {}\n'.format(type(e).__name__, e)), + sys.exc_info()[2], ) return inner @@ -206,8 +203,8 @@ def add_default(self, token): self.defaults[count] += token def merge(self): - defaults = (isinstance(d, six.text_type) and d.strip() or None for d in self.defaults) - return list(six.moves.zip_longest((a.strip() for a in self.arguments), defaults)) + defaults = [d.strip() if d is not None else None for d in self.defaults] + return list(zip((a.strip() for a in self.arguments), defaults)) class _LowLevelParser(SourceReader): From 9fec4eb5c2a7a325a3ae2ff4da5d733801bbe639 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 2 Jun 2016 10:39:15 -0700 Subject: [PATCH 07/11] Simplify triple quote error case --- Cheetah/legacy_parser.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/Cheetah/legacy_parser.py b/Cheetah/legacy_parser.py index a1b44100..087f1e79 100644 --- a/Cheetah/legacy_parser.py +++ b/Cheetah/legacy_parser.py @@ -235,21 +235,16 @@ def matchTopLevelToken(self): break return match - def matchPyToken(self): + def getPyToken(self): match = python_token_re.match(self.src(), self.pos()) if match and match.group() in triple_quoted_pairs: - TQSmatch = triple_quoted_res[match.group()].match(self.src(), self.pos()) - if TQSmatch: - return TQSmatch - return match - - def getPyToken(self): - match = self.matchPyToken() - if match is None: + match = triple_quoted_res[match.group()].match(self.src(), self.pos()) + if not match: + raise ParseError(self, msg='Malformed triple-quoted string') + elif not match: raise ParseError(self) - elif match.group() in triple_quoted_pairs: - raise ParseError(self, msg='Malformed triple-quoted string') + return self.readTo(match.end()) def matchCommentStartToken(self): From b176e69d6f2c06f6d7d90fdaca1ed9ad52264168 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 2 Jun 2016 11:00:48 -0700 Subject: [PATCH 08/11] Simplify reading a dotted name --- Cheetah/legacy_parser.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/Cheetah/legacy_parser.py b/Cheetah/legacy_parser.py index 087f1e79..f5f48c0d 100644 --- a/Cheetah/legacy_parser.py +++ b/Cheetah/legacy_parser.py @@ -255,25 +255,15 @@ def getCommentStartToken(self): return self.readTo(match.end()) def getDottedName(self): - srcLen = len(self) - nameChunks = [] - assert self.peek() in identchars - - while self.pos() < srcLen: - c = self.peek() - if c in namechars: - nameChunk = self.getIdentifier() - nameChunks.append(nameChunk) - elif c == '.': - if self.pos() + 1 < srcLen and self.peek(1) in identchars: - nameChunks.append(self.getc()) - else: - break - else: - break - - return ''.join(nameChunks) + name = self.getIdentifier() + while ( + self.pos() + 1 < len(self) and + self.peek() == '.' and + self.peek(1) in identchars + ): + name += self.getc() + self.getIdentifier() + return name def matchIdentifier(self): return identRE.match(self.src(), self.pos()) From f9a8d18f26b5314631bfa539ab143886b264271f Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 2 Jun 2016 11:08:24 -0700 Subject: [PATCH 09/11] Fix misparsing of decorators --- Cheetah/legacy_parser.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/Cheetah/legacy_parser.py b/Cheetah/legacy_parser.py index f5f48c0d..0ccab6c7 100644 --- a/Cheetah/legacy_parser.py +++ b/Cheetah/legacy_parser.py @@ -33,7 +33,6 @@ escCharLookBehind = r'(?:(?<=\A)|(? Date: Thu, 2 Jun 2016 11:40:14 -0700 Subject: [PATCH 10/11] Minor fixes --- Cheetah/legacy_parser.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Cheetah/legacy_parser.py b/Cheetah/legacy_parser.py index 0ccab6c7..97b7a3b8 100644 --- a/Cheetah/legacy_parser.py +++ b/Cheetah/legacy_parser.py @@ -222,7 +222,6 @@ def matchTopLevelToken(self): Returns None if no match. """ - match = None if self.peek() in '#$': for matcher in ( self.matchCommentStartToken, @@ -232,8 +231,8 @@ def matchTopLevelToken(self): ): match = matcher() if match: - break - return match + return match + return None def getPyToken(self): match = python_token_re.match(self.src(), self.pos()) @@ -847,7 +846,7 @@ def eatSimpleIndentingDirective(self, directiveName, callback): expr = self.getExpression(pyTokensToBreakAt=[':']) if self.matchColonForSingleLineShortFormDirective(): self.advance() # skip over : - if directiveName in 'else elif except finally'.split(): + if directiveName in {'else', 'elif', 'except', 'finally'}: callback(expr, lineCol, dedent=False) else: callback(expr, lineCol) From 9a75b10e92dff6cf6dd4f914ce9806c464af1cc0 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 2 Jun 2016 11:50:16 -0700 Subject: [PATCH 11/11] Minor cleanups --- Cheetah/SettingsManager.py | 6 +++--- Cheetah/legacy_compiler.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cheetah/SettingsManager.py b/Cheetah/SettingsManager.py index ad0609a1..9ee465c8 100644 --- a/Cheetah/SettingsManager.py +++ b/Cheetah/SettingsManager.py @@ -38,8 +38,8 @@ def updateSettings(self, new_settings): def updateSettingsFromConfigStr(self, config_str): values = [line.split('=', 1) for line in config_str.strip().splitlines()] - settings = dict( - (key.strip(), convert_value(value.strip())) + settings = { + key.strip(): convert_value(value.strip()) for key, value in values - ) + } self.updateSettings(settings) diff --git a/Cheetah/legacy_compiler.py b/Cheetah/legacy_compiler.py index c9be01af..e86e749a 100644 --- a/Cheetah/legacy_compiler.py +++ b/Cheetah/legacy_compiler.py @@ -475,7 +475,7 @@ def addImportedVarNames(self, varNames, raw_statement=None): if not varNames: return if not self.setting('useLegacyImportMode'): - if raw_statement and getattr(self, '_methodBodyChunks'): + if raw_statement and self._methodBodyChunks: self.addChunk(raw_statement) else: self._global_vars.update(varNames)