• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Implements a Jinja / Python combination lexer. The ``Lexer`` class
2is used to do some preprocessing. It filters out invalid operators like
3the bitshift operators we don't allow in templates. It separates
4template code and python code in expressions.
5"""
6import re
7from ast import literal_eval
8from collections import deque
9from operator import itemgetter
10from sys import intern
11
12from ._identifier import pattern as name_re
13from .exceptions import TemplateSyntaxError
14from .utils import LRUCache
15
16# cache for the lexers. Exists in order to be able to have multiple
17# environments with the same lexer
18_lexer_cache = LRUCache(50)
19
20# static regular expressions
21whitespace_re = re.compile(r"\s+")
22newline_re = re.compile(r"(\r\n|\r|\n)")
23string_re = re.compile(
24    r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S
25)
26integer_re = re.compile(r"(\d+_)*\d+")
27float_re = re.compile(
28    r"""
29    (?<!\.)  # doesn't start with a .
30    (\d+_)*\d+  # digits, possibly _ separated
31    (
32        (\.(\d+_)*\d+)?  # optional fractional part
33        e[+\-]?(\d+_)*\d+  # exponent part
34    |
35        \.(\d+_)*\d+  # required fractional part
36    )
37    """,
38    re.IGNORECASE | re.VERBOSE,
39)
40
41# internal the tokens and keep references to them
42TOKEN_ADD = intern("add")
43TOKEN_ASSIGN = intern("assign")
44TOKEN_COLON = intern("colon")
45TOKEN_COMMA = intern("comma")
46TOKEN_DIV = intern("div")
47TOKEN_DOT = intern("dot")
48TOKEN_EQ = intern("eq")
49TOKEN_FLOORDIV = intern("floordiv")
50TOKEN_GT = intern("gt")
51TOKEN_GTEQ = intern("gteq")
52TOKEN_LBRACE = intern("lbrace")
53TOKEN_LBRACKET = intern("lbracket")
54TOKEN_LPAREN = intern("lparen")
55TOKEN_LT = intern("lt")
56TOKEN_LTEQ = intern("lteq")
57TOKEN_MOD = intern("mod")
58TOKEN_MUL = intern("mul")
59TOKEN_NE = intern("ne")
60TOKEN_PIPE = intern("pipe")
61TOKEN_POW = intern("pow")
62TOKEN_RBRACE = intern("rbrace")
63TOKEN_RBRACKET = intern("rbracket")
64TOKEN_RPAREN = intern("rparen")
65TOKEN_SEMICOLON = intern("semicolon")
66TOKEN_SUB = intern("sub")
67TOKEN_TILDE = intern("tilde")
68TOKEN_WHITESPACE = intern("whitespace")
69TOKEN_FLOAT = intern("float")
70TOKEN_INTEGER = intern("integer")
71TOKEN_NAME = intern("name")
72TOKEN_STRING = intern("string")
73TOKEN_OPERATOR = intern("operator")
74TOKEN_BLOCK_BEGIN = intern("block_begin")
75TOKEN_BLOCK_END = intern("block_end")
76TOKEN_VARIABLE_BEGIN = intern("variable_begin")
77TOKEN_VARIABLE_END = intern("variable_end")
78TOKEN_RAW_BEGIN = intern("raw_begin")
79TOKEN_RAW_END = intern("raw_end")
80TOKEN_COMMENT_BEGIN = intern("comment_begin")
81TOKEN_COMMENT_END = intern("comment_end")
82TOKEN_COMMENT = intern("comment")
83TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin")
84TOKEN_LINESTATEMENT_END = intern("linestatement_end")
85TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin")
86TOKEN_LINECOMMENT_END = intern("linecomment_end")
87TOKEN_LINECOMMENT = intern("linecomment")
88TOKEN_DATA = intern("data")
89TOKEN_INITIAL = intern("initial")
90TOKEN_EOF = intern("eof")
91
92# bind operators to token types
93operators = {
94    "+": TOKEN_ADD,
95    "-": TOKEN_SUB,
96    "/": TOKEN_DIV,
97    "//": TOKEN_FLOORDIV,
98    "*": TOKEN_MUL,
99    "%": TOKEN_MOD,
100    "**": TOKEN_POW,
101    "~": TOKEN_TILDE,
102    "[": TOKEN_LBRACKET,
103    "]": TOKEN_RBRACKET,
104    "(": TOKEN_LPAREN,
105    ")": TOKEN_RPAREN,
106    "{": TOKEN_LBRACE,
107    "}": TOKEN_RBRACE,
108    "==": TOKEN_EQ,
109    "!=": TOKEN_NE,
110    ">": TOKEN_GT,
111    ">=": TOKEN_GTEQ,
112    "<": TOKEN_LT,
113    "<=": TOKEN_LTEQ,
114    "=": TOKEN_ASSIGN,
115    ".": TOKEN_DOT,
116    ":": TOKEN_COLON,
117    "|": TOKEN_PIPE,
118    ",": TOKEN_COMMA,
119    ";": TOKEN_SEMICOLON,
120}
121
122reverse_operators = {v: k for k, v in operators.items()}
123assert len(operators) == len(reverse_operators), "operators dropped"
124operator_re = re.compile(
125    f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})"
126)
127
128ignored_tokens = frozenset(
129    [
130        TOKEN_COMMENT_BEGIN,
131        TOKEN_COMMENT,
132        TOKEN_COMMENT_END,
133        TOKEN_WHITESPACE,
134        TOKEN_LINECOMMENT_BEGIN,
135        TOKEN_LINECOMMENT_END,
136        TOKEN_LINECOMMENT,
137    ]
138)
139ignore_if_empty = frozenset(
140    [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT]
141)
142
143
144def _describe_token_type(token_type):
145    if token_type in reverse_operators:
146        return reverse_operators[token_type]
147    return {
148        TOKEN_COMMENT_BEGIN: "begin of comment",
149        TOKEN_COMMENT_END: "end of comment",
150        TOKEN_COMMENT: "comment",
151        TOKEN_LINECOMMENT: "comment",
152        TOKEN_BLOCK_BEGIN: "begin of statement block",
153        TOKEN_BLOCK_END: "end of statement block",
154        TOKEN_VARIABLE_BEGIN: "begin of print statement",
155        TOKEN_VARIABLE_END: "end of print statement",
156        TOKEN_LINESTATEMENT_BEGIN: "begin of line statement",
157        TOKEN_LINESTATEMENT_END: "end of line statement",
158        TOKEN_DATA: "template data / text",
159        TOKEN_EOF: "end of template",
160    }.get(token_type, token_type)
161
162
163def describe_token(token):
164    """Returns a description of the token."""
165    if token.type == TOKEN_NAME:
166        return token.value
167    return _describe_token_type(token.type)
168
169
170def describe_token_expr(expr):
171    """Like `describe_token` but for token expressions."""
172    if ":" in expr:
173        type, value = expr.split(":", 1)
174        if type == TOKEN_NAME:
175            return value
176    else:
177        type = expr
178    return _describe_token_type(type)
179
180
181def count_newlines(value):
182    """Count the number of newline characters in the string.  This is
183    useful for extensions that filter a stream.
184    """
185    return len(newline_re.findall(value))
186
187
188def compile_rules(environment):
189    """Compiles all the rules from the environment into a list of rules."""
190    e = re.escape
191    rules = [
192        (
193            len(environment.comment_start_string),
194            TOKEN_COMMENT_BEGIN,
195            e(environment.comment_start_string),
196        ),
197        (
198            len(environment.block_start_string),
199            TOKEN_BLOCK_BEGIN,
200            e(environment.block_start_string),
201        ),
202        (
203            len(environment.variable_start_string),
204            TOKEN_VARIABLE_BEGIN,
205            e(environment.variable_start_string),
206        ),
207    ]
208
209    if environment.line_statement_prefix is not None:
210        rules.append(
211            (
212                len(environment.line_statement_prefix),
213                TOKEN_LINESTATEMENT_BEGIN,
214                r"^[ \t\v]*" + e(environment.line_statement_prefix),
215            )
216        )
217    if environment.line_comment_prefix is not None:
218        rules.append(
219            (
220                len(environment.line_comment_prefix),
221                TOKEN_LINECOMMENT_BEGIN,
222                r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix),
223            )
224        )
225
226    return [x[1:] for x in sorted(rules, reverse=True)]
227
228
229class Failure:
230    """Class that raises a `TemplateSyntaxError` if called.
231    Used by the `Lexer` to specify known errors.
232    """
233
234    def __init__(self, message, cls=TemplateSyntaxError):
235        self.message = message
236        self.error_class = cls
237
238    def __call__(self, lineno, filename):
239        raise self.error_class(self.message, lineno, filename)
240
241
242class Token(tuple):
243    """Token class."""
244
245    __slots__ = ()
246    lineno, type, value = (property(itemgetter(x)) for x in range(3))
247
248    def __new__(cls, lineno, type, value):
249        return tuple.__new__(cls, (lineno, intern(str(type)), value))
250
251    def __str__(self):
252        if self.type in reverse_operators:
253            return reverse_operators[self.type]
254        elif self.type == "name":
255            return self.value
256        return self.type
257
258    def test(self, expr):
259        """Test a token against a token expression.  This can either be a
260        token type or ``'token_type:token_value'``.  This can only test
261        against string values and types.
262        """
263        # here we do a regular string equality check as test_any is usually
264        # passed an iterable of not interned strings.
265        if self.type == expr:
266            return True
267        elif ":" in expr:
268            return expr.split(":", 1) == [self.type, self.value]
269        return False
270
271    def test_any(self, *iterable):
272        """Test against multiple token expressions."""
273        for expr in iterable:
274            if self.test(expr):
275                return True
276        return False
277
278    def __repr__(self):
279        return f"Token({self.lineno!r}, {self.type!r}, {self.value!r})"
280
281
282class TokenStreamIterator:
283    """The iterator for tokenstreams.  Iterate over the stream
284    until the eof token is reached.
285    """
286
287    def __init__(self, stream):
288        self.stream = stream
289
290    def __iter__(self):
291        return self
292
293    def __next__(self):
294        token = self.stream.current
295        if token.type is TOKEN_EOF:
296            self.stream.close()
297            raise StopIteration()
298        next(self.stream)
299        return token
300
301
302class TokenStream:
303    """A token stream is an iterable that yields :class:`Token`\\s.  The
304    parser however does not iterate over it but calls :meth:`next` to go
305    one token ahead.  The current active token is stored as :attr:`current`.
306    """
307
308    def __init__(self, generator, name, filename):
309        self._iter = iter(generator)
310        self._pushed = deque()
311        self.name = name
312        self.filename = filename
313        self.closed = False
314        self.current = Token(1, TOKEN_INITIAL, "")
315        next(self)
316
317    def __iter__(self):
318        return TokenStreamIterator(self)
319
320    def __bool__(self):
321        return bool(self._pushed) or self.current.type is not TOKEN_EOF
322
323    __nonzero__ = __bool__  # py2
324
325    @property
326    def eos(self):
327        """Are we at the end of the stream?"""
328        return not self
329
330    def push(self, token):
331        """Push a token back to the stream."""
332        self._pushed.append(token)
333
334    def look(self):
335        """Look at the next token."""
336        old_token = next(self)
337        result = self.current
338        self.push(result)
339        self.current = old_token
340        return result
341
342    def skip(self, n=1):
343        """Got n tokens ahead."""
344        for _ in range(n):
345            next(self)
346
347    def next_if(self, expr):
348        """Perform the token test and return the token if it matched.
349        Otherwise the return value is `None`.
350        """
351        if self.current.test(expr):
352            return next(self)
353
354    def skip_if(self, expr):
355        """Like :meth:`next_if` but only returns `True` or `False`."""
356        return self.next_if(expr) is not None
357
358    def __next__(self):
359        """Go one token ahead and return the old one.
360
361        Use the built-in :func:`next` instead of calling this directly.
362        """
363        rv = self.current
364        if self._pushed:
365            self.current = self._pushed.popleft()
366        elif self.current.type is not TOKEN_EOF:
367            try:
368                self.current = next(self._iter)
369            except StopIteration:
370                self.close()
371        return rv
372
373    def close(self):
374        """Close the stream."""
375        self.current = Token(self.current.lineno, TOKEN_EOF, "")
376        self._iter = None
377        self.closed = True
378
379    def expect(self, expr):
380        """Expect a given token type and return it.  This accepts the same
381        argument as :meth:`jinja2.lexer.Token.test`.
382        """
383        if not self.current.test(expr):
384            expr = describe_token_expr(expr)
385            if self.current.type is TOKEN_EOF:
386                raise TemplateSyntaxError(
387                    f"unexpected end of template, expected {expr!r}.",
388                    self.current.lineno,
389                    self.name,
390                    self.filename,
391                )
392            raise TemplateSyntaxError(
393                f"expected token {expr!r}, got {describe_token(self.current)!r}",
394                self.current.lineno,
395                self.name,
396                self.filename,
397            )
398        try:
399            return self.current
400        finally:
401            next(self)
402
403
404def get_lexer(environment):
405    """Return a lexer which is probably cached."""
406    key = (
407        environment.block_start_string,
408        environment.block_end_string,
409        environment.variable_start_string,
410        environment.variable_end_string,
411        environment.comment_start_string,
412        environment.comment_end_string,
413        environment.line_statement_prefix,
414        environment.line_comment_prefix,
415        environment.trim_blocks,
416        environment.lstrip_blocks,
417        environment.newline_sequence,
418        environment.keep_trailing_newline,
419    )
420    lexer = _lexer_cache.get(key)
421    if lexer is None:
422        lexer = Lexer(environment)
423        _lexer_cache[key] = lexer
424    return lexer
425
426
427class OptionalLStrip(tuple):
428    """A special tuple for marking a point in the state that can have
429    lstrip applied.
430    """
431
432    __slots__ = ()
433
434    # Even though it looks like a no-op, creating instances fails
435    # without this.
436    def __new__(cls, *members, **kwargs):
437        return super().__new__(cls, members)
438
439
440class Lexer:
441    """Class that implements a lexer for a given environment. Automatically
442    created by the environment class, usually you don't have to do that.
443
444    Note that the lexer is not automatically bound to an environment.
445    Multiple environments can share the same lexer.
446    """
447
448    def __init__(self, environment):
449        # shortcuts
450        e = re.escape
451
452        def c(x):
453            return re.compile(x, re.M | re.S)
454
455        # lexing rules for tags
456        tag_rules = [
457            (whitespace_re, TOKEN_WHITESPACE, None),
458            (float_re, TOKEN_FLOAT, None),
459            (integer_re, TOKEN_INTEGER, None),
460            (name_re, TOKEN_NAME, None),
461            (string_re, TOKEN_STRING, None),
462            (operator_re, TOKEN_OPERATOR, None),
463        ]
464
465        # assemble the root lexing rule. because "|" is ungreedy
466        # we have to sort by length so that the lexer continues working
467        # as expected when we have parsing rules like <% for block and
468        # <%= for variables. (if someone wants asp like syntax)
469        # variables are just part of the rules if variable processing
470        # is required.
471        root_tag_rules = compile_rules(environment)
472
473        block_start_re = e(environment.block_start_string)
474        block_end_re = e(environment.block_end_string)
475        comment_end_re = e(environment.comment_end_string)
476        variable_end_re = e(environment.variable_end_string)
477
478        # block suffix if trimming is enabled
479        block_suffix_re = "\\n?" if environment.trim_blocks else ""
480
481        # If lstrip is enabled, it should not be applied if there is any
482        # non-whitespace between the newline and block.
483        self.lstrip_unless_re = c(r"[^ \t]") if environment.lstrip_blocks else None
484
485        self.newline_sequence = environment.newline_sequence
486        self.keep_trailing_newline = environment.keep_trailing_newline
487
488        root_raw_re = (
489            fr"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*"
490            fr"(?:\-{block_end_re}\s*|{block_end_re}))"
491        )
492        root_parts_re = "|".join(
493            [root_raw_re] + [fr"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules]
494        )
495
496        # global lexing rules
497        self.rules = {
498            "root": [
499                # directives
500                (
501                    c(fr"(.*?)(?:{root_parts_re})"),
502                    OptionalLStrip(TOKEN_DATA, "#bygroup"),
503                    "#bygroup",
504                ),
505                # data
506                (c(".+"), TOKEN_DATA, None),
507            ],
508            # comments
509            TOKEN_COMMENT_BEGIN: [
510                (
511                    c(
512                        fr"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*"
513                        fr"|{comment_end_re}{block_suffix_re}))"
514                    ),
515                    (TOKEN_COMMENT, TOKEN_COMMENT_END),
516                    "#pop",
517                ),
518                (c(r"(.)"), (Failure("Missing end of comment tag"),), None),
519            ],
520            # blocks
521            TOKEN_BLOCK_BEGIN: [
522                (
523                    c(
524                        fr"(?:\+{block_end_re}|\-{block_end_re}\s*"
525                        fr"|{block_end_re}{block_suffix_re})"
526                    ),
527                    TOKEN_BLOCK_END,
528                    "#pop",
529                ),
530            ]
531            + tag_rules,
532            # variables
533            TOKEN_VARIABLE_BEGIN: [
534                (
535                    c(fr"\-{variable_end_re}\s*|{variable_end_re}"),
536                    TOKEN_VARIABLE_END,
537                    "#pop",
538                )
539            ]
540            + tag_rules,
541            # raw block
542            TOKEN_RAW_BEGIN: [
543                (
544                    c(
545                        fr"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*"
546                        fr"(?:\+{block_end_re}|\-{block_end_re}\s*"
547                        fr"|{block_end_re}{block_suffix_re}))"
548                    ),
549                    OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END),
550                    "#pop",
551                ),
552                (c(r"(.)"), (Failure("Missing end of raw directive"),), None),
553            ],
554            # line statements
555            TOKEN_LINESTATEMENT_BEGIN: [
556                (c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop")
557            ]
558            + tag_rules,
559            # line comments
560            TOKEN_LINECOMMENT_BEGIN: [
561                (
562                    c(r"(.*?)()(?=\n|$)"),
563                    (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END),
564                    "#pop",
565                )
566            ],
567        }
568
569    def _normalize_newlines(self, value):
570        """Replace all newlines with the configured sequence in strings
571        and template data.
572        """
573        return newline_re.sub(self.newline_sequence, value)
574
575    def tokenize(self, source, name=None, filename=None, state=None):
576        """Calls tokeniter + tokenize and wraps it in a token stream."""
577        stream = self.tokeniter(source, name, filename, state)
578        return TokenStream(self.wrap(stream, name, filename), name, filename)
579
580    def wrap(self, stream, name=None, filename=None):
581        """This is called with the stream as returned by `tokenize` and wraps
582        every token in a :class:`Token` and converts the value.
583        """
584        for lineno, token, value in stream:
585            if token in ignored_tokens:
586                continue
587            elif token == TOKEN_LINESTATEMENT_BEGIN:
588                token = TOKEN_BLOCK_BEGIN
589            elif token == TOKEN_LINESTATEMENT_END:
590                token = TOKEN_BLOCK_END
591            # we are not interested in those tokens in the parser
592            elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
593                continue
594            elif token == TOKEN_DATA:
595                value = self._normalize_newlines(value)
596            elif token == "keyword":
597                token = value
598            elif token == TOKEN_NAME:
599                value = str(value)
600                if not value.isidentifier():
601                    raise TemplateSyntaxError(
602                        "Invalid character in identifier", lineno, name, filename
603                    )
604            elif token == TOKEN_STRING:
605                # try to unescape string
606                try:
607                    value = (
608                        self._normalize_newlines(value[1:-1])
609                        .encode("ascii", "backslashreplace")
610                        .decode("unicode-escape")
611                    )
612                except Exception as e:
613                    msg = str(e).split(":")[-1].strip()
614                    raise TemplateSyntaxError(msg, lineno, name, filename)
615            elif token == TOKEN_INTEGER:
616                value = int(value.replace("_", ""))
617            elif token == TOKEN_FLOAT:
618                # remove all "_" first to support more Python versions
619                value = literal_eval(value.replace("_", ""))
620            elif token == TOKEN_OPERATOR:
621                token = operators[value]
622            yield Token(lineno, token, value)
623
624    def tokeniter(self, source, name, filename=None, state=None):
625        """This method tokenizes the text and returns the tokens in a
626        generator.  Use this method if you just want to tokenize a template.
627        """
628        lines = source.splitlines()
629        if self.keep_trailing_newline and source:
630            if source.endswith(("\r\n", "\r", "\n")):
631                lines.append("")
632        source = "\n".join(lines)
633        pos = 0
634        lineno = 1
635        stack = ["root"]
636        if state is not None and state != "root":
637            assert state in ("variable", "block"), "invalid state"
638            stack.append(state + "_begin")
639        statetokens = self.rules[stack[-1]]
640        source_length = len(source)
641        balancing_stack = []
642        lstrip_unless_re = self.lstrip_unless_re
643        newlines_stripped = 0
644        line_starting = True
645
646        while 1:
647            # tokenizer loop
648            for regex, tokens, new_state in statetokens:
649                m = regex.match(source, pos)
650                # if no match we try again with the next rule
651                if m is None:
652                    continue
653
654                # we only match blocks and variables if braces / parentheses
655                # are balanced. continue parsing with the lower rule which
656                # is the operator rule. do this only if the end tags look
657                # like operators
658                if balancing_stack and tokens in (
659                    TOKEN_VARIABLE_END,
660                    TOKEN_BLOCK_END,
661                    TOKEN_LINESTATEMENT_END,
662                ):
663                    continue
664
665                # tuples support more options
666                if isinstance(tokens, tuple):
667                    groups = m.groups()
668
669                    if isinstance(tokens, OptionalLStrip):
670                        # Rule supports lstrip. Match will look like
671                        # text, block type, whitespace control, type, control, ...
672                        text = groups[0]
673
674                        # Skipping the text and first type, every other group is the
675                        # whitespace control for each type. One of the groups will be
676                        # -, +, or empty string instead of None.
677                        strip_sign = next(g for g in groups[2::2] if g is not None)
678
679                        if strip_sign == "-":
680                            # Strip all whitespace between the text and the tag.
681                            stripped = text.rstrip()
682                            newlines_stripped = text[len(stripped) :].count("\n")
683                            groups = (stripped,) + groups[1:]
684                        elif (
685                            # Not marked for preserving whitespace.
686                            strip_sign != "+"
687                            # lstrip is enabled.
688                            and lstrip_unless_re is not None
689                            # Not a variable expression.
690                            and not m.groupdict().get(TOKEN_VARIABLE_BEGIN)
691                        ):
692                            # The start of text between the last newline and the tag.
693                            l_pos = text.rfind("\n") + 1
694                            if l_pos > 0 or line_starting:
695                                # If there's only whitespace between the newline and the
696                                # tag, strip it.
697                                if not lstrip_unless_re.search(text, l_pos):
698                                    groups = (text[:l_pos],) + groups[1:]
699
700                    for idx, token in enumerate(tokens):
701                        # failure group
702                        if token.__class__ is Failure:
703                            raise token(lineno, filename)
704                        # bygroup is a bit more complex, in that case we
705                        # yield for the current token the first named
706                        # group that matched
707                        elif token == "#bygroup":
708                            for key, value in m.groupdict().items():
709                                if value is not None:
710                                    yield lineno, key, value
711                                    lineno += value.count("\n")
712                                    break
713                            else:
714                                raise RuntimeError(
715                                    f"{regex!r} wanted to resolve the token dynamically"
716                                    " but no group matched"
717                                )
718                        # normal group
719                        else:
720                            data = groups[idx]
721                            if data or token not in ignore_if_empty:
722                                yield lineno, token, data
723                            lineno += data.count("\n") + newlines_stripped
724                            newlines_stripped = 0
725
726                # strings as token just are yielded as it.
727                else:
728                    data = m.group()
729                    # update brace/parentheses balance
730                    if tokens == TOKEN_OPERATOR:
731                        if data == "{":
732                            balancing_stack.append("}")
733                        elif data == "(":
734                            balancing_stack.append(")")
735                        elif data == "[":
736                            balancing_stack.append("]")
737                        elif data in ("}", ")", "]"):
738                            if not balancing_stack:
739                                raise TemplateSyntaxError(
740                                    f"unexpected '{data}'", lineno, name, filename
741                                )
742                            expected_op = balancing_stack.pop()
743                            if expected_op != data:
744                                raise TemplateSyntaxError(
745                                    f"unexpected '{data}', expected '{expected_op}'",
746                                    lineno,
747                                    name,
748                                    filename,
749                                )
750                    # yield items
751                    if data or tokens not in ignore_if_empty:
752                        yield lineno, tokens, data
753                    lineno += data.count("\n")
754
755                line_starting = m.group()[-1:] == "\n"
756
757                # fetch new position into new variable so that we can check
758                # if there is a internal parsing error which would result
759                # in an infinite loop
760                pos2 = m.end()
761
762                # handle state changes
763                if new_state is not None:
764                    # remove the uppermost state
765                    if new_state == "#pop":
766                        stack.pop()
767                    # resolve the new state by group checking
768                    elif new_state == "#bygroup":
769                        for key, value in m.groupdict().items():
770                            if value is not None:
771                                stack.append(key)
772                                break
773                        else:
774                            raise RuntimeError(
775                                f"{regex!r} wanted to resolve the new state dynamically"
776                                f" but no group matched"
777                            )
778                    # direct state name given
779                    else:
780                        stack.append(new_state)
781                    statetokens = self.rules[stack[-1]]
782                # we are still at the same position and no stack change.
783                # this means a loop without break condition, avoid that and
784                # raise error
785                elif pos2 == pos:
786                    raise RuntimeError(
787                        f"{regex!r} yielded empty string without stack change"
788                    )
789                # publish new function and start again
790                pos = pos2
791                break
792            # if loop terminated without break we haven't found a single match
793            # either we are at the end of the file or we have a problem
794            else:
795                # end of text
796                if pos >= source_length:
797                    return
798                # something went wrong
799                raise TemplateSyntaxError(
800                    f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename
801                )
802