• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Implements a Jinja / Python combination lexer. The ``Lexer`` class
2is used to do some preprocessing. It filters out invalid operators like
3the bitshift operators we don't allow in templates. It separates
4template code and python code in expressions.
5"""
6import re
7import typing as t
8from ast import literal_eval
9from collections import deque
10from sys import intern
11
12from ._identifier import pattern as name_re
13from .exceptions import TemplateSyntaxError
14from .utils import LRUCache
15
16if t.TYPE_CHECKING:
17    import typing_extensions as te
18    from .environment import Environment
19
20# cache for the lexers. Exists in order to be able to have multiple
21# environments with the same lexer
22_lexer_cache: t.MutableMapping[t.Tuple, "Lexer"] = LRUCache(50)  # type: ignore
23
24# static regular expressions
25whitespace_re = re.compile(r"\s+")
26newline_re = re.compile(r"(\r\n|\r|\n)")
27string_re = re.compile(
28    r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S
29)
30integer_re = re.compile(
31    r"""
32    (
33        0b(_?[0-1])+ # binary
34    |
35        0o(_?[0-7])+ # octal
36    |
37        0x(_?[\da-f])+ # hex
38    |
39        [1-9](_?\d)* # decimal
40    |
41        0(_?0)* # decimal zero
42    )
43    """,
44    re.IGNORECASE | re.VERBOSE,
45)
46float_re = re.compile(
47    r"""
48    (?<!\.)  # doesn't start with a .
49    (\d+_)*\d+  # digits, possibly _ separated
50    (
51        (\.(\d+_)*\d+)?  # optional fractional part
52        e[+\-]?(\d+_)*\d+  # exponent part
53    |
54        \.(\d+_)*\d+  # required fractional part
55    )
56    """,
57    re.IGNORECASE | re.VERBOSE,
58)
59
60# internal the tokens and keep references to them
61TOKEN_ADD = intern("add")
62TOKEN_ASSIGN = intern("assign")
63TOKEN_COLON = intern("colon")
64TOKEN_COMMA = intern("comma")
65TOKEN_DIV = intern("div")
66TOKEN_DOT = intern("dot")
67TOKEN_EQ = intern("eq")
68TOKEN_FLOORDIV = intern("floordiv")
69TOKEN_GT = intern("gt")
70TOKEN_GTEQ = intern("gteq")
71TOKEN_LBRACE = intern("lbrace")
72TOKEN_LBRACKET = intern("lbracket")
73TOKEN_LPAREN = intern("lparen")
74TOKEN_LT = intern("lt")
75TOKEN_LTEQ = intern("lteq")
76TOKEN_MOD = intern("mod")
77TOKEN_MUL = intern("mul")
78TOKEN_NE = intern("ne")
79TOKEN_PIPE = intern("pipe")
80TOKEN_POW = intern("pow")
81TOKEN_RBRACE = intern("rbrace")
82TOKEN_RBRACKET = intern("rbracket")
83TOKEN_RPAREN = intern("rparen")
84TOKEN_SEMICOLON = intern("semicolon")
85TOKEN_SUB = intern("sub")
86TOKEN_TILDE = intern("tilde")
87TOKEN_WHITESPACE = intern("whitespace")
88TOKEN_FLOAT = intern("float")
89TOKEN_INTEGER = intern("integer")
90TOKEN_NAME = intern("name")
91TOKEN_STRING = intern("string")
92TOKEN_OPERATOR = intern("operator")
93TOKEN_BLOCK_BEGIN = intern("block_begin")
94TOKEN_BLOCK_END = intern("block_end")
95TOKEN_VARIABLE_BEGIN = intern("variable_begin")
96TOKEN_VARIABLE_END = intern("variable_end")
97TOKEN_RAW_BEGIN = intern("raw_begin")
98TOKEN_RAW_END = intern("raw_end")
99TOKEN_COMMENT_BEGIN = intern("comment_begin")
100TOKEN_COMMENT_END = intern("comment_end")
101TOKEN_COMMENT = intern("comment")
102TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin")
103TOKEN_LINESTATEMENT_END = intern("linestatement_end")
104TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin")
105TOKEN_LINECOMMENT_END = intern("linecomment_end")
106TOKEN_LINECOMMENT = intern("linecomment")
107TOKEN_DATA = intern("data")
108TOKEN_INITIAL = intern("initial")
109TOKEN_EOF = intern("eof")
110
111# bind operators to token types
112operators = {
113    "+": TOKEN_ADD,
114    "-": TOKEN_SUB,
115    "/": TOKEN_DIV,
116    "//": TOKEN_FLOORDIV,
117    "*": TOKEN_MUL,
118    "%": TOKEN_MOD,
119    "**": TOKEN_POW,
120    "~": TOKEN_TILDE,
121    "[": TOKEN_LBRACKET,
122    "]": TOKEN_RBRACKET,
123    "(": TOKEN_LPAREN,
124    ")": TOKEN_RPAREN,
125    "{": TOKEN_LBRACE,
126    "}": TOKEN_RBRACE,
127    "==": TOKEN_EQ,
128    "!=": TOKEN_NE,
129    ">": TOKEN_GT,
130    ">=": TOKEN_GTEQ,
131    "<": TOKEN_LT,
132    "<=": TOKEN_LTEQ,
133    "=": TOKEN_ASSIGN,
134    ".": TOKEN_DOT,
135    ":": TOKEN_COLON,
136    "|": TOKEN_PIPE,
137    ",": TOKEN_COMMA,
138    ";": TOKEN_SEMICOLON,
139}
140
141reverse_operators = {v: k for k, v in operators.items()}
142assert len(operators) == len(reverse_operators), "operators dropped"
143operator_re = re.compile(
144    f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})"
145)
146
147ignored_tokens = frozenset(
148    [
149        TOKEN_COMMENT_BEGIN,
150        TOKEN_COMMENT,
151        TOKEN_COMMENT_END,
152        TOKEN_WHITESPACE,
153        TOKEN_LINECOMMENT_BEGIN,
154        TOKEN_LINECOMMENT_END,
155        TOKEN_LINECOMMENT,
156    ]
157)
158ignore_if_empty = frozenset(
159    [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT]
160)
161
162
163def _describe_token_type(token_type: str) -> str:
164    if token_type in reverse_operators:
165        return reverse_operators[token_type]
166
167    return {
168        TOKEN_COMMENT_BEGIN: "begin of comment",
169        TOKEN_COMMENT_END: "end of comment",
170        TOKEN_COMMENT: "comment",
171        TOKEN_LINECOMMENT: "comment",
172        TOKEN_BLOCK_BEGIN: "begin of statement block",
173        TOKEN_BLOCK_END: "end of statement block",
174        TOKEN_VARIABLE_BEGIN: "begin of print statement",
175        TOKEN_VARIABLE_END: "end of print statement",
176        TOKEN_LINESTATEMENT_BEGIN: "begin of line statement",
177        TOKEN_LINESTATEMENT_END: "end of line statement",
178        TOKEN_DATA: "template data / text",
179        TOKEN_EOF: "end of template",
180    }.get(token_type, token_type)
181
182
183def describe_token(token: "Token") -> str:
184    """Returns a description of the token."""
185    if token.type == TOKEN_NAME:
186        return token.value
187
188    return _describe_token_type(token.type)
189
190
191def describe_token_expr(expr: str) -> str:
192    """Like `describe_token` but for token expressions."""
193    if ":" in expr:
194        type, value = expr.split(":", 1)
195
196        if type == TOKEN_NAME:
197            return value
198    else:
199        type = expr
200
201    return _describe_token_type(type)
202
203
204def count_newlines(value: str) -> int:
205    """Count the number of newline characters in the string.  This is
206    useful for extensions that filter a stream.
207    """
208    return len(newline_re.findall(value))
209
210
211def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]:
212    """Compiles all the rules from the environment into a list of rules."""
213    e = re.escape
214    rules = [
215        (
216            len(environment.comment_start_string),
217            TOKEN_COMMENT_BEGIN,
218            e(environment.comment_start_string),
219        ),
220        (
221            len(environment.block_start_string),
222            TOKEN_BLOCK_BEGIN,
223            e(environment.block_start_string),
224        ),
225        (
226            len(environment.variable_start_string),
227            TOKEN_VARIABLE_BEGIN,
228            e(environment.variable_start_string),
229        ),
230    ]
231
232    if environment.line_statement_prefix is not None:
233        rules.append(
234            (
235                len(environment.line_statement_prefix),
236                TOKEN_LINESTATEMENT_BEGIN,
237                r"^[ \t\v]*" + e(environment.line_statement_prefix),
238            )
239        )
240    if environment.line_comment_prefix is not None:
241        rules.append(
242            (
243                len(environment.line_comment_prefix),
244                TOKEN_LINECOMMENT_BEGIN,
245                r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix),
246            )
247        )
248
249    return [x[1:] for x in sorted(rules, reverse=True)]
250
251
252class Failure:
253    """Class that raises a `TemplateSyntaxError` if called.
254    Used by the `Lexer` to specify known errors.
255    """
256
257    def __init__(
258        self, message: str, cls: t.Type[TemplateSyntaxError] = TemplateSyntaxError
259    ) -> None:
260        self.message = message
261        self.error_class = cls
262
263    def __call__(self, lineno: int, filename: str) -> "te.NoReturn":
264        raise self.error_class(self.message, lineno, filename)
265
266
267class Token(t.NamedTuple):
268    lineno: int
269    type: str
270    value: str
271
272    def __str__(self) -> str:
273        return describe_token(self)
274
275    def test(self, expr: str) -> bool:
276        """Test a token against a token expression.  This can either be a
277        token type or ``'token_type:token_value'``.  This can only test
278        against string values and types.
279        """
280        # here we do a regular string equality check as test_any is usually
281        # passed an iterable of not interned strings.
282        if self.type == expr:
283            return True
284
285        if ":" in expr:
286            return expr.split(":", 1) == [self.type, self.value]
287
288        return False
289
290    def test_any(self, *iterable: str) -> bool:
291        """Test against multiple token expressions."""
292        return any(self.test(expr) for expr in iterable)
293
294
295class TokenStreamIterator:
296    """The iterator for tokenstreams.  Iterate over the stream
297    until the eof token is reached.
298    """
299
300    def __init__(self, stream: "TokenStream") -> None:
301        self.stream = stream
302
303    def __iter__(self) -> "TokenStreamIterator":
304        return self
305
306    def __next__(self) -> Token:
307        token = self.stream.current
308
309        if token.type is TOKEN_EOF:
310            self.stream.close()
311            raise StopIteration
312
313        next(self.stream)
314        return token
315
316
317class TokenStream:
318    """A token stream is an iterable that yields :class:`Token`\\s.  The
319    parser however does not iterate over it but calls :meth:`next` to go
320    one token ahead.  The current active token is stored as :attr:`current`.
321    """
322
323    def __init__(
324        self,
325        generator: t.Iterable[Token],
326        name: t.Optional[str],
327        filename: t.Optional[str],
328    ):
329        self._iter = iter(generator)
330        self._pushed: "te.Deque[Token]" = deque()
331        self.name = name
332        self.filename = filename
333        self.closed = False
334        self.current = Token(1, TOKEN_INITIAL, "")
335        next(self)
336
337    def __iter__(self) -> TokenStreamIterator:
338        return TokenStreamIterator(self)
339
340    def __bool__(self) -> bool:
341        return bool(self._pushed) or self.current.type is not TOKEN_EOF
342
343    @property
344    def eos(self) -> bool:
345        """Are we at the end of the stream?"""
346        return not self
347
348    def push(self, token: Token) -> None:
349        """Push a token back to the stream."""
350        self._pushed.append(token)
351
352    def look(self) -> Token:
353        """Look at the next token."""
354        old_token = next(self)
355        result = self.current
356        self.push(result)
357        self.current = old_token
358        return result
359
360    def skip(self, n: int = 1) -> None:
361        """Got n tokens ahead."""
362        for _ in range(n):
363            next(self)
364
365    def next_if(self, expr: str) -> t.Optional[Token]:
366        """Perform the token test and return the token if it matched.
367        Otherwise the return value is `None`.
368        """
369        if self.current.test(expr):
370            return next(self)
371
372        return None
373
374    def skip_if(self, expr: str) -> bool:
375        """Like :meth:`next_if` but only returns `True` or `False`."""
376        return self.next_if(expr) is not None
377
378    def __next__(self) -> Token:
379        """Go one token ahead and return the old one.
380
381        Use the built-in :func:`next` instead of calling this directly.
382        """
383        rv = self.current
384
385        if self._pushed:
386            self.current = self._pushed.popleft()
387        elif self.current.type is not TOKEN_EOF:
388            try:
389                self.current = next(self._iter)
390            except StopIteration:
391                self.close()
392
393        return rv
394
395    def close(self) -> None:
396        """Close the stream."""
397        self.current = Token(self.current.lineno, TOKEN_EOF, "")
398        self._iter = iter(())
399        self.closed = True
400
401    def expect(self, expr: str) -> Token:
402        """Expect a given token type and return it.  This accepts the same
403        argument as :meth:`jinja2.lexer.Token.test`.
404        """
405        if not self.current.test(expr):
406            expr = describe_token_expr(expr)
407
408            if self.current.type is TOKEN_EOF:
409                raise TemplateSyntaxError(
410                    f"unexpected end of template, expected {expr!r}.",
411                    self.current.lineno,
412                    self.name,
413                    self.filename,
414                )
415
416            raise TemplateSyntaxError(
417                f"expected token {expr!r}, got {describe_token(self.current)!r}",
418                self.current.lineno,
419                self.name,
420                self.filename,
421            )
422
423        return next(self)
424
425
426def get_lexer(environment: "Environment") -> "Lexer":
427    """Return a lexer which is probably cached."""
428    key = (
429        environment.block_start_string,
430        environment.block_end_string,
431        environment.variable_start_string,
432        environment.variable_end_string,
433        environment.comment_start_string,
434        environment.comment_end_string,
435        environment.line_statement_prefix,
436        environment.line_comment_prefix,
437        environment.trim_blocks,
438        environment.lstrip_blocks,
439        environment.newline_sequence,
440        environment.keep_trailing_newline,
441    )
442    lexer = _lexer_cache.get(key)
443
444    if lexer is None:
445        _lexer_cache[key] = lexer = Lexer(environment)
446
447    return lexer
448
449
450class OptionalLStrip(tuple):
451    """A special tuple for marking a point in the state that can have
452    lstrip applied.
453    """
454
455    __slots__ = ()
456
457    # Even though it looks like a no-op, creating instances fails
458    # without this.
459    def __new__(cls, *members, **kwargs):  # type: ignore
460        return super().__new__(cls, members)
461
462
463class _Rule(t.NamedTuple):
464    pattern: t.Pattern[str]
465    tokens: t.Union[str, t.Tuple[str, ...], t.Tuple[Failure]]
466    command: t.Optional[str]
467
468
469class Lexer:
470    """Class that implements a lexer for a given environment. Automatically
471    created by the environment class, usually you don't have to do that.
472
473    Note that the lexer is not automatically bound to an environment.
474    Multiple environments can share the same lexer.
475    """
476
477    def __init__(self, environment: "Environment") -> None:
478        # shortcuts
479        e = re.escape
480
481        def c(x: str) -> t.Pattern[str]:
482            return re.compile(x, re.M | re.S)
483
484        # lexing rules for tags
485        tag_rules: t.List[_Rule] = [
486            _Rule(whitespace_re, TOKEN_WHITESPACE, None),
487            _Rule(float_re, TOKEN_FLOAT, None),
488            _Rule(integer_re, TOKEN_INTEGER, None),
489            _Rule(name_re, TOKEN_NAME, None),
490            _Rule(string_re, TOKEN_STRING, None),
491            _Rule(operator_re, TOKEN_OPERATOR, None),
492        ]
493
494        # assemble the root lexing rule. because "|" is ungreedy
495        # we have to sort by length so that the lexer continues working
496        # as expected when we have parsing rules like <% for block and
497        # <%= for variables. (if someone wants asp like syntax)
498        # variables are just part of the rules if variable processing
499        # is required.
500        root_tag_rules = compile_rules(environment)
501
502        block_start_re = e(environment.block_start_string)
503        block_end_re = e(environment.block_end_string)
504        comment_end_re = e(environment.comment_end_string)
505        variable_end_re = e(environment.variable_end_string)
506
507        # block suffix if trimming is enabled
508        block_suffix_re = "\\n?" if environment.trim_blocks else ""
509
510        self.lstrip_blocks = environment.lstrip_blocks
511
512        self.newline_sequence = environment.newline_sequence
513        self.keep_trailing_newline = environment.keep_trailing_newline
514
515        root_raw_re = (
516            rf"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*"
517            rf"(?:\-{block_end_re}\s*|{block_end_re}))"
518        )
519        root_parts_re = "|".join(
520            [root_raw_re] + [rf"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules]
521        )
522
523        # global lexing rules
524        self.rules: t.Dict[str, t.List[_Rule]] = {
525            "root": [
526                # directives
527                _Rule(
528                    c(rf"(.*?)(?:{root_parts_re})"),
529                    OptionalLStrip(TOKEN_DATA, "#bygroup"),  # type: ignore
530                    "#bygroup",
531                ),
532                # data
533                _Rule(c(".+"), TOKEN_DATA, None),
534            ],
535            # comments
536            TOKEN_COMMENT_BEGIN: [
537                _Rule(
538                    c(
539                        rf"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*"
540                        rf"|{comment_end_re}{block_suffix_re}))"
541                    ),
542                    (TOKEN_COMMENT, TOKEN_COMMENT_END),
543                    "#pop",
544                ),
545                _Rule(c(r"(.)"), (Failure("Missing end of comment tag"),), None),
546            ],
547            # blocks
548            TOKEN_BLOCK_BEGIN: [
549                _Rule(
550                    c(
551                        rf"(?:\+{block_end_re}|\-{block_end_re}\s*"
552                        rf"|{block_end_re}{block_suffix_re})"
553                    ),
554                    TOKEN_BLOCK_END,
555                    "#pop",
556                ),
557            ]
558            + tag_rules,
559            # variables
560            TOKEN_VARIABLE_BEGIN: [
561                _Rule(
562                    c(rf"\-{variable_end_re}\s*|{variable_end_re}"),
563                    TOKEN_VARIABLE_END,
564                    "#pop",
565                )
566            ]
567            + tag_rules,
568            # raw block
569            TOKEN_RAW_BEGIN: [
570                _Rule(
571                    c(
572                        rf"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*"
573                        rf"(?:\+{block_end_re}|\-{block_end_re}\s*"
574                        rf"|{block_end_re}{block_suffix_re}))"
575                    ),
576                    OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END),  # type: ignore
577                    "#pop",
578                ),
579                _Rule(c(r"(.)"), (Failure("Missing end of raw directive"),), None),
580            ],
581            # line statements
582            TOKEN_LINESTATEMENT_BEGIN: [
583                _Rule(c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop")
584            ]
585            + tag_rules,
586            # line comments
587            TOKEN_LINECOMMENT_BEGIN: [
588                _Rule(
589                    c(r"(.*?)()(?=\n|$)"),
590                    (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END),
591                    "#pop",
592                )
593            ],
594        }
595
596    def _normalize_newlines(self, value: str) -> str:
597        """Replace all newlines with the configured sequence in strings
598        and template data.
599        """
600        return newline_re.sub(self.newline_sequence, value)
601
602    def tokenize(
603        self,
604        source: str,
605        name: t.Optional[str] = None,
606        filename: t.Optional[str] = None,
607        state: t.Optional[str] = None,
608    ) -> TokenStream:
609        """Calls tokeniter + tokenize and wraps it in a token stream."""
610        stream = self.tokeniter(source, name, filename, state)
611        return TokenStream(self.wrap(stream, name, filename), name, filename)
612
613    def wrap(
614        self,
615        stream: t.Iterable[t.Tuple[int, str, str]],
616        name: t.Optional[str] = None,
617        filename: t.Optional[str] = None,
618    ) -> t.Iterator[Token]:
619        """This is called with the stream as returned by `tokenize` and wraps
620        every token in a :class:`Token` and converts the value.
621        """
622        for lineno, token, value_str in stream:
623            if token in ignored_tokens:
624                continue
625
626            value: t.Any = value_str
627
628            if token == TOKEN_LINESTATEMENT_BEGIN:
629                token = TOKEN_BLOCK_BEGIN
630            elif token == TOKEN_LINESTATEMENT_END:
631                token = TOKEN_BLOCK_END
632            # we are not interested in those tokens in the parser
633            elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
634                continue
635            elif token == TOKEN_DATA:
636                value = self._normalize_newlines(value_str)
637            elif token == "keyword":
638                token = value_str
639            elif token == TOKEN_NAME:
640                value = value_str
641
642                if not value.isidentifier():
643                    raise TemplateSyntaxError(
644                        "Invalid character in identifier", lineno, name, filename
645                    )
646            elif token == TOKEN_STRING:
647                # try to unescape string
648                try:
649                    value = (
650                        self._normalize_newlines(value_str[1:-1])
651                        .encode("ascii", "backslashreplace")
652                        .decode("unicode-escape")
653                    )
654                except Exception as e:
655                    msg = str(e).split(":")[-1].strip()
656                    raise TemplateSyntaxError(msg, lineno, name, filename) from e
657            elif token == TOKEN_INTEGER:
658                value = int(value_str.replace("_", ""), 0)
659            elif token == TOKEN_FLOAT:
660                # remove all "_" first to support more Python versions
661                value = literal_eval(value_str.replace("_", ""))
662            elif token == TOKEN_OPERATOR:
663                token = operators[value_str]
664
665            yield Token(lineno, token, value)
666
667    def tokeniter(
668        self,
669        source: str,
670        name: t.Optional[str],
671        filename: t.Optional[str] = None,
672        state: t.Optional[str] = None,
673    ) -> t.Iterator[t.Tuple[int, str, str]]:
674        """This method tokenizes the text and returns the tokens in a
675        generator. Use this method if you just want to tokenize a template.
676
677        .. versionchanged:: 3.0
678            Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line
679            breaks.
680        """
681        lines = newline_re.split(source)[::2]
682
683        if not self.keep_trailing_newline and lines[-1] == "":
684            del lines[-1]
685
686        source = "\n".join(lines)
687        pos = 0
688        lineno = 1
689        stack = ["root"]
690
691        if state is not None and state != "root":
692            assert state in ("variable", "block"), "invalid state"
693            stack.append(state + "_begin")
694
695        statetokens = self.rules[stack[-1]]
696        source_length = len(source)
697        balancing_stack: t.List[str] = []
698        newlines_stripped = 0
699        line_starting = True
700
701        while True:
702            # tokenizer loop
703            for regex, tokens, new_state in statetokens:
704                m = regex.match(source, pos)
705
706                # if no match we try again with the next rule
707                if m is None:
708                    continue
709
710                # we only match blocks and variables if braces / parentheses
711                # are balanced. continue parsing with the lower rule which
712                # is the operator rule. do this only if the end tags look
713                # like operators
714                if balancing_stack and tokens in (
715                    TOKEN_VARIABLE_END,
716                    TOKEN_BLOCK_END,
717                    TOKEN_LINESTATEMENT_END,
718                ):
719                    continue
720
721                # tuples support more options
722                if isinstance(tokens, tuple):
723                    groups: t.Sequence[str] = m.groups()
724
725                    if isinstance(tokens, OptionalLStrip):
726                        # Rule supports lstrip. Match will look like
727                        # text, block type, whitespace control, type, control, ...
728                        text = groups[0]
729                        # Skipping the text and first type, every other group is the
730                        # whitespace control for each type. One of the groups will be
731                        # -, +, or empty string instead of None.
732                        strip_sign = next(g for g in groups[2::2] if g is not None)
733
734                        if strip_sign == "-":
735                            # Strip all whitespace between the text and the tag.
736                            stripped = text.rstrip()
737                            newlines_stripped = text[len(stripped) :].count("\n")
738                            groups = [stripped, *groups[1:]]
739                        elif (
740                            # Not marked for preserving whitespace.
741                            strip_sign != "+"
742                            # lstrip is enabled.
743                            and self.lstrip_blocks
744                            # Not a variable expression.
745                            and not m.groupdict().get(TOKEN_VARIABLE_BEGIN)
746                        ):
747                            # The start of text between the last newline and the tag.
748                            l_pos = text.rfind("\n") + 1
749
750                            if l_pos > 0 or line_starting:
751                                # If there's only whitespace between the newline and the
752                                # tag, strip it.
753                                if whitespace_re.fullmatch(text, l_pos):
754                                    groups = [text[:l_pos], *groups[1:]]
755
756                    for idx, token in enumerate(tokens):
757                        # failure group
758                        if token.__class__ is Failure:
759                            raise token(lineno, filename)
760                        # bygroup is a bit more complex, in that case we
761                        # yield for the current token the first named
762                        # group that matched
763                        elif token == "#bygroup":
764                            for key, value in m.groupdict().items():
765                                if value is not None:
766                                    yield lineno, key, value
767                                    lineno += value.count("\n")
768                                    break
769                            else:
770                                raise RuntimeError(
771                                    f"{regex!r} wanted to resolve the token dynamically"
772                                    " but no group matched"
773                                )
774                        # normal group
775                        else:
776                            data = groups[idx]
777
778                            if data or token not in ignore_if_empty:
779                                yield lineno, token, data
780
781                            lineno += data.count("\n") + newlines_stripped
782                            newlines_stripped = 0
783
784                # strings as token just are yielded as it.
785                else:
786                    data = m.group()
787
788                    # update brace/parentheses balance
789                    if tokens == TOKEN_OPERATOR:
790                        if data == "{":
791                            balancing_stack.append("}")
792                        elif data == "(":
793                            balancing_stack.append(")")
794                        elif data == "[":
795                            balancing_stack.append("]")
796                        elif data in ("}", ")", "]"):
797                            if not balancing_stack:
798                                raise TemplateSyntaxError(
799                                    f"unexpected '{data}'", lineno, name, filename
800                                )
801
802                            expected_op = balancing_stack.pop()
803
804                            if expected_op != data:
805                                raise TemplateSyntaxError(
806                                    f"unexpected '{data}', expected '{expected_op}'",
807                                    lineno,
808                                    name,
809                                    filename,
810                                )
811
812                    # yield items
813                    if data or tokens not in ignore_if_empty:
814                        yield lineno, tokens, data
815
816                    lineno += data.count("\n")
817
818                line_starting = m.group()[-1:] == "\n"
819                # fetch new position into new variable so that we can check
820                # if there is a internal parsing error which would result
821                # in an infinite loop
822                pos2 = m.end()
823
824                # handle state changes
825                if new_state is not None:
826                    # remove the uppermost state
827                    if new_state == "#pop":
828                        stack.pop()
829                    # resolve the new state by group checking
830                    elif new_state == "#bygroup":
831                        for key, value in m.groupdict().items():
832                            if value is not None:
833                                stack.append(key)
834                                break
835                        else:
836                            raise RuntimeError(
837                                f"{regex!r} wanted to resolve the new state dynamically"
838                                f" but no group matched"
839                            )
840                    # direct state name given
841                    else:
842                        stack.append(new_state)
843
844                    statetokens = self.rules[stack[-1]]
845                # we are still at the same position and no stack change.
846                # this means a loop without break condition, avoid that and
847                # raise error
848                elif pos2 == pos:
849                    raise RuntimeError(
850                        f"{regex!r} yielded empty string without stack change"
851                    )
852
853                # publish new function and start again
854                pos = pos2
855                break
856            # if loop terminated without break we haven't found a single match
857            # either we are at the end of the file or we have a problem
858            else:
859                # end of text
860                if pos >= source_length:
861                    return
862
863                # something went wrong
864                raise TemplateSyntaxError(
865                    f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename
866                )
867