• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2"""Implements a Jinja / Python combination lexer. The ``Lexer`` class
3is used to do some preprocessing. It filters out invalid operators like
4the bitshift operators we don't allow in templates. It separates
5template code and python code in expressions.
6"""
7import re
8from ast import literal_eval
9from collections import deque
10from operator import itemgetter
11
12from ._compat import implements_iterator
13from ._compat import intern
14from ._compat import iteritems
15from ._compat import text_type
16from .exceptions import TemplateSyntaxError
17from .utils import LRUCache
18
19# cache for the lexers. Exists in order to be able to have multiple
20# environments with the same lexer
21_lexer_cache = LRUCache(50)
22
23# static regular expressions
24whitespace_re = re.compile(r"\s+", re.U)
25newline_re = re.compile(r"(\r\n|\r|\n)")
26string_re = re.compile(
27    r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S
28)
29integer_re = re.compile(r"(\d+_)*\d+")
30float_re = re.compile(
31    r"""
32    (?<!\.)  # doesn't start with a .
33    (\d+_)*\d+  # digits, possibly _ separated
34    (
35        (\.(\d+_)*\d+)?  # optional fractional part
36        e[+\-]?(\d+_)*\d+  # exponent part
37    |
38        \.(\d+_)*\d+  # required fractional part
39    )
40    """,
41    re.IGNORECASE | re.VERBOSE,
42)
43
44try:
45    # check if this Python supports Unicode identifiers
46    compile("föö", "<unknown>", "eval")
47except SyntaxError:
48    # Python 2, no Unicode support, use ASCII identifiers
49    name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
50    check_ident = False
51else:
52    # Unicode support, import generated re pattern and set flag to use
53    # str.isidentifier to validate during lexing.
54    from ._identifier import pattern as name_re
55
56    check_ident = True
57
58# internal the tokens and keep references to them
59TOKEN_ADD = intern("add")
60TOKEN_ASSIGN = intern("assign")
61TOKEN_COLON = intern("colon")
62TOKEN_COMMA = intern("comma")
63TOKEN_DIV = intern("div")
64TOKEN_DOT = intern("dot")
65TOKEN_EQ = intern("eq")
66TOKEN_FLOORDIV = intern("floordiv")
67TOKEN_GT = intern("gt")
68TOKEN_GTEQ = intern("gteq")
69TOKEN_LBRACE = intern("lbrace")
70TOKEN_LBRACKET = intern("lbracket")
71TOKEN_LPAREN = intern("lparen")
72TOKEN_LT = intern("lt")
73TOKEN_LTEQ = intern("lteq")
74TOKEN_MOD = intern("mod")
75TOKEN_MUL = intern("mul")
76TOKEN_NE = intern("ne")
77TOKEN_PIPE = intern("pipe")
78TOKEN_POW = intern("pow")
79TOKEN_RBRACE = intern("rbrace")
80TOKEN_RBRACKET = intern("rbracket")
81TOKEN_RPAREN = intern("rparen")
82TOKEN_SEMICOLON = intern("semicolon")
83TOKEN_SUB = intern("sub")
84TOKEN_TILDE = intern("tilde")
85TOKEN_WHITESPACE = intern("whitespace")
86TOKEN_FLOAT = intern("float")
87TOKEN_INTEGER = intern("integer")
88TOKEN_NAME = intern("name")
89TOKEN_STRING = intern("string")
90TOKEN_OPERATOR = intern("operator")
91TOKEN_BLOCK_BEGIN = intern("block_begin")
92TOKEN_BLOCK_END = intern("block_end")
93TOKEN_VARIABLE_BEGIN = intern("variable_begin")
94TOKEN_VARIABLE_END = intern("variable_end")
95TOKEN_RAW_BEGIN = intern("raw_begin")
96TOKEN_RAW_END = intern("raw_end")
97TOKEN_COMMENT_BEGIN = intern("comment_begin")
98TOKEN_COMMENT_END = intern("comment_end")
99TOKEN_COMMENT = intern("comment")
100TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin")
101TOKEN_LINESTATEMENT_END = intern("linestatement_end")
102TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin")
103TOKEN_LINECOMMENT_END = intern("linecomment_end")
104TOKEN_LINECOMMENT = intern("linecomment")
105TOKEN_DATA = intern("data")
106TOKEN_INITIAL = intern("initial")
107TOKEN_EOF = intern("eof")
108
109# bind operators to token types
110operators = {
111    "+": TOKEN_ADD,
112    "-": TOKEN_SUB,
113    "/": TOKEN_DIV,
114    "//": TOKEN_FLOORDIV,
115    "*": TOKEN_MUL,
116    "%": TOKEN_MOD,
117    "**": TOKEN_POW,
118    "~": TOKEN_TILDE,
119    "[": TOKEN_LBRACKET,
120    "]": TOKEN_RBRACKET,
121    "(": TOKEN_LPAREN,
122    ")": TOKEN_RPAREN,
123    "{": TOKEN_LBRACE,
124    "}": TOKEN_RBRACE,
125    "==": TOKEN_EQ,
126    "!=": TOKEN_NE,
127    ">": TOKEN_GT,
128    ">=": TOKEN_GTEQ,
129    "<": TOKEN_LT,
130    "<=": TOKEN_LTEQ,
131    "=": TOKEN_ASSIGN,
132    ".": TOKEN_DOT,
133    ":": TOKEN_COLON,
134    "|": TOKEN_PIPE,
135    ",": TOKEN_COMMA,
136    ";": TOKEN_SEMICOLON,
137}
138
139reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
140assert len(operators) == len(reverse_operators), "operators dropped"
141operator_re = re.compile(
142    "(%s)" % "|".join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))
143)
144
145ignored_tokens = frozenset(
146    [
147        TOKEN_COMMENT_BEGIN,
148        TOKEN_COMMENT,
149        TOKEN_COMMENT_END,
150        TOKEN_WHITESPACE,
151        TOKEN_LINECOMMENT_BEGIN,
152        TOKEN_LINECOMMENT_END,
153        TOKEN_LINECOMMENT,
154    ]
155)
156ignore_if_empty = frozenset(
157    [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT]
158)
159
160
161def _describe_token_type(token_type):
162    if token_type in reverse_operators:
163        return reverse_operators[token_type]
164    return {
165        TOKEN_COMMENT_BEGIN: "begin of comment",
166        TOKEN_COMMENT_END: "end of comment",
167        TOKEN_COMMENT: "comment",
168        TOKEN_LINECOMMENT: "comment",
169        TOKEN_BLOCK_BEGIN: "begin of statement block",
170        TOKEN_BLOCK_END: "end of statement block",
171        TOKEN_VARIABLE_BEGIN: "begin of print statement",
172        TOKEN_VARIABLE_END: "end of print statement",
173        TOKEN_LINESTATEMENT_BEGIN: "begin of line statement",
174        TOKEN_LINESTATEMENT_END: "end of line statement",
175        TOKEN_DATA: "template data / text",
176        TOKEN_EOF: "end of template",
177    }.get(token_type, token_type)
178
179
180def describe_token(token):
181    """Returns a description of the token."""
182    if token.type == TOKEN_NAME:
183        return token.value
184    return _describe_token_type(token.type)
185
186
187def describe_token_expr(expr):
188    """Like `describe_token` but for token expressions."""
189    if ":" in expr:
190        type, value = expr.split(":", 1)
191        if type == TOKEN_NAME:
192            return value
193    else:
194        type = expr
195    return _describe_token_type(type)
196
197
198def count_newlines(value):
199    """Count the number of newline characters in the string.  This is
200    useful for extensions that filter a stream.
201    """
202    return len(newline_re.findall(value))
203
204
205def compile_rules(environment):
206    """Compiles all the rules from the environment into a list of rules."""
207    e = re.escape
208    rules = [
209        (
210            len(environment.comment_start_string),
211            TOKEN_COMMENT_BEGIN,
212            e(environment.comment_start_string),
213        ),
214        (
215            len(environment.block_start_string),
216            TOKEN_BLOCK_BEGIN,
217            e(environment.block_start_string),
218        ),
219        (
220            len(environment.variable_start_string),
221            TOKEN_VARIABLE_BEGIN,
222            e(environment.variable_start_string),
223        ),
224    ]
225
226    if environment.line_statement_prefix is not None:
227        rules.append(
228            (
229                len(environment.line_statement_prefix),
230                TOKEN_LINESTATEMENT_BEGIN,
231                r"^[ \t\v]*" + e(environment.line_statement_prefix),
232            )
233        )
234    if environment.line_comment_prefix is not None:
235        rules.append(
236            (
237                len(environment.line_comment_prefix),
238                TOKEN_LINECOMMENT_BEGIN,
239                r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix),
240            )
241        )
242
243    return [x[1:] for x in sorted(rules, reverse=True)]
244
245
246class Failure(object):
247    """Class that raises a `TemplateSyntaxError` if called.
248    Used by the `Lexer` to specify known errors.
249    """
250
251    def __init__(self, message, cls=TemplateSyntaxError):
252        self.message = message
253        self.error_class = cls
254
255    def __call__(self, lineno, filename):
256        raise self.error_class(self.message, lineno, filename)
257
258
259class Token(tuple):
260    """Token class."""
261
262    __slots__ = ()
263    lineno, type, value = (property(itemgetter(x)) for x in range(3))
264
265    def __new__(cls, lineno, type, value):
266        return tuple.__new__(cls, (lineno, intern(str(type)), value))
267
268    def __str__(self):
269        if self.type in reverse_operators:
270            return reverse_operators[self.type]
271        elif self.type == "name":
272            return self.value
273        return self.type
274
275    def test(self, expr):
276        """Test a token against a token expression.  This can either be a
277        token type or ``'token_type:token_value'``.  This can only test
278        against string values and types.
279        """
280        # here we do a regular string equality check as test_any is usually
281        # passed an iterable of not interned strings.
282        if self.type == expr:
283            return True
284        elif ":" in expr:
285            return expr.split(":", 1) == [self.type, self.value]
286        return False
287
288    def test_any(self, *iterable):
289        """Test against multiple token expressions."""
290        for expr in iterable:
291            if self.test(expr):
292                return True
293        return False
294
295    def __repr__(self):
296        return "Token(%r, %r, %r)" % (self.lineno, self.type, self.value)
297
298
299@implements_iterator
300class TokenStreamIterator(object):
301    """The iterator for tokenstreams.  Iterate over the stream
302    until the eof token is reached.
303    """
304
305    def __init__(self, stream):
306        self.stream = stream
307
308    def __iter__(self):
309        return self
310
311    def __next__(self):
312        token = self.stream.current
313        if token.type is TOKEN_EOF:
314            self.stream.close()
315            raise StopIteration()
316        next(self.stream)
317        return token
318
319
320@implements_iterator
321class TokenStream(object):
322    """A token stream is an iterable that yields :class:`Token`\\s.  The
323    parser however does not iterate over it but calls :meth:`next` to go
324    one token ahead.  The current active token is stored as :attr:`current`.
325    """
326
327    def __init__(self, generator, name, filename):
328        self._iter = iter(generator)
329        self._pushed = deque()
330        self.name = name
331        self.filename = filename
332        self.closed = False
333        self.current = Token(1, TOKEN_INITIAL, "")
334        next(self)
335
336    def __iter__(self):
337        return TokenStreamIterator(self)
338
339    def __bool__(self):
340        return bool(self._pushed) or self.current.type is not TOKEN_EOF
341
342    __nonzero__ = __bool__  # py2
343
344    @property
345    def eos(self):
346        """Are we at the end of the stream?"""
347        return not self
348
349    def push(self, token):
350        """Push a token back to the stream."""
351        self._pushed.append(token)
352
353    def look(self):
354        """Look at the next token."""
355        old_token = next(self)
356        result = self.current
357        self.push(result)
358        self.current = old_token
359        return result
360
361    def skip(self, n=1):
362        """Got n tokens ahead."""
363        for _ in range(n):
364            next(self)
365
366    def next_if(self, expr):
367        """Perform the token test and return the token if it matched.
368        Otherwise the return value is `None`.
369        """
370        if self.current.test(expr):
371            return next(self)
372
373    def skip_if(self, expr):
374        """Like :meth:`next_if` but only returns `True` or `False`."""
375        return self.next_if(expr) is not None
376
377    def __next__(self):
378        """Go one token ahead and return the old one.
379
380        Use the built-in :func:`next` instead of calling this directly.
381        """
382        rv = self.current
383        if self._pushed:
384            self.current = self._pushed.popleft()
385        elif self.current.type is not TOKEN_EOF:
386            try:
387                self.current = next(self._iter)
388            except StopIteration:
389                self.close()
390        return rv
391
392    def close(self):
393        """Close the stream."""
394        self.current = Token(self.current.lineno, TOKEN_EOF, "")
395        self._iter = None
396        self.closed = True
397
398    def expect(self, expr):
399        """Expect a given token type and return it.  This accepts the same
400        argument as :meth:`jinja2.lexer.Token.test`.
401        """
402        if not self.current.test(expr):
403            expr = describe_token_expr(expr)
404            if self.current.type is TOKEN_EOF:
405                raise TemplateSyntaxError(
406                    "unexpected end of template, expected %r." % expr,
407                    self.current.lineno,
408                    self.name,
409                    self.filename,
410                )
411            raise TemplateSyntaxError(
412                "expected token %r, got %r" % (expr, describe_token(self.current)),
413                self.current.lineno,
414                self.name,
415                self.filename,
416            )
417        try:
418            return self.current
419        finally:
420            next(self)
421
422
423def get_lexer(environment):
424    """Return a lexer which is probably cached."""
425    key = (
426        environment.block_start_string,
427        environment.block_end_string,
428        environment.variable_start_string,
429        environment.variable_end_string,
430        environment.comment_start_string,
431        environment.comment_end_string,
432        environment.line_statement_prefix,
433        environment.line_comment_prefix,
434        environment.trim_blocks,
435        environment.lstrip_blocks,
436        environment.newline_sequence,
437        environment.keep_trailing_newline,
438    )
439    lexer = _lexer_cache.get(key)
440    if lexer is None:
441        lexer = Lexer(environment)
442        _lexer_cache[key] = lexer
443    return lexer
444
445
446class OptionalLStrip(tuple):
447    """A special tuple for marking a point in the state that can have
448    lstrip applied.
449    """
450
451    __slots__ = ()
452
453    # Even though it looks like a no-op, creating instances fails
454    # without this.
455    def __new__(cls, *members, **kwargs):
456        return super(OptionalLStrip, cls).__new__(cls, members)
457
458
459class Lexer(object):
460    """Class that implements a lexer for a given environment. Automatically
461    created by the environment class, usually you don't have to do that.
462
463    Note that the lexer is not automatically bound to an environment.
464    Multiple environments can share the same lexer.
465    """
466
467    def __init__(self, environment):
468        # shortcuts
469        e = re.escape
470
471        def c(x):
472            return re.compile(x, re.M | re.S)
473
474        # lexing rules for tags
475        tag_rules = [
476            (whitespace_re, TOKEN_WHITESPACE, None),
477            (float_re, TOKEN_FLOAT, None),
478            (integer_re, TOKEN_INTEGER, None),
479            (name_re, TOKEN_NAME, None),
480            (string_re, TOKEN_STRING, None),
481            (operator_re, TOKEN_OPERATOR, None),
482        ]
483
484        # assemble the root lexing rule. because "|" is ungreedy
485        # we have to sort by length so that the lexer continues working
486        # as expected when we have parsing rules like <% for block and
487        # <%= for variables. (if someone wants asp like syntax)
488        # variables are just part of the rules if variable processing
489        # is required.
490        root_tag_rules = compile_rules(environment)
491
492        # block suffix if trimming is enabled
493        block_suffix_re = environment.trim_blocks and "\\n?" or ""
494
495        # If lstrip is enabled, it should not be applied if there is any
496        # non-whitespace between the newline and block.
497        self.lstrip_unless_re = c(r"[^ \t]") if environment.lstrip_blocks else None
498
499        self.newline_sequence = environment.newline_sequence
500        self.keep_trailing_newline = environment.keep_trailing_newline
501
502        # global lexing rules
503        self.rules = {
504            "root": [
505                # directives
506                (
507                    c(
508                        "(.*?)(?:%s)"
509                        % "|".join(
510                            [
511                                r"(?P<raw_begin>%s(\-|\+|)\s*raw\s*(?:\-%s\s*|%s))"
512                                % (
513                                    e(environment.block_start_string),
514                                    e(environment.block_end_string),
515                                    e(environment.block_end_string),
516                                )
517                            ]
518                            + [
519                                r"(?P<%s>%s(\-|\+|))" % (n, r)
520                                for n, r in root_tag_rules
521                            ]
522                        )
523                    ),
524                    OptionalLStrip(TOKEN_DATA, "#bygroup"),
525                    "#bygroup",
526                ),
527                # data
528                (c(".+"), TOKEN_DATA, None),
529            ],
530            # comments
531            TOKEN_COMMENT_BEGIN: [
532                (
533                    c(
534                        r"(.*?)((?:\-%s\s*|%s)%s)"
535                        % (
536                            e(environment.comment_end_string),
537                            e(environment.comment_end_string),
538                            block_suffix_re,
539                        )
540                    ),
541                    (TOKEN_COMMENT, TOKEN_COMMENT_END),
542                    "#pop",
543                ),
544                (c("(.)"), (Failure("Missing end of comment tag"),), None),
545            ],
546            # blocks
547            TOKEN_BLOCK_BEGIN: [
548                (
549                    c(
550                        r"(?:\-%s\s*|%s)%s"
551                        % (
552                            e(environment.block_end_string),
553                            e(environment.block_end_string),
554                            block_suffix_re,
555                        )
556                    ),
557                    TOKEN_BLOCK_END,
558                    "#pop",
559                ),
560            ]
561            + tag_rules,
562            # variables
563            TOKEN_VARIABLE_BEGIN: [
564                (
565                    c(
566                        r"\-%s\s*|%s"
567                        % (
568                            e(environment.variable_end_string),
569                            e(environment.variable_end_string),
570                        )
571                    ),
572                    TOKEN_VARIABLE_END,
573                    "#pop",
574                )
575            ]
576            + tag_rules,
577            # raw block
578            TOKEN_RAW_BEGIN: [
579                (
580                    c(
581                        r"(.*?)((?:%s(\-|\+|))\s*endraw\s*(?:\-%s\s*|%s%s))"
582                        % (
583                            e(environment.block_start_string),
584                            e(environment.block_end_string),
585                            e(environment.block_end_string),
586                            block_suffix_re,
587                        )
588                    ),
589                    OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END),
590                    "#pop",
591                ),
592                (c("(.)"), (Failure("Missing end of raw directive"),), None),
593            ],
594            # line statements
595            TOKEN_LINESTATEMENT_BEGIN: [
596                (c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop")
597            ]
598            + tag_rules,
599            # line comments
600            TOKEN_LINECOMMENT_BEGIN: [
601                (
602                    c(r"(.*?)()(?=\n|$)"),
603                    (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END),
604                    "#pop",
605                )
606            ],
607        }
608
609    def _normalize_newlines(self, value):
610        """Called for strings and template data to normalize it to unicode."""
611        return newline_re.sub(self.newline_sequence, value)
612
613    def tokenize(self, source, name=None, filename=None, state=None):
614        """Calls tokeniter + tokenize and wraps it in a token stream."""
615        stream = self.tokeniter(source, name, filename, state)
616        return TokenStream(self.wrap(stream, name, filename), name, filename)
617
618    def wrap(self, stream, name=None, filename=None):
619        """This is called with the stream as returned by `tokenize` and wraps
620        every token in a :class:`Token` and converts the value.
621        """
622        for lineno, token, value in stream:
623            if token in ignored_tokens:
624                continue
625            elif token == TOKEN_LINESTATEMENT_BEGIN:
626                token = TOKEN_BLOCK_BEGIN
627            elif token == TOKEN_LINESTATEMENT_END:
628                token = TOKEN_BLOCK_END
629            # we are not interested in those tokens in the parser
630            elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
631                continue
632            elif token == TOKEN_DATA:
633                value = self._normalize_newlines(value)
634            elif token == "keyword":
635                token = value
636            elif token == TOKEN_NAME:
637                value = str(value)
638                if check_ident and not value.isidentifier():
639                    raise TemplateSyntaxError(
640                        "Invalid character in identifier", lineno, name, filename
641                    )
642            elif token == TOKEN_STRING:
643                # try to unescape string
644                try:
645                    value = (
646                        self._normalize_newlines(value[1:-1])
647                        .encode("ascii", "backslashreplace")
648                        .decode("unicode-escape")
649                    )
650                except Exception as e:
651                    msg = str(e).split(":")[-1].strip()
652                    raise TemplateSyntaxError(msg, lineno, name, filename)
653            elif token == TOKEN_INTEGER:
654                value = int(value.replace("_", ""))
655            elif token == TOKEN_FLOAT:
656                # remove all "_" first to support more Python versions
657                value = literal_eval(value.replace("_", ""))
658            elif token == TOKEN_OPERATOR:
659                token = operators[value]
660            yield Token(lineno, token, value)
661
662    def tokeniter(self, source, name, filename=None, state=None):
663        """This method tokenizes the text and returns the tokens in a
664        generator.  Use this method if you just want to tokenize a template.
665        """
666        source = text_type(source)
667        lines = source.splitlines()
668        if self.keep_trailing_newline and source:
669            for newline in ("\r\n", "\r", "\n"):
670                if source.endswith(newline):
671                    lines.append("")
672                    break
673        source = "\n".join(lines)
674        pos = 0
675        lineno = 1
676        stack = ["root"]
677        if state is not None and state != "root":
678            assert state in ("variable", "block"), "invalid state"
679            stack.append(state + "_begin")
680        statetokens = self.rules[stack[-1]]
681        source_length = len(source)
682        balancing_stack = []
683        lstrip_unless_re = self.lstrip_unless_re
684        newlines_stripped = 0
685        line_starting = True
686
687        while 1:
688            # tokenizer loop
689            for regex, tokens, new_state in statetokens:
690                m = regex.match(source, pos)
691                # if no match we try again with the next rule
692                if m is None:
693                    continue
694
695                # we only match blocks and variables if braces / parentheses
696                # are balanced. continue parsing with the lower rule which
697                # is the operator rule. do this only if the end tags look
698                # like operators
699                if balancing_stack and tokens in (
700                    TOKEN_VARIABLE_END,
701                    TOKEN_BLOCK_END,
702                    TOKEN_LINESTATEMENT_END,
703                ):
704                    continue
705
706                # tuples support more options
707                if isinstance(tokens, tuple):
708                    groups = m.groups()
709
710                    if isinstance(tokens, OptionalLStrip):
711                        # Rule supports lstrip. Match will look like
712                        # text, block type, whitespace control, type, control, ...
713                        text = groups[0]
714
715                        # Skipping the text and first type, every other group is the
716                        # whitespace control for each type. One of the groups will be
717                        # -, +, or empty string instead of None.
718                        strip_sign = next(g for g in groups[2::2] if g is not None)
719
720                        if strip_sign == "-":
721                            # Strip all whitespace between the text and the tag.
722                            stripped = text.rstrip()
723                            newlines_stripped = text[len(stripped) :].count("\n")
724                            groups = (stripped,) + groups[1:]
725                        elif (
726                            # Not marked for preserving whitespace.
727                            strip_sign != "+"
728                            # lstrip is enabled.
729                            and lstrip_unless_re is not None
730                            # Not a variable expression.
731                            and not m.groupdict().get(TOKEN_VARIABLE_BEGIN)
732                        ):
733                            # The start of text between the last newline and the tag.
734                            l_pos = text.rfind("\n") + 1
735                            if l_pos > 0 or line_starting:
736                                # If there's only whitespace between the newline and the
737                                # tag, strip it.
738                                if not lstrip_unless_re.search(text, l_pos):
739                                    groups = (text[:l_pos],) + groups[1:]
740
741                    for idx, token in enumerate(tokens):
742                        # failure group
743                        if token.__class__ is Failure:
744                            raise token(lineno, filename)
745                        # bygroup is a bit more complex, in that case we
746                        # yield for the current token the first named
747                        # group that matched
748                        elif token == "#bygroup":
749                            for key, value in iteritems(m.groupdict()):
750                                if value is not None:
751                                    yield lineno, key, value
752                                    lineno += value.count("\n")
753                                    break
754                            else:
755                                raise RuntimeError(
756                                    "%r wanted to resolve "
757                                    "the token dynamically"
758                                    " but no group matched" % regex
759                                )
760                        # normal group
761                        else:
762                            data = groups[idx]
763                            if data or token not in ignore_if_empty:
764                                yield lineno, token, data
765                            lineno += data.count("\n") + newlines_stripped
766                            newlines_stripped = 0
767
768                # strings as token just are yielded as it.
769                else:
770                    data = m.group()
771                    # update brace/parentheses balance
772                    if tokens == TOKEN_OPERATOR:
773                        if data == "{":
774                            balancing_stack.append("}")
775                        elif data == "(":
776                            balancing_stack.append(")")
777                        elif data == "[":
778                            balancing_stack.append("]")
779                        elif data in ("}", ")", "]"):
780                            if not balancing_stack:
781                                raise TemplateSyntaxError(
782                                    "unexpected '%s'" % data, lineno, name, filename
783                                )
784                            expected_op = balancing_stack.pop()
785                            if expected_op != data:
786                                raise TemplateSyntaxError(
787                                    "unexpected '%s', "
788                                    "expected '%s'" % (data, expected_op),
789                                    lineno,
790                                    name,
791                                    filename,
792                                )
793                    # yield items
794                    if data or tokens not in ignore_if_empty:
795                        yield lineno, tokens, data
796                    lineno += data.count("\n")
797
798                line_starting = m.group()[-1:] == "\n"
799
800                # fetch new position into new variable so that we can check
801                # if there is a internal parsing error which would result
802                # in an infinite loop
803                pos2 = m.end()
804
805                # handle state changes
806                if new_state is not None:
807                    # remove the uppermost state
808                    if new_state == "#pop":
809                        stack.pop()
810                    # resolve the new state by group checking
811                    elif new_state == "#bygroup":
812                        for key, value in iteritems(m.groupdict()):
813                            if value is not None:
814                                stack.append(key)
815                                break
816                        else:
817                            raise RuntimeError(
818                                "%r wanted to resolve the "
819                                "new state dynamically but"
820                                " no group matched" % regex
821                            )
822                    # direct state name given
823                    else:
824                        stack.append(new_state)
825                    statetokens = self.rules[stack[-1]]
826                # we are still at the same position and no stack change.
827                # this means a loop without break condition, avoid that and
828                # raise error
829                elif pos2 == pos:
830                    raise RuntimeError(
831                        "%r yielded empty string without stack change" % regex
832                    )
833                # publish new function and start again
834                pos = pos2
835                break
836            # if loop terminated without break we haven't found a single match
837            # either we are at the end of the file or we have a problem
838            else:
839                # end of text
840                if pos >= source_length:
841                    return
842                # something went wrong
843                raise TemplateSyntaxError(
844                    "unexpected char %r at %d" % (source[pos], pos),
845                    lineno,
846                    name,
847                    filename,
848                )
849