1"""Implements a Jinja / Python combination lexer. The ``Lexer`` class 2is used to do some preprocessing. It filters out invalid operators like 3the bitshift operators we don't allow in templates. It separates 4template code and python code in expressions. 5""" 6import re 7from ast import literal_eval 8from collections import deque 9from operator import itemgetter 10from sys import intern 11 12from ._identifier import pattern as name_re 13from .exceptions import TemplateSyntaxError 14from .utils import LRUCache 15 16# cache for the lexers. Exists in order to be able to have multiple 17# environments with the same lexer 18_lexer_cache = LRUCache(50) 19 20# static regular expressions 21whitespace_re = re.compile(r"\s+") 22newline_re = re.compile(r"(\r\n|\r|\n)") 23string_re = re.compile( 24 r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S 25) 26integer_re = re.compile(r"(\d+_)*\d+") 27float_re = re.compile( 28 r""" 29 (?<!\.) # doesn't start with a . 30 (\d+_)*\d+ # digits, possibly _ separated 31 ( 32 (\.(\d+_)*\d+)? # optional fractional part 33 e[+\-]?(\d+_)*\d+ # exponent part 34 | 35 \.(\d+_)*\d+ # required fractional part 36 ) 37 """, 38 re.IGNORECASE | re.VERBOSE, 39) 40 41# internal the tokens and keep references to them 42TOKEN_ADD = intern("add") 43TOKEN_ASSIGN = intern("assign") 44TOKEN_COLON = intern("colon") 45TOKEN_COMMA = intern("comma") 46TOKEN_DIV = intern("div") 47TOKEN_DOT = intern("dot") 48TOKEN_EQ = intern("eq") 49TOKEN_FLOORDIV = intern("floordiv") 50TOKEN_GT = intern("gt") 51TOKEN_GTEQ = intern("gteq") 52TOKEN_LBRACE = intern("lbrace") 53TOKEN_LBRACKET = intern("lbracket") 54TOKEN_LPAREN = intern("lparen") 55TOKEN_LT = intern("lt") 56TOKEN_LTEQ = intern("lteq") 57TOKEN_MOD = intern("mod") 58TOKEN_MUL = intern("mul") 59TOKEN_NE = intern("ne") 60TOKEN_PIPE = intern("pipe") 61TOKEN_POW = intern("pow") 62TOKEN_RBRACE = intern("rbrace") 63TOKEN_RBRACKET = intern("rbracket") 64TOKEN_RPAREN = intern("rparen") 65TOKEN_SEMICOLON = intern("semicolon") 66TOKEN_SUB = intern("sub") 67TOKEN_TILDE = intern("tilde") 68TOKEN_WHITESPACE = intern("whitespace") 69TOKEN_FLOAT = intern("float") 70TOKEN_INTEGER = intern("integer") 71TOKEN_NAME = intern("name") 72TOKEN_STRING = intern("string") 73TOKEN_OPERATOR = intern("operator") 74TOKEN_BLOCK_BEGIN = intern("block_begin") 75TOKEN_BLOCK_END = intern("block_end") 76TOKEN_VARIABLE_BEGIN = intern("variable_begin") 77TOKEN_VARIABLE_END = intern("variable_end") 78TOKEN_RAW_BEGIN = intern("raw_begin") 79TOKEN_RAW_END = intern("raw_end") 80TOKEN_COMMENT_BEGIN = intern("comment_begin") 81TOKEN_COMMENT_END = intern("comment_end") 82TOKEN_COMMENT = intern("comment") 83TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin") 84TOKEN_LINESTATEMENT_END = intern("linestatement_end") 85TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin") 86TOKEN_LINECOMMENT_END = intern("linecomment_end") 87TOKEN_LINECOMMENT = intern("linecomment") 88TOKEN_DATA = intern("data") 89TOKEN_INITIAL = intern("initial") 90TOKEN_EOF = intern("eof") 91 92# bind operators to token types 93operators = { 94 "+": TOKEN_ADD, 95 "-": TOKEN_SUB, 96 "/": TOKEN_DIV, 97 "//": TOKEN_FLOORDIV, 98 "*": TOKEN_MUL, 99 "%": TOKEN_MOD, 100 "**": TOKEN_POW, 101 "~": TOKEN_TILDE, 102 "[": TOKEN_LBRACKET, 103 "]": TOKEN_RBRACKET, 104 "(": TOKEN_LPAREN, 105 ")": TOKEN_RPAREN, 106 "{": TOKEN_LBRACE, 107 "}": TOKEN_RBRACE, 108 "==": TOKEN_EQ, 109 "!=": TOKEN_NE, 110 ">": TOKEN_GT, 111 ">=": TOKEN_GTEQ, 112 "<": TOKEN_LT, 113 "<=": TOKEN_LTEQ, 114 "=": TOKEN_ASSIGN, 115 ".": TOKEN_DOT, 116 ":": TOKEN_COLON, 117 "|": TOKEN_PIPE, 118 ",": TOKEN_COMMA, 119 ";": TOKEN_SEMICOLON, 120} 121 122reverse_operators = {v: k for k, v in operators.items()} 123assert len(operators) == len(reverse_operators), "operators dropped" 124operator_re = re.compile( 125 f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})" 126) 127 128ignored_tokens = frozenset( 129 [ 130 TOKEN_COMMENT_BEGIN, 131 TOKEN_COMMENT, 132 TOKEN_COMMENT_END, 133 TOKEN_WHITESPACE, 134 TOKEN_LINECOMMENT_BEGIN, 135 TOKEN_LINECOMMENT_END, 136 TOKEN_LINECOMMENT, 137 ] 138) 139ignore_if_empty = frozenset( 140 [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] 141) 142 143 144def _describe_token_type(token_type): 145 if token_type in reverse_operators: 146 return reverse_operators[token_type] 147 return { 148 TOKEN_COMMENT_BEGIN: "begin of comment", 149 TOKEN_COMMENT_END: "end of comment", 150 TOKEN_COMMENT: "comment", 151 TOKEN_LINECOMMENT: "comment", 152 TOKEN_BLOCK_BEGIN: "begin of statement block", 153 TOKEN_BLOCK_END: "end of statement block", 154 TOKEN_VARIABLE_BEGIN: "begin of print statement", 155 TOKEN_VARIABLE_END: "end of print statement", 156 TOKEN_LINESTATEMENT_BEGIN: "begin of line statement", 157 TOKEN_LINESTATEMENT_END: "end of line statement", 158 TOKEN_DATA: "template data / text", 159 TOKEN_EOF: "end of template", 160 }.get(token_type, token_type) 161 162 163def describe_token(token): 164 """Returns a description of the token.""" 165 if token.type == TOKEN_NAME: 166 return token.value 167 return _describe_token_type(token.type) 168 169 170def describe_token_expr(expr): 171 """Like `describe_token` but for token expressions.""" 172 if ":" in expr: 173 type, value = expr.split(":", 1) 174 if type == TOKEN_NAME: 175 return value 176 else: 177 type = expr 178 return _describe_token_type(type) 179 180 181def count_newlines(value): 182 """Count the number of newline characters in the string. This is 183 useful for extensions that filter a stream. 184 """ 185 return len(newline_re.findall(value)) 186 187 188def compile_rules(environment): 189 """Compiles all the rules from the environment into a list of rules.""" 190 e = re.escape 191 rules = [ 192 ( 193 len(environment.comment_start_string), 194 TOKEN_COMMENT_BEGIN, 195 e(environment.comment_start_string), 196 ), 197 ( 198 len(environment.block_start_string), 199 TOKEN_BLOCK_BEGIN, 200 e(environment.block_start_string), 201 ), 202 ( 203 len(environment.variable_start_string), 204 TOKEN_VARIABLE_BEGIN, 205 e(environment.variable_start_string), 206 ), 207 ] 208 209 if environment.line_statement_prefix is not None: 210 rules.append( 211 ( 212 len(environment.line_statement_prefix), 213 TOKEN_LINESTATEMENT_BEGIN, 214 r"^[ \t\v]*" + e(environment.line_statement_prefix), 215 ) 216 ) 217 if environment.line_comment_prefix is not None: 218 rules.append( 219 ( 220 len(environment.line_comment_prefix), 221 TOKEN_LINECOMMENT_BEGIN, 222 r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix), 223 ) 224 ) 225 226 return [x[1:] for x in sorted(rules, reverse=True)] 227 228 229class Failure: 230 """Class that raises a `TemplateSyntaxError` if called. 231 Used by the `Lexer` to specify known errors. 232 """ 233 234 def __init__(self, message, cls=TemplateSyntaxError): 235 self.message = message 236 self.error_class = cls 237 238 def __call__(self, lineno, filename): 239 raise self.error_class(self.message, lineno, filename) 240 241 242class Token(tuple): 243 """Token class.""" 244 245 __slots__ = () 246 lineno, type, value = (property(itemgetter(x)) for x in range(3)) 247 248 def __new__(cls, lineno, type, value): 249 return tuple.__new__(cls, (lineno, intern(str(type)), value)) 250 251 def __str__(self): 252 if self.type in reverse_operators: 253 return reverse_operators[self.type] 254 elif self.type == "name": 255 return self.value 256 return self.type 257 258 def test(self, expr): 259 """Test a token against a token expression. This can either be a 260 token type or ``'token_type:token_value'``. This can only test 261 against string values and types. 262 """ 263 # here we do a regular string equality check as test_any is usually 264 # passed an iterable of not interned strings. 265 if self.type == expr: 266 return True 267 elif ":" in expr: 268 return expr.split(":", 1) == [self.type, self.value] 269 return False 270 271 def test_any(self, *iterable): 272 """Test against multiple token expressions.""" 273 for expr in iterable: 274 if self.test(expr): 275 return True 276 return False 277 278 def __repr__(self): 279 return f"Token({self.lineno!r}, {self.type!r}, {self.value!r})" 280 281 282class TokenStreamIterator: 283 """The iterator for tokenstreams. Iterate over the stream 284 until the eof token is reached. 285 """ 286 287 def __init__(self, stream): 288 self.stream = stream 289 290 def __iter__(self): 291 return self 292 293 def __next__(self): 294 token = self.stream.current 295 if token.type is TOKEN_EOF: 296 self.stream.close() 297 raise StopIteration() 298 next(self.stream) 299 return token 300 301 302class TokenStream: 303 """A token stream is an iterable that yields :class:`Token`\\s. The 304 parser however does not iterate over it but calls :meth:`next` to go 305 one token ahead. The current active token is stored as :attr:`current`. 306 """ 307 308 def __init__(self, generator, name, filename): 309 self._iter = iter(generator) 310 self._pushed = deque() 311 self.name = name 312 self.filename = filename 313 self.closed = False 314 self.current = Token(1, TOKEN_INITIAL, "") 315 next(self) 316 317 def __iter__(self): 318 return TokenStreamIterator(self) 319 320 def __bool__(self): 321 return bool(self._pushed) or self.current.type is not TOKEN_EOF 322 323 __nonzero__ = __bool__ # py2 324 325 @property 326 def eos(self): 327 """Are we at the end of the stream?""" 328 return not self 329 330 def push(self, token): 331 """Push a token back to the stream.""" 332 self._pushed.append(token) 333 334 def look(self): 335 """Look at the next token.""" 336 old_token = next(self) 337 result = self.current 338 self.push(result) 339 self.current = old_token 340 return result 341 342 def skip(self, n=1): 343 """Got n tokens ahead.""" 344 for _ in range(n): 345 next(self) 346 347 def next_if(self, expr): 348 """Perform the token test and return the token if it matched. 349 Otherwise the return value is `None`. 350 """ 351 if self.current.test(expr): 352 return next(self) 353 354 def skip_if(self, expr): 355 """Like :meth:`next_if` but only returns `True` or `False`.""" 356 return self.next_if(expr) is not None 357 358 def __next__(self): 359 """Go one token ahead and return the old one. 360 361 Use the built-in :func:`next` instead of calling this directly. 362 """ 363 rv = self.current 364 if self._pushed: 365 self.current = self._pushed.popleft() 366 elif self.current.type is not TOKEN_EOF: 367 try: 368 self.current = next(self._iter) 369 except StopIteration: 370 self.close() 371 return rv 372 373 def close(self): 374 """Close the stream.""" 375 self.current = Token(self.current.lineno, TOKEN_EOF, "") 376 self._iter = None 377 self.closed = True 378 379 def expect(self, expr): 380 """Expect a given token type and return it. This accepts the same 381 argument as :meth:`jinja2.lexer.Token.test`. 382 """ 383 if not self.current.test(expr): 384 expr = describe_token_expr(expr) 385 if self.current.type is TOKEN_EOF: 386 raise TemplateSyntaxError( 387 f"unexpected end of template, expected {expr!r}.", 388 self.current.lineno, 389 self.name, 390 self.filename, 391 ) 392 raise TemplateSyntaxError( 393 f"expected token {expr!r}, got {describe_token(self.current)!r}", 394 self.current.lineno, 395 self.name, 396 self.filename, 397 ) 398 try: 399 return self.current 400 finally: 401 next(self) 402 403 404def get_lexer(environment): 405 """Return a lexer which is probably cached.""" 406 key = ( 407 environment.block_start_string, 408 environment.block_end_string, 409 environment.variable_start_string, 410 environment.variable_end_string, 411 environment.comment_start_string, 412 environment.comment_end_string, 413 environment.line_statement_prefix, 414 environment.line_comment_prefix, 415 environment.trim_blocks, 416 environment.lstrip_blocks, 417 environment.newline_sequence, 418 environment.keep_trailing_newline, 419 ) 420 lexer = _lexer_cache.get(key) 421 if lexer is None: 422 lexer = Lexer(environment) 423 _lexer_cache[key] = lexer 424 return lexer 425 426 427class OptionalLStrip(tuple): 428 """A special tuple for marking a point in the state that can have 429 lstrip applied. 430 """ 431 432 __slots__ = () 433 434 # Even though it looks like a no-op, creating instances fails 435 # without this. 436 def __new__(cls, *members, **kwargs): 437 return super().__new__(cls, members) 438 439 440class Lexer: 441 """Class that implements a lexer for a given environment. Automatically 442 created by the environment class, usually you don't have to do that. 443 444 Note that the lexer is not automatically bound to an environment. 445 Multiple environments can share the same lexer. 446 """ 447 448 def __init__(self, environment): 449 # shortcuts 450 e = re.escape 451 452 def c(x): 453 return re.compile(x, re.M | re.S) 454 455 # lexing rules for tags 456 tag_rules = [ 457 (whitespace_re, TOKEN_WHITESPACE, None), 458 (float_re, TOKEN_FLOAT, None), 459 (integer_re, TOKEN_INTEGER, None), 460 (name_re, TOKEN_NAME, None), 461 (string_re, TOKEN_STRING, None), 462 (operator_re, TOKEN_OPERATOR, None), 463 ] 464 465 # assemble the root lexing rule. because "|" is ungreedy 466 # we have to sort by length so that the lexer continues working 467 # as expected when we have parsing rules like <% for block and 468 # <%= for variables. (if someone wants asp like syntax) 469 # variables are just part of the rules if variable processing 470 # is required. 471 root_tag_rules = compile_rules(environment) 472 473 block_start_re = e(environment.block_start_string) 474 block_end_re = e(environment.block_end_string) 475 comment_end_re = e(environment.comment_end_string) 476 variable_end_re = e(environment.variable_end_string) 477 478 # block suffix if trimming is enabled 479 block_suffix_re = "\\n?" if environment.trim_blocks else "" 480 481 # If lstrip is enabled, it should not be applied if there is any 482 # non-whitespace between the newline and block. 483 self.lstrip_unless_re = c(r"[^ \t]") if environment.lstrip_blocks else None 484 485 self.newline_sequence = environment.newline_sequence 486 self.keep_trailing_newline = environment.keep_trailing_newline 487 488 root_raw_re = ( 489 fr"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*" 490 fr"(?:\-{block_end_re}\s*|{block_end_re}))" 491 ) 492 root_parts_re = "|".join( 493 [root_raw_re] + [fr"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules] 494 ) 495 496 # global lexing rules 497 self.rules = { 498 "root": [ 499 # directives 500 ( 501 c(fr"(.*?)(?:{root_parts_re})"), 502 OptionalLStrip(TOKEN_DATA, "#bygroup"), 503 "#bygroup", 504 ), 505 # data 506 (c(".+"), TOKEN_DATA, None), 507 ], 508 # comments 509 TOKEN_COMMENT_BEGIN: [ 510 ( 511 c( 512 fr"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*" 513 fr"|{comment_end_re}{block_suffix_re}))" 514 ), 515 (TOKEN_COMMENT, TOKEN_COMMENT_END), 516 "#pop", 517 ), 518 (c(r"(.)"), (Failure("Missing end of comment tag"),), None), 519 ], 520 # blocks 521 TOKEN_BLOCK_BEGIN: [ 522 ( 523 c( 524 fr"(?:\+{block_end_re}|\-{block_end_re}\s*" 525 fr"|{block_end_re}{block_suffix_re})" 526 ), 527 TOKEN_BLOCK_END, 528 "#pop", 529 ), 530 ] 531 + tag_rules, 532 # variables 533 TOKEN_VARIABLE_BEGIN: [ 534 ( 535 c(fr"\-{variable_end_re}\s*|{variable_end_re}"), 536 TOKEN_VARIABLE_END, 537 "#pop", 538 ) 539 ] 540 + tag_rules, 541 # raw block 542 TOKEN_RAW_BEGIN: [ 543 ( 544 c( 545 fr"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*" 546 fr"(?:\+{block_end_re}|\-{block_end_re}\s*" 547 fr"|{block_end_re}{block_suffix_re}))" 548 ), 549 OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END), 550 "#pop", 551 ), 552 (c(r"(.)"), (Failure("Missing end of raw directive"),), None), 553 ], 554 # line statements 555 TOKEN_LINESTATEMENT_BEGIN: [ 556 (c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop") 557 ] 558 + tag_rules, 559 # line comments 560 TOKEN_LINECOMMENT_BEGIN: [ 561 ( 562 c(r"(.*?)()(?=\n|$)"), 563 (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END), 564 "#pop", 565 ) 566 ], 567 } 568 569 def _normalize_newlines(self, value): 570 """Replace all newlines with the configured sequence in strings 571 and template data. 572 """ 573 return newline_re.sub(self.newline_sequence, value) 574 575 def tokenize(self, source, name=None, filename=None, state=None): 576 """Calls tokeniter + tokenize and wraps it in a token stream.""" 577 stream = self.tokeniter(source, name, filename, state) 578 return TokenStream(self.wrap(stream, name, filename), name, filename) 579 580 def wrap(self, stream, name=None, filename=None): 581 """This is called with the stream as returned by `tokenize` and wraps 582 every token in a :class:`Token` and converts the value. 583 """ 584 for lineno, token, value in stream: 585 if token in ignored_tokens: 586 continue 587 elif token == TOKEN_LINESTATEMENT_BEGIN: 588 token = TOKEN_BLOCK_BEGIN 589 elif token == TOKEN_LINESTATEMENT_END: 590 token = TOKEN_BLOCK_END 591 # we are not interested in those tokens in the parser 592 elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END): 593 continue 594 elif token == TOKEN_DATA: 595 value = self._normalize_newlines(value) 596 elif token == "keyword": 597 token = value 598 elif token == TOKEN_NAME: 599 value = str(value) 600 if not value.isidentifier(): 601 raise TemplateSyntaxError( 602 "Invalid character in identifier", lineno, name, filename 603 ) 604 elif token == TOKEN_STRING: 605 # try to unescape string 606 try: 607 value = ( 608 self._normalize_newlines(value[1:-1]) 609 .encode("ascii", "backslashreplace") 610 .decode("unicode-escape") 611 ) 612 except Exception as e: 613 msg = str(e).split(":")[-1].strip() 614 raise TemplateSyntaxError(msg, lineno, name, filename) 615 elif token == TOKEN_INTEGER: 616 value = int(value.replace("_", "")) 617 elif token == TOKEN_FLOAT: 618 # remove all "_" first to support more Python versions 619 value = literal_eval(value.replace("_", "")) 620 elif token == TOKEN_OPERATOR: 621 token = operators[value] 622 yield Token(lineno, token, value) 623 624 def tokeniter(self, source, name, filename=None, state=None): 625 """This method tokenizes the text and returns the tokens in a 626 generator. Use this method if you just want to tokenize a template. 627 """ 628 lines = source.splitlines() 629 if self.keep_trailing_newline and source: 630 if source.endswith(("\r\n", "\r", "\n")): 631 lines.append("") 632 source = "\n".join(lines) 633 pos = 0 634 lineno = 1 635 stack = ["root"] 636 if state is not None and state != "root": 637 assert state in ("variable", "block"), "invalid state" 638 stack.append(state + "_begin") 639 statetokens = self.rules[stack[-1]] 640 source_length = len(source) 641 balancing_stack = [] 642 lstrip_unless_re = self.lstrip_unless_re 643 newlines_stripped = 0 644 line_starting = True 645 646 while 1: 647 # tokenizer loop 648 for regex, tokens, new_state in statetokens: 649 m = regex.match(source, pos) 650 # if no match we try again with the next rule 651 if m is None: 652 continue 653 654 # we only match blocks and variables if braces / parentheses 655 # are balanced. continue parsing with the lower rule which 656 # is the operator rule. do this only if the end tags look 657 # like operators 658 if balancing_stack and tokens in ( 659 TOKEN_VARIABLE_END, 660 TOKEN_BLOCK_END, 661 TOKEN_LINESTATEMENT_END, 662 ): 663 continue 664 665 # tuples support more options 666 if isinstance(tokens, tuple): 667 groups = m.groups() 668 669 if isinstance(tokens, OptionalLStrip): 670 # Rule supports lstrip. Match will look like 671 # text, block type, whitespace control, type, control, ... 672 text = groups[0] 673 674 # Skipping the text and first type, every other group is the 675 # whitespace control for each type. One of the groups will be 676 # -, +, or empty string instead of None. 677 strip_sign = next(g for g in groups[2::2] if g is not None) 678 679 if strip_sign == "-": 680 # Strip all whitespace between the text and the tag. 681 stripped = text.rstrip() 682 newlines_stripped = text[len(stripped) :].count("\n") 683 groups = (stripped,) + groups[1:] 684 elif ( 685 # Not marked for preserving whitespace. 686 strip_sign != "+" 687 # lstrip is enabled. 688 and lstrip_unless_re is not None 689 # Not a variable expression. 690 and not m.groupdict().get(TOKEN_VARIABLE_BEGIN) 691 ): 692 # The start of text between the last newline and the tag. 693 l_pos = text.rfind("\n") + 1 694 if l_pos > 0 or line_starting: 695 # If there's only whitespace between the newline and the 696 # tag, strip it. 697 if not lstrip_unless_re.search(text, l_pos): 698 groups = (text[:l_pos],) + groups[1:] 699 700 for idx, token in enumerate(tokens): 701 # failure group 702 if token.__class__ is Failure: 703 raise token(lineno, filename) 704 # bygroup is a bit more complex, in that case we 705 # yield for the current token the first named 706 # group that matched 707 elif token == "#bygroup": 708 for key, value in m.groupdict().items(): 709 if value is not None: 710 yield lineno, key, value 711 lineno += value.count("\n") 712 break 713 else: 714 raise RuntimeError( 715 f"{regex!r} wanted to resolve the token dynamically" 716 " but no group matched" 717 ) 718 # normal group 719 else: 720 data = groups[idx] 721 if data or token not in ignore_if_empty: 722 yield lineno, token, data 723 lineno += data.count("\n") + newlines_stripped 724 newlines_stripped = 0 725 726 # strings as token just are yielded as it. 727 else: 728 data = m.group() 729 # update brace/parentheses balance 730 if tokens == TOKEN_OPERATOR: 731 if data == "{": 732 balancing_stack.append("}") 733 elif data == "(": 734 balancing_stack.append(")") 735 elif data == "[": 736 balancing_stack.append("]") 737 elif data in ("}", ")", "]"): 738 if not balancing_stack: 739 raise TemplateSyntaxError( 740 f"unexpected '{data}'", lineno, name, filename 741 ) 742 expected_op = balancing_stack.pop() 743 if expected_op != data: 744 raise TemplateSyntaxError( 745 f"unexpected '{data}', expected '{expected_op}'", 746 lineno, 747 name, 748 filename, 749 ) 750 # yield items 751 if data or tokens not in ignore_if_empty: 752 yield lineno, tokens, data 753 lineno += data.count("\n") 754 755 line_starting = m.group()[-1:] == "\n" 756 757 # fetch new position into new variable so that we can check 758 # if there is a internal parsing error which would result 759 # in an infinite loop 760 pos2 = m.end() 761 762 # handle state changes 763 if new_state is not None: 764 # remove the uppermost state 765 if new_state == "#pop": 766 stack.pop() 767 # resolve the new state by group checking 768 elif new_state == "#bygroup": 769 for key, value in m.groupdict().items(): 770 if value is not None: 771 stack.append(key) 772 break 773 else: 774 raise RuntimeError( 775 f"{regex!r} wanted to resolve the new state dynamically" 776 f" but no group matched" 777 ) 778 # direct state name given 779 else: 780 stack.append(new_state) 781 statetokens = self.rules[stack[-1]] 782 # we are still at the same position and no stack change. 783 # this means a loop without break condition, avoid that and 784 # raise error 785 elif pos2 == pos: 786 raise RuntimeError( 787 f"{regex!r} yielded empty string without stack change" 788 ) 789 # publish new function and start again 790 pos = pos2 791 break 792 # if loop terminated without break we haven't found a single match 793 # either we are at the end of the file or we have a problem 794 else: 795 # end of text 796 if pos >= source_length: 797 return 798 # something went wrong 799 raise TemplateSyntaxError( 800 f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename 801 ) 802