1# -*- coding: utf-8 -*- 2""" 3 jinja2.lexer 4 ~~~~~~~~~~~~ 5 6 This module implements a Jinja / Python combination lexer. The 7 `Lexer` class provided by this module is used to do some preprocessing 8 for Jinja. 9 10 On the one hand it filters out invalid operators like the bitshift 11 operators we don't allow in templates. On the other hand it separates 12 template code and python code in expressions. 13 14 :copyright: (c) 2017 by the Jinja Team. 15 :license: BSD, see LICENSE for more details. 16""" 17import re 18from collections import deque 19from operator import itemgetter 20 21from jinja2._compat import implements_iterator, intern, iteritems, text_type 22from jinja2.exceptions import TemplateSyntaxError 23from jinja2.utils import LRUCache 24 25# cache for the lexers. Exists in order to be able to have multiple 26# environments with the same lexer 27_lexer_cache = LRUCache(50) 28 29# static regular expressions 30whitespace_re = re.compile(r'\s+', re.U) 31string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" 32 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) 33integer_re = re.compile(r'\d+') 34 35try: 36 # check if this Python supports Unicode identifiers 37 compile('föö', '<unknown>', 'eval') 38except SyntaxError: 39 # no Unicode support, use ASCII identifiers 40 name_re = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*') 41 check_ident = False 42else: 43 # Unicode support, build a pattern to match valid characters, and set flag 44 # to use str.isidentifier to validate during lexing 45 from jinja2 import _identifier 46 name_re = re.compile(r'[\w{0}]+'.format(_identifier.pattern)) 47 check_ident = True 48 # remove the pattern from memory after building the regex 49 import sys 50 del sys.modules['jinja2._identifier'] 51 import jinja2 52 del jinja2._identifier 53 del _identifier 54 55float_re = re.compile(r'(?<!\.)\d+\.\d+') 56newline_re = re.compile(r'(\r\n|\r|\n)') 57 58# internal the tokens and keep references to them 59TOKEN_ADD = intern('add') 60TOKEN_ASSIGN = intern('assign') 61TOKEN_COLON = intern('colon') 62TOKEN_COMMA = intern('comma') 63TOKEN_DIV = intern('div') 64TOKEN_DOT = intern('dot') 65TOKEN_EQ = intern('eq') 66TOKEN_FLOORDIV = intern('floordiv') 67TOKEN_GT = intern('gt') 68TOKEN_GTEQ = intern('gteq') 69TOKEN_LBRACE = intern('lbrace') 70TOKEN_LBRACKET = intern('lbracket') 71TOKEN_LPAREN = intern('lparen') 72TOKEN_LT = intern('lt') 73TOKEN_LTEQ = intern('lteq') 74TOKEN_MOD = intern('mod') 75TOKEN_MUL = intern('mul') 76TOKEN_NE = intern('ne') 77TOKEN_PIPE = intern('pipe') 78TOKEN_POW = intern('pow') 79TOKEN_RBRACE = intern('rbrace') 80TOKEN_RBRACKET = intern('rbracket') 81TOKEN_RPAREN = intern('rparen') 82TOKEN_SEMICOLON = intern('semicolon') 83TOKEN_SUB = intern('sub') 84TOKEN_TILDE = intern('tilde') 85TOKEN_WHITESPACE = intern('whitespace') 86TOKEN_FLOAT = intern('float') 87TOKEN_INTEGER = intern('integer') 88TOKEN_NAME = intern('name') 89TOKEN_STRING = intern('string') 90TOKEN_OPERATOR = intern('operator') 91TOKEN_BLOCK_BEGIN = intern('block_begin') 92TOKEN_BLOCK_END = intern('block_end') 93TOKEN_VARIABLE_BEGIN = intern('variable_begin') 94TOKEN_VARIABLE_END = intern('variable_end') 95TOKEN_RAW_BEGIN = intern('raw_begin') 96TOKEN_RAW_END = intern('raw_end') 97TOKEN_COMMENT_BEGIN = intern('comment_begin') 98TOKEN_COMMENT_END = intern('comment_end') 99TOKEN_COMMENT = intern('comment') 100TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin') 101TOKEN_LINESTATEMENT_END = intern('linestatement_end') 102TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin') 103TOKEN_LINECOMMENT_END = intern('linecomment_end') 104TOKEN_LINECOMMENT = intern('linecomment') 105TOKEN_DATA = intern('data') 106TOKEN_INITIAL = intern('initial') 107TOKEN_EOF = intern('eof') 108 109# bind operators to token types 110operators = { 111 '+': TOKEN_ADD, 112 '-': TOKEN_SUB, 113 '/': TOKEN_DIV, 114 '//': TOKEN_FLOORDIV, 115 '*': TOKEN_MUL, 116 '%': TOKEN_MOD, 117 '**': TOKEN_POW, 118 '~': TOKEN_TILDE, 119 '[': TOKEN_LBRACKET, 120 ']': TOKEN_RBRACKET, 121 '(': TOKEN_LPAREN, 122 ')': TOKEN_RPAREN, 123 '{': TOKEN_LBRACE, 124 '}': TOKEN_RBRACE, 125 '==': TOKEN_EQ, 126 '!=': TOKEN_NE, 127 '>': TOKEN_GT, 128 '>=': TOKEN_GTEQ, 129 '<': TOKEN_LT, 130 '<=': TOKEN_LTEQ, 131 '=': TOKEN_ASSIGN, 132 '.': TOKEN_DOT, 133 ':': TOKEN_COLON, 134 '|': TOKEN_PIPE, 135 ',': TOKEN_COMMA, 136 ';': TOKEN_SEMICOLON 137} 138 139reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) 140assert len(operators) == len(reverse_operators), 'operators dropped' 141operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in 142 sorted(operators, key=lambda x: -len(x)))) 143 144ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, 145 TOKEN_COMMENT_END, TOKEN_WHITESPACE, 146 TOKEN_LINECOMMENT_BEGIN, TOKEN_LINECOMMENT_END, 147 TOKEN_LINECOMMENT]) 148ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, 149 TOKEN_COMMENT, TOKEN_LINECOMMENT]) 150 151 152def _describe_token_type(token_type): 153 if token_type in reverse_operators: 154 return reverse_operators[token_type] 155 return { 156 TOKEN_COMMENT_BEGIN: 'begin of comment', 157 TOKEN_COMMENT_END: 'end of comment', 158 TOKEN_COMMENT: 'comment', 159 TOKEN_LINECOMMENT: 'comment', 160 TOKEN_BLOCK_BEGIN: 'begin of statement block', 161 TOKEN_BLOCK_END: 'end of statement block', 162 TOKEN_VARIABLE_BEGIN: 'begin of print statement', 163 TOKEN_VARIABLE_END: 'end of print statement', 164 TOKEN_LINESTATEMENT_BEGIN: 'begin of line statement', 165 TOKEN_LINESTATEMENT_END: 'end of line statement', 166 TOKEN_DATA: 'template data / text', 167 TOKEN_EOF: 'end of template' 168 }.get(token_type, token_type) 169 170 171def describe_token(token): 172 """Returns a description of the token.""" 173 if token.type == 'name': 174 return token.value 175 return _describe_token_type(token.type) 176 177 178def describe_token_expr(expr): 179 """Like `describe_token` but for token expressions.""" 180 if ':' in expr: 181 type, value = expr.split(':', 1) 182 if type == 'name': 183 return value 184 else: 185 type = expr 186 return _describe_token_type(type) 187 188 189def count_newlines(value): 190 """Count the number of newline characters in the string. This is 191 useful for extensions that filter a stream. 192 """ 193 return len(newline_re.findall(value)) 194 195 196def compile_rules(environment): 197 """Compiles all the rules from the environment into a list of rules.""" 198 e = re.escape 199 rules = [ 200 (len(environment.comment_start_string), 'comment', 201 e(environment.comment_start_string)), 202 (len(environment.block_start_string), 'block', 203 e(environment.block_start_string)), 204 (len(environment.variable_start_string), 'variable', 205 e(environment.variable_start_string)) 206 ] 207 208 if environment.line_statement_prefix is not None: 209 rules.append((len(environment.line_statement_prefix), 'linestatement', 210 r'^[ \t\v]*' + e(environment.line_statement_prefix))) 211 if environment.line_comment_prefix is not None: 212 rules.append((len(environment.line_comment_prefix), 'linecomment', 213 r'(?:^|(?<=\S))[^\S\r\n]*' + 214 e(environment.line_comment_prefix))) 215 216 return [x[1:] for x in sorted(rules, reverse=True)] 217 218 219class Failure(object): 220 """Class that raises a `TemplateSyntaxError` if called. 221 Used by the `Lexer` to specify known errors. 222 """ 223 224 def __init__(self, message, cls=TemplateSyntaxError): 225 self.message = message 226 self.error_class = cls 227 228 def __call__(self, lineno, filename): 229 raise self.error_class(self.message, lineno, filename) 230 231 232class Token(tuple): 233 """Token class.""" 234 __slots__ = () 235 lineno, type, value = (property(itemgetter(x)) for x in range(3)) 236 237 def __new__(cls, lineno, type, value): 238 return tuple.__new__(cls, (lineno, intern(str(type)), value)) 239 240 def __str__(self): 241 if self.type in reverse_operators: 242 return reverse_operators[self.type] 243 elif self.type == 'name': 244 return self.value 245 return self.type 246 247 def test(self, expr): 248 """Test a token against a token expression. This can either be a 249 token type or ``'token_type:token_value'``. This can only test 250 against string values and types. 251 """ 252 # here we do a regular string equality check as test_any is usually 253 # passed an iterable of not interned strings. 254 if self.type == expr: 255 return True 256 elif ':' in expr: 257 return expr.split(':', 1) == [self.type, self.value] 258 return False 259 260 def test_any(self, *iterable): 261 """Test against multiple token expressions.""" 262 for expr in iterable: 263 if self.test(expr): 264 return True 265 return False 266 267 def __repr__(self): 268 return 'Token(%r, %r, %r)' % ( 269 self.lineno, 270 self.type, 271 self.value 272 ) 273 274 275@implements_iterator 276class TokenStreamIterator(object): 277 """The iterator for tokenstreams. Iterate over the stream 278 until the eof token is reached. 279 """ 280 281 def __init__(self, stream): 282 self.stream = stream 283 284 def __iter__(self): 285 return self 286 287 def __next__(self): 288 token = self.stream.current 289 if token.type is TOKEN_EOF: 290 self.stream.close() 291 raise StopIteration() 292 next(self.stream) 293 return token 294 295 296@implements_iterator 297class TokenStream(object): 298 """A token stream is an iterable that yields :class:`Token`\\s. The 299 parser however does not iterate over it but calls :meth:`next` to go 300 one token ahead. The current active token is stored as :attr:`current`. 301 """ 302 303 def __init__(self, generator, name, filename): 304 self._iter = iter(generator) 305 self._pushed = deque() 306 self.name = name 307 self.filename = filename 308 self.closed = False 309 self.current = Token(1, TOKEN_INITIAL, '') 310 next(self) 311 312 def __iter__(self): 313 return TokenStreamIterator(self) 314 315 def __bool__(self): 316 return bool(self._pushed) or self.current.type is not TOKEN_EOF 317 __nonzero__ = __bool__ # py2 318 319 eos = property(lambda x: not x, doc="Are we at the end of the stream?") 320 321 def push(self, token): 322 """Push a token back to the stream.""" 323 self._pushed.append(token) 324 325 def look(self): 326 """Look at the next token.""" 327 old_token = next(self) 328 result = self.current 329 self.push(result) 330 self.current = old_token 331 return result 332 333 def skip(self, n=1): 334 """Got n tokens ahead.""" 335 for x in range(n): 336 next(self) 337 338 def next_if(self, expr): 339 """Perform the token test and return the token if it matched. 340 Otherwise the return value is `None`. 341 """ 342 if self.current.test(expr): 343 return next(self) 344 345 def skip_if(self, expr): 346 """Like :meth:`next_if` but only returns `True` or `False`.""" 347 return self.next_if(expr) is not None 348 349 def __next__(self): 350 """Go one token ahead and return the old one. 351 352 Use the built-in :func:`next` instead of calling this directly. 353 """ 354 rv = self.current 355 if self._pushed: 356 self.current = self._pushed.popleft() 357 elif self.current.type is not TOKEN_EOF: 358 try: 359 self.current = next(self._iter) 360 except StopIteration: 361 self.close() 362 return rv 363 364 def close(self): 365 """Close the stream.""" 366 self.current = Token(self.current.lineno, TOKEN_EOF, '') 367 self._iter = None 368 self.closed = True 369 370 def expect(self, expr): 371 """Expect a given token type and return it. This accepts the same 372 argument as :meth:`jinja2.lexer.Token.test`. 373 """ 374 if not self.current.test(expr): 375 expr = describe_token_expr(expr) 376 if self.current.type is TOKEN_EOF: 377 raise TemplateSyntaxError('unexpected end of template, ' 378 'expected %r.' % expr, 379 self.current.lineno, 380 self.name, self.filename) 381 raise TemplateSyntaxError("expected token %r, got %r" % 382 (expr, describe_token(self.current)), 383 self.current.lineno, 384 self.name, self.filename) 385 try: 386 return self.current 387 finally: 388 next(self) 389 390 391def get_lexer(environment): 392 """Return a lexer which is probably cached.""" 393 key = (environment.block_start_string, 394 environment.block_end_string, 395 environment.variable_start_string, 396 environment.variable_end_string, 397 environment.comment_start_string, 398 environment.comment_end_string, 399 environment.line_statement_prefix, 400 environment.line_comment_prefix, 401 environment.trim_blocks, 402 environment.lstrip_blocks, 403 environment.newline_sequence, 404 environment.keep_trailing_newline) 405 lexer = _lexer_cache.get(key) 406 if lexer is None: 407 lexer = Lexer(environment) 408 _lexer_cache[key] = lexer 409 return lexer 410 411 412class Lexer(object): 413 """Class that implements a lexer for a given environment. Automatically 414 created by the environment class, usually you don't have to do that. 415 416 Note that the lexer is not automatically bound to an environment. 417 Multiple environments can share the same lexer. 418 """ 419 420 def __init__(self, environment): 421 # shortcuts 422 c = lambda x: re.compile(x, re.M | re.S) 423 e = re.escape 424 425 # lexing rules for tags 426 tag_rules = [ 427 (whitespace_re, TOKEN_WHITESPACE, None), 428 (float_re, TOKEN_FLOAT, None), 429 (integer_re, TOKEN_INTEGER, None), 430 (name_re, TOKEN_NAME, None), 431 (string_re, TOKEN_STRING, None), 432 (operator_re, TOKEN_OPERATOR, None) 433 ] 434 435 # assemble the root lexing rule. because "|" is ungreedy 436 # we have to sort by length so that the lexer continues working 437 # as expected when we have parsing rules like <% for block and 438 # <%= for variables. (if someone wants asp like syntax) 439 # variables are just part of the rules if variable processing 440 # is required. 441 root_tag_rules = compile_rules(environment) 442 443 # block suffix if trimming is enabled 444 block_suffix_re = environment.trim_blocks and '\\n?' or '' 445 446 # strip leading spaces if lstrip_blocks is enabled 447 prefix_re = {} 448 if environment.lstrip_blocks: 449 # use '{%+' to manually disable lstrip_blocks behavior 450 no_lstrip_re = e('+') 451 # detect overlap between block and variable or comment strings 452 block_diff = c(r'^%s(.*)' % e(environment.block_start_string)) 453 # make sure we don't mistake a block for a variable or a comment 454 m = block_diff.match(environment.comment_start_string) 455 no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' 456 m = block_diff.match(environment.variable_start_string) 457 no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' 458 459 # detect overlap between comment and variable strings 460 comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string)) 461 m = comment_diff.match(environment.variable_start_string) 462 no_variable_re = m and r'(?!%s)' % e(m.group(1)) or '' 463 464 lstrip_re = r'^[ \t]*' 465 block_prefix_re = r'%s%s(?!%s)|%s\+?' % ( 466 lstrip_re, 467 e(environment.block_start_string), 468 no_lstrip_re, 469 e(environment.block_start_string), 470 ) 471 comment_prefix_re = r'%s%s%s|%s\+?' % ( 472 lstrip_re, 473 e(environment.comment_start_string), 474 no_variable_re, 475 e(environment.comment_start_string), 476 ) 477 prefix_re['block'] = block_prefix_re 478 prefix_re['comment'] = comment_prefix_re 479 else: 480 block_prefix_re = '%s' % e(environment.block_start_string) 481 482 self.newline_sequence = environment.newline_sequence 483 self.keep_trailing_newline = environment.keep_trailing_newline 484 485 # global lexing rules 486 self.rules = { 487 'root': [ 488 # directives 489 (c('(.*?)(?:%s)' % '|'.join( 490 [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( 491 e(environment.block_start_string), 492 block_prefix_re, 493 e(environment.block_end_string), 494 e(environment.block_end_string) 495 )] + [ 496 r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r)) 497 for n, r in root_tag_rules 498 ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), 499 # data 500 (c('.+'), TOKEN_DATA, None) 501 ], 502 # comments 503 TOKEN_COMMENT_BEGIN: [ 504 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( 505 e(environment.comment_end_string), 506 e(environment.comment_end_string), 507 block_suffix_re 508 )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'), 509 (c('(.)'), (Failure('Missing end of comment tag'),), None) 510 ], 511 # blocks 512 TOKEN_BLOCK_BEGIN: [ 513 (c(r'(?:\-%s\s*|%s)%s' % ( 514 e(environment.block_end_string), 515 e(environment.block_end_string), 516 block_suffix_re 517 )), TOKEN_BLOCK_END, '#pop'), 518 ] + tag_rules, 519 # variables 520 TOKEN_VARIABLE_BEGIN: [ 521 (c(r'\-%s\s*|%s' % ( 522 e(environment.variable_end_string), 523 e(environment.variable_end_string) 524 )), TOKEN_VARIABLE_END, '#pop') 525 ] + tag_rules, 526 # raw block 527 TOKEN_RAW_BEGIN: [ 528 (c(r'(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( 529 e(environment.block_start_string), 530 block_prefix_re, 531 e(environment.block_end_string), 532 e(environment.block_end_string), 533 block_suffix_re 534 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), 535 (c('(.)'), (Failure('Missing end of raw directive'),), None) 536 ], 537 # line statements 538 TOKEN_LINESTATEMENT_BEGIN: [ 539 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') 540 ] + tag_rules, 541 # line comments 542 TOKEN_LINECOMMENT_BEGIN: [ 543 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, 544 TOKEN_LINECOMMENT_END), '#pop') 545 ] 546 } 547 548 def _normalize_newlines(self, value): 549 """Called for strings and template data to normalize it to unicode.""" 550 return newline_re.sub(self.newline_sequence, value) 551 552 def tokenize(self, source, name=None, filename=None, state=None): 553 """Calls tokeniter + tokenize and wraps it in a token stream. 554 """ 555 stream = self.tokeniter(source, name, filename, state) 556 return TokenStream(self.wrap(stream, name, filename), name, filename) 557 558 def wrap(self, stream, name=None, filename=None): 559 """This is called with the stream as returned by `tokenize` and wraps 560 every token in a :class:`Token` and converts the value. 561 """ 562 for lineno, token, value in stream: 563 if token in ignored_tokens: 564 continue 565 elif token == 'linestatement_begin': 566 token = 'block_begin' 567 elif token == 'linestatement_end': 568 token = 'block_end' 569 # we are not interested in those tokens in the parser 570 elif token in ('raw_begin', 'raw_end'): 571 continue 572 elif token == 'data': 573 value = self._normalize_newlines(value) 574 elif token == 'keyword': 575 token = value 576 elif token == 'name': 577 value = str(value) 578 if check_ident and not value.isidentifier(): 579 raise TemplateSyntaxError( 580 'Invalid character in identifier', 581 lineno, name, filename) 582 elif token == 'string': 583 # try to unescape string 584 try: 585 value = self._normalize_newlines(value[1:-1]) \ 586 .encode('ascii', 'backslashreplace') \ 587 .decode('unicode-escape') 588 except Exception as e: 589 msg = str(e).split(':')[-1].strip() 590 raise TemplateSyntaxError(msg, lineno, name, filename) 591 elif token == 'integer': 592 value = int(value) 593 elif token == 'float': 594 value = float(value) 595 elif token == 'operator': 596 token = operators[value] 597 yield Token(lineno, token, value) 598 599 def tokeniter(self, source, name, filename=None, state=None): 600 """This method tokenizes the text and returns the tokens in a 601 generator. Use this method if you just want to tokenize a template. 602 """ 603 source = text_type(source) 604 lines = source.splitlines() 605 if self.keep_trailing_newline and source: 606 for newline in ('\r\n', '\r', '\n'): 607 if source.endswith(newline): 608 lines.append('') 609 break 610 source = '\n'.join(lines) 611 pos = 0 612 lineno = 1 613 stack = ['root'] 614 if state is not None and state != 'root': 615 assert state in ('variable', 'block'), 'invalid state' 616 stack.append(state + '_begin') 617 else: 618 state = 'root' 619 statetokens = self.rules[stack[-1]] 620 source_length = len(source) 621 622 balancing_stack = [] 623 624 while 1: 625 # tokenizer loop 626 for regex, tokens, new_state in statetokens: 627 m = regex.match(source, pos) 628 # if no match we try again with the next rule 629 if m is None: 630 continue 631 632 # we only match blocks and variables if braces / parentheses 633 # are balanced. continue parsing with the lower rule which 634 # is the operator rule. do this only if the end tags look 635 # like operators 636 if balancing_stack and \ 637 tokens in ('variable_end', 'block_end', 638 'linestatement_end'): 639 continue 640 641 # tuples support more options 642 if isinstance(tokens, tuple): 643 for idx, token in enumerate(tokens): 644 # failure group 645 if token.__class__ is Failure: 646 raise token(lineno, filename) 647 # bygroup is a bit more complex, in that case we 648 # yield for the current token the first named 649 # group that matched 650 elif token == '#bygroup': 651 for key, value in iteritems(m.groupdict()): 652 if value is not None: 653 yield lineno, key, value 654 lineno += value.count('\n') 655 break 656 else: 657 raise RuntimeError('%r wanted to resolve ' 658 'the token dynamically' 659 ' but no group matched' 660 % regex) 661 # normal group 662 else: 663 data = m.group(idx + 1) 664 if data or token not in ignore_if_empty: 665 yield lineno, token, data 666 lineno += data.count('\n') 667 668 # strings as token just are yielded as it. 669 else: 670 data = m.group() 671 # update brace/parentheses balance 672 if tokens == 'operator': 673 if data == '{': 674 balancing_stack.append('}') 675 elif data == '(': 676 balancing_stack.append(')') 677 elif data == '[': 678 balancing_stack.append(']') 679 elif data in ('}', ')', ']'): 680 if not balancing_stack: 681 raise TemplateSyntaxError('unexpected \'%s\'' % 682 data, lineno, name, 683 filename) 684 expected_op = balancing_stack.pop() 685 if expected_op != data: 686 raise TemplateSyntaxError('unexpected \'%s\', ' 687 'expected \'%s\'' % 688 (data, expected_op), 689 lineno, name, 690 filename) 691 # yield items 692 if data or tokens not in ignore_if_empty: 693 yield lineno, tokens, data 694 lineno += data.count('\n') 695 696 # fetch new position into new variable so that we can check 697 # if there is a internal parsing error which would result 698 # in an infinite loop 699 pos2 = m.end() 700 701 # handle state changes 702 if new_state is not None: 703 # remove the uppermost state 704 if new_state == '#pop': 705 stack.pop() 706 # resolve the new state by group checking 707 elif new_state == '#bygroup': 708 for key, value in iteritems(m.groupdict()): 709 if value is not None: 710 stack.append(key) 711 break 712 else: 713 raise RuntimeError('%r wanted to resolve the ' 714 'new state dynamically but' 715 ' no group matched' % 716 regex) 717 # direct state name given 718 else: 719 stack.append(new_state) 720 statetokens = self.rules[stack[-1]] 721 # we are still at the same position and no stack change. 722 # this means a loop without break condition, avoid that and 723 # raise error 724 elif pos2 == pos: 725 raise RuntimeError('%r yielded empty string without ' 726 'stack change' % regex) 727 # publish new function and start again 728 pos = pos2 729 break 730 # if loop terminated without break we haven't found a single match 731 # either we are at the end of the file or we have a problem 732 else: 733 # end of text 734 if pos >= source_length: 735 return 736 # something went wrong 737 raise TemplateSyntaxError('unexpected char %r at %d' % 738 (source[pos], pos), lineno, 739 name, filename) 740