1# -*- coding: utf-8 -*- 2""" 3 jinja2.ext 4 ~~~~~~~~~~ 5 6 Jinja extensions allow to add custom tags similar to the way django custom 7 tags work. By default two example extensions exist: an i18n and a cache 8 extension. 9 10 :copyright: (c) 2017 by the Jinja Team. 11 :license: BSD. 12""" 13import re 14 15from jinja2 import nodes 16from jinja2.defaults import BLOCK_START_STRING, \ 17 BLOCK_END_STRING, VARIABLE_START_STRING, VARIABLE_END_STRING, \ 18 COMMENT_START_STRING, COMMENT_END_STRING, LINE_STATEMENT_PREFIX, \ 19 LINE_COMMENT_PREFIX, TRIM_BLOCKS, NEWLINE_SEQUENCE, \ 20 KEEP_TRAILING_NEWLINE, LSTRIP_BLOCKS 21from jinja2.environment import Environment 22from jinja2.runtime import concat 23from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError 24from jinja2.utils import contextfunction, import_string, Markup 25from jinja2._compat import with_metaclass, string_types, iteritems 26 27 28# the only real useful gettext functions for a Jinja template. Note 29# that ugettext must be assigned to gettext as Jinja doesn't support 30# non unicode strings. 31GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext') 32 33 34class ExtensionRegistry(type): 35 """Gives the extension an unique identifier.""" 36 37 def __new__(cls, name, bases, d): 38 rv = type.__new__(cls, name, bases, d) 39 rv.identifier = rv.__module__ + '.' + rv.__name__ 40 return rv 41 42 43class Extension(with_metaclass(ExtensionRegistry, object)): 44 """Extensions can be used to add extra functionality to the Jinja template 45 system at the parser level. Custom extensions are bound to an environment 46 but may not store environment specific data on `self`. The reason for 47 this is that an extension can be bound to another environment (for 48 overlays) by creating a copy and reassigning the `environment` attribute. 49 50 As extensions are created by the environment they cannot accept any 51 arguments for configuration. One may want to work around that by using 52 a factory function, but that is not possible as extensions are identified 53 by their import name. The correct way to configure the extension is 54 storing the configuration values on the environment. Because this way the 55 environment ends up acting as central configuration storage the 56 attributes may clash which is why extensions have to ensure that the names 57 they choose for configuration are not too generic. ``prefix`` for example 58 is a terrible name, ``fragment_cache_prefix`` on the other hand is a good 59 name as includes the name of the extension (fragment cache). 60 """ 61 62 #: if this extension parses this is the list of tags it's listening to. 63 tags = set() 64 65 #: the priority of that extension. This is especially useful for 66 #: extensions that preprocess values. A lower value means higher 67 #: priority. 68 #: 69 #: .. versionadded:: 2.4 70 priority = 100 71 72 def __init__(self, environment): 73 self.environment = environment 74 75 def bind(self, environment): 76 """Create a copy of this extension bound to another environment.""" 77 rv = object.__new__(self.__class__) 78 rv.__dict__.update(self.__dict__) 79 rv.environment = environment 80 return rv 81 82 def preprocess(self, source, name, filename=None): 83 """This method is called before the actual lexing and can be used to 84 preprocess the source. The `filename` is optional. The return value 85 must be the preprocessed source. 86 """ 87 return source 88 89 def filter_stream(self, stream): 90 """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used 91 to filter tokens returned. This method has to return an iterable of 92 :class:`~jinja2.lexer.Token`\\s, but it doesn't have to return a 93 :class:`~jinja2.lexer.TokenStream`. 94 95 In the `ext` folder of the Jinja2 source distribution there is a file 96 called `inlinegettext.py` which implements a filter that utilizes this 97 method. 98 """ 99 return stream 100 101 def parse(self, parser): 102 """If any of the :attr:`tags` matched this method is called with the 103 parser as first argument. The token the parser stream is pointing at 104 is the name token that matched. This method has to return one or a 105 list of multiple nodes. 106 """ 107 raise NotImplementedError() 108 109 def attr(self, name, lineno=None): 110 """Return an attribute node for the current extension. This is useful 111 to pass constants on extensions to generated template code. 112 113 :: 114 115 self.attr('_my_attribute', lineno=lineno) 116 """ 117 return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno) 118 119 def call_method(self, name, args=None, kwargs=None, dyn_args=None, 120 dyn_kwargs=None, lineno=None): 121 """Call a method of the extension. This is a shortcut for 122 :meth:`attr` + :class:`jinja2.nodes.Call`. 123 """ 124 if args is None: 125 args = [] 126 if kwargs is None: 127 kwargs = [] 128 return nodes.Call(self.attr(name, lineno=lineno), args, kwargs, 129 dyn_args, dyn_kwargs, lineno=lineno) 130 131 132@contextfunction 133def _gettext_alias(__context, *args, **kwargs): 134 return __context.call(__context.resolve('gettext'), *args, **kwargs) 135 136 137def _make_new_gettext(func): 138 @contextfunction 139 def gettext(__context, __string, **variables): 140 rv = __context.call(func, __string) 141 if __context.eval_ctx.autoescape: 142 rv = Markup(rv) 143 return rv % variables 144 return gettext 145 146 147def _make_new_ngettext(func): 148 @contextfunction 149 def ngettext(__context, __singular, __plural, __num, **variables): 150 variables.setdefault('num', __num) 151 rv = __context.call(func, __singular, __plural, __num) 152 if __context.eval_ctx.autoescape: 153 rv = Markup(rv) 154 return rv % variables 155 return ngettext 156 157 158class InternationalizationExtension(Extension): 159 """This extension adds gettext support to Jinja2.""" 160 tags = set(['trans']) 161 162 # TODO: the i18n extension is currently reevaluating values in a few 163 # situations. Take this example: 164 # {% trans count=something() %}{{ count }} foo{% pluralize 165 # %}{{ count }} fooss{% endtrans %} 166 # something is called twice here. One time for the gettext value and 167 # the other time for the n-parameter of the ngettext function. 168 169 def __init__(self, environment): 170 Extension.__init__(self, environment) 171 environment.globals['_'] = _gettext_alias 172 environment.extend( 173 install_gettext_translations=self._install, 174 install_null_translations=self._install_null, 175 install_gettext_callables=self._install_callables, 176 uninstall_gettext_translations=self._uninstall, 177 extract_translations=self._extract, 178 newstyle_gettext=False 179 ) 180 181 def _install(self, translations, newstyle=None): 182 gettext = getattr(translations, 'ugettext', None) 183 if gettext is None: 184 gettext = translations.gettext 185 ngettext = getattr(translations, 'ungettext', None) 186 if ngettext is None: 187 ngettext = translations.ngettext 188 self._install_callables(gettext, ngettext, newstyle) 189 190 def _install_null(self, newstyle=None): 191 self._install_callables( 192 lambda x: x, 193 lambda s, p, n: (n != 1 and (p,) or (s,))[0], 194 newstyle 195 ) 196 197 def _install_callables(self, gettext, ngettext, newstyle=None): 198 if newstyle is not None: 199 self.environment.newstyle_gettext = newstyle 200 if self.environment.newstyle_gettext: 201 gettext = _make_new_gettext(gettext) 202 ngettext = _make_new_ngettext(ngettext) 203 self.environment.globals.update( 204 gettext=gettext, 205 ngettext=ngettext 206 ) 207 208 def _uninstall(self, translations): 209 for key in 'gettext', 'ngettext': 210 self.environment.globals.pop(key, None) 211 212 def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS): 213 if isinstance(source, string_types): 214 source = self.environment.parse(source) 215 return extract_from_ast(source, gettext_functions) 216 217 def parse(self, parser): 218 """Parse a translatable tag.""" 219 lineno = next(parser.stream).lineno 220 num_called_num = False 221 222 # find all the variables referenced. Additionally a variable can be 223 # defined in the body of the trans block too, but this is checked at 224 # a later state. 225 plural_expr = None 226 plural_expr_assignment = None 227 variables = {} 228 trimmed = None 229 while parser.stream.current.type != 'block_end': 230 if variables: 231 parser.stream.expect('comma') 232 233 # skip colon for python compatibility 234 if parser.stream.skip_if('colon'): 235 break 236 237 name = parser.stream.expect('name') 238 if name.value in variables: 239 parser.fail('translatable variable %r defined twice.' % 240 name.value, name.lineno, 241 exc=TemplateAssertionError) 242 243 # expressions 244 if parser.stream.current.type == 'assign': 245 next(parser.stream) 246 variables[name.value] = var = parser.parse_expression() 247 elif trimmed is None and name.value in ('trimmed', 'notrimmed'): 248 trimmed = name.value == 'trimmed' 249 continue 250 else: 251 variables[name.value] = var = nodes.Name(name.value, 'load') 252 253 if plural_expr is None: 254 if isinstance(var, nodes.Call): 255 plural_expr = nodes.Name('_trans', 'load') 256 variables[name.value] = plural_expr 257 plural_expr_assignment = nodes.Assign( 258 nodes.Name('_trans', 'store'), var) 259 else: 260 plural_expr = var 261 num_called_num = name.value == 'num' 262 263 parser.stream.expect('block_end') 264 265 plural = None 266 have_plural = False 267 referenced = set() 268 269 # now parse until endtrans or pluralize 270 singular_names, singular = self._parse_block(parser, True) 271 if singular_names: 272 referenced.update(singular_names) 273 if plural_expr is None: 274 plural_expr = nodes.Name(singular_names[0], 'load') 275 num_called_num = singular_names[0] == 'num' 276 277 # if we have a pluralize block, we parse that too 278 if parser.stream.current.test('name:pluralize'): 279 have_plural = True 280 next(parser.stream) 281 if parser.stream.current.type != 'block_end': 282 name = parser.stream.expect('name') 283 if name.value not in variables: 284 parser.fail('unknown variable %r for pluralization' % 285 name.value, name.lineno, 286 exc=TemplateAssertionError) 287 plural_expr = variables[name.value] 288 num_called_num = name.value == 'num' 289 parser.stream.expect('block_end') 290 plural_names, plural = self._parse_block(parser, False) 291 next(parser.stream) 292 referenced.update(plural_names) 293 else: 294 next(parser.stream) 295 296 # register free names as simple name expressions 297 for var in referenced: 298 if var not in variables: 299 variables[var] = nodes.Name(var, 'load') 300 301 if not have_plural: 302 plural_expr = None 303 elif plural_expr is None: 304 parser.fail('pluralize without variables', lineno) 305 306 if trimmed is None: 307 trimmed = self.environment.policies['ext.i18n.trimmed'] 308 if trimmed: 309 singular = self._trim_whitespace(singular) 310 if plural: 311 plural = self._trim_whitespace(plural) 312 313 node = self._make_node(singular, plural, variables, plural_expr, 314 bool(referenced), 315 num_called_num and have_plural) 316 node.set_lineno(lineno) 317 if plural_expr_assignment is not None: 318 return [plural_expr_assignment, node] 319 else: 320 return node 321 322 def _trim_whitespace(self, string, _ws_re=re.compile(r'\s*\n\s*')): 323 return _ws_re.sub(' ', string.strip()) 324 325 def _parse_block(self, parser, allow_pluralize): 326 """Parse until the next block tag with a given name.""" 327 referenced = [] 328 buf = [] 329 while 1: 330 if parser.stream.current.type == 'data': 331 buf.append(parser.stream.current.value.replace('%', '%%')) 332 next(parser.stream) 333 elif parser.stream.current.type == 'variable_begin': 334 next(parser.stream) 335 name = parser.stream.expect('name').value 336 referenced.append(name) 337 buf.append('%%(%s)s' % name) 338 parser.stream.expect('variable_end') 339 elif parser.stream.current.type == 'block_begin': 340 next(parser.stream) 341 if parser.stream.current.test('name:endtrans'): 342 break 343 elif parser.stream.current.test('name:pluralize'): 344 if allow_pluralize: 345 break 346 parser.fail('a translatable section can have only one ' 347 'pluralize section') 348 parser.fail('control structures in translatable sections are ' 349 'not allowed') 350 elif parser.stream.eos: 351 parser.fail('unclosed translation block') 352 else: 353 assert False, 'internal parser error' 354 355 return referenced, concat(buf) 356 357 def _make_node(self, singular, plural, variables, plural_expr, 358 vars_referenced, num_called_num): 359 """Generates a useful node from the data provided.""" 360 # no variables referenced? no need to escape for old style 361 # gettext invocations only if there are vars. 362 if not vars_referenced and not self.environment.newstyle_gettext: 363 singular = singular.replace('%%', '%') 364 if plural: 365 plural = plural.replace('%%', '%') 366 367 # singular only: 368 if plural_expr is None: 369 gettext = nodes.Name('gettext', 'load') 370 node = nodes.Call(gettext, [nodes.Const(singular)], 371 [], None, None) 372 373 # singular and plural 374 else: 375 ngettext = nodes.Name('ngettext', 'load') 376 node = nodes.Call(ngettext, [ 377 nodes.Const(singular), 378 nodes.Const(plural), 379 plural_expr 380 ], [], None, None) 381 382 # in case newstyle gettext is used, the method is powerful 383 # enough to handle the variable expansion and autoescape 384 # handling itself 385 if self.environment.newstyle_gettext: 386 for key, value in iteritems(variables): 387 # the function adds that later anyways in case num was 388 # called num, so just skip it. 389 if num_called_num and key == 'num': 390 continue 391 node.kwargs.append(nodes.Keyword(key, value)) 392 393 # otherwise do that here 394 else: 395 # mark the return value as safe if we are in an 396 # environment with autoescaping turned on 397 node = nodes.MarkSafeIfAutoescape(node) 398 if variables: 399 node = nodes.Mod(node, nodes.Dict([ 400 nodes.Pair(nodes.Const(key), value) 401 for key, value in variables.items() 402 ])) 403 return nodes.Output([node]) 404 405 406class ExprStmtExtension(Extension): 407 """Adds a `do` tag to Jinja2 that works like the print statement just 408 that it doesn't print the return value. 409 """ 410 tags = set(['do']) 411 412 def parse(self, parser): 413 node = nodes.ExprStmt(lineno=next(parser.stream).lineno) 414 node.node = parser.parse_tuple() 415 return node 416 417 418class LoopControlExtension(Extension): 419 """Adds break and continue to the template engine.""" 420 tags = set(['break', 'continue']) 421 422 def parse(self, parser): 423 token = next(parser.stream) 424 if token.value == 'break': 425 return nodes.Break(lineno=token.lineno) 426 return nodes.Continue(lineno=token.lineno) 427 428 429class WithExtension(Extension): 430 pass 431 432 433class AutoEscapeExtension(Extension): 434 pass 435 436 437def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS, 438 babel_style=True): 439 """Extract localizable strings from the given template node. Per 440 default this function returns matches in babel style that means non string 441 parameters as well as keyword arguments are returned as `None`. This 442 allows Babel to figure out what you really meant if you are using 443 gettext functions that allow keyword arguments for placeholder expansion. 444 If you don't want that behavior set the `babel_style` parameter to `False` 445 which causes only strings to be returned and parameters are always stored 446 in tuples. As a consequence invalid gettext calls (calls without a single 447 string parameter or string parameters after non-string parameters) are 448 skipped. 449 450 This example explains the behavior: 451 452 >>> from jinja2 import Environment 453 >>> env = Environment() 454 >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}') 455 >>> list(extract_from_ast(node)) 456 [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))] 457 >>> list(extract_from_ast(node, babel_style=False)) 458 [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))] 459 460 For every string found this function yields a ``(lineno, function, 461 message)`` tuple, where: 462 463 * ``lineno`` is the number of the line on which the string was found, 464 * ``function`` is the name of the ``gettext`` function used (if the 465 string was extracted from embedded Python code), and 466 * ``message`` is the string itself (a ``unicode`` object, or a tuple 467 of ``unicode`` objects for functions with multiple string arguments). 468 469 This extraction function operates on the AST and is because of that unable 470 to extract any comments. For comment support you have to use the babel 471 extraction interface or extract comments yourself. 472 """ 473 for node in node.find_all(nodes.Call): 474 if not isinstance(node.node, nodes.Name) or \ 475 node.node.name not in gettext_functions: 476 continue 477 478 strings = [] 479 for arg in node.args: 480 if isinstance(arg, nodes.Const) and \ 481 isinstance(arg.value, string_types): 482 strings.append(arg.value) 483 else: 484 strings.append(None) 485 486 for arg in node.kwargs: 487 strings.append(None) 488 if node.dyn_args is not None: 489 strings.append(None) 490 if node.dyn_kwargs is not None: 491 strings.append(None) 492 493 if not babel_style: 494 strings = tuple(x for x in strings if x is not None) 495 if not strings: 496 continue 497 else: 498 if len(strings) == 1: 499 strings = strings[0] 500 else: 501 strings = tuple(strings) 502 yield node.lineno, node.node.name, strings 503 504 505class _CommentFinder(object): 506 """Helper class to find comments in a token stream. Can only 507 find comments for gettext calls forwards. Once the comment 508 from line 4 is found, a comment for line 1 will not return a 509 usable value. 510 """ 511 512 def __init__(self, tokens, comment_tags): 513 self.tokens = tokens 514 self.comment_tags = comment_tags 515 self.offset = 0 516 self.last_lineno = 0 517 518 def find_backwards(self, offset): 519 try: 520 for _, token_type, token_value in \ 521 reversed(self.tokens[self.offset:offset]): 522 if token_type in ('comment', 'linecomment'): 523 try: 524 prefix, comment = token_value.split(None, 1) 525 except ValueError: 526 continue 527 if prefix in self.comment_tags: 528 return [comment.rstrip()] 529 return [] 530 finally: 531 self.offset = offset 532 533 def find_comments(self, lineno): 534 if not self.comment_tags or self.last_lineno > lineno: 535 return [] 536 for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset:]): 537 if token_lineno > lineno: 538 return self.find_backwards(self.offset + idx) 539 return self.find_backwards(len(self.tokens)) 540 541 542def babel_extract(fileobj, keywords, comment_tags, options): 543 """Babel extraction method for Jinja templates. 544 545 .. versionchanged:: 2.3 546 Basic support for translation comments was added. If `comment_tags` 547 is now set to a list of keywords for extraction, the extractor will 548 try to find the best preceeding comment that begins with one of the 549 keywords. For best results, make sure to not have more than one 550 gettext call in one line of code and the matching comment in the 551 same line or the line before. 552 553 .. versionchanged:: 2.5.1 554 The `newstyle_gettext` flag can be set to `True` to enable newstyle 555 gettext calls. 556 557 .. versionchanged:: 2.7 558 A `silent` option can now be provided. If set to `False` template 559 syntax errors are propagated instead of being ignored. 560 561 :param fileobj: the file-like object the messages should be extracted from 562 :param keywords: a list of keywords (i.e. function names) that should be 563 recognized as translation functions 564 :param comment_tags: a list of translator tags to search for and include 565 in the results. 566 :param options: a dictionary of additional options (optional) 567 :return: an iterator over ``(lineno, funcname, message, comments)`` tuples. 568 (comments will be empty currently) 569 """ 570 extensions = set() 571 for extension in options.get('extensions', '').split(','): 572 extension = extension.strip() 573 if not extension: 574 continue 575 extensions.add(import_string(extension)) 576 if InternationalizationExtension not in extensions: 577 extensions.add(InternationalizationExtension) 578 579 def getbool(options, key, default=False): 580 return options.get(key, str(default)).lower() in \ 581 ('1', 'on', 'yes', 'true') 582 583 silent = getbool(options, 'silent', True) 584 environment = Environment( 585 options.get('block_start_string', BLOCK_START_STRING), 586 options.get('block_end_string', BLOCK_END_STRING), 587 options.get('variable_start_string', VARIABLE_START_STRING), 588 options.get('variable_end_string', VARIABLE_END_STRING), 589 options.get('comment_start_string', COMMENT_START_STRING), 590 options.get('comment_end_string', COMMENT_END_STRING), 591 options.get('line_statement_prefix') or LINE_STATEMENT_PREFIX, 592 options.get('line_comment_prefix') or LINE_COMMENT_PREFIX, 593 getbool(options, 'trim_blocks', TRIM_BLOCKS), 594 getbool(options, 'lstrip_blocks', LSTRIP_BLOCKS), 595 NEWLINE_SEQUENCE, 596 getbool(options, 'keep_trailing_newline', KEEP_TRAILING_NEWLINE), 597 frozenset(extensions), 598 cache_size=0, 599 auto_reload=False 600 ) 601 602 if getbool(options, 'trimmed'): 603 environment.policies['ext.i18n.trimmed'] = True 604 if getbool(options, 'newstyle_gettext'): 605 environment.newstyle_gettext = True 606 607 source = fileobj.read().decode(options.get('encoding', 'utf-8')) 608 try: 609 node = environment.parse(source) 610 tokens = list(environment.lex(environment.preprocess(source))) 611 except TemplateSyntaxError as e: 612 if not silent: 613 raise 614 # skip templates with syntax errors 615 return 616 617 finder = _CommentFinder(tokens, comment_tags) 618 for lineno, func, message in extract_from_ast(node, keywords): 619 yield lineno, func, message, finder.find_comments(lineno) 620 621 622#: nicer import names 623i18n = InternationalizationExtension 624do = ExprStmtExtension 625loopcontrols = LoopControlExtension 626with_ = WithExtension 627autoescape = AutoEscapeExtension 628