1# -*- coding: utf-8 -*- 2"""Extension API for adding custom tags and behavior.""" 3import pprint 4import re 5from sys import version_info 6 7from markupsafe import Markup 8 9from . import nodes 10from ._compat import iteritems 11from ._compat import string_types 12from ._compat import with_metaclass 13from .defaults import BLOCK_END_STRING 14from .defaults import BLOCK_START_STRING 15from .defaults import COMMENT_END_STRING 16from .defaults import COMMENT_START_STRING 17from .defaults import KEEP_TRAILING_NEWLINE 18from .defaults import LINE_COMMENT_PREFIX 19from .defaults import LINE_STATEMENT_PREFIX 20from .defaults import LSTRIP_BLOCKS 21from .defaults import NEWLINE_SEQUENCE 22from .defaults import TRIM_BLOCKS 23from .defaults import VARIABLE_END_STRING 24from .defaults import VARIABLE_START_STRING 25from .environment import Environment 26from .exceptions import TemplateAssertionError 27from .exceptions import TemplateSyntaxError 28from .nodes import ContextReference 29from .runtime import concat 30from .utils import contextfunction 31from .utils import import_string 32 33# the only real useful gettext functions for a Jinja template. Note 34# that ugettext must be assigned to gettext as Jinja doesn't support 35# non unicode strings. 36GETTEXT_FUNCTIONS = ("_", "gettext", "ngettext") 37 38_ws_re = re.compile(r"\s*\n\s*") 39 40 41class ExtensionRegistry(type): 42 """Gives the extension an unique identifier.""" 43 44 def __new__(mcs, name, bases, d): 45 rv = type.__new__(mcs, name, bases, d) 46 rv.identifier = rv.__module__ + "." + rv.__name__ 47 return rv 48 49 50class Extension(with_metaclass(ExtensionRegistry, object)): 51 """Extensions can be used to add extra functionality to the Jinja template 52 system at the parser level. Custom extensions are bound to an environment 53 but may not store environment specific data on `self`. The reason for 54 this is that an extension can be bound to another environment (for 55 overlays) by creating a copy and reassigning the `environment` attribute. 56 57 As extensions are created by the environment they cannot accept any 58 arguments for configuration. One may want to work around that by using 59 a factory function, but that is not possible as extensions are identified 60 by their import name. The correct way to configure the extension is 61 storing the configuration values on the environment. Because this way the 62 environment ends up acting as central configuration storage the 63 attributes may clash which is why extensions have to ensure that the names 64 they choose for configuration are not too generic. ``prefix`` for example 65 is a terrible name, ``fragment_cache_prefix`` on the other hand is a good 66 name as includes the name of the extension (fragment cache). 67 """ 68 69 #: if this extension parses this is the list of tags it's listening to. 70 tags = set() 71 72 #: the priority of that extension. This is especially useful for 73 #: extensions that preprocess values. A lower value means higher 74 #: priority. 75 #: 76 #: .. versionadded:: 2.4 77 priority = 100 78 79 def __init__(self, environment): 80 self.environment = environment 81 82 def bind(self, environment): 83 """Create a copy of this extension bound to another environment.""" 84 rv = object.__new__(self.__class__) 85 rv.__dict__.update(self.__dict__) 86 rv.environment = environment 87 return rv 88 89 def preprocess(self, source, name, filename=None): 90 """This method is called before the actual lexing and can be used to 91 preprocess the source. The `filename` is optional. The return value 92 must be the preprocessed source. 93 """ 94 return source 95 96 def filter_stream(self, stream): 97 """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used 98 to filter tokens returned. This method has to return an iterable of 99 :class:`~jinja2.lexer.Token`\\s, but it doesn't have to return a 100 :class:`~jinja2.lexer.TokenStream`. 101 """ 102 return stream 103 104 def parse(self, parser): 105 """If any of the :attr:`tags` matched this method is called with the 106 parser as first argument. The token the parser stream is pointing at 107 is the name token that matched. This method has to return one or a 108 list of multiple nodes. 109 """ 110 raise NotImplementedError() 111 112 def attr(self, name, lineno=None): 113 """Return an attribute node for the current extension. This is useful 114 to pass constants on extensions to generated template code. 115 116 :: 117 118 self.attr('_my_attribute', lineno=lineno) 119 """ 120 return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno) 121 122 def call_method( 123 self, name, args=None, kwargs=None, dyn_args=None, dyn_kwargs=None, lineno=None 124 ): 125 """Call a method of the extension. This is a shortcut for 126 :meth:`attr` + :class:`jinja2.nodes.Call`. 127 """ 128 if args is None: 129 args = [] 130 if kwargs is None: 131 kwargs = [] 132 return nodes.Call( 133 self.attr(name, lineno=lineno), 134 args, 135 kwargs, 136 dyn_args, 137 dyn_kwargs, 138 lineno=lineno, 139 ) 140 141 142@contextfunction 143def _gettext_alias(__context, *args, **kwargs): 144 return __context.call(__context.resolve("gettext"), *args, **kwargs) 145 146 147def _make_new_gettext(func): 148 @contextfunction 149 def gettext(__context, __string, **variables): 150 rv = __context.call(func, __string) 151 if __context.eval_ctx.autoescape: 152 rv = Markup(rv) 153 # Always treat as a format string, even if there are no 154 # variables. This makes translation strings more consistent 155 # and predictable. This requires escaping 156 return rv % variables 157 158 return gettext 159 160 161def _make_new_ngettext(func): 162 @contextfunction 163 def ngettext(__context, __singular, __plural, __num, **variables): 164 variables.setdefault("num", __num) 165 rv = __context.call(func, __singular, __plural, __num) 166 if __context.eval_ctx.autoescape: 167 rv = Markup(rv) 168 # Always treat as a format string, see gettext comment above. 169 return rv % variables 170 171 return ngettext 172 173 174class InternationalizationExtension(Extension): 175 """This extension adds gettext support to Jinja.""" 176 177 tags = {"trans"} 178 179 # TODO: the i18n extension is currently reevaluating values in a few 180 # situations. Take this example: 181 # {% trans count=something() %}{{ count }} foo{% pluralize 182 # %}{{ count }} fooss{% endtrans %} 183 # something is called twice here. One time for the gettext value and 184 # the other time for the n-parameter of the ngettext function. 185 186 def __init__(self, environment): 187 Extension.__init__(self, environment) 188 environment.globals["_"] = _gettext_alias 189 environment.extend( 190 install_gettext_translations=self._install, 191 install_null_translations=self._install_null, 192 install_gettext_callables=self._install_callables, 193 uninstall_gettext_translations=self._uninstall, 194 extract_translations=self._extract, 195 newstyle_gettext=False, 196 ) 197 198 def _install(self, translations, newstyle=None): 199 gettext = getattr(translations, "ugettext", None) 200 if gettext is None: 201 gettext = translations.gettext 202 ngettext = getattr(translations, "ungettext", None) 203 if ngettext is None: 204 ngettext = translations.ngettext 205 self._install_callables(gettext, ngettext, newstyle) 206 207 def _install_null(self, newstyle=None): 208 self._install_callables( 209 lambda x: x, lambda s, p, n: (n != 1 and (p,) or (s,))[0], newstyle 210 ) 211 212 def _install_callables(self, gettext, ngettext, newstyle=None): 213 if newstyle is not None: 214 self.environment.newstyle_gettext = newstyle 215 if self.environment.newstyle_gettext: 216 gettext = _make_new_gettext(gettext) 217 ngettext = _make_new_ngettext(ngettext) 218 self.environment.globals.update(gettext=gettext, ngettext=ngettext) 219 220 def _uninstall(self, translations): 221 for key in "gettext", "ngettext": 222 self.environment.globals.pop(key, None) 223 224 def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS): 225 if isinstance(source, string_types): 226 source = self.environment.parse(source) 227 return extract_from_ast(source, gettext_functions) 228 229 def parse(self, parser): 230 """Parse a translatable tag.""" 231 lineno = next(parser.stream).lineno 232 num_called_num = False 233 234 # find all the variables referenced. Additionally a variable can be 235 # defined in the body of the trans block too, but this is checked at 236 # a later state. 237 plural_expr = None 238 plural_expr_assignment = None 239 variables = {} 240 trimmed = None 241 while parser.stream.current.type != "block_end": 242 if variables: 243 parser.stream.expect("comma") 244 245 # skip colon for python compatibility 246 if parser.stream.skip_if("colon"): 247 break 248 249 name = parser.stream.expect("name") 250 if name.value in variables: 251 parser.fail( 252 "translatable variable %r defined twice." % name.value, 253 name.lineno, 254 exc=TemplateAssertionError, 255 ) 256 257 # expressions 258 if parser.stream.current.type == "assign": 259 next(parser.stream) 260 variables[name.value] = var = parser.parse_expression() 261 elif trimmed is None and name.value in ("trimmed", "notrimmed"): 262 trimmed = name.value == "trimmed" 263 continue 264 else: 265 variables[name.value] = var = nodes.Name(name.value, "load") 266 267 if plural_expr is None: 268 if isinstance(var, nodes.Call): 269 plural_expr = nodes.Name("_trans", "load") 270 variables[name.value] = plural_expr 271 plural_expr_assignment = nodes.Assign( 272 nodes.Name("_trans", "store"), var 273 ) 274 else: 275 plural_expr = var 276 num_called_num = name.value == "num" 277 278 parser.stream.expect("block_end") 279 280 plural = None 281 have_plural = False 282 referenced = set() 283 284 # now parse until endtrans or pluralize 285 singular_names, singular = self._parse_block(parser, True) 286 if singular_names: 287 referenced.update(singular_names) 288 if plural_expr is None: 289 plural_expr = nodes.Name(singular_names[0], "load") 290 num_called_num = singular_names[0] == "num" 291 292 # if we have a pluralize block, we parse that too 293 if parser.stream.current.test("name:pluralize"): 294 have_plural = True 295 next(parser.stream) 296 if parser.stream.current.type != "block_end": 297 name = parser.stream.expect("name") 298 if name.value not in variables: 299 parser.fail( 300 "unknown variable %r for pluralization" % name.value, 301 name.lineno, 302 exc=TemplateAssertionError, 303 ) 304 plural_expr = variables[name.value] 305 num_called_num = name.value == "num" 306 parser.stream.expect("block_end") 307 plural_names, plural = self._parse_block(parser, False) 308 next(parser.stream) 309 referenced.update(plural_names) 310 else: 311 next(parser.stream) 312 313 # register free names as simple name expressions 314 for var in referenced: 315 if var not in variables: 316 variables[var] = nodes.Name(var, "load") 317 318 if not have_plural: 319 plural_expr = None 320 elif plural_expr is None: 321 parser.fail("pluralize without variables", lineno) 322 323 if trimmed is None: 324 trimmed = self.environment.policies["ext.i18n.trimmed"] 325 if trimmed: 326 singular = self._trim_whitespace(singular) 327 if plural: 328 plural = self._trim_whitespace(plural) 329 330 node = self._make_node( 331 singular, 332 plural, 333 variables, 334 plural_expr, 335 bool(referenced), 336 num_called_num and have_plural, 337 ) 338 node.set_lineno(lineno) 339 if plural_expr_assignment is not None: 340 return [plural_expr_assignment, node] 341 else: 342 return node 343 344 def _trim_whitespace(self, string, _ws_re=_ws_re): 345 return _ws_re.sub(" ", string.strip()) 346 347 def _parse_block(self, parser, allow_pluralize): 348 """Parse until the next block tag with a given name.""" 349 referenced = [] 350 buf = [] 351 while 1: 352 if parser.stream.current.type == "data": 353 buf.append(parser.stream.current.value.replace("%", "%%")) 354 next(parser.stream) 355 elif parser.stream.current.type == "variable_begin": 356 next(parser.stream) 357 name = parser.stream.expect("name").value 358 referenced.append(name) 359 buf.append("%%(%s)s" % name) 360 parser.stream.expect("variable_end") 361 elif parser.stream.current.type == "block_begin": 362 next(parser.stream) 363 if parser.stream.current.test("name:endtrans"): 364 break 365 elif parser.stream.current.test("name:pluralize"): 366 if allow_pluralize: 367 break 368 parser.fail( 369 "a translatable section can have only one pluralize section" 370 ) 371 parser.fail( 372 "control structures in translatable sections are not allowed" 373 ) 374 elif parser.stream.eos: 375 parser.fail("unclosed translation block") 376 else: 377 raise RuntimeError("internal parser error") 378 379 return referenced, concat(buf) 380 381 def _make_node( 382 self, singular, plural, variables, plural_expr, vars_referenced, num_called_num 383 ): 384 """Generates a useful node from the data provided.""" 385 # no variables referenced? no need to escape for old style 386 # gettext invocations only if there are vars. 387 if not vars_referenced and not self.environment.newstyle_gettext: 388 singular = singular.replace("%%", "%") 389 if plural: 390 plural = plural.replace("%%", "%") 391 392 # singular only: 393 if plural_expr is None: 394 gettext = nodes.Name("gettext", "load") 395 node = nodes.Call(gettext, [nodes.Const(singular)], [], None, None) 396 397 # singular and plural 398 else: 399 ngettext = nodes.Name("ngettext", "load") 400 node = nodes.Call( 401 ngettext, 402 [nodes.Const(singular), nodes.Const(plural), plural_expr], 403 [], 404 None, 405 None, 406 ) 407 408 # in case newstyle gettext is used, the method is powerful 409 # enough to handle the variable expansion and autoescape 410 # handling itself 411 if self.environment.newstyle_gettext: 412 for key, value in iteritems(variables): 413 # the function adds that later anyways in case num was 414 # called num, so just skip it. 415 if num_called_num and key == "num": 416 continue 417 node.kwargs.append(nodes.Keyword(key, value)) 418 419 # otherwise do that here 420 else: 421 # mark the return value as safe if we are in an 422 # environment with autoescaping turned on 423 node = nodes.MarkSafeIfAutoescape(node) 424 if variables: 425 node = nodes.Mod( 426 node, 427 nodes.Dict( 428 [ 429 nodes.Pair(nodes.Const(key), value) 430 for key, value in variables.items() 431 ] 432 ), 433 ) 434 return nodes.Output([node]) 435 436 437class ExprStmtExtension(Extension): 438 """Adds a `do` tag to Jinja that works like the print statement just 439 that it doesn't print the return value. 440 """ 441 442 tags = set(["do"]) 443 444 def parse(self, parser): 445 node = nodes.ExprStmt(lineno=next(parser.stream).lineno) 446 node.node = parser.parse_tuple() 447 return node 448 449 450class LoopControlExtension(Extension): 451 """Adds break and continue to the template engine.""" 452 453 tags = set(["break", "continue"]) 454 455 def parse(self, parser): 456 token = next(parser.stream) 457 if token.value == "break": 458 return nodes.Break(lineno=token.lineno) 459 return nodes.Continue(lineno=token.lineno) 460 461 462class WithExtension(Extension): 463 pass 464 465 466class AutoEscapeExtension(Extension): 467 pass 468 469 470class DebugExtension(Extension): 471 """A ``{% debug %}`` tag that dumps the available variables, 472 filters, and tests. 473 474 .. code-block:: html+jinja 475 476 <pre>{% debug %}</pre> 477 478 .. code-block:: text 479 480 {'context': {'cycler': <class 'jinja2.utils.Cycler'>, 481 ..., 482 'namespace': <class 'jinja2.utils.Namespace'>}, 483 'filters': ['abs', 'attr', 'batch', 'capitalize', 'center', 'count', 'd', 484 ..., 'urlencode', 'urlize', 'wordcount', 'wordwrap', 'xmlattr'], 485 'tests': ['!=', '<', '<=', '==', '>', '>=', 'callable', 'defined', 486 ..., 'odd', 'sameas', 'sequence', 'string', 'undefined', 'upper']} 487 488 .. versionadded:: 2.11.0 489 """ 490 491 tags = {"debug"} 492 493 def parse(self, parser): 494 lineno = parser.stream.expect("name:debug").lineno 495 context = ContextReference() 496 result = self.call_method("_render", [context], lineno=lineno) 497 return nodes.Output([result], lineno=lineno) 498 499 def _render(self, context): 500 result = { 501 "context": context.get_all(), 502 "filters": sorted(self.environment.filters.keys()), 503 "tests": sorted(self.environment.tests.keys()), 504 } 505 506 # Set the depth since the intent is to show the top few names. 507 if version_info[:2] >= (3, 4): 508 return pprint.pformat(result, depth=3, compact=True) 509 else: 510 return pprint.pformat(result, depth=3) 511 512 513def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS, babel_style=True): 514 """Extract localizable strings from the given template node. Per 515 default this function returns matches in babel style that means non string 516 parameters as well as keyword arguments are returned as `None`. This 517 allows Babel to figure out what you really meant if you are using 518 gettext functions that allow keyword arguments for placeholder expansion. 519 If you don't want that behavior set the `babel_style` parameter to `False` 520 which causes only strings to be returned and parameters are always stored 521 in tuples. As a consequence invalid gettext calls (calls without a single 522 string parameter or string parameters after non-string parameters) are 523 skipped. 524 525 This example explains the behavior: 526 527 >>> from jinja2 import Environment 528 >>> env = Environment() 529 >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}') 530 >>> list(extract_from_ast(node)) 531 [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))] 532 >>> list(extract_from_ast(node, babel_style=False)) 533 [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))] 534 535 For every string found this function yields a ``(lineno, function, 536 message)`` tuple, where: 537 538 * ``lineno`` is the number of the line on which the string was found, 539 * ``function`` is the name of the ``gettext`` function used (if the 540 string was extracted from embedded Python code), and 541 * ``message`` is the string itself (a ``unicode`` object, or a tuple 542 of ``unicode`` objects for functions with multiple string arguments). 543 544 This extraction function operates on the AST and is because of that unable 545 to extract any comments. For comment support you have to use the babel 546 extraction interface or extract comments yourself. 547 """ 548 for node in node.find_all(nodes.Call): 549 if ( 550 not isinstance(node.node, nodes.Name) 551 or node.node.name not in gettext_functions 552 ): 553 continue 554 555 strings = [] 556 for arg in node.args: 557 if isinstance(arg, nodes.Const) and isinstance(arg.value, string_types): 558 strings.append(arg.value) 559 else: 560 strings.append(None) 561 562 for _ in node.kwargs: 563 strings.append(None) 564 if node.dyn_args is not None: 565 strings.append(None) 566 if node.dyn_kwargs is not None: 567 strings.append(None) 568 569 if not babel_style: 570 strings = tuple(x for x in strings if x is not None) 571 if not strings: 572 continue 573 else: 574 if len(strings) == 1: 575 strings = strings[0] 576 else: 577 strings = tuple(strings) 578 yield node.lineno, node.node.name, strings 579 580 581class _CommentFinder(object): 582 """Helper class to find comments in a token stream. Can only 583 find comments for gettext calls forwards. Once the comment 584 from line 4 is found, a comment for line 1 will not return a 585 usable value. 586 """ 587 588 def __init__(self, tokens, comment_tags): 589 self.tokens = tokens 590 self.comment_tags = comment_tags 591 self.offset = 0 592 self.last_lineno = 0 593 594 def find_backwards(self, offset): 595 try: 596 for _, token_type, token_value in reversed( 597 self.tokens[self.offset : offset] 598 ): 599 if token_type in ("comment", "linecomment"): 600 try: 601 prefix, comment = token_value.split(None, 1) 602 except ValueError: 603 continue 604 if prefix in self.comment_tags: 605 return [comment.rstrip()] 606 return [] 607 finally: 608 self.offset = offset 609 610 def find_comments(self, lineno): 611 if not self.comment_tags or self.last_lineno > lineno: 612 return [] 613 for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset :]): 614 if token_lineno > lineno: 615 return self.find_backwards(self.offset + idx) 616 return self.find_backwards(len(self.tokens)) 617 618 619def babel_extract(fileobj, keywords, comment_tags, options): 620 """Babel extraction method for Jinja templates. 621 622 .. versionchanged:: 2.3 623 Basic support for translation comments was added. If `comment_tags` 624 is now set to a list of keywords for extraction, the extractor will 625 try to find the best preceding comment that begins with one of the 626 keywords. For best results, make sure to not have more than one 627 gettext call in one line of code and the matching comment in the 628 same line or the line before. 629 630 .. versionchanged:: 2.5.1 631 The `newstyle_gettext` flag can be set to `True` to enable newstyle 632 gettext calls. 633 634 .. versionchanged:: 2.7 635 A `silent` option can now be provided. If set to `False` template 636 syntax errors are propagated instead of being ignored. 637 638 :param fileobj: the file-like object the messages should be extracted from 639 :param keywords: a list of keywords (i.e. function names) that should be 640 recognized as translation functions 641 :param comment_tags: a list of translator tags to search for and include 642 in the results. 643 :param options: a dictionary of additional options (optional) 644 :return: an iterator over ``(lineno, funcname, message, comments)`` tuples. 645 (comments will be empty currently) 646 """ 647 extensions = set() 648 for extension in options.get("extensions", "").split(","): 649 extension = extension.strip() 650 if not extension: 651 continue 652 extensions.add(import_string(extension)) 653 if InternationalizationExtension not in extensions: 654 extensions.add(InternationalizationExtension) 655 656 def getbool(options, key, default=False): 657 return options.get(key, str(default)).lower() in ("1", "on", "yes", "true") 658 659 silent = getbool(options, "silent", True) 660 environment = Environment( 661 options.get("block_start_string", BLOCK_START_STRING), 662 options.get("block_end_string", BLOCK_END_STRING), 663 options.get("variable_start_string", VARIABLE_START_STRING), 664 options.get("variable_end_string", VARIABLE_END_STRING), 665 options.get("comment_start_string", COMMENT_START_STRING), 666 options.get("comment_end_string", COMMENT_END_STRING), 667 options.get("line_statement_prefix") or LINE_STATEMENT_PREFIX, 668 options.get("line_comment_prefix") or LINE_COMMENT_PREFIX, 669 getbool(options, "trim_blocks", TRIM_BLOCKS), 670 getbool(options, "lstrip_blocks", LSTRIP_BLOCKS), 671 NEWLINE_SEQUENCE, 672 getbool(options, "keep_trailing_newline", KEEP_TRAILING_NEWLINE), 673 frozenset(extensions), 674 cache_size=0, 675 auto_reload=False, 676 ) 677 678 if getbool(options, "trimmed"): 679 environment.policies["ext.i18n.trimmed"] = True 680 if getbool(options, "newstyle_gettext"): 681 environment.newstyle_gettext = True 682 683 source = fileobj.read().decode(options.get("encoding", "utf-8")) 684 try: 685 node = environment.parse(source) 686 tokens = list(environment.lex(environment.preprocess(source))) 687 except TemplateSyntaxError: 688 if not silent: 689 raise 690 # skip templates with syntax errors 691 return 692 693 finder = _CommentFinder(tokens, comment_tags) 694 for lineno, func, message in extract_from_ast(node, keywords): 695 yield lineno, func, message, finder.find_comments(lineno) 696 697 698#: nicer import names 699i18n = InternationalizationExtension 700do = ExprStmtExtension 701loopcontrols = LoopControlExtension 702with_ = WithExtension 703autoescape = AutoEscapeExtension 704debug = DebugExtension 705