• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2"""Extension API for adding custom tags and behavior."""
3import pprint
4import re
5from sys import version_info
6
7from markupsafe import Markup
8
9from . import nodes
10from ._compat import iteritems
11from ._compat import string_types
12from ._compat import with_metaclass
13from .defaults import BLOCK_END_STRING
14from .defaults import BLOCK_START_STRING
15from .defaults import COMMENT_END_STRING
16from .defaults import COMMENT_START_STRING
17from .defaults import KEEP_TRAILING_NEWLINE
18from .defaults import LINE_COMMENT_PREFIX
19from .defaults import LINE_STATEMENT_PREFIX
20from .defaults import LSTRIP_BLOCKS
21from .defaults import NEWLINE_SEQUENCE
22from .defaults import TRIM_BLOCKS
23from .defaults import VARIABLE_END_STRING
24from .defaults import VARIABLE_START_STRING
25from .environment import Environment
26from .exceptions import TemplateAssertionError
27from .exceptions import TemplateSyntaxError
28from .nodes import ContextReference
29from .runtime import concat
30from .utils import contextfunction
31from .utils import import_string
32
33# the only real useful gettext functions for a Jinja template.  Note
34# that ugettext must be assigned to gettext as Jinja doesn't support
35# non unicode strings.
36GETTEXT_FUNCTIONS = ("_", "gettext", "ngettext")
37
38_ws_re = re.compile(r"\s*\n\s*")
39
40
41class ExtensionRegistry(type):
42    """Gives the extension an unique identifier."""
43
44    def __new__(mcs, name, bases, d):
45        rv = type.__new__(mcs, name, bases, d)
46        rv.identifier = rv.__module__ + "." + rv.__name__
47        return rv
48
49
50class Extension(with_metaclass(ExtensionRegistry, object)):
51    """Extensions can be used to add extra functionality to the Jinja template
52    system at the parser level.  Custom extensions are bound to an environment
53    but may not store environment specific data on `self`.  The reason for
54    this is that an extension can be bound to another environment (for
55    overlays) by creating a copy and reassigning the `environment` attribute.
56
57    As extensions are created by the environment they cannot accept any
58    arguments for configuration.  One may want to work around that by using
59    a factory function, but that is not possible as extensions are identified
60    by their import name.  The correct way to configure the extension is
61    storing the configuration values on the environment.  Because this way the
62    environment ends up acting as central configuration storage the
63    attributes may clash which is why extensions have to ensure that the names
64    they choose for configuration are not too generic.  ``prefix`` for example
65    is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
66    name as includes the name of the extension (fragment cache).
67    """
68
69    #: if this extension parses this is the list of tags it's listening to.
70    tags = set()
71
72    #: the priority of that extension.  This is especially useful for
73    #: extensions that preprocess values.  A lower value means higher
74    #: priority.
75    #:
76    #: .. versionadded:: 2.4
77    priority = 100
78
79    def __init__(self, environment):
80        self.environment = environment
81
82    def bind(self, environment):
83        """Create a copy of this extension bound to another environment."""
84        rv = object.__new__(self.__class__)
85        rv.__dict__.update(self.__dict__)
86        rv.environment = environment
87        return rv
88
89    def preprocess(self, source, name, filename=None):
90        """This method is called before the actual lexing and can be used to
91        preprocess the source.  The `filename` is optional.  The return value
92        must be the preprocessed source.
93        """
94        return source
95
96    def filter_stream(self, stream):
97        """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
98        to filter tokens returned.  This method has to return an iterable of
99        :class:`~jinja2.lexer.Token`\\s, but it doesn't have to return a
100        :class:`~jinja2.lexer.TokenStream`.
101        """
102        return stream
103
104    def parse(self, parser):
105        """If any of the :attr:`tags` matched this method is called with the
106        parser as first argument.  The token the parser stream is pointing at
107        is the name token that matched.  This method has to return one or a
108        list of multiple nodes.
109        """
110        raise NotImplementedError()
111
112    def attr(self, name, lineno=None):
113        """Return an attribute node for the current extension.  This is useful
114        to pass constants on extensions to generated template code.
115
116        ::
117
118            self.attr('_my_attribute', lineno=lineno)
119        """
120        return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
121
122    def call_method(
123        self, name, args=None, kwargs=None, dyn_args=None, dyn_kwargs=None, lineno=None
124    ):
125        """Call a method of the extension.  This is a shortcut for
126        :meth:`attr` + :class:`jinja2.nodes.Call`.
127        """
128        if args is None:
129            args = []
130        if kwargs is None:
131            kwargs = []
132        return nodes.Call(
133            self.attr(name, lineno=lineno),
134            args,
135            kwargs,
136            dyn_args,
137            dyn_kwargs,
138            lineno=lineno,
139        )
140
141
142@contextfunction
143def _gettext_alias(__context, *args, **kwargs):
144    return __context.call(__context.resolve("gettext"), *args, **kwargs)
145
146
147def _make_new_gettext(func):
148    @contextfunction
149    def gettext(__context, __string, **variables):
150        rv = __context.call(func, __string)
151        if __context.eval_ctx.autoescape:
152            rv = Markup(rv)
153        # Always treat as a format string, even if there are no
154        # variables. This makes translation strings more consistent
155        # and predictable. This requires escaping
156        return rv % variables
157
158    return gettext
159
160
161def _make_new_ngettext(func):
162    @contextfunction
163    def ngettext(__context, __singular, __plural, __num, **variables):
164        variables.setdefault("num", __num)
165        rv = __context.call(func, __singular, __plural, __num)
166        if __context.eval_ctx.autoescape:
167            rv = Markup(rv)
168        # Always treat as a format string, see gettext comment above.
169        return rv % variables
170
171    return ngettext
172
173
174class InternationalizationExtension(Extension):
175    """This extension adds gettext support to Jinja."""
176
177    tags = {"trans"}
178
179    # TODO: the i18n extension is currently reevaluating values in a few
180    # situations.  Take this example:
181    #   {% trans count=something() %}{{ count }} foo{% pluralize
182    #     %}{{ count }} fooss{% endtrans %}
183    # something is called twice here.  One time for the gettext value and
184    # the other time for the n-parameter of the ngettext function.
185
186    def __init__(self, environment):
187        Extension.__init__(self, environment)
188        environment.globals["_"] = _gettext_alias
189        environment.extend(
190            install_gettext_translations=self._install,
191            install_null_translations=self._install_null,
192            install_gettext_callables=self._install_callables,
193            uninstall_gettext_translations=self._uninstall,
194            extract_translations=self._extract,
195            newstyle_gettext=False,
196        )
197
198    def _install(self, translations, newstyle=None):
199        gettext = getattr(translations, "ugettext", None)
200        if gettext is None:
201            gettext = translations.gettext
202        ngettext = getattr(translations, "ungettext", None)
203        if ngettext is None:
204            ngettext = translations.ngettext
205        self._install_callables(gettext, ngettext, newstyle)
206
207    def _install_null(self, newstyle=None):
208        self._install_callables(
209            lambda x: x, lambda s, p, n: (n != 1 and (p,) or (s,))[0], newstyle
210        )
211
212    def _install_callables(self, gettext, ngettext, newstyle=None):
213        if newstyle is not None:
214            self.environment.newstyle_gettext = newstyle
215        if self.environment.newstyle_gettext:
216            gettext = _make_new_gettext(gettext)
217            ngettext = _make_new_ngettext(ngettext)
218        self.environment.globals.update(gettext=gettext, ngettext=ngettext)
219
220    def _uninstall(self, translations):
221        for key in "gettext", "ngettext":
222            self.environment.globals.pop(key, None)
223
224    def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS):
225        if isinstance(source, string_types):
226            source = self.environment.parse(source)
227        return extract_from_ast(source, gettext_functions)
228
229    def parse(self, parser):
230        """Parse a translatable tag."""
231        lineno = next(parser.stream).lineno
232        num_called_num = False
233
234        # find all the variables referenced.  Additionally a variable can be
235        # defined in the body of the trans block too, but this is checked at
236        # a later state.
237        plural_expr = None
238        plural_expr_assignment = None
239        variables = {}
240        trimmed = None
241        while parser.stream.current.type != "block_end":
242            if variables:
243                parser.stream.expect("comma")
244
245            # skip colon for python compatibility
246            if parser.stream.skip_if("colon"):
247                break
248
249            name = parser.stream.expect("name")
250            if name.value in variables:
251                parser.fail(
252                    "translatable variable %r defined twice." % name.value,
253                    name.lineno,
254                    exc=TemplateAssertionError,
255                )
256
257            # expressions
258            if parser.stream.current.type == "assign":
259                next(parser.stream)
260                variables[name.value] = var = parser.parse_expression()
261            elif trimmed is None and name.value in ("trimmed", "notrimmed"):
262                trimmed = name.value == "trimmed"
263                continue
264            else:
265                variables[name.value] = var = nodes.Name(name.value, "load")
266
267            if plural_expr is None:
268                if isinstance(var, nodes.Call):
269                    plural_expr = nodes.Name("_trans", "load")
270                    variables[name.value] = plural_expr
271                    plural_expr_assignment = nodes.Assign(
272                        nodes.Name("_trans", "store"), var
273                    )
274                else:
275                    plural_expr = var
276                num_called_num = name.value == "num"
277
278        parser.stream.expect("block_end")
279
280        plural = None
281        have_plural = False
282        referenced = set()
283
284        # now parse until endtrans or pluralize
285        singular_names, singular = self._parse_block(parser, True)
286        if singular_names:
287            referenced.update(singular_names)
288            if plural_expr is None:
289                plural_expr = nodes.Name(singular_names[0], "load")
290                num_called_num = singular_names[0] == "num"
291
292        # if we have a pluralize block, we parse that too
293        if parser.stream.current.test("name:pluralize"):
294            have_plural = True
295            next(parser.stream)
296            if parser.stream.current.type != "block_end":
297                name = parser.stream.expect("name")
298                if name.value not in variables:
299                    parser.fail(
300                        "unknown variable %r for pluralization" % name.value,
301                        name.lineno,
302                        exc=TemplateAssertionError,
303                    )
304                plural_expr = variables[name.value]
305                num_called_num = name.value == "num"
306            parser.stream.expect("block_end")
307            plural_names, plural = self._parse_block(parser, False)
308            next(parser.stream)
309            referenced.update(plural_names)
310        else:
311            next(parser.stream)
312
313        # register free names as simple name expressions
314        for var in referenced:
315            if var not in variables:
316                variables[var] = nodes.Name(var, "load")
317
318        if not have_plural:
319            plural_expr = None
320        elif plural_expr is None:
321            parser.fail("pluralize without variables", lineno)
322
323        if trimmed is None:
324            trimmed = self.environment.policies["ext.i18n.trimmed"]
325        if trimmed:
326            singular = self._trim_whitespace(singular)
327            if plural:
328                plural = self._trim_whitespace(plural)
329
330        node = self._make_node(
331            singular,
332            plural,
333            variables,
334            plural_expr,
335            bool(referenced),
336            num_called_num and have_plural,
337        )
338        node.set_lineno(lineno)
339        if plural_expr_assignment is not None:
340            return [plural_expr_assignment, node]
341        else:
342            return node
343
344    def _trim_whitespace(self, string, _ws_re=_ws_re):
345        return _ws_re.sub(" ", string.strip())
346
347    def _parse_block(self, parser, allow_pluralize):
348        """Parse until the next block tag with a given name."""
349        referenced = []
350        buf = []
351        while 1:
352            if parser.stream.current.type == "data":
353                buf.append(parser.stream.current.value.replace("%", "%%"))
354                next(parser.stream)
355            elif parser.stream.current.type == "variable_begin":
356                next(parser.stream)
357                name = parser.stream.expect("name").value
358                referenced.append(name)
359                buf.append("%%(%s)s" % name)
360                parser.stream.expect("variable_end")
361            elif parser.stream.current.type == "block_begin":
362                next(parser.stream)
363                if parser.stream.current.test("name:endtrans"):
364                    break
365                elif parser.stream.current.test("name:pluralize"):
366                    if allow_pluralize:
367                        break
368                    parser.fail(
369                        "a translatable section can have only one pluralize section"
370                    )
371                parser.fail(
372                    "control structures in translatable sections are not allowed"
373                )
374            elif parser.stream.eos:
375                parser.fail("unclosed translation block")
376            else:
377                raise RuntimeError("internal parser error")
378
379        return referenced, concat(buf)
380
381    def _make_node(
382        self, singular, plural, variables, plural_expr, vars_referenced, num_called_num
383    ):
384        """Generates a useful node from the data provided."""
385        # no variables referenced?  no need to escape for old style
386        # gettext invocations only if there are vars.
387        if not vars_referenced and not self.environment.newstyle_gettext:
388            singular = singular.replace("%%", "%")
389            if plural:
390                plural = plural.replace("%%", "%")
391
392        # singular only:
393        if plural_expr is None:
394            gettext = nodes.Name("gettext", "load")
395            node = nodes.Call(gettext, [nodes.Const(singular)], [], None, None)
396
397        # singular and plural
398        else:
399            ngettext = nodes.Name("ngettext", "load")
400            node = nodes.Call(
401                ngettext,
402                [nodes.Const(singular), nodes.Const(plural), plural_expr],
403                [],
404                None,
405                None,
406            )
407
408        # in case newstyle gettext is used, the method is powerful
409        # enough to handle the variable expansion and autoescape
410        # handling itself
411        if self.environment.newstyle_gettext:
412            for key, value in iteritems(variables):
413                # the function adds that later anyways in case num was
414                # called num, so just skip it.
415                if num_called_num and key == "num":
416                    continue
417                node.kwargs.append(nodes.Keyword(key, value))
418
419        # otherwise do that here
420        else:
421            # mark the return value as safe if we are in an
422            # environment with autoescaping turned on
423            node = nodes.MarkSafeIfAutoescape(node)
424            if variables:
425                node = nodes.Mod(
426                    node,
427                    nodes.Dict(
428                        [
429                            nodes.Pair(nodes.Const(key), value)
430                            for key, value in variables.items()
431                        ]
432                    ),
433                )
434        return nodes.Output([node])
435
436
437class ExprStmtExtension(Extension):
438    """Adds a `do` tag to Jinja that works like the print statement just
439    that it doesn't print the return value.
440    """
441
442    tags = set(["do"])
443
444    def parse(self, parser):
445        node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
446        node.node = parser.parse_tuple()
447        return node
448
449
450class LoopControlExtension(Extension):
451    """Adds break and continue to the template engine."""
452
453    tags = set(["break", "continue"])
454
455    def parse(self, parser):
456        token = next(parser.stream)
457        if token.value == "break":
458            return nodes.Break(lineno=token.lineno)
459        return nodes.Continue(lineno=token.lineno)
460
461
462class WithExtension(Extension):
463    pass
464
465
466class AutoEscapeExtension(Extension):
467    pass
468
469
470class DebugExtension(Extension):
471    """A ``{% debug %}`` tag that dumps the available variables,
472    filters, and tests.
473
474    .. code-block:: html+jinja
475
476        <pre>{% debug %}</pre>
477
478    .. code-block:: text
479
480        {'context': {'cycler': <class 'jinja2.utils.Cycler'>,
481                     ...,
482                     'namespace': <class 'jinja2.utils.Namespace'>},
483         'filters': ['abs', 'attr', 'batch', 'capitalize', 'center', 'count', 'd',
484                     ..., 'urlencode', 'urlize', 'wordcount', 'wordwrap', 'xmlattr'],
485         'tests': ['!=', '<', '<=', '==', '>', '>=', 'callable', 'defined',
486                   ..., 'odd', 'sameas', 'sequence', 'string', 'undefined', 'upper']}
487
488    .. versionadded:: 2.11.0
489    """
490
491    tags = {"debug"}
492
493    def parse(self, parser):
494        lineno = parser.stream.expect("name:debug").lineno
495        context = ContextReference()
496        result = self.call_method("_render", [context], lineno=lineno)
497        return nodes.Output([result], lineno=lineno)
498
499    def _render(self, context):
500        result = {
501            "context": context.get_all(),
502            "filters": sorted(self.environment.filters.keys()),
503            "tests": sorted(self.environment.tests.keys()),
504        }
505
506        # Set the depth since the intent is to show the top few names.
507        if version_info[:2] >= (3, 4):
508            return pprint.pformat(result, depth=3, compact=True)
509        else:
510            return pprint.pformat(result, depth=3)
511
512
513def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS, babel_style=True):
514    """Extract localizable strings from the given template node.  Per
515    default this function returns matches in babel style that means non string
516    parameters as well as keyword arguments are returned as `None`.  This
517    allows Babel to figure out what you really meant if you are using
518    gettext functions that allow keyword arguments for placeholder expansion.
519    If you don't want that behavior set the `babel_style` parameter to `False`
520    which causes only strings to be returned and parameters are always stored
521    in tuples.  As a consequence invalid gettext calls (calls without a single
522    string parameter or string parameters after non-string parameters) are
523    skipped.
524
525    This example explains the behavior:
526
527    >>> from jinja2 import Environment
528    >>> env = Environment()
529    >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
530    >>> list(extract_from_ast(node))
531    [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
532    >>> list(extract_from_ast(node, babel_style=False))
533    [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
534
535    For every string found this function yields a ``(lineno, function,
536    message)`` tuple, where:
537
538    * ``lineno`` is the number of the line on which the string was found,
539    * ``function`` is the name of the ``gettext`` function used (if the
540      string was extracted from embedded Python code), and
541    *  ``message`` is the string itself (a ``unicode`` object, or a tuple
542       of ``unicode`` objects for functions with multiple string arguments).
543
544    This extraction function operates on the AST and is because of that unable
545    to extract any comments.  For comment support you have to use the babel
546    extraction interface or extract comments yourself.
547    """
548    for node in node.find_all(nodes.Call):
549        if (
550            not isinstance(node.node, nodes.Name)
551            or node.node.name not in gettext_functions
552        ):
553            continue
554
555        strings = []
556        for arg in node.args:
557            if isinstance(arg, nodes.Const) and isinstance(arg.value, string_types):
558                strings.append(arg.value)
559            else:
560                strings.append(None)
561
562        for _ in node.kwargs:
563            strings.append(None)
564        if node.dyn_args is not None:
565            strings.append(None)
566        if node.dyn_kwargs is not None:
567            strings.append(None)
568
569        if not babel_style:
570            strings = tuple(x for x in strings if x is not None)
571            if not strings:
572                continue
573        else:
574            if len(strings) == 1:
575                strings = strings[0]
576            else:
577                strings = tuple(strings)
578        yield node.lineno, node.node.name, strings
579
580
581class _CommentFinder(object):
582    """Helper class to find comments in a token stream.  Can only
583    find comments for gettext calls forwards.  Once the comment
584    from line 4 is found, a comment for line 1 will not return a
585    usable value.
586    """
587
588    def __init__(self, tokens, comment_tags):
589        self.tokens = tokens
590        self.comment_tags = comment_tags
591        self.offset = 0
592        self.last_lineno = 0
593
594    def find_backwards(self, offset):
595        try:
596            for _, token_type, token_value in reversed(
597                self.tokens[self.offset : offset]
598            ):
599                if token_type in ("comment", "linecomment"):
600                    try:
601                        prefix, comment = token_value.split(None, 1)
602                    except ValueError:
603                        continue
604                    if prefix in self.comment_tags:
605                        return [comment.rstrip()]
606            return []
607        finally:
608            self.offset = offset
609
610    def find_comments(self, lineno):
611        if not self.comment_tags or self.last_lineno > lineno:
612            return []
613        for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset :]):
614            if token_lineno > lineno:
615                return self.find_backwards(self.offset + idx)
616        return self.find_backwards(len(self.tokens))
617
618
619def babel_extract(fileobj, keywords, comment_tags, options):
620    """Babel extraction method for Jinja templates.
621
622    .. versionchanged:: 2.3
623       Basic support for translation comments was added.  If `comment_tags`
624       is now set to a list of keywords for extraction, the extractor will
625       try to find the best preceding comment that begins with one of the
626       keywords.  For best results, make sure to not have more than one
627       gettext call in one line of code and the matching comment in the
628       same line or the line before.
629
630    .. versionchanged:: 2.5.1
631       The `newstyle_gettext` flag can be set to `True` to enable newstyle
632       gettext calls.
633
634    .. versionchanged:: 2.7
635       A `silent` option can now be provided.  If set to `False` template
636       syntax errors are propagated instead of being ignored.
637
638    :param fileobj: the file-like object the messages should be extracted from
639    :param keywords: a list of keywords (i.e. function names) that should be
640                     recognized as translation functions
641    :param comment_tags: a list of translator tags to search for and include
642                         in the results.
643    :param options: a dictionary of additional options (optional)
644    :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
645             (comments will be empty currently)
646    """
647    extensions = set()
648    for extension in options.get("extensions", "").split(","):
649        extension = extension.strip()
650        if not extension:
651            continue
652        extensions.add(import_string(extension))
653    if InternationalizationExtension not in extensions:
654        extensions.add(InternationalizationExtension)
655
656    def getbool(options, key, default=False):
657        return options.get(key, str(default)).lower() in ("1", "on", "yes", "true")
658
659    silent = getbool(options, "silent", True)
660    environment = Environment(
661        options.get("block_start_string", BLOCK_START_STRING),
662        options.get("block_end_string", BLOCK_END_STRING),
663        options.get("variable_start_string", VARIABLE_START_STRING),
664        options.get("variable_end_string", VARIABLE_END_STRING),
665        options.get("comment_start_string", COMMENT_START_STRING),
666        options.get("comment_end_string", COMMENT_END_STRING),
667        options.get("line_statement_prefix") or LINE_STATEMENT_PREFIX,
668        options.get("line_comment_prefix") or LINE_COMMENT_PREFIX,
669        getbool(options, "trim_blocks", TRIM_BLOCKS),
670        getbool(options, "lstrip_blocks", LSTRIP_BLOCKS),
671        NEWLINE_SEQUENCE,
672        getbool(options, "keep_trailing_newline", KEEP_TRAILING_NEWLINE),
673        frozenset(extensions),
674        cache_size=0,
675        auto_reload=False,
676    )
677
678    if getbool(options, "trimmed"):
679        environment.policies["ext.i18n.trimmed"] = True
680    if getbool(options, "newstyle_gettext"):
681        environment.newstyle_gettext = True
682
683    source = fileobj.read().decode(options.get("encoding", "utf-8"))
684    try:
685        node = environment.parse(source)
686        tokens = list(environment.lex(environment.preprocess(source)))
687    except TemplateSyntaxError:
688        if not silent:
689            raise
690        # skip templates with syntax errors
691        return
692
693    finder = _CommentFinder(tokens, comment_tags)
694    for lineno, func, message in extract_from_ast(node, keywords):
695        yield lineno, func, message, finder.find_comments(lineno)
696
697
698#: nicer import names
699i18n = InternationalizationExtension
700do = ExprStmtExtension
701loopcontrols = LoopControlExtension
702with_ = WithExtension
703autoescape = AutoEscapeExtension
704debug = DebugExtension
705