• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# ElementTree
3# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $
4#
5# limited xinclude support for element trees
6#
7# history:
8# 2003-08-15 fl   created
9# 2003-11-14 fl   fixed default loader
10#
11# Copyright (c) 2003-2004 by Fredrik Lundh.  All rights reserved.
12#
13# fredrik@pythonware.com
14# http://www.pythonware.com
15#
16# --------------------------------------------------------------------
17# The ElementTree toolkit is
18#
19# Copyright (c) 1999-2008 by Fredrik Lundh
20#
21# By obtaining, using, and/or copying this software and/or its
22# associated documentation, you agree that you have read, understood,
23# and will comply with the following terms and conditions:
24#
25# Permission to use, copy, modify, and distribute this software and
26# its associated documentation for any purpose and without fee is
27# hereby granted, provided that the above copyright notice appears in
28# all copies, and that both that copyright notice and this permission
29# notice appear in supporting documentation, and that the name of
30# Secret Labs AB or the author not be used in advertising or publicity
31# pertaining to distribution of the software without specific, written
32# prior permission.
33#
34# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
35# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
36# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
37# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
38# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
39# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
41# OF THIS SOFTWARE.
42# --------------------------------------------------------------------
43
44# Licensed to PSF under a Contributor Agreement.
45# See https://www.python.org/psf/license for licensing details.
46
47##
48# Limited XInclude support for the ElementTree package.
49##
50
51import copy
52from . import ElementTree
53from urllib.parse import urljoin
54
55XINCLUDE = "{http://www.w3.org/2001/XInclude}"
56
57XINCLUDE_INCLUDE = XINCLUDE + "include"
58XINCLUDE_FALLBACK = XINCLUDE + "fallback"
59
60# For security reasons, the inclusion depth is limited to this read-only value by default.
61DEFAULT_MAX_INCLUSION_DEPTH = 6
62
63
64##
65# Fatal include error.
66
67class FatalIncludeError(SyntaxError):
68    pass
69
70
71class LimitedRecursiveIncludeError(FatalIncludeError):
72    pass
73
74
75##
76# Default loader.  This loader reads an included resource from disk.
77#
78# @param href Resource reference.
79# @param parse Parse mode.  Either "xml" or "text".
80# @param encoding Optional text encoding (UTF-8 by default for "text").
81# @return The expanded resource.  If the parse mode is "xml", this
82#    is an Element instance.  If the parse mode is "text", this
83#    is a string.  If the loader fails, it can return None
84#    or raise an OSError exception.
85# @throws OSError If the loader fails to load the resource.
86
87def default_loader(href, parse, encoding=None):
88    if parse == "xml":
89        with open(href, 'rb') as file:
90            data = ElementTree.parse(file).getroot()
91    else:
92        if not encoding:
93            encoding = 'UTF-8'
94        with open(href, 'r', encoding=encoding) as file:
95            data = file.read()
96    return data
97
98##
99# Expand XInclude directives.
100#
101# @param elem Root Element or any ElementTree of a tree to be expanded
102# @param loader Optional resource loader.  If omitted, it defaults
103#     to {@link default_loader}.  If given, it should be a callable
104#     that implements the same interface as <b>default_loader</b>.
105# @param base_url The base URL of the original file, to resolve
106#     relative include file references.
107# @param max_depth The maximum number of recursive inclusions.
108#     Limited to reduce the risk of malicious content explosion.
109#     Pass None to disable the limitation.
110# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
111# @throws FatalIncludeError If the function fails to include a given
112#     resource, or if the tree contains malformed XInclude elements.
113# @throws OSError If the function fails to load a given resource.
114# @throws ValueError If negative {@link max_depth} is passed.
115# @returns None. Modifies tree pointed by {@link elem}
116
117def include(elem, loader=None, base_url=None,
118            max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
119    if max_depth is None:
120        max_depth = -1
121    elif max_depth < 0:
122        raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
123
124    if hasattr(elem, 'getroot'):
125        elem = elem.getroot()
126    if loader is None:
127        loader = default_loader
128
129    _include(elem, loader, base_url, max_depth, set())
130
131
132def _include(elem, loader, base_url, max_depth, _parent_hrefs):
133    # look for xinclude elements
134    i = 0
135    while i < len(elem):
136        e = elem[i]
137        if e.tag == XINCLUDE_INCLUDE:
138            # process xinclude directive
139            href = e.get("href")
140            if base_url:
141                href = urljoin(base_url, href)
142            parse = e.get("parse", "xml")
143            if parse == "xml":
144                if href in _parent_hrefs:
145                    raise FatalIncludeError("recursive include of %s" % href)
146                if max_depth == 0:
147                    raise LimitedRecursiveIncludeError(
148                        "maximum xinclude depth reached when including file %s" % href)
149                _parent_hrefs.add(href)
150                node = loader(href, parse)
151                if node is None:
152                    raise FatalIncludeError(
153                        "cannot load %r as %r" % (href, parse)
154                        )
155                node = copy.copy(node)  # FIXME: this makes little sense with recursive includes
156                _include(node, loader, href, max_depth - 1, _parent_hrefs)
157                _parent_hrefs.remove(href)
158                if e.tail:
159                    node.tail = (node.tail or "") + e.tail
160                elem[i] = node
161            elif parse == "text":
162                text = loader(href, parse, e.get("encoding"))
163                if text is None:
164                    raise FatalIncludeError(
165                        "cannot load %r as %r" % (href, parse)
166                        )
167                if e.tail:
168                    text += e.tail
169                if i:
170                    node = elem[i-1]
171                    node.tail = (node.tail or "") + text
172                else:
173                    elem.text = (elem.text or "") + text
174                del elem[i]
175                continue
176            else:
177                raise FatalIncludeError(
178                    "unknown parse type in xi:include tag (%r)" % parse
179                )
180        elif e.tag == XINCLUDE_FALLBACK:
181            raise FatalIncludeError(
182                "xi:fallback tag must be child of xi:include (%r)" % e.tag
183                )
184        else:
185            _include(e, loader, base_url, max_depth, _parent_hrefs)
186        i += 1
187