1# 2# ElementTree 3# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $ 4# 5# limited xinclude support for element trees 6# 7# history: 8# 2003-08-15 fl created 9# 2003-11-14 fl fixed default loader 10# 11# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. 12# 13# fredrik@pythonware.com 14# http://www.pythonware.com 15# 16# -------------------------------------------------------------------- 17# The ElementTree toolkit is 18# 19# Copyright (c) 1999-2008 by Fredrik Lundh 20# 21# By obtaining, using, and/or copying this software and/or its 22# associated documentation, you agree that you have read, understood, 23# and will comply with the following terms and conditions: 24# 25# Permission to use, copy, modify, and distribute this software and 26# its associated documentation for any purpose and without fee is 27# hereby granted, provided that the above copyright notice appears in 28# all copies, and that both that copyright notice and this permission 29# notice appear in supporting documentation, and that the name of 30# Secret Labs AB or the author not be used in advertising or publicity 31# pertaining to distribution of the software without specific, written 32# prior permission. 33# 34# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 35# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 36# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 37# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 38# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 39# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 40# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 41# OF THIS SOFTWARE. 42# -------------------------------------------------------------------- 43 44# Licensed to PSF under a Contributor Agreement. 45# See https://www.python.org/psf/license for licensing details. 46 47## 48# Limited XInclude support for the ElementTree package. 49## 50 51import copy 52from . import ElementTree 53from urllib.parse import urljoin 54 55XINCLUDE = "{http://www.w3.org/2001/XInclude}" 56 57XINCLUDE_INCLUDE = XINCLUDE + "include" 58XINCLUDE_FALLBACK = XINCLUDE + "fallback" 59 60# For security reasons, the inclusion depth is limited to this read-only value by default. 61DEFAULT_MAX_INCLUSION_DEPTH = 6 62 63 64## 65# Fatal include error. 66 67class FatalIncludeError(SyntaxError): 68 pass 69 70 71class LimitedRecursiveIncludeError(FatalIncludeError): 72 pass 73 74 75## 76# Default loader. This loader reads an included resource from disk. 77# 78# @param href Resource reference. 79# @param parse Parse mode. Either "xml" or "text". 80# @param encoding Optional text encoding (UTF-8 by default for "text"). 81# @return The expanded resource. If the parse mode is "xml", this 82# is an Element instance. If the parse mode is "text", this 83# is a string. If the loader fails, it can return None 84# or raise an OSError exception. 85# @throws OSError If the loader fails to load the resource. 86 87def default_loader(href, parse, encoding=None): 88 if parse == "xml": 89 with open(href, 'rb') as file: 90 data = ElementTree.parse(file).getroot() 91 else: 92 if not encoding: 93 encoding = 'UTF-8' 94 with open(href, 'r', encoding=encoding) as file: 95 data = file.read() 96 return data 97 98## 99# Expand XInclude directives. 100# 101# @param elem Root Element or any ElementTree of a tree to be expanded 102# @param loader Optional resource loader. If omitted, it defaults 103# to {@link default_loader}. If given, it should be a callable 104# that implements the same interface as <b>default_loader</b>. 105# @param base_url The base URL of the original file, to resolve 106# relative include file references. 107# @param max_depth The maximum number of recursive inclusions. 108# Limited to reduce the risk of malicious content explosion. 109# Pass None to disable the limitation. 110# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. 111# @throws FatalIncludeError If the function fails to include a given 112# resource, or if the tree contains malformed XInclude elements. 113# @throws OSError If the function fails to load a given resource. 114# @throws ValueError If negative {@link max_depth} is passed. 115# @returns None. Modifies tree pointed by {@link elem} 116 117def include(elem, loader=None, base_url=None, 118 max_depth=DEFAULT_MAX_INCLUSION_DEPTH): 119 if max_depth is None: 120 max_depth = -1 121 elif max_depth < 0: 122 raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth) 123 124 if hasattr(elem, 'getroot'): 125 elem = elem.getroot() 126 if loader is None: 127 loader = default_loader 128 129 _include(elem, loader, base_url, max_depth, set()) 130 131 132def _include(elem, loader, base_url, max_depth, _parent_hrefs): 133 # look for xinclude elements 134 i = 0 135 while i < len(elem): 136 e = elem[i] 137 if e.tag == XINCLUDE_INCLUDE: 138 # process xinclude directive 139 href = e.get("href") 140 if base_url: 141 href = urljoin(base_url, href) 142 parse = e.get("parse", "xml") 143 if parse == "xml": 144 if href in _parent_hrefs: 145 raise FatalIncludeError("recursive include of %s" % href) 146 if max_depth == 0: 147 raise LimitedRecursiveIncludeError( 148 "maximum xinclude depth reached when including file %s" % href) 149 _parent_hrefs.add(href) 150 node = loader(href, parse) 151 if node is None: 152 raise FatalIncludeError( 153 "cannot load %r as %r" % (href, parse) 154 ) 155 node = copy.copy(node) # FIXME: this makes little sense with recursive includes 156 _include(node, loader, href, max_depth - 1, _parent_hrefs) 157 _parent_hrefs.remove(href) 158 if e.tail: 159 node.tail = (node.tail or "") + e.tail 160 elem[i] = node 161 elif parse == "text": 162 text = loader(href, parse, e.get("encoding")) 163 if text is None: 164 raise FatalIncludeError( 165 "cannot load %r as %r" % (href, parse) 166 ) 167 if e.tail: 168 text += e.tail 169 if i: 170 node = elem[i-1] 171 node.tail = (node.tail or "") + text 172 else: 173 elem.text = (elem.text or "") + text 174 del elem[i] 175 continue 176 else: 177 raise FatalIncludeError( 178 "unknown parse type in xi:include tag (%r)" % parse 179 ) 180 elif e.tag == XINCLUDE_FALLBACK: 181 raise FatalIncludeError( 182 "xi:fallback tag must be child of xi:include (%r)" % e.tag 183 ) 184 else: 185 _include(e, loader, base_url, max_depth, _parent_hrefs) 186 i += 1 187