• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Lightweight XML support for Python.
2
3 XML is an inherently hierarchical data format, and the most natural way to
4 represent it is with a tree.  This module has two classes for this purpose:
5
6    1. ElementTree represents the whole XML document as a tree and
7
8    2. Element represents a single node in this tree.
9
10 Interactions with the whole document (reading and writing to/from files) are
11 usually done on the ElementTree level.  Interactions with a single XML element
12 and its sub-elements are done on the Element level.
13
14 Element is a flexible container object designed to store hierarchical data
15 structures in memory. It can be described as a cross between a list and a
16 dictionary.  Each Element has a number of properties associated with it:
17
18    'tag' - a string containing the element's name.
19
20    'attributes' - a Python dictionary storing the element's attributes.
21
22    'text' - a string containing the element's text content.
23
24    'tail' - an optional string containing text after the element's end tag.
25
26    And a number of child elements stored in a Python sequence.
27
28 To create an element instance, use the Element constructor,
29 or the SubElement factory function.
30
31 You can also use the ElementTree class to wrap an element structure
32 and convert it to and from XML.
33
34"""
35
36#---------------------------------------------------------------------
37# Licensed to PSF under a Contributor Agreement.
38# See http://www.python.org/psf/license for licensing details.
39#
40# ElementTree
41# Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
42#
43# fredrik@pythonware.com
44# http://www.pythonware.com
45# --------------------------------------------------------------------
46# The ElementTree toolkit is
47#
48# Copyright (c) 1999-2008 by Fredrik Lundh
49#
50# By obtaining, using, and/or copying this software and/or its
51# associated documentation, you agree that you have read, understood,
52# and will comply with the following terms and conditions:
53#
54# Permission to use, copy, modify, and distribute this software and
55# its associated documentation for any purpose and without fee is
56# hereby granted, provided that the above copyright notice appears in
57# all copies, and that both that copyright notice and this permission
58# notice appear in supporting documentation, and that the name of
59# Secret Labs AB or the author not be used in advertising or publicity
60# pertaining to distribution of the software without specific, written
61# prior permission.
62#
63# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
64# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
65# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
66# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
67# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
68# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
69# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
70# OF THIS SOFTWARE.
71# --------------------------------------------------------------------
72
73__all__ = [
74    # public symbols
75    "Comment",
76    "dump",
77    "Element", "ElementTree",
78    "fromstring", "fromstringlist",
79    "iselement", "iterparse",
80    "parse", "ParseError",
81    "PI", "ProcessingInstruction",
82    "QName",
83    "SubElement",
84    "tostring", "tostringlist",
85    "TreeBuilder",
86    "VERSION",
87    "XML", "XMLID",
88    "XMLParser", "XMLPullParser",
89    "register_namespace",
90    ]
91
92VERSION = "1.3.0"
93
94import sys
95import re
96import warnings
97import io
98import collections
99import contextlib
100
101from . import ElementPath
102
103
104class ParseError(SyntaxError):
105    """An error when parsing an XML document.
106
107    In addition to its exception value, a ParseError contains
108    two extra attributes:
109        'code'     - the specific exception code
110        'position' - the line and column of the error
111
112    """
113    pass
114
115# --------------------------------------------------------------------
116
117
118def iselement(element):
119    """Return True if *element* appears to be an Element."""
120    return hasattr(element, 'tag')
121
122
123class Element:
124    """An XML element.
125
126    This class is the reference implementation of the Element interface.
127
128    An element's length is its number of subelements.  That means if you
129    want to check if an element is truly empty, you should check BOTH
130    its length AND its text attribute.
131
132    The element tag, attribute names, and attribute values can be either
133    bytes or strings.
134
135    *tag* is the element name.  *attrib* is an optional dictionary containing
136    element attributes. *extra* are additional element attributes given as
137    keyword arguments.
138
139    Example form:
140        <tag attrib>text<child/>...</tag>tail
141
142    """
143
144    tag = None
145    """The element's name."""
146
147    attrib = None
148    """Dictionary of the element's attributes."""
149
150    text = None
151    """
152    Text before first subelement. This is either a string or the value None.
153    Note that if there is no text, this attribute may be either
154    None or the empty string, depending on the parser.
155
156    """
157
158    tail = None
159    """
160    Text after this element's end tag, but before the next sibling element's
161    start tag.  This is either a string or the value None.  Note that if there
162    was no text, this attribute may be either None or an empty string,
163    depending on the parser.
164
165    """
166
167    def __init__(self, tag, attrib={}, **extra):
168        if not isinstance(attrib, dict):
169            raise TypeError("attrib must be dict, not %s" % (
170                attrib.__class__.__name__,))
171        attrib = attrib.copy()
172        attrib.update(extra)
173        self.tag = tag
174        self.attrib = attrib
175        self._children = []
176
177    def __repr__(self):
178        return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
179
180    def makeelement(self, tag, attrib):
181        """Create a new element with the same type.
182
183        *tag* is a string containing the element name.
184        *attrib* is a dictionary containing the element attributes.
185
186        Do not call this method, use the SubElement factory function instead.
187
188        """
189        return self.__class__(tag, attrib)
190
191    def copy(self):
192        """Return copy of current element.
193
194        This creates a shallow copy. Subelements will be shared with the
195        original tree.
196
197        """
198        elem = self.makeelement(self.tag, self.attrib)
199        elem.text = self.text
200        elem.tail = self.tail
201        elem[:] = self
202        return elem
203
204    def __len__(self):
205        return len(self._children)
206
207    def __bool__(self):
208        warnings.warn(
209            "The behavior of this method will change in future versions.  "
210            "Use specific 'len(elem)' or 'elem is not None' test instead.",
211            FutureWarning, stacklevel=2
212            )
213        return len(self._children) != 0 # emulate old behaviour, for now
214
215    def __getitem__(self, index):
216        return self._children[index]
217
218    def __setitem__(self, index, element):
219        # if isinstance(index, slice):
220        #     for elt in element:
221        #         assert iselement(elt)
222        # else:
223        #     assert iselement(element)
224        self._children[index] = element
225
226    def __delitem__(self, index):
227        del self._children[index]
228
229    def append(self, subelement):
230        """Add *subelement* to the end of this element.
231
232        The new element will appear in document order after the last existing
233        subelement (or directly after the text, if it's the first subelement),
234        but before the end tag for this element.
235
236        """
237        self._assert_is_element(subelement)
238        self._children.append(subelement)
239
240    def extend(self, elements):
241        """Append subelements from a sequence.
242
243        *elements* is a sequence with zero or more elements.
244
245        """
246        for element in elements:
247            self._assert_is_element(element)
248        self._children.extend(elements)
249
250    def insert(self, index, subelement):
251        """Insert *subelement* at position *index*."""
252        self._assert_is_element(subelement)
253        self._children.insert(index, subelement)
254
255    def _assert_is_element(self, e):
256        # Need to refer to the actual Python implementation, not the
257        # shadowing C implementation.
258        if not isinstance(e, _Element_Py):
259            raise TypeError('expected an Element, not %s' % type(e).__name__)
260
261    def remove(self, subelement):
262        """Remove matching subelement.
263
264        Unlike the find methods, this method compares elements based on
265        identity, NOT ON tag value or contents.  To remove subelements by
266        other means, the easiest way is to use a list comprehension to
267        select what elements to keep, and then use slice assignment to update
268        the parent element.
269
270        ValueError is raised if a matching element could not be found.
271
272        """
273        # assert iselement(element)
274        self._children.remove(subelement)
275
276    def getchildren(self):
277        """(Deprecated) Return all subelements.
278
279        Elements are returned in document order.
280
281        """
282        warnings.warn(
283            "This method will be removed in future versions.  "
284            "Use 'list(elem)' or iteration over elem instead.",
285            DeprecationWarning, stacklevel=2
286            )
287        return self._children
288
289    def find(self, path, namespaces=None):
290        """Find first matching element by tag name or path.
291
292        *path* is a string having either an element tag or an XPath,
293        *namespaces* is an optional mapping from namespace prefix to full name.
294
295        Return the first matching element, or None if no element was found.
296
297        """
298        return ElementPath.find(self, path, namespaces)
299
300    def findtext(self, path, default=None, namespaces=None):
301        """Find text for first matching element by tag name or path.
302
303        *path* is a string having either an element tag or an XPath,
304        *default* is the value to return if the element was not found,
305        *namespaces* is an optional mapping from namespace prefix to full name.
306
307        Return text content of first matching element, or default value if
308        none was found.  Note that if an element is found having no text
309        content, the empty string is returned.
310
311        """
312        return ElementPath.findtext(self, path, default, namespaces)
313
314    def findall(self, path, namespaces=None):
315        """Find all matching subelements by tag name or path.
316
317        *path* is a string having either an element tag or an XPath,
318        *namespaces* is an optional mapping from namespace prefix to full name.
319
320        Returns list containing all matching elements in document order.
321
322        """
323        return ElementPath.findall(self, path, namespaces)
324
325    def iterfind(self, path, namespaces=None):
326        """Find all matching subelements by tag name or path.
327
328        *path* is a string having either an element tag or an XPath,
329        *namespaces* is an optional mapping from namespace prefix to full name.
330
331        Return an iterable yielding all matching elements in document order.
332
333        """
334        return ElementPath.iterfind(self, path, namespaces)
335
336    def clear(self):
337        """Reset element.
338
339        This function removes all subelements, clears all attributes, and sets
340        the text and tail attributes to None.
341
342        """
343        self.attrib.clear()
344        self._children = []
345        self.text = self.tail = None
346
347    def get(self, key, default=None):
348        """Get element attribute.
349
350        Equivalent to attrib.get, but some implementations may handle this a
351        bit more efficiently.  *key* is what attribute to look for, and
352        *default* is what to return if the attribute was not found.
353
354        Returns a string containing the attribute value, or the default if
355        attribute was not found.
356
357        """
358        return self.attrib.get(key, default)
359
360    def set(self, key, value):
361        """Set element attribute.
362
363        Equivalent to attrib[key] = value, but some implementations may handle
364        this a bit more efficiently.  *key* is what attribute to set, and
365        *value* is the attribute value to set it to.
366
367        """
368        self.attrib[key] = value
369
370    def keys(self):
371        """Get list of attribute names.
372
373        Names are returned in an arbitrary order, just like an ordinary
374        Python dict.  Equivalent to attrib.keys()
375
376        """
377        return self.attrib.keys()
378
379    def items(self):
380        """Get element attributes as a sequence.
381
382        The attributes are returned in arbitrary order.  Equivalent to
383        attrib.items().
384
385        Return a list of (name, value) tuples.
386
387        """
388        return self.attrib.items()
389
390    def iter(self, tag=None):
391        """Create tree iterator.
392
393        The iterator loops over the element and all subelements in document
394        order, returning all elements with a matching tag.
395
396        If the tree structure is modified during iteration, new or removed
397        elements may or may not be included.  To get a stable set, use the
398        list() function on the iterator, and loop over the resulting list.
399
400        *tag* is what tags to look for (default is to return all elements)
401
402        Return an iterator containing all the matching elements.
403
404        """
405        if tag == "*":
406            tag = None
407        if tag is None or self.tag == tag:
408            yield self
409        for e in self._children:
410            yield from e.iter(tag)
411
412    # compatibility
413    def getiterator(self, tag=None):
414        # Change for a DeprecationWarning in 1.4
415        warnings.warn(
416            "This method will be removed in future versions.  "
417            "Use 'elem.iter()' or 'list(elem.iter())' instead.",
418            PendingDeprecationWarning, stacklevel=2
419        )
420        return list(self.iter(tag))
421
422    def itertext(self):
423        """Create text iterator.
424
425        The iterator loops over the element and all subelements in document
426        order, returning all inner text.
427
428        """
429        tag = self.tag
430        if not isinstance(tag, str) and tag is not None:
431            return
432        t = self.text
433        if t:
434            yield t
435        for e in self:
436            yield from e.itertext()
437            t = e.tail
438            if t:
439                yield t
440
441
442def SubElement(parent, tag, attrib={}, **extra):
443    """Subelement factory which creates an element instance, and appends it
444    to an existing parent.
445
446    The element tag, attribute names, and attribute values can be either
447    bytes or Unicode strings.
448
449    *parent* is the parent element, *tag* is the subelements name, *attrib* is
450    an optional directory containing element attributes, *extra* are
451    additional attributes given as keyword arguments.
452
453    """
454    attrib = attrib.copy()
455    attrib.update(extra)
456    element = parent.makeelement(tag, attrib)
457    parent.append(element)
458    return element
459
460
461def Comment(text=None):
462    """Comment element factory.
463
464    This function creates a special element which the standard serializer
465    serializes as an XML comment.
466
467    *text* is a string containing the comment string.
468
469    """
470    element = Element(Comment)
471    element.text = text
472    return element
473
474
475def ProcessingInstruction(target, text=None):
476    """Processing Instruction element factory.
477
478    This function creates a special element which the standard serializer
479    serializes as an XML comment.
480
481    *target* is a string containing the processing instruction, *text* is a
482    string containing the processing instruction contents, if any.
483
484    """
485    element = Element(ProcessingInstruction)
486    element.text = target
487    if text:
488        element.text = element.text + " " + text
489    return element
490
491PI = ProcessingInstruction
492
493
494class QName:
495    """Qualified name wrapper.
496
497    This class can be used to wrap a QName attribute value in order to get
498    proper namespace handing on output.
499
500    *text_or_uri* is a string containing the QName value either in the form
501    {uri}local, or if the tag argument is given, the URI part of a QName.
502
503    *tag* is an optional argument which if given, will make the first
504    argument (text_or_uri) be interpreted as a URI, and this argument (tag)
505    be interpreted as a local name.
506
507    """
508    def __init__(self, text_or_uri, tag=None):
509        if tag:
510            text_or_uri = "{%s}%s" % (text_or_uri, tag)
511        self.text = text_or_uri
512    def __str__(self):
513        return self.text
514    def __repr__(self):
515        return '<%s %r>' % (self.__class__.__name__, self.text)
516    def __hash__(self):
517        return hash(self.text)
518    def __le__(self, other):
519        if isinstance(other, QName):
520            return self.text <= other.text
521        return self.text <= other
522    def __lt__(self, other):
523        if isinstance(other, QName):
524            return self.text < other.text
525        return self.text < other
526    def __ge__(self, other):
527        if isinstance(other, QName):
528            return self.text >= other.text
529        return self.text >= other
530    def __gt__(self, other):
531        if isinstance(other, QName):
532            return self.text > other.text
533        return self.text > other
534    def __eq__(self, other):
535        if isinstance(other, QName):
536            return self.text == other.text
537        return self.text == other
538
539# --------------------------------------------------------------------
540
541
542class ElementTree:
543    """An XML element hierarchy.
544
545    This class also provides support for serialization to and from
546    standard XML.
547
548    *element* is an optional root element node,
549    *file* is an optional file handle or file name of an XML file whose
550    contents will be used to initialize the tree with.
551
552    """
553    def __init__(self, element=None, file=None):
554        # assert element is None or iselement(element)
555        self._root = element # first node
556        if file:
557            self.parse(file)
558
559    def getroot(self):
560        """Return root element of this tree."""
561        return self._root
562
563    def _setroot(self, element):
564        """Replace root element of this tree.
565
566        This will discard the current contents of the tree and replace it
567        with the given element.  Use with care!
568
569        """
570        # assert iselement(element)
571        self._root = element
572
573    def parse(self, source, parser=None):
574        """Load external XML document into element tree.
575
576        *source* is a file name or file object, *parser* is an optional parser
577        instance that defaults to XMLParser.
578
579        ParseError is raised if the parser fails to parse the document.
580
581        Returns the root element of the given source document.
582
583        """
584        close_source = False
585        if not hasattr(source, "read"):
586            source = open(source, "rb")
587            close_source = True
588        try:
589            if parser is None:
590                # If no parser was specified, create a default XMLParser
591                parser = XMLParser()
592                if hasattr(parser, '_parse_whole'):
593                    # The default XMLParser, when it comes from an accelerator,
594                    # can define an internal _parse_whole API for efficiency.
595                    # It can be used to parse the whole source without feeding
596                    # it with chunks.
597                    self._root = parser._parse_whole(source)
598                    return self._root
599            while True:
600                data = source.read(65536)
601                if not data:
602                    break
603                parser.feed(data)
604            self._root = parser.close()
605            return self._root
606        finally:
607            if close_source:
608                source.close()
609
610    def iter(self, tag=None):
611        """Create and return tree iterator for the root element.
612
613        The iterator loops over all elements in this tree, in document order.
614
615        *tag* is a string with the tag name to iterate over
616        (default is to return all elements).
617
618        """
619        # assert self._root is not None
620        return self._root.iter(tag)
621
622    # compatibility
623    def getiterator(self, tag=None):
624        # Change for a DeprecationWarning in 1.4
625        warnings.warn(
626            "This method will be removed in future versions.  "
627            "Use 'tree.iter()' or 'list(tree.iter())' instead.",
628            PendingDeprecationWarning, stacklevel=2
629        )
630        return list(self.iter(tag))
631
632    def find(self, path, namespaces=None):
633        """Find first matching element by tag name or path.
634
635        Same as getroot().find(path), which is Element.find()
636
637        *path* is a string having either an element tag or an XPath,
638        *namespaces* is an optional mapping from namespace prefix to full name.
639
640        Return the first matching element, or None if no element was found.
641
642        """
643        # assert self._root is not None
644        if path[:1] == "/":
645            path = "." + path
646            warnings.warn(
647                "This search is broken in 1.3 and earlier, and will be "
648                "fixed in a future version.  If you rely on the current "
649                "behaviour, change it to %r" % path,
650                FutureWarning, stacklevel=2
651                )
652        return self._root.find(path, namespaces)
653
654    def findtext(self, path, default=None, namespaces=None):
655        """Find first matching element by tag name or path.
656
657        Same as getroot().findtext(path),  which is Element.findtext()
658
659        *path* is a string having either an element tag or an XPath,
660        *namespaces* is an optional mapping from namespace prefix to full name.
661
662        Return the first matching element, or None if no element was found.
663
664        """
665        # assert self._root is not None
666        if path[:1] == "/":
667            path = "." + path
668            warnings.warn(
669                "This search is broken in 1.3 and earlier, and will be "
670                "fixed in a future version.  If you rely on the current "
671                "behaviour, change it to %r" % path,
672                FutureWarning, stacklevel=2
673                )
674        return self._root.findtext(path, default, namespaces)
675
676    def findall(self, path, namespaces=None):
677        """Find all matching subelements by tag name or path.
678
679        Same as getroot().findall(path), which is Element.findall().
680
681        *path* is a string having either an element tag or an XPath,
682        *namespaces* is an optional mapping from namespace prefix to full name.
683
684        Return list containing all matching elements in document order.
685
686        """
687        # assert self._root is not None
688        if path[:1] == "/":
689            path = "." + path
690            warnings.warn(
691                "This search is broken in 1.3 and earlier, and will be "
692                "fixed in a future version.  If you rely on the current "
693                "behaviour, change it to %r" % path,
694                FutureWarning, stacklevel=2
695                )
696        return self._root.findall(path, namespaces)
697
698    def iterfind(self, path, namespaces=None):
699        """Find all matching subelements by tag name or path.
700
701        Same as getroot().iterfind(path), which is element.iterfind()
702
703        *path* is a string having either an element tag or an XPath,
704        *namespaces* is an optional mapping from namespace prefix to full name.
705
706        Return an iterable yielding all matching elements in document order.
707
708        """
709        # assert self._root is not None
710        if path[:1] == "/":
711            path = "." + path
712            warnings.warn(
713                "This search is broken in 1.3 and earlier, and will be "
714                "fixed in a future version.  If you rely on the current "
715                "behaviour, change it to %r" % path,
716                FutureWarning, stacklevel=2
717                )
718        return self._root.iterfind(path, namespaces)
719
720    def write(self, file_or_filename,
721              encoding=None,
722              xml_declaration=None,
723              default_namespace=None,
724              method=None, *,
725              short_empty_elements=True):
726        """Write element tree to a file as XML.
727
728        Arguments:
729          *file_or_filename* -- file name or a file object opened for writing
730
731          *encoding* -- the output encoding (default: US-ASCII)
732
733          *xml_declaration* -- bool indicating if an XML declaration should be
734                               added to the output. If None, an XML declaration
735                               is added if encoding IS NOT either of:
736                               US-ASCII, UTF-8, or Unicode
737
738          *default_namespace* -- sets the default XML namespace (for "xmlns")
739
740          *method* -- either "xml" (default), "html, "text", or "c14n"
741
742          *short_empty_elements* -- controls the formatting of elements
743                                    that contain no content. If True (default)
744                                    they are emitted as a single self-closed
745                                    tag, otherwise they are emitted as a pair
746                                    of start/end tags
747
748        """
749        if not method:
750            method = "xml"
751        elif method not in _serialize:
752            raise ValueError("unknown method %r" % method)
753        if not encoding:
754            if method == "c14n":
755                encoding = "utf-8"
756            else:
757                encoding = "us-ascii"
758        enc_lower = encoding.lower()
759        with _get_writer(file_or_filename, enc_lower) as write:
760            if method == "xml" and (xml_declaration or
761                    (xml_declaration is None and
762                     enc_lower not in ("utf-8", "us-ascii", "unicode"))):
763                declared_encoding = encoding
764                if enc_lower == "unicode":
765                    # Retrieve the default encoding for the xml declaration
766                    import locale
767                    declared_encoding = locale.getpreferredencoding()
768                write("<?xml version='1.0' encoding='%s'?>\n" % (
769                    declared_encoding,))
770            if method == "text":
771                _serialize_text(write, self._root)
772            else:
773                qnames, namespaces = _namespaces(self._root, default_namespace)
774                serialize = _serialize[method]
775                serialize(write, self._root, qnames, namespaces,
776                          short_empty_elements=short_empty_elements)
777
778    def write_c14n(self, file):
779        # lxml.etree compatibility.  use output method instead
780        return self.write(file, method="c14n")
781
782# --------------------------------------------------------------------
783# serialization support
784
785@contextlib.contextmanager
786def _get_writer(file_or_filename, encoding):
787    # returns text write method and release all resources after using
788    try:
789        write = file_or_filename.write
790    except AttributeError:
791        # file_or_filename is a file name
792        if encoding == "unicode":
793            file = open(file_or_filename, "w")
794        else:
795            file = open(file_or_filename, "w", encoding=encoding,
796                        errors="xmlcharrefreplace")
797        with file:
798            yield file.write
799    else:
800        # file_or_filename is a file-like object
801        # encoding determines if it is a text or binary writer
802        if encoding == "unicode":
803            # use a text writer as is
804            yield write
805        else:
806            # wrap a binary writer with TextIOWrapper
807            with contextlib.ExitStack() as stack:
808                if isinstance(file_or_filename, io.BufferedIOBase):
809                    file = file_or_filename
810                elif isinstance(file_or_filename, io.RawIOBase):
811                    file = io.BufferedWriter(file_or_filename)
812                    # Keep the original file open when the BufferedWriter is
813                    # destroyed
814                    stack.callback(file.detach)
815                else:
816                    # This is to handle passed objects that aren't in the
817                    # IOBase hierarchy, but just have a write method
818                    file = io.BufferedIOBase()
819                    file.writable = lambda: True
820                    file.write = write
821                    try:
822                        # TextIOWrapper uses this methods to determine
823                        # if BOM (for UTF-16, etc) should be added
824                        file.seekable = file_or_filename.seekable
825                        file.tell = file_or_filename.tell
826                    except AttributeError:
827                        pass
828                file = io.TextIOWrapper(file,
829                                        encoding=encoding,
830                                        errors="xmlcharrefreplace",
831                                        newline="\n")
832                # Keep the original file open when the TextIOWrapper is
833                # destroyed
834                stack.callback(file.detach)
835                yield file.write
836
837def _namespaces(elem, default_namespace=None):
838    # identify namespaces used in this tree
839
840    # maps qnames to *encoded* prefix:local names
841    qnames = {None: None}
842
843    # maps uri:s to prefixes
844    namespaces = {}
845    if default_namespace:
846        namespaces[default_namespace] = ""
847
848    def add_qname(qname):
849        # calculate serialized qname representation
850        try:
851            if qname[:1] == "{":
852                uri, tag = qname[1:].rsplit("}", 1)
853                prefix = namespaces.get(uri)
854                if prefix is None:
855                    prefix = _namespace_map.get(uri)
856                    if prefix is None:
857                        prefix = "ns%d" % len(namespaces)
858                    if prefix != "xml":
859                        namespaces[uri] = prefix
860                if prefix:
861                    qnames[qname] = "%s:%s" % (prefix, tag)
862                else:
863                    qnames[qname] = tag # default element
864            else:
865                if default_namespace:
866                    # FIXME: can this be handled in XML 1.0?
867                    raise ValueError(
868                        "cannot use non-qualified names with "
869                        "default_namespace option"
870                        )
871                qnames[qname] = qname
872        except TypeError:
873            _raise_serialization_error(qname)
874
875    # populate qname and namespaces table
876    for elem in elem.iter():
877        tag = elem.tag
878        if isinstance(tag, QName):
879            if tag.text not in qnames:
880                add_qname(tag.text)
881        elif isinstance(tag, str):
882            if tag not in qnames:
883                add_qname(tag)
884        elif tag is not None and tag is not Comment and tag is not PI:
885            _raise_serialization_error(tag)
886        for key, value in elem.items():
887            if isinstance(key, QName):
888                key = key.text
889            if key not in qnames:
890                add_qname(key)
891            if isinstance(value, QName) and value.text not in qnames:
892                add_qname(value.text)
893        text = elem.text
894        if isinstance(text, QName) and text.text not in qnames:
895            add_qname(text.text)
896    return qnames, namespaces
897
898def _serialize_xml(write, elem, qnames, namespaces,
899                   short_empty_elements, **kwargs):
900    tag = elem.tag
901    text = elem.text
902    if tag is Comment:
903        write("<!--%s-->" % text)
904    elif tag is ProcessingInstruction:
905        write("<?%s?>" % text)
906    else:
907        tag = qnames[tag]
908        if tag is None:
909            if text:
910                write(_escape_cdata(text))
911            for e in elem:
912                _serialize_xml(write, e, qnames, None,
913                               short_empty_elements=short_empty_elements)
914        else:
915            write("<" + tag)
916            items = list(elem.items())
917            if items or namespaces:
918                if namespaces:
919                    for v, k in sorted(namespaces.items(),
920                                       key=lambda x: x[1]):  # sort on prefix
921                        if k:
922                            k = ":" + k
923                        write(" xmlns%s=\"%s\"" % (
924                            k,
925                            _escape_attrib(v)
926                            ))
927                for k, v in sorted(items):  # lexical order
928                    if isinstance(k, QName):
929                        k = k.text
930                    if isinstance(v, QName):
931                        v = qnames[v.text]
932                    else:
933                        v = _escape_attrib(v)
934                    write(" %s=\"%s\"" % (qnames[k], v))
935            if text or len(elem) or not short_empty_elements:
936                write(">")
937                if text:
938                    write(_escape_cdata(text))
939                for e in elem:
940                    _serialize_xml(write, e, qnames, None,
941                                   short_empty_elements=short_empty_elements)
942                write("</" + tag + ">")
943            else:
944                write(" />")
945    if elem.tail:
946        write(_escape_cdata(elem.tail))
947
948HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
949              "img", "input", "isindex", "link", "meta", "param")
950
951try:
952    HTML_EMPTY = set(HTML_EMPTY)
953except NameError:
954    pass
955
956def _serialize_html(write, elem, qnames, namespaces, **kwargs):
957    tag = elem.tag
958    text = elem.text
959    if tag is Comment:
960        write("<!--%s-->" % _escape_cdata(text))
961    elif tag is ProcessingInstruction:
962        write("<?%s?>" % _escape_cdata(text))
963    else:
964        tag = qnames[tag]
965        if tag is None:
966            if text:
967                write(_escape_cdata(text))
968            for e in elem:
969                _serialize_html(write, e, qnames, None)
970        else:
971            write("<" + tag)
972            items = list(elem.items())
973            if items or namespaces:
974                if namespaces:
975                    for v, k in sorted(namespaces.items(),
976                                       key=lambda x: x[1]):  # sort on prefix
977                        if k:
978                            k = ":" + k
979                        write(" xmlns%s=\"%s\"" % (
980                            k,
981                            _escape_attrib(v)
982                            ))
983                for k, v in sorted(items):  # lexical order
984                    if isinstance(k, QName):
985                        k = k.text
986                    if isinstance(v, QName):
987                        v = qnames[v.text]
988                    else:
989                        v = _escape_attrib_html(v)
990                    # FIXME: handle boolean attributes
991                    write(" %s=\"%s\"" % (qnames[k], v))
992            write(">")
993            ltag = tag.lower()
994            if text:
995                if ltag == "script" or ltag == "style":
996                    write(text)
997                else:
998                    write(_escape_cdata(text))
999            for e in elem:
1000                _serialize_html(write, e, qnames, None)
1001            if ltag not in HTML_EMPTY:
1002                write("</" + tag + ">")
1003    if elem.tail:
1004        write(_escape_cdata(elem.tail))
1005
1006def _serialize_text(write, elem):
1007    for part in elem.itertext():
1008        write(part)
1009    if elem.tail:
1010        write(elem.tail)
1011
1012_serialize = {
1013    "xml": _serialize_xml,
1014    "html": _serialize_html,
1015    "text": _serialize_text,
1016# this optional method is imported at the end of the module
1017#   "c14n": _serialize_c14n,
1018}
1019
1020
1021def register_namespace(prefix, uri):
1022    """Register a namespace prefix.
1023
1024    The registry is global, and any existing mapping for either the
1025    given prefix or the namespace URI will be removed.
1026
1027    *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
1028    attributes in this namespace will be serialized with prefix if possible.
1029
1030    ValueError is raised if prefix is reserved or is invalid.
1031
1032    """
1033    if re.match(r"ns\d+$", prefix):
1034        raise ValueError("Prefix format reserved for internal use")
1035    for k, v in list(_namespace_map.items()):
1036        if k == uri or v == prefix:
1037            del _namespace_map[k]
1038    _namespace_map[uri] = prefix
1039
1040_namespace_map = {
1041    # "well-known" namespace prefixes
1042    "http://www.w3.org/XML/1998/namespace": "xml",
1043    "http://www.w3.org/1999/xhtml": "html",
1044    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1045    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1046    # xml schema
1047    "http://www.w3.org/2001/XMLSchema": "xs",
1048    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1049    # dublin core
1050    "http://purl.org/dc/elements/1.1/": "dc",
1051}
1052# For tests and troubleshooting
1053register_namespace._namespace_map = _namespace_map
1054
1055def _raise_serialization_error(text):
1056    raise TypeError(
1057        "cannot serialize %r (type %s)" % (text, type(text).__name__)
1058        )
1059
1060def _escape_cdata(text):
1061    # escape character data
1062    try:
1063        # it's worth avoiding do-nothing calls for strings that are
1064        # shorter than 500 character, or so.  assume that's, by far,
1065        # the most common case in most applications.
1066        if "&" in text:
1067            text = text.replace("&", "&amp;")
1068        if "<" in text:
1069            text = text.replace("<", "&lt;")
1070        if ">" in text:
1071            text = text.replace(">", "&gt;")
1072        return text
1073    except (TypeError, AttributeError):
1074        _raise_serialization_error(text)
1075
1076def _escape_attrib(text):
1077    # escape attribute value
1078    try:
1079        if "&" in text:
1080            text = text.replace("&", "&amp;")
1081        if "<" in text:
1082            text = text.replace("<", "&lt;")
1083        if ">" in text:
1084            text = text.replace(">", "&gt;")
1085        if "\"" in text:
1086            text = text.replace("\"", "&quot;")
1087        # The following business with carriage returns is to satisfy
1088        # Section 2.11 of the XML specification, stating that
1089        # CR or CR LN should be replaced with just LN
1090        # http://www.w3.org/TR/REC-xml/#sec-line-ends
1091        if "\r\n" in text:
1092            text = text.replace("\r\n", "\n")
1093        if "\r" in text:
1094            text = text.replace("\r", "\n")
1095        #The following four lines are issue 17582
1096        if "\n" in text:
1097            text = text.replace("\n", "&#10;")
1098        if "\t" in text:
1099            text = text.replace("\t", "&#09;")
1100        return text
1101    except (TypeError, AttributeError):
1102        _raise_serialization_error(text)
1103
1104def _escape_attrib_html(text):
1105    # escape attribute value
1106    try:
1107        if "&" in text:
1108            text = text.replace("&", "&amp;")
1109        if ">" in text:
1110            text = text.replace(">", "&gt;")
1111        if "\"" in text:
1112            text = text.replace("\"", "&quot;")
1113        return text
1114    except (TypeError, AttributeError):
1115        _raise_serialization_error(text)
1116
1117# --------------------------------------------------------------------
1118
1119def tostring(element, encoding=None, method=None, *,
1120             short_empty_elements=True):
1121    """Generate string representation of XML element.
1122
1123    All subelements are included.  If encoding is "unicode", a string
1124    is returned. Otherwise a bytestring is returned.
1125
1126    *element* is an Element instance, *encoding* is an optional output
1127    encoding defaulting to US-ASCII, *method* is an optional output which can
1128    be one of "xml" (default), "html", "text" or "c14n".
1129
1130    Returns an (optionally) encoded string containing the XML data.
1131
1132    """
1133    stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
1134    ElementTree(element).write(stream, encoding, method=method,
1135                               short_empty_elements=short_empty_elements)
1136    return stream.getvalue()
1137
1138class _ListDataStream(io.BufferedIOBase):
1139    """An auxiliary stream accumulating into a list reference."""
1140    def __init__(self, lst):
1141        self.lst = lst
1142
1143    def writable(self):
1144        return True
1145
1146    def seekable(self):
1147        return True
1148
1149    def write(self, b):
1150        self.lst.append(b)
1151
1152    def tell(self):
1153        return len(self.lst)
1154
1155def tostringlist(element, encoding=None, method=None, *,
1156                 short_empty_elements=True):
1157    lst = []
1158    stream = _ListDataStream(lst)
1159    ElementTree(element).write(stream, encoding, method=method,
1160                               short_empty_elements=short_empty_elements)
1161    return lst
1162
1163
1164def dump(elem):
1165    """Write element tree or element structure to sys.stdout.
1166
1167    This function should be used for debugging only.
1168
1169    *elem* is either an ElementTree, or a single Element.  The exact output
1170    format is implementation dependent.  In this version, it's written as an
1171    ordinary XML file.
1172
1173    """
1174    # debugging
1175    if not isinstance(elem, ElementTree):
1176        elem = ElementTree(elem)
1177    elem.write(sys.stdout, encoding="unicode")
1178    tail = elem.getroot().tail
1179    if not tail or tail[-1] != "\n":
1180        sys.stdout.write("\n")
1181
1182# --------------------------------------------------------------------
1183# parsing
1184
1185
1186def parse(source, parser=None):
1187    """Parse XML document into element tree.
1188
1189    *source* is a filename or file object containing XML data,
1190    *parser* is an optional parser instance defaulting to XMLParser.
1191
1192    Return an ElementTree instance.
1193
1194    """
1195    tree = ElementTree()
1196    tree.parse(source, parser)
1197    return tree
1198
1199
1200def iterparse(source, events=None, parser=None):
1201    """Incrementally parse XML document into ElementTree.
1202
1203    This class also reports what's going on to the user based on the
1204    *events* it is initialized with.  The supported events are the strings
1205    "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
1206    detailed namespace information).  If *events* is omitted, only
1207    "end" events are reported.
1208
1209    *source* is a filename or file object containing XML data, *events* is
1210    a list of events to report back, *parser* is an optional parser instance.
1211
1212    Returns an iterator providing (event, elem) pairs.
1213
1214    """
1215    # Use the internal, undocumented _parser argument for now; When the
1216    # parser argument of iterparse is removed, this can be killed.
1217    pullparser = XMLPullParser(events=events, _parser=parser)
1218    def iterator():
1219        try:
1220            while True:
1221                yield from pullparser.read_events()
1222                # load event buffer
1223                data = source.read(16 * 1024)
1224                if not data:
1225                    break
1226                pullparser.feed(data)
1227            root = pullparser._close_and_return_root()
1228            yield from pullparser.read_events()
1229            it.root = root
1230        finally:
1231            if close_source:
1232                source.close()
1233
1234    class IterParseIterator(collections.Iterator):
1235        __next__ = iterator().__next__
1236    it = IterParseIterator()
1237    it.root = None
1238    del iterator, IterParseIterator
1239
1240    close_source = False
1241    if not hasattr(source, "read"):
1242        source = open(source, "rb")
1243        close_source = True
1244
1245    return it
1246
1247
1248class XMLPullParser:
1249
1250    def __init__(self, events=None, *, _parser=None):
1251        # The _parser argument is for internal use only and must not be relied
1252        # upon in user code. It will be removed in a future release.
1253        # See http://bugs.python.org/issue17741 for more details.
1254
1255        self._events_queue = collections.deque()
1256        self._parser = _parser or XMLParser(target=TreeBuilder())
1257        # wire up the parser for event reporting
1258        if events is None:
1259            events = ("end",)
1260        self._parser._setevents(self._events_queue, events)
1261
1262    def feed(self, data):
1263        """Feed encoded data to parser."""
1264        if self._parser is None:
1265            raise ValueError("feed() called after end of stream")
1266        if data:
1267            try:
1268                self._parser.feed(data)
1269            except SyntaxError as exc:
1270                self._events_queue.append(exc)
1271
1272    def _close_and_return_root(self):
1273        # iterparse needs this to set its root attribute properly :(
1274        root = self._parser.close()
1275        self._parser = None
1276        return root
1277
1278    def close(self):
1279        """Finish feeding data to parser.
1280
1281        Unlike XMLParser, does not return the root element. Use
1282        read_events() to consume elements from XMLPullParser.
1283        """
1284        self._close_and_return_root()
1285
1286    def read_events(self):
1287        """Return an iterator over currently available (event, elem) pairs.
1288
1289        Events are consumed from the internal event queue as they are
1290        retrieved from the iterator.
1291        """
1292        events = self._events_queue
1293        while events:
1294            event = events.popleft()
1295            if isinstance(event, Exception):
1296                raise event
1297            else:
1298                yield event
1299
1300
1301def XML(text, parser=None):
1302    """Parse XML document from string constant.
1303
1304    This function can be used to embed "XML Literals" in Python code.
1305
1306    *text* is a string containing XML data, *parser* is an
1307    optional parser instance, defaulting to the standard XMLParser.
1308
1309    Returns an Element instance.
1310
1311    """
1312    if not parser:
1313        parser = XMLParser(target=TreeBuilder())
1314    parser.feed(text)
1315    return parser.close()
1316
1317
1318def XMLID(text, parser=None):
1319    """Parse XML document from string constant for its IDs.
1320
1321    *text* is a string containing XML data, *parser* is an
1322    optional parser instance, defaulting to the standard XMLParser.
1323
1324    Returns an (Element, dict) tuple, in which the
1325    dict maps element id:s to elements.
1326
1327    """
1328    if not parser:
1329        parser = XMLParser(target=TreeBuilder())
1330    parser.feed(text)
1331    tree = parser.close()
1332    ids = {}
1333    for elem in tree.iter():
1334        id = elem.get("id")
1335        if id:
1336            ids[id] = elem
1337    return tree, ids
1338
1339# Parse XML document from string constant.  Alias for XML().
1340fromstring = XML
1341
1342def fromstringlist(sequence, parser=None):
1343    """Parse XML document from sequence of string fragments.
1344
1345    *sequence* is a list of other sequence, *parser* is an optional parser
1346    instance, defaulting to the standard XMLParser.
1347
1348    Returns an Element instance.
1349
1350    """
1351    if not parser:
1352        parser = XMLParser(target=TreeBuilder())
1353    for text in sequence:
1354        parser.feed(text)
1355    return parser.close()
1356
1357# --------------------------------------------------------------------
1358
1359
1360class TreeBuilder:
1361    """Generic element structure builder.
1362
1363    This builder converts a sequence of start, data, and end method
1364    calls to a well-formed element structure.
1365
1366    You can use this class to build an element structure using a custom XML
1367    parser, or a parser for some other XML-like format.
1368
1369    *element_factory* is an optional element factory which is called
1370    to create new Element instances, as necessary.
1371
1372    """
1373    def __init__(self, element_factory=None):
1374        self._data = [] # data collector
1375        self._elem = [] # element stack
1376        self._last = None # last element
1377        self._tail = None # true if we're after an end tag
1378        if element_factory is None:
1379            element_factory = Element
1380        self._factory = element_factory
1381
1382    def close(self):
1383        """Flush builder buffers and return toplevel document Element."""
1384        assert len(self._elem) == 0, "missing end tags"
1385        assert self._last is not None, "missing toplevel element"
1386        return self._last
1387
1388    def _flush(self):
1389        if self._data:
1390            if self._last is not None:
1391                text = "".join(self._data)
1392                if self._tail:
1393                    assert self._last.tail is None, "internal error (tail)"
1394                    self._last.tail = text
1395                else:
1396                    assert self._last.text is None, "internal error (text)"
1397                    self._last.text = text
1398            self._data = []
1399
1400    def data(self, data):
1401        """Add text to current element."""
1402        self._data.append(data)
1403
1404    def start(self, tag, attrs):
1405        """Open new element and return it.
1406
1407        *tag* is the element name, *attrs* is a dict containing element
1408        attributes.
1409
1410        """
1411        self._flush()
1412        self._last = elem = self._factory(tag, attrs)
1413        if self._elem:
1414            self._elem[-1].append(elem)
1415        self._elem.append(elem)
1416        self._tail = 0
1417        return elem
1418
1419    def end(self, tag):
1420        """Close and return current Element.
1421
1422        *tag* is the element name.
1423
1424        """
1425        self._flush()
1426        self._last = self._elem.pop()
1427        assert self._last.tag == tag,\
1428               "end tag mismatch (expected %s, got %s)" % (
1429                   self._last.tag, tag)
1430        self._tail = 1
1431        return self._last
1432
1433
1434# also see ElementTree and TreeBuilder
1435class XMLParser:
1436    """Element structure builder for XML source data based on the expat parser.
1437
1438    *html* are predefined HTML entities (deprecated and not supported),
1439    *target* is an optional target object which defaults to an instance of the
1440    standard TreeBuilder class, *encoding* is an optional encoding string
1441    which if given, overrides the encoding specified in the XML file:
1442    http://www.iana.org/assignments/character-sets
1443
1444    """
1445
1446    def __init__(self, html=0, target=None, encoding=None):
1447        try:
1448            from xml.parsers import expat
1449        except ImportError:
1450            try:
1451                import pyexpat as expat
1452            except ImportError:
1453                raise ImportError(
1454                    "No module named expat; use SimpleXMLTreeBuilder instead"
1455                    )
1456        parser = expat.ParserCreate(encoding, "}")
1457        if target is None:
1458            target = TreeBuilder()
1459        # underscored names are provided for compatibility only
1460        self.parser = self._parser = parser
1461        self.target = self._target = target
1462        self._error = expat.error
1463        self._names = {} # name memo cache
1464        # main callbacks
1465        parser.DefaultHandlerExpand = self._default
1466        if hasattr(target, 'start'):
1467            parser.StartElementHandler = self._start
1468        if hasattr(target, 'end'):
1469            parser.EndElementHandler = self._end
1470        if hasattr(target, 'data'):
1471            parser.CharacterDataHandler = target.data
1472        # miscellaneous callbacks
1473        if hasattr(target, 'comment'):
1474            parser.CommentHandler = target.comment
1475        if hasattr(target, 'pi'):
1476            parser.ProcessingInstructionHandler = target.pi
1477        # Configure pyexpat: buffering, new-style attribute handling.
1478        parser.buffer_text = 1
1479        parser.ordered_attributes = 1
1480        parser.specified_attributes = 1
1481        self._doctype = None
1482        self.entity = {}
1483        try:
1484            self.version = "Expat %d.%d.%d" % expat.version_info
1485        except AttributeError:
1486            pass # unknown
1487
1488    def _setevents(self, events_queue, events_to_report):
1489        # Internal API for XMLPullParser
1490        # events_to_report: a list of events to report during parsing (same as
1491        # the *events* of XMLPullParser's constructor.
1492        # events_queue: a list of actual parsing events that will be populated
1493        # by the underlying parser.
1494        #
1495        parser = self._parser
1496        append = events_queue.append
1497        for event_name in events_to_report:
1498            if event_name == "start":
1499                parser.ordered_attributes = 1
1500                parser.specified_attributes = 1
1501                def handler(tag, attrib_in, event=event_name, append=append,
1502                            start=self._start):
1503                    append((event, start(tag, attrib_in)))
1504                parser.StartElementHandler = handler
1505            elif event_name == "end":
1506                def handler(tag, event=event_name, append=append,
1507                            end=self._end):
1508                    append((event, end(tag)))
1509                parser.EndElementHandler = handler
1510            elif event_name == "start-ns":
1511                def handler(prefix, uri, event=event_name, append=append):
1512                    append((event, (prefix or "", uri or "")))
1513                parser.StartNamespaceDeclHandler = handler
1514            elif event_name == "end-ns":
1515                def handler(prefix, event=event_name, append=append):
1516                    append((event, None))
1517                parser.EndNamespaceDeclHandler = handler
1518            else:
1519                raise ValueError("unknown event %r" % event_name)
1520
1521    def _raiseerror(self, value):
1522        err = ParseError(value)
1523        err.code = value.code
1524        err.position = value.lineno, value.offset
1525        raise err
1526
1527    def _fixname(self, key):
1528        # expand qname, and convert name string to ascii, if possible
1529        try:
1530            name = self._names[key]
1531        except KeyError:
1532            name = key
1533            if "}" in name:
1534                name = "{" + name
1535            self._names[key] = name
1536        return name
1537
1538    def _start(self, tag, attr_list):
1539        # Handler for expat's StartElementHandler. Since ordered_attributes
1540        # is set, the attributes are reported as a list of alternating
1541        # attribute name,value.
1542        fixname = self._fixname
1543        tag = fixname(tag)
1544        attrib = {}
1545        if attr_list:
1546            for i in range(0, len(attr_list), 2):
1547                attrib[fixname(attr_list[i])] = attr_list[i+1]
1548        return self.target.start(tag, attrib)
1549
1550    def _end(self, tag):
1551        return self.target.end(self._fixname(tag))
1552
1553    def _default(self, text):
1554        prefix = text[:1]
1555        if prefix == "&":
1556            # deal with undefined entities
1557            try:
1558                data_handler = self.target.data
1559            except AttributeError:
1560                return
1561            try:
1562                data_handler(self.entity[text[1:-1]])
1563            except KeyError:
1564                from xml.parsers import expat
1565                err = expat.error(
1566                    "undefined entity %s: line %d, column %d" %
1567                    (text, self.parser.ErrorLineNumber,
1568                    self.parser.ErrorColumnNumber)
1569                    )
1570                err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1571                err.lineno = self.parser.ErrorLineNumber
1572                err.offset = self.parser.ErrorColumnNumber
1573                raise err
1574        elif prefix == "<" and text[:9] == "<!DOCTYPE":
1575            self._doctype = [] # inside a doctype declaration
1576        elif self._doctype is not None:
1577            # parse doctype contents
1578            if prefix == ">":
1579                self._doctype = None
1580                return
1581            text = text.strip()
1582            if not text:
1583                return
1584            self._doctype.append(text)
1585            n = len(self._doctype)
1586            if n > 2:
1587                type = self._doctype[1]
1588                if type == "PUBLIC" and n == 4:
1589                    name, type, pubid, system = self._doctype
1590                    if pubid:
1591                        pubid = pubid[1:-1]
1592                elif type == "SYSTEM" and n == 3:
1593                    name, type, system = self._doctype
1594                    pubid = None
1595                else:
1596                    return
1597                if hasattr(self.target, "doctype"):
1598                    self.target.doctype(name, pubid, system[1:-1])
1599                elif self.doctype != self._XMLParser__doctype:
1600                    # warn about deprecated call
1601                    self._XMLParser__doctype(name, pubid, system[1:-1])
1602                    self.doctype(name, pubid, system[1:-1])
1603                self._doctype = None
1604
1605    def doctype(self, name, pubid, system):
1606        """(Deprecated)  Handle doctype declaration
1607
1608        *name* is the Doctype name, *pubid* is the public identifier,
1609        and *system* is the system identifier.
1610
1611        """
1612        warnings.warn(
1613            "This method of XMLParser is deprecated.  Define doctype() "
1614            "method on the TreeBuilder target.",
1615            DeprecationWarning,
1616            )
1617
1618    # sentinel, if doctype is redefined in a subclass
1619    __doctype = doctype
1620
1621    def feed(self, data):
1622        """Feed encoded data to parser."""
1623        try:
1624            self.parser.Parse(data, 0)
1625        except self._error as v:
1626            self._raiseerror(v)
1627
1628    def close(self):
1629        """Finish feeding data to parser and return element structure."""
1630        try:
1631            self.parser.Parse("", 1) # end of data
1632        except self._error as v:
1633            self._raiseerror(v)
1634        try:
1635            close_handler = self.target.close
1636        except AttributeError:
1637            pass
1638        else:
1639            return close_handler()
1640        finally:
1641            # get rid of circular references
1642            del self.parser, self._parser
1643            del self.target, self._target
1644
1645
1646# Import the C accelerators
1647try:
1648    # Element is going to be shadowed by the C implementation. We need to keep
1649    # the Python version of it accessible for some "creative" by external code
1650    # (see tests)
1651    _Element_Py = Element
1652
1653    # Element, SubElement, ParseError, TreeBuilder, XMLParser
1654    from _elementtree import *
1655except ImportError:
1656    pass
1657