• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# ElementTree
3# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
4#
5# light-weight XML support for Python 2.3 and later.
6#
7# history (since 1.2.6):
8# 2005-11-12 fl   added tostringlist/fromstringlist helpers
9# 2006-07-05 fl   merged in selected changes from the 1.3 sandbox
10# 2006-07-05 fl   removed support for 2.1 and earlier
11# 2007-06-21 fl   added deprecation/future warnings
12# 2007-08-25 fl   added doctype hook, added parser version attribute etc
13# 2007-08-26 fl   added new serializer code (better namespace handling, etc)
14# 2007-08-27 fl   warn for broken /tag searches on tree level
15# 2007-09-02 fl   added html/text methods to serializer (experimental)
16# 2007-09-05 fl   added method argument to tostring/tostringlist
17# 2007-09-06 fl   improved error handling
18# 2007-09-13 fl   added itertext, iterfind; assorted cleanups
19# 2007-12-15 fl   added C14N hooks, copy method (experimental)
20#
21# Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
22#
23# fredrik@pythonware.com
24# http://www.pythonware.com
25#
26# --------------------------------------------------------------------
27# The ElementTree toolkit is
28#
29# Copyright (c) 1999-2008 by Fredrik Lundh
30#
31# By obtaining, using, and/or copying this software and/or its
32# associated documentation, you agree that you have read, understood,
33# and will comply with the following terms and conditions:
34#
35# Permission to use, copy, modify, and distribute this software and
36# its associated documentation for any purpose and without fee is
37# hereby granted, provided that the above copyright notice appears in
38# all copies, and that both that copyright notice and this permission
39# notice appear in supporting documentation, and that the name of
40# Secret Labs AB or the author not be used in advertising or publicity
41# pertaining to distribution of the software without specific, written
42# prior permission.
43#
44# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
45# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
46# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
47# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
48# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
49# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
50# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
51# OF THIS SOFTWARE.
52# --------------------------------------------------------------------
53
54# Licensed to PSF under a Contributor Agreement.
55# See http://www.python.org/psf/license for licensing details.
56
57__all__ = [
58    # public symbols
59    "Comment",
60    "dump",
61    "Element", "ElementTree",
62    "fromstring", "fromstringlist",
63    "iselement", "iterparse",
64    "parse", "ParseError",
65    "PI", "ProcessingInstruction",
66    "QName",
67    "SubElement",
68    "tostring", "tostringlist",
69    "TreeBuilder",
70    "VERSION",
71    "XML",
72    "XMLParser", "XMLTreeBuilder",
73    ]
74
75VERSION = "1.3.0"
76
77##
78# The <b>Element</b> type is a flexible container object, designed to
79# store hierarchical data structures in memory. The type can be
80# described as a cross between a list and a dictionary.
81# <p>
82# Each element has a number of properties associated with it:
83# <ul>
84# <li>a <i>tag</i>. This is a string identifying what kind of data
85# this element represents (the element type, in other words).</li>
86# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
87# <li>a <i>text</i> string.</li>
88# <li>an optional <i>tail</i> string.</li>
89# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
90# </ul>
91#
92# To create an element instance, use the {@link #Element} constructor
93# or the {@link #SubElement} factory function.
94# <p>
95# The {@link #ElementTree} class can be used to wrap an element
96# structure, and convert it from and to XML.
97##
98
99import sys
100import re
101import warnings
102
103
104class _SimpleElementPath(object):
105    # emulate pre-1.2 find/findtext/findall behaviour
106    def find(self, element, tag, namespaces=None):
107        for elem in element:
108            if elem.tag == tag:
109                return elem
110        return None
111    def findtext(self, element, tag, default=None, namespaces=None):
112        elem = self.find(element, tag)
113        if elem is None:
114            return default
115        return elem.text or ""
116    def iterfind(self, element, tag, namespaces=None):
117        if tag[:3] == ".//":
118            for elem in element.iter(tag[3:]):
119                yield elem
120        for elem in element:
121            if elem.tag == tag:
122                yield elem
123    def findall(self, element, tag, namespaces=None):
124        return list(self.iterfind(element, tag, namespaces))
125
126try:
127    from . import ElementPath
128except ImportError:
129    ElementPath = _SimpleElementPath()
130
131##
132# Parser error.  This is a subclass of <b>SyntaxError</b>.
133# <p>
134# In addition to the exception value, an exception instance contains a
135# specific exception code in the <b>code</b> attribute, and the line and
136# column of the error in the <b>position</b> attribute.
137
138class ParseError(SyntaxError):
139    pass
140
141# --------------------------------------------------------------------
142
143##
144# Checks if an object appears to be a valid element object.
145#
146# @param An element instance.
147# @return A true value if this is an element object.
148# @defreturn flag
149
150def iselement(element):
151    # FIXME: not sure about this; might be a better idea to look
152    # for tag/attrib/text attributes
153    return isinstance(element, Element) or hasattr(element, "tag")
154
155##
156# Element class.  This class defines the Element interface, and
157# provides a reference implementation of this interface.
158# <p>
159# The element name, attribute names, and attribute values can be
160# either ASCII strings (ordinary Python strings containing only 7-bit
161# ASCII characters) or Unicode strings.
162#
163# @param tag The element name.
164# @param attrib An optional dictionary, containing element attributes.
165# @param **extra Additional attributes, given as keyword arguments.
166# @see Element
167# @see SubElement
168# @see Comment
169# @see ProcessingInstruction
170
171class Element(object):
172    # <tag attrib>text<child/>...</tag>tail
173
174    ##
175    # (Attribute) Element tag.
176
177    tag = None
178
179    ##
180    # (Attribute) Element attribute dictionary.  Where possible, use
181    # {@link #Element.get},
182    # {@link #Element.set},
183    # {@link #Element.keys}, and
184    # {@link #Element.items} to access
185    # element attributes.
186
187    attrib = None
188
189    ##
190    # (Attribute) Text before first subelement.  This is either a
191    # string or the value None.  Note that if there was no text, this
192    # attribute may be either None or an empty string, depending on
193    # the parser.
194
195    text = None
196
197    ##
198    # (Attribute) Text after this element's end tag, but before the
199    # next sibling element's start tag.  This is either a string or
200    # the value None.  Note that if there was no text, this attribute
201    # may be either None or an empty string, depending on the parser.
202
203    tail = None # text after end tag, if any
204
205    # constructor
206
207    def __init__(self, tag, attrib={}, **extra):
208        attrib = attrib.copy()
209        attrib.update(extra)
210        self.tag = tag
211        self.attrib = attrib
212        self._children = []
213
214    def __repr__(self):
215        return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
216
217    ##
218    # Creates a new element object of the same type as this element.
219    #
220    # @param tag Element tag.
221    # @param attrib Element attributes, given as a dictionary.
222    # @return A new element instance.
223
224    def makeelement(self, tag, attrib):
225        return self.__class__(tag, attrib)
226
227    ##
228    # (Experimental) Copies the current element.  This creates a
229    # shallow copy; subelements will be shared with the original tree.
230    #
231    # @return A new element instance.
232
233    def copy(self):
234        elem = self.makeelement(self.tag, self.attrib)
235        elem.text = self.text
236        elem.tail = self.tail
237        elem[:] = self
238        return elem
239
240    ##
241    # Returns the number of subelements.  Note that this only counts
242    # full elements; to check if there's any content in an element, you
243    # have to check both the length and the <b>text</b> attribute.
244    #
245    # @return The number of subelements.
246
247    def __len__(self):
248        return len(self._children)
249
250    def __nonzero__(self):
251        warnings.warn(
252            "The behavior of this method will change in future versions.  "
253            "Use specific 'len(elem)' or 'elem is not None' test instead.",
254            FutureWarning, stacklevel=2
255            )
256        return len(self._children) != 0 # emulate old behaviour, for now
257
258    ##
259    # Returns the given subelement, by index.
260    #
261    # @param index What subelement to return.
262    # @return The given subelement.
263    # @exception IndexError If the given element does not exist.
264
265    def __getitem__(self, index):
266        return self._children[index]
267
268    ##
269    # Replaces the given subelement, by index.
270    #
271    # @param index What subelement to replace.
272    # @param element The new element value.
273    # @exception IndexError If the given element does not exist.
274
275    def __setitem__(self, index, element):
276        # if isinstance(index, slice):
277        #     for elt in element:
278        #         assert iselement(elt)
279        # else:
280        #     assert iselement(element)
281        self._children[index] = element
282
283    ##
284    # Deletes the given subelement, by index.
285    #
286    # @param index What subelement to delete.
287    # @exception IndexError If the given element does not exist.
288
289    def __delitem__(self, index):
290        del self._children[index]
291
292    ##
293    # Adds a subelement to the end of this element.  In document order,
294    # the new element will appear after the last existing subelement (or
295    # directly after the text, if it's the first subelement), but before
296    # the end tag for this element.
297    #
298    # @param element The element to add.
299
300    def append(self, element):
301        # assert iselement(element)
302        self._children.append(element)
303
304    ##
305    # Appends subelements from a sequence.
306    #
307    # @param elements A sequence object with zero or more elements.
308    # @since 1.3
309
310    def extend(self, elements):
311        # for element in elements:
312        #     assert iselement(element)
313        self._children.extend(elements)
314
315    ##
316    # Inserts a subelement at the given position in this element.
317    #
318    # @param index Where to insert the new subelement.
319
320    def insert(self, index, element):
321        # assert iselement(element)
322        self._children.insert(index, element)
323
324    ##
325    # Removes a matching subelement.  Unlike the <b>find</b> methods,
326    # this method compares elements based on identity, not on tag
327    # value or contents.  To remove subelements by other means, the
328    # easiest way is often to use a list comprehension to select what
329    # elements to keep, and use slice assignment to update the parent
330    # element.
331    #
332    # @param element What element to remove.
333    # @exception ValueError If a matching element could not be found.
334
335    def remove(self, element):
336        # assert iselement(element)
337        self._children.remove(element)
338
339    ##
340    # (Deprecated) Returns all subelements.  The elements are returned
341    # in document order.
342    #
343    # @return A list of subelements.
344    # @defreturn list of Element instances
345
346    def getchildren(self):
347        warnings.warn(
348            "This method will be removed in future versions.  "
349            "Use 'list(elem)' or iteration over elem instead.",
350            DeprecationWarning, stacklevel=2
351            )
352        return self._children
353
354    ##
355    # Finds the first matching subelement, by tag name or path.
356    #
357    # @param path What element to look for.
358    # @keyparam namespaces Optional namespace prefix map.
359    # @return The first matching element, or None if no element was found.
360    # @defreturn Element or None
361
362    def find(self, path, namespaces=None):
363        return ElementPath.find(self, path, namespaces)
364
365    ##
366    # Finds text for the first matching subelement, by tag name or path.
367    #
368    # @param path What element to look for.
369    # @param default What to return if the element was not found.
370    # @keyparam namespaces Optional namespace prefix map.
371    # @return The text content of the first matching element, or the
372    #     default value no element was found.  Note that if the element
373    #     is found, but has no text content, this method returns an
374    #     empty string.
375    # @defreturn string
376
377    def findtext(self, path, default=None, namespaces=None):
378        return ElementPath.findtext(self, path, default, namespaces)
379
380    ##
381    # Finds all matching subelements, by tag name or path.
382    #
383    # @param path What element to look for.
384    # @keyparam namespaces Optional namespace prefix map.
385    # @return A list or other sequence containing all matching elements,
386    #    in document order.
387    # @defreturn list of Element instances
388
389    def findall(self, path, namespaces=None):
390        return ElementPath.findall(self, path, namespaces)
391
392    ##
393    # Finds all matching subelements, by tag name or path.
394    #
395    # @param path What element to look for.
396    # @keyparam namespaces Optional namespace prefix map.
397    # @return An iterator or sequence containing all matching elements,
398    #    in document order.
399    # @defreturn a generated sequence of Element instances
400
401    def iterfind(self, path, namespaces=None):
402        return ElementPath.iterfind(self, path, namespaces)
403
404    ##
405    # Resets an element.  This function removes all subelements, clears
406    # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
407    # to None.
408
409    def clear(self):
410        self.attrib.clear()
411        self._children = []
412        self.text = self.tail = None
413
414    ##
415    # Gets an element attribute.  Equivalent to <b>attrib.get</b>, but
416    # some implementations may handle this a bit more efficiently.
417    #
418    # @param key What attribute to look for.
419    # @param default What to return if the attribute was not found.
420    # @return The attribute value, or the default value, if the
421    #     attribute was not found.
422    # @defreturn string or None
423
424    def get(self, key, default=None):
425        return self.attrib.get(key, default)
426
427    ##
428    # Sets an element attribute.  Equivalent to <b>attrib[key] = value</b>,
429    # but some implementations may handle this a bit more efficiently.
430    #
431    # @param key What attribute to set.
432    # @param value The attribute value.
433
434    def set(self, key, value):
435        self.attrib[key] = value
436
437    ##
438    # Gets a list of attribute names.  The names are returned in an
439    # arbitrary order (just like for an ordinary Python dictionary).
440    # Equivalent to <b>attrib.keys()</b>.
441    #
442    # @return A list of element attribute names.
443    # @defreturn list of strings
444
445    def keys(self):
446        return self.attrib.keys()
447
448    ##
449    # Gets element attributes, as a sequence.  The attributes are
450    # returned in an arbitrary order.  Equivalent to <b>attrib.items()</b>.
451    #
452    # @return A list of (name, value) tuples for all attributes.
453    # @defreturn list of (string, string) tuples
454
455    def items(self):
456        return self.attrib.items()
457
458    ##
459    # Creates a tree iterator.  The iterator loops over this element
460    # and all subelements, in document order, and returns all elements
461    # with a matching tag.
462    # <p>
463    # If the tree structure is modified during iteration, new or removed
464    # elements may or may not be included.  To get a stable set, use the
465    # list() function on the iterator, and loop over the resulting list.
466    #
467    # @param tag What tags to look for (default is to return all elements).
468    # @return An iterator containing all the matching elements.
469    # @defreturn iterator
470
471    def iter(self, tag=None):
472        if tag == "*":
473            tag = None
474        if tag is None or self.tag == tag:
475            yield self
476        for e in self._children:
477            for e in e.iter(tag):
478                yield e
479
480    # compatibility
481    def getiterator(self, tag=None):
482        # Change for a DeprecationWarning in 1.4
483        warnings.warn(
484            "This method will be removed in future versions.  "
485            "Use 'elem.iter()' or 'list(elem.iter())' instead.",
486            PendingDeprecationWarning, stacklevel=2
487        )
488        return list(self.iter(tag))
489
490    ##
491    # Creates a text iterator.  The iterator loops over this element
492    # and all subelements, in document order, and returns all inner
493    # text.
494    #
495    # @return An iterator containing all inner text.
496    # @defreturn iterator
497
498    def itertext(self):
499        tag = self.tag
500        if not isinstance(tag, basestring) and tag is not None:
501            return
502        if self.text:
503            yield self.text
504        for e in self:
505            for s in e.itertext():
506                yield s
507            if e.tail:
508                yield e.tail
509
510# compatibility
511_Element = _ElementInterface = Element
512
513##
514# Subelement factory.  This function creates an element instance, and
515# appends it to an existing element.
516# <p>
517# The element name, attribute names, and attribute values can be
518# either 8-bit ASCII strings or Unicode strings.
519#
520# @param parent The parent element.
521# @param tag The subelement name.
522# @param attrib An optional dictionary, containing element attributes.
523# @param **extra Additional attributes, given as keyword arguments.
524# @return An element instance.
525# @defreturn Element
526
527def SubElement(parent, tag, attrib={}, **extra):
528    attrib = attrib.copy()
529    attrib.update(extra)
530    element = parent.makeelement(tag, attrib)
531    parent.append(element)
532    return element
533
534##
535# Comment element factory.  This factory function creates a special
536# element that will be serialized as an XML comment by the standard
537# serializer.
538# <p>
539# The comment string can be either an 8-bit ASCII string or a Unicode
540# string.
541#
542# @param text A string containing the comment string.
543# @return An element instance, representing a comment.
544# @defreturn Element
545
546def Comment(text=None):
547    element = Element(Comment)
548    element.text = text
549    return element
550
551##
552# PI element factory.  This factory function creates a special element
553# that will be serialized as an XML processing instruction by the standard
554# serializer.
555#
556# @param target A string containing the PI target.
557# @param text A string containing the PI contents, if any.
558# @return An element instance, representing a PI.
559# @defreturn Element
560
561def ProcessingInstruction(target, text=None):
562    element = Element(ProcessingInstruction)
563    element.text = target
564    if text:
565        element.text = element.text + " " + text
566    return element
567
568PI = ProcessingInstruction
569
570##
571# QName wrapper.  This can be used to wrap a QName attribute value, in
572# order to get proper namespace handling on output.
573#
574# @param text A string containing the QName value, in the form {uri}local,
575#     or, if the tag argument is given, the URI part of a QName.
576# @param tag Optional tag.  If given, the first argument is interpreted as
577#     a URI, and this argument is interpreted as a local name.
578# @return An opaque object, representing the QName.
579
580class QName(object):
581    def __init__(self, text_or_uri, tag=None):
582        if tag:
583            text_or_uri = "{%s}%s" % (text_or_uri, tag)
584        self.text = text_or_uri
585    def __str__(self):
586        return self.text
587    def __hash__(self):
588        return hash(self.text)
589    def __cmp__(self, other):
590        if isinstance(other, QName):
591            return cmp(self.text, other.text)
592        return cmp(self.text, other)
593
594# --------------------------------------------------------------------
595
596##
597# ElementTree wrapper class.  This class represents an entire element
598# hierarchy, and adds some extra support for serialization to and from
599# standard XML.
600#
601# @param element Optional root element.
602# @keyparam file Optional file handle or file name.  If given, the
603#     tree is initialized with the contents of this XML file.
604
605class ElementTree(object):
606
607    def __init__(self, element=None, file=None):
608        # assert element is None or iselement(element)
609        self._root = element # first node
610        if file:
611            self.parse(file)
612
613    ##
614    # Gets the root element for this tree.
615    #
616    # @return An element instance.
617    # @defreturn Element
618
619    def getroot(self):
620        return self._root
621
622    ##
623    # Replaces the root element for this tree.  This discards the
624    # current contents of the tree, and replaces it with the given
625    # element.  Use with care.
626    #
627    # @param element An element instance.
628
629    def _setroot(self, element):
630        # assert iselement(element)
631        self._root = element
632
633    ##
634    # Loads an external XML document into this element tree.
635    #
636    # @param source A file name or file object.  If a file object is
637    #     given, it only has to implement a <b>read(n)</b> method.
638    # @keyparam parser An optional parser instance.  If not given, the
639    #     standard {@link XMLParser} parser is used.
640    # @return The document root element.
641    # @defreturn Element
642    # @exception ParseError If the parser fails to parse the document.
643
644    def parse(self, source, parser=None):
645        close_source = False
646        if not hasattr(source, "read"):
647            source = open(source, "rb")
648            close_source = True
649        try:
650            if not parser:
651                parser = XMLParser(target=TreeBuilder())
652            while 1:
653                data = source.read(65536)
654                if not data:
655                    break
656                parser.feed(data)
657            self._root = parser.close()
658            return self._root
659        finally:
660            if close_source:
661                source.close()
662
663    ##
664    # Creates a tree iterator for the root element.  The iterator loops
665    # over all elements in this tree, in document order.
666    #
667    # @param tag What tags to look for (default is to return all elements)
668    # @return An iterator.
669    # @defreturn iterator
670
671    def iter(self, tag=None):
672        # assert self._root is not None
673        return self._root.iter(tag)
674
675    # compatibility
676    def getiterator(self, tag=None):
677        # Change for a DeprecationWarning in 1.4
678        warnings.warn(
679            "This method will be removed in future versions.  "
680            "Use 'tree.iter()' or 'list(tree.iter())' instead.",
681            PendingDeprecationWarning, stacklevel=2
682        )
683        return list(self.iter(tag))
684
685    ##
686    # Same as getroot().find(path), starting at the root of the
687    # tree.
688    #
689    # @param path What element to look for.
690    # @keyparam namespaces Optional namespace prefix map.
691    # @return The first matching element, or None if no element was found.
692    # @defreturn Element or None
693
694    def find(self, path, namespaces=None):
695        # assert self._root is not None
696        if path[:1] == "/":
697            path = "." + path
698            warnings.warn(
699                "This search is broken in 1.3 and earlier, and will be "
700                "fixed in a future version.  If you rely on the current "
701                "behaviour, change it to %r" % path,
702                FutureWarning, stacklevel=2
703                )
704        return self._root.find(path, namespaces)
705
706    ##
707    # Same as getroot().findtext(path), starting at the root of the tree.
708    #
709    # @param path What element to look for.
710    # @param default What to return if the element was not found.
711    # @keyparam namespaces Optional namespace prefix map.
712    # @return The text content of the first matching element, or the
713    #     default value no element was found.  Note that if the element
714    #     is found, but has no text content, this method returns an
715    #     empty string.
716    # @defreturn string
717
718    def findtext(self, path, default=None, namespaces=None):
719        # assert self._root is not None
720        if path[:1] == "/":
721            path = "." + path
722            warnings.warn(
723                "This search is broken in 1.3 and earlier, and will be "
724                "fixed in a future version.  If you rely on the current "
725                "behaviour, change it to %r" % path,
726                FutureWarning, stacklevel=2
727                )
728        return self._root.findtext(path, default, namespaces)
729
730    ##
731    # Same as getroot().findall(path), starting at the root of the tree.
732    #
733    # @param path What element to look for.
734    # @keyparam namespaces Optional namespace prefix map.
735    # @return A list or iterator containing all matching elements,
736    #    in document order.
737    # @defreturn list of Element instances
738
739    def findall(self, path, namespaces=None):
740        # assert self._root is not None
741        if path[:1] == "/":
742            path = "." + path
743            warnings.warn(
744                "This search is broken in 1.3 and earlier, and will be "
745                "fixed in a future version.  If you rely on the current "
746                "behaviour, change it to %r" % path,
747                FutureWarning, stacklevel=2
748                )
749        return self._root.findall(path, namespaces)
750
751    ##
752    # Finds all matching subelements, by tag name or path.
753    # Same as getroot().iterfind(path).
754    #
755    # @param path What element to look for.
756    # @keyparam namespaces Optional namespace prefix map.
757    # @return An iterator or sequence containing all matching elements,
758    #    in document order.
759    # @defreturn a generated sequence of Element instances
760
761    def iterfind(self, path, namespaces=None):
762        # assert self._root is not None
763        if path[:1] == "/":
764            path = "." + path
765            warnings.warn(
766                "This search is broken in 1.3 and earlier, and will be "
767                "fixed in a future version.  If you rely on the current "
768                "behaviour, change it to %r" % path,
769                FutureWarning, stacklevel=2
770                )
771        return self._root.iterfind(path, namespaces)
772
773    ##
774    # Writes the element tree to a file, as XML.
775    #
776    # @def write(file, **options)
777    # @param file A file name, or a file object opened for writing.
778    # @param **options Options, given as keyword arguments.
779    # @keyparam encoding Optional output encoding (default is US-ASCII).
780    # @keyparam xml_declaration Controls if an XML declaration should
781    #     be added to the file.  Use False for never, True for always,
782    #     None for only if not US-ASCII or UTF-8.  None is default.
783    # @keyparam default_namespace Sets the default XML namespace (for "xmlns").
784    # @keyparam method Optional output method ("xml", "html", "text" or
785    #     "c14n"; default is "xml").
786
787    def write(self, file_or_filename,
788              # keyword arguments
789              encoding=None,
790              xml_declaration=None,
791              default_namespace=None,
792              method=None):
793        # assert self._root is not None
794        if not method:
795            method = "xml"
796        elif method not in _serialize:
797            # FIXME: raise an ImportError for c14n if ElementC14N is missing?
798            raise ValueError("unknown method %r" % method)
799        if hasattr(file_or_filename, "write"):
800            file = file_or_filename
801        else:
802            file = open(file_or_filename, "wb")
803        write = file.write
804        if not encoding:
805            if method == "c14n":
806                encoding = "utf-8"
807            else:
808                encoding = "us-ascii"
809        elif xml_declaration or (xml_declaration is None and
810                                 encoding not in ("utf-8", "us-ascii")):
811            if method == "xml":
812                write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
813        if method == "text":
814            _serialize_text(write, self._root, encoding)
815        else:
816            qnames, namespaces = _namespaces(
817                self._root, encoding, default_namespace
818                )
819            serialize = _serialize[method]
820            serialize(write, self._root, encoding, qnames, namespaces)
821        if file_or_filename is not file:
822            file.close()
823
824    def write_c14n(self, file):
825        # lxml.etree compatibility.  use output method instead
826        return self.write(file, method="c14n")
827
828# --------------------------------------------------------------------
829# serialization support
830
831def _namespaces(elem, encoding, default_namespace=None):
832    # identify namespaces used in this tree
833
834    # maps qnames to *encoded* prefix:local names
835    qnames = {None: None}
836
837    # maps uri:s to prefixes
838    namespaces = {}
839    if default_namespace:
840        namespaces[default_namespace] = ""
841
842    def encode(text):
843        return text.encode(encoding)
844
845    def add_qname(qname):
846        # calculate serialized qname representation
847        try:
848            if qname[:1] == "{":
849                uri, tag = qname[1:].rsplit("}", 1)
850                prefix = namespaces.get(uri)
851                if prefix is None:
852                    prefix = _namespace_map.get(uri)
853                    if prefix is None:
854                        prefix = "ns%d" % len(namespaces)
855                    if prefix != "xml":
856                        namespaces[uri] = prefix
857                if prefix:
858                    qnames[qname] = encode("%s:%s" % (prefix, tag))
859                else:
860                    qnames[qname] = encode(tag) # default element
861            else:
862                if default_namespace:
863                    # FIXME: can this be handled in XML 1.0?
864                    raise ValueError(
865                        "cannot use non-qualified names with "
866                        "default_namespace option"
867                        )
868                qnames[qname] = encode(qname)
869        except TypeError:
870            _raise_serialization_error(qname)
871
872    # populate qname and namespaces table
873    try:
874        iterate = elem.iter
875    except AttributeError:
876        iterate = elem.getiterator # cET compatibility
877    for elem in iterate():
878        tag = elem.tag
879        if isinstance(tag, QName):
880            if tag.text not in qnames:
881                add_qname(tag.text)
882        elif isinstance(tag, basestring):
883            if tag not in qnames:
884                add_qname(tag)
885        elif tag is not None and tag is not Comment and tag is not PI:
886            _raise_serialization_error(tag)
887        for key, value in elem.items():
888            if isinstance(key, QName):
889                key = key.text
890            if key not in qnames:
891                add_qname(key)
892            if isinstance(value, QName) and value.text not in qnames:
893                add_qname(value.text)
894        text = elem.text
895        if isinstance(text, QName) and text.text not in qnames:
896            add_qname(text.text)
897    return qnames, namespaces
898
899def _serialize_xml(write, elem, encoding, qnames, namespaces):
900    tag = elem.tag
901    text = elem.text
902    if tag is Comment:
903        write("<!--%s-->" % _encode(text, encoding))
904    elif tag is ProcessingInstruction:
905        write("<?%s?>" % _encode(text, encoding))
906    else:
907        tag = qnames[tag]
908        if tag is None:
909            if text:
910                write(_escape_cdata(text, encoding))
911            for e in elem:
912                _serialize_xml(write, e, encoding, qnames, None)
913        else:
914            write("<" + tag)
915            items = elem.items()
916            if items or namespaces:
917                if namespaces:
918                    for v, k in sorted(namespaces.items(),
919                                       key=lambda x: x[1]):  # sort on prefix
920                        if k:
921                            k = ":" + k
922                        write(" xmlns%s=\"%s\"" % (
923                            k.encode(encoding),
924                            _escape_attrib(v, encoding)
925                            ))
926                for k, v in sorted(items):  # lexical order
927                    if isinstance(k, QName):
928                        k = k.text
929                    if isinstance(v, QName):
930                        v = qnames[v.text]
931                    else:
932                        v = _escape_attrib(v, encoding)
933                    write(" %s=\"%s\"" % (qnames[k], v))
934            if text or len(elem):
935                write(">")
936                if text:
937                    write(_escape_cdata(text, encoding))
938                for e in elem:
939                    _serialize_xml(write, e, encoding, qnames, None)
940                write("</" + tag + ">")
941            else:
942                write(" />")
943    if elem.tail:
944        write(_escape_cdata(elem.tail, encoding))
945
946HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
947              "img", "input", "isindex", "link", "meta", "param")
948
949try:
950    HTML_EMPTY = set(HTML_EMPTY)
951except NameError:
952    pass
953
954def _serialize_html(write, elem, encoding, qnames, namespaces):
955    tag = elem.tag
956    text = elem.text
957    if tag is Comment:
958        write("<!--%s-->" % _escape_cdata(text, encoding))
959    elif tag is ProcessingInstruction:
960        write("<?%s?>" % _escape_cdata(text, encoding))
961    else:
962        tag = qnames[tag]
963        if tag is None:
964            if text:
965                write(_escape_cdata(text, encoding))
966            for e in elem:
967                _serialize_html(write, e, encoding, qnames, None)
968        else:
969            write("<" + tag)
970            items = elem.items()
971            if items or namespaces:
972                if namespaces:
973                    for v, k in sorted(namespaces.items(),
974                                       key=lambda x: x[1]):  # sort on prefix
975                        if k:
976                            k = ":" + k
977                        write(" xmlns%s=\"%s\"" % (
978                            k.encode(encoding),
979                            _escape_attrib(v, encoding)
980                            ))
981                for k, v in sorted(items):  # lexical order
982                    if isinstance(k, QName):
983                        k = k.text
984                    if isinstance(v, QName):
985                        v = qnames[v.text]
986                    else:
987                        v = _escape_attrib_html(v, encoding)
988                    # FIXME: handle boolean attributes
989                    write(" %s=\"%s\"" % (qnames[k], v))
990            write(">")
991            ltag = tag.lower()
992            if text:
993                if ltag == "script" or ltag == "style":
994                    write(_encode(text, encoding))
995                else:
996                    write(_escape_cdata(text, encoding))
997            for e in elem:
998                _serialize_html(write, e, encoding, qnames, None)
999            if ltag not in HTML_EMPTY:
1000                write("</" + tag + ">")
1001    if elem.tail:
1002        write(_escape_cdata(elem.tail, encoding))
1003
1004def _serialize_text(write, elem, encoding):
1005    for part in elem.itertext():
1006        write(part.encode(encoding))
1007    if elem.tail:
1008        write(elem.tail.encode(encoding))
1009
1010_serialize = {
1011    "xml": _serialize_xml,
1012    "html": _serialize_html,
1013    "text": _serialize_text,
1014# this optional method is imported at the end of the module
1015#   "c14n": _serialize_c14n,
1016}
1017
1018##
1019# Registers a namespace prefix.  The registry is global, and any
1020# existing mapping for either the given prefix or the namespace URI
1021# will be removed.
1022#
1023# @param prefix Namespace prefix.
1024# @param uri Namespace uri.  Tags and attributes in this namespace
1025#     will be serialized with the given prefix, if at all possible.
1026# @exception ValueError If the prefix is reserved, or is otherwise
1027#     invalid.
1028
1029def register_namespace(prefix, uri):
1030    if re.match("ns\d+$", prefix):
1031        raise ValueError("Prefix format reserved for internal use")
1032    for k, v in _namespace_map.items():
1033        if k == uri or v == prefix:
1034            del _namespace_map[k]
1035    _namespace_map[uri] = prefix
1036
1037_namespace_map = {
1038    # "well-known" namespace prefixes
1039    "http://www.w3.org/XML/1998/namespace": "xml",
1040    "http://www.w3.org/1999/xhtml": "html",
1041    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1042    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1043    # xml schema
1044    "http://www.w3.org/2001/XMLSchema": "xs",
1045    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1046    # dublin core
1047    "http://purl.org/dc/elements/1.1/": "dc",
1048}
1049
1050def _raise_serialization_error(text):
1051    raise TypeError(
1052        "cannot serialize %r (type %s)" % (text, type(text).__name__)
1053        )
1054
1055def _encode(text, encoding):
1056    try:
1057        return text.encode(encoding, "xmlcharrefreplace")
1058    except (TypeError, AttributeError):
1059        _raise_serialization_error(text)
1060
1061def _escape_cdata(text, encoding):
1062    # escape character data
1063    try:
1064        # it's worth avoiding do-nothing calls for strings that are
1065        # shorter than 500 character, or so.  assume that's, by far,
1066        # the most common case in most applications.
1067        if "&" in text:
1068            text = text.replace("&", "&amp;")
1069        if "<" in text:
1070            text = text.replace("<", "&lt;")
1071        if ">" in text:
1072            text = text.replace(">", "&gt;")
1073        return text.encode(encoding, "xmlcharrefreplace")
1074    except (TypeError, AttributeError):
1075        _raise_serialization_error(text)
1076
1077def _escape_attrib(text, encoding):
1078    # escape attribute value
1079    try:
1080        if "&" in text:
1081            text = text.replace("&", "&amp;")
1082        if "<" in text:
1083            text = text.replace("<", "&lt;")
1084        if ">" in text:
1085            text = text.replace(">", "&gt;")
1086        if "\"" in text:
1087            text = text.replace("\"", "&quot;")
1088        if "\n" in text:
1089            text = text.replace("\n", "&#10;")
1090        return text.encode(encoding, "xmlcharrefreplace")
1091    except (TypeError, AttributeError):
1092        _raise_serialization_error(text)
1093
1094def _escape_attrib_html(text, encoding):
1095    # escape attribute value
1096    try:
1097        if "&" in text:
1098            text = text.replace("&", "&amp;")
1099        if ">" in text:
1100            text = text.replace(">", "&gt;")
1101        if "\"" in text:
1102            text = text.replace("\"", "&quot;")
1103        return text.encode(encoding, "xmlcharrefreplace")
1104    except (TypeError, AttributeError):
1105        _raise_serialization_error(text)
1106
1107# --------------------------------------------------------------------
1108
1109##
1110# Generates a string representation of an XML element, including all
1111# subelements.
1112#
1113# @param element An Element instance.
1114# @keyparam encoding Optional output encoding (default is US-ASCII).
1115# @keyparam method Optional output method ("xml", "html", "text" or
1116#     "c14n"; default is "xml").
1117# @return An encoded string containing the XML data.
1118# @defreturn string
1119
1120def tostring(element, encoding=None, method=None):
1121    class dummy:
1122        pass
1123    data = []
1124    file = dummy()
1125    file.write = data.append
1126    ElementTree(element).write(file, encoding, method=method)
1127    return "".join(data)
1128
1129##
1130# Generates a string representation of an XML element, including all
1131# subelements.  The string is returned as a sequence of string fragments.
1132#
1133# @param element An Element instance.
1134# @keyparam encoding Optional output encoding (default is US-ASCII).
1135# @keyparam method Optional output method ("xml", "html", "text" or
1136#     "c14n"; default is "xml").
1137# @return A sequence object containing the XML data.
1138# @defreturn sequence
1139# @since 1.3
1140
1141def tostringlist(element, encoding=None, method=None):
1142    class dummy:
1143        pass
1144    data = []
1145    file = dummy()
1146    file.write = data.append
1147    ElementTree(element).write(file, encoding, method=method)
1148    # FIXME: merge small fragments into larger parts
1149    return data
1150
1151##
1152# Writes an element tree or element structure to sys.stdout.  This
1153# function should be used for debugging only.
1154# <p>
1155# The exact output format is implementation dependent.  In this
1156# version, it's written as an ordinary XML file.
1157#
1158# @param elem An element tree or an individual element.
1159
1160def dump(elem):
1161    # debugging
1162    if not isinstance(elem, ElementTree):
1163        elem = ElementTree(elem)
1164    elem.write(sys.stdout)
1165    tail = elem.getroot().tail
1166    if not tail or tail[-1] != "\n":
1167        sys.stdout.write("\n")
1168
1169# --------------------------------------------------------------------
1170# parsing
1171
1172##
1173# Parses an XML document into an element tree.
1174#
1175# @param source A filename or file object containing XML data.
1176# @param parser An optional parser instance.  If not given, the
1177#     standard {@link XMLParser} parser is used.
1178# @return An ElementTree instance
1179
1180def parse(source, parser=None):
1181    tree = ElementTree()
1182    tree.parse(source, parser)
1183    return tree
1184
1185##
1186# Parses an XML document into an element tree incrementally, and reports
1187# what's going on to the user.
1188#
1189# @param source A filename or file object containing XML data.
1190# @param events A list of events to report back.  If omitted, only "end"
1191#     events are reported.
1192# @param parser An optional parser instance.  If not given, the
1193#     standard {@link XMLParser} parser is used.
1194# @return A (event, elem) iterator.
1195
1196def iterparse(source, events=None, parser=None):
1197    close_source = False
1198    if not hasattr(source, "read"):
1199        source = open(source, "rb")
1200        close_source = True
1201    try:
1202        if not parser:
1203            parser = XMLParser(target=TreeBuilder())
1204        return _IterParseIterator(source, events, parser, close_source)
1205    except:
1206        if close_source:
1207            source.close()
1208        raise
1209
1210class _IterParseIterator(object):
1211
1212    def __init__(self, source, events, parser, close_source=False):
1213        self._file = source
1214        self._close_file = close_source
1215        self._events = []
1216        self._index = 0
1217        self._error = None
1218        self.root = self._root = None
1219        self._parser = parser
1220        # wire up the parser for event reporting
1221        parser = self._parser._parser
1222        append = self._events.append
1223        if events is None:
1224            events = ["end"]
1225        for event in events:
1226            if event == "start":
1227                try:
1228                    parser.ordered_attributes = 1
1229                    parser.specified_attributes = 1
1230                    def handler(tag, attrib_in, event=event, append=append,
1231                                start=self._parser._start_list):
1232                        append((event, start(tag, attrib_in)))
1233                    parser.StartElementHandler = handler
1234                except AttributeError:
1235                    def handler(tag, attrib_in, event=event, append=append,
1236                                start=self._parser._start):
1237                        append((event, start(tag, attrib_in)))
1238                    parser.StartElementHandler = handler
1239            elif event == "end":
1240                def handler(tag, event=event, append=append,
1241                            end=self._parser._end):
1242                    append((event, end(tag)))
1243                parser.EndElementHandler = handler
1244            elif event == "start-ns":
1245                def handler(prefix, uri, event=event, append=append):
1246                    try:
1247                        uri = (uri or "").encode("ascii")
1248                    except UnicodeError:
1249                        pass
1250                    append((event, (prefix or "", uri or "")))
1251                parser.StartNamespaceDeclHandler = handler
1252            elif event == "end-ns":
1253                def handler(prefix, event=event, append=append):
1254                    append((event, None))
1255                parser.EndNamespaceDeclHandler = handler
1256            else:
1257                raise ValueError("unknown event %r" % event)
1258
1259    def next(self):
1260        try:
1261            while 1:
1262                try:
1263                    item = self._events[self._index]
1264                    self._index += 1
1265                    return item
1266                except IndexError:
1267                    pass
1268                if self._error:
1269                    e = self._error
1270                    self._error = None
1271                    raise e
1272                if self._parser is None:
1273                    self.root = self._root
1274                    break
1275                # load event buffer
1276                del self._events[:]
1277                self._index = 0
1278                data = self._file.read(16384)
1279                if data:
1280                    try:
1281                        self._parser.feed(data)
1282                    except SyntaxError as exc:
1283                        self._error = exc
1284                else:
1285                    self._root = self._parser.close()
1286                    self._parser = None
1287        except:
1288            if self._close_file:
1289                self._file.close()
1290            raise
1291        if self._close_file:
1292            self._file.close()
1293        raise StopIteration
1294
1295    def __iter__(self):
1296        return self
1297
1298##
1299# Parses an XML document from a string constant.  This function can
1300# be used to embed "XML literals" in Python code.
1301#
1302# @param source A string containing XML data.
1303# @param parser An optional parser instance.  If not given, the
1304#     standard {@link XMLParser} parser is used.
1305# @return An Element instance.
1306# @defreturn Element
1307
1308def XML(text, parser=None):
1309    if not parser:
1310        parser = XMLParser(target=TreeBuilder())
1311    parser.feed(text)
1312    return parser.close()
1313
1314##
1315# Parses an XML document from a string constant, and also returns
1316# a dictionary which maps from element id:s to elements.
1317#
1318# @param source A string containing XML data.
1319# @param parser An optional parser instance.  If not given, the
1320#     standard {@link XMLParser} parser is used.
1321# @return A tuple containing an Element instance and a dictionary.
1322# @defreturn (Element, dictionary)
1323
1324def XMLID(text, parser=None):
1325    if not parser:
1326        parser = XMLParser(target=TreeBuilder())
1327    parser.feed(text)
1328    tree = parser.close()
1329    ids = {}
1330    for elem in tree.iter():
1331        id = elem.get("id")
1332        if id:
1333            ids[id] = elem
1334    return tree, ids
1335
1336##
1337# Parses an XML document from a string constant.  Same as {@link #XML}.
1338#
1339# @def fromstring(text)
1340# @param source A string containing XML data.
1341# @return An Element instance.
1342# @defreturn Element
1343
1344fromstring = XML
1345
1346##
1347# Parses an XML document from a sequence of string fragments.
1348#
1349# @param sequence A list or other sequence containing XML data fragments.
1350# @param parser An optional parser instance.  If not given, the
1351#     standard {@link XMLParser} parser is used.
1352# @return An Element instance.
1353# @defreturn Element
1354# @since 1.3
1355
1356def fromstringlist(sequence, parser=None):
1357    if not parser:
1358        parser = XMLParser(target=TreeBuilder())
1359    for text in sequence:
1360        parser.feed(text)
1361    return parser.close()
1362
1363# --------------------------------------------------------------------
1364
1365##
1366# Generic element structure builder.  This builder converts a sequence
1367# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
1368# #TreeBuilder.end} method calls to a well-formed element structure.
1369# <p>
1370# You can use this class to build an element structure using a custom XML
1371# parser, or a parser for some other XML-like format.
1372#
1373# @param element_factory Optional element factory.  This factory
1374#    is called to create new Element instances, as necessary.
1375
1376class TreeBuilder(object):
1377
1378    def __init__(self, element_factory=None):
1379        self._data = [] # data collector
1380        self._elem = [] # element stack
1381        self._last = None # last element
1382        self._tail = None # true if we're after an end tag
1383        if element_factory is None:
1384            element_factory = Element
1385        self._factory = element_factory
1386
1387    ##
1388    # Flushes the builder buffers, and returns the toplevel document
1389    # element.
1390    #
1391    # @return An Element instance.
1392    # @defreturn Element
1393
1394    def close(self):
1395        assert len(self._elem) == 0, "missing end tags"
1396        assert self._last is not None, "missing toplevel element"
1397        return self._last
1398
1399    def _flush(self):
1400        if self._data:
1401            if self._last is not None:
1402                text = "".join(self._data)
1403                if self._tail:
1404                    assert self._last.tail is None, "internal error (tail)"
1405                    self._last.tail = text
1406                else:
1407                    assert self._last.text is None, "internal error (text)"
1408                    self._last.text = text
1409            self._data = []
1410
1411    ##
1412    # Adds text to the current element.
1413    #
1414    # @param data A string.  This should be either an 8-bit string
1415    #    containing ASCII text, or a Unicode string.
1416
1417    def data(self, data):
1418        self._data.append(data)
1419
1420    ##
1421    # Opens a new element.
1422    #
1423    # @param tag The element name.
1424    # @param attrib A dictionary containing element attributes.
1425    # @return The opened element.
1426    # @defreturn Element
1427
1428    def start(self, tag, attrs):
1429        self._flush()
1430        self._last = elem = self._factory(tag, attrs)
1431        if self._elem:
1432            self._elem[-1].append(elem)
1433        self._elem.append(elem)
1434        self._tail = 0
1435        return elem
1436
1437    ##
1438    # Closes the current element.
1439    #
1440    # @param tag The element name.
1441    # @return The closed element.
1442    # @defreturn Element
1443
1444    def end(self, tag):
1445        self._flush()
1446        self._last = self._elem.pop()
1447        assert self._last.tag == tag,\
1448               "end tag mismatch (expected %s, got %s)" % (
1449                   self._last.tag, tag)
1450        self._tail = 1
1451        return self._last
1452
1453_sentinel = ['sentinel']
1454
1455##
1456# Element structure builder for XML source data, based on the
1457# <b>expat</b> parser.
1458#
1459# @keyparam target Target object.  If omitted, the builder uses an
1460#     instance of the standard {@link #TreeBuilder} class.
1461# @keyparam html Predefine HTML entities.  This flag is not supported
1462#     by the current implementation.
1463# @keyparam encoding Optional encoding.  If given, the value overrides
1464#     the encoding specified in the XML file.
1465# @see #ElementTree
1466# @see #TreeBuilder
1467
1468class XMLParser(object):
1469
1470    def __init__(self, html=_sentinel, target=None, encoding=None):
1471        if html is not _sentinel:
1472            warnings.warnpy3k(
1473                "The html argument of XMLParser() is deprecated",
1474                DeprecationWarning, stacklevel=2)
1475        try:
1476            from xml.parsers import expat
1477        except ImportError:
1478            try:
1479                import pyexpat as expat
1480            except ImportError:
1481                raise ImportError(
1482                    "No module named expat; use SimpleXMLTreeBuilder instead"
1483                    )
1484        parser = expat.ParserCreate(encoding, "}")
1485        if target is None:
1486            target = TreeBuilder()
1487        # underscored names are provided for compatibility only
1488        self.parser = self._parser = parser
1489        self.target = self._target = target
1490        self._error = expat.error
1491        self._names = {} # name memo cache
1492        # callbacks
1493        parser.DefaultHandlerExpand = self._default
1494        parser.StartElementHandler = self._start
1495        parser.EndElementHandler = self._end
1496        parser.CharacterDataHandler = self._data
1497        # optional callbacks
1498        parser.CommentHandler = self._comment
1499        parser.ProcessingInstructionHandler = self._pi
1500        # let expat do the buffering, if supported
1501        try:
1502            self._parser.buffer_text = 1
1503        except AttributeError:
1504            pass
1505        # use new-style attribute handling, if supported
1506        try:
1507            self._parser.ordered_attributes = 1
1508            self._parser.specified_attributes = 1
1509            parser.StartElementHandler = self._start_list
1510        except AttributeError:
1511            pass
1512        self._doctype = None
1513        self.entity = {}
1514        try:
1515            self.version = "Expat %d.%d.%d" % expat.version_info
1516        except AttributeError:
1517            pass # unknown
1518
1519    def _raiseerror(self, value):
1520        err = ParseError(value)
1521        err.code = value.code
1522        err.position = value.lineno, value.offset
1523        raise err
1524
1525    def _fixtext(self, text):
1526        # convert text string to ascii, if possible
1527        try:
1528            return text.encode("ascii")
1529        except UnicodeError:
1530            return text
1531
1532    def _fixname(self, key):
1533        # expand qname, and convert name string to ascii, if possible
1534        try:
1535            name = self._names[key]
1536        except KeyError:
1537            name = key
1538            if "}" in name:
1539                name = "{" + name
1540            self._names[key] = name = self._fixtext(name)
1541        return name
1542
1543    def _start(self, tag, attrib_in):
1544        fixname = self._fixname
1545        fixtext = self._fixtext
1546        tag = fixname(tag)
1547        attrib = {}
1548        for key, value in attrib_in.items():
1549            attrib[fixname(key)] = fixtext(value)
1550        return self.target.start(tag, attrib)
1551
1552    def _start_list(self, tag, attrib_in):
1553        fixname = self._fixname
1554        fixtext = self._fixtext
1555        tag = fixname(tag)
1556        attrib = {}
1557        if attrib_in:
1558            for i in range(0, len(attrib_in), 2):
1559                attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
1560        return self.target.start(tag, attrib)
1561
1562    def _data(self, text):
1563        return self.target.data(self._fixtext(text))
1564
1565    def _end(self, tag):
1566        return self.target.end(self._fixname(tag))
1567
1568    def _comment(self, data):
1569        try:
1570            comment = self.target.comment
1571        except AttributeError:
1572            pass
1573        else:
1574            return comment(self._fixtext(data))
1575
1576    def _pi(self, target, data):
1577        try:
1578            pi = self.target.pi
1579        except AttributeError:
1580            pass
1581        else:
1582            return pi(self._fixtext(target), self._fixtext(data))
1583
1584    def _default(self, text):
1585        prefix = text[:1]
1586        if prefix == "&":
1587            # deal with undefined entities
1588            try:
1589                self.target.data(self.entity[text[1:-1]])
1590            except KeyError:
1591                from xml.parsers import expat
1592                err = expat.error(
1593                    "undefined entity %s: line %d, column %d" %
1594                    (text, self._parser.ErrorLineNumber,
1595                    self._parser.ErrorColumnNumber)
1596                    )
1597                err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1598                err.lineno = self._parser.ErrorLineNumber
1599                err.offset = self._parser.ErrorColumnNumber
1600                raise err
1601        elif prefix == "<" and text[:9] == "<!DOCTYPE":
1602            self._doctype = [] # inside a doctype declaration
1603        elif self._doctype is not None:
1604            # parse doctype contents
1605            if prefix == ">":
1606                self._doctype = None
1607                return
1608            text = text.strip()
1609            if not text:
1610                return
1611            self._doctype.append(text)
1612            n = len(self._doctype)
1613            if n > 2:
1614                type = self._doctype[1]
1615                if type == "PUBLIC" and n == 4:
1616                    name, type, pubid, system = self._doctype
1617                elif type == "SYSTEM" and n == 3:
1618                    name, type, system = self._doctype
1619                    pubid = None
1620                else:
1621                    return
1622                if pubid:
1623                    pubid = pubid[1:-1]
1624                if hasattr(self.target, "doctype"):
1625                    self.target.doctype(name, pubid, system[1:-1])
1626                elif self.doctype != self._XMLParser__doctype:
1627                    # warn about deprecated call
1628                    self._XMLParser__doctype(name, pubid, system[1:-1])
1629                    self.doctype(name, pubid, system[1:-1])
1630                self._doctype = None
1631
1632    ##
1633    # (Deprecated) Handles a doctype declaration.
1634    #
1635    # @param name Doctype name.
1636    # @param pubid Public identifier.
1637    # @param system System identifier.
1638
1639    def doctype(self, name, pubid, system):
1640        """This method of XMLParser is deprecated."""
1641        warnings.warn(
1642            "This method of XMLParser is deprecated.  Define doctype() "
1643            "method on the TreeBuilder target.",
1644            DeprecationWarning,
1645            )
1646
1647    # sentinel, if doctype is redefined in a subclass
1648    __doctype = doctype
1649
1650    ##
1651    # Feeds data to the parser.
1652    #
1653    # @param data Encoded data.
1654
1655    def feed(self, data):
1656        try:
1657            self._parser.Parse(data, 0)
1658        except self._error, v:
1659            self._raiseerror(v)
1660
1661    ##
1662    # Finishes feeding data to the parser.
1663    #
1664    # @return An element structure.
1665    # @defreturn Element
1666
1667    def close(self):
1668        try:
1669            self._parser.Parse("", 1) # end of data
1670        except self._error, v:
1671            self._raiseerror(v)
1672        tree = self.target.close()
1673        del self.target, self._parser # get rid of circular references
1674        return tree
1675
1676# compatibility
1677XMLTreeBuilder = XMLParser
1678
1679# workaround circular import.
1680try:
1681    from ElementC14N import _serialize_c14n
1682    _serialize["c14n"] = _serialize_c14n
1683except ImportError:
1684    pass
1685