• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Simple implementation of the Level 1 DOM.
2
3Namespaces and other minor Level 2 features are also supported.
4
5parse("foo.xml")
6
7parseString("<foo><bar/></foo>")
8
9Todo:
10=====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14        interface
15 * SAX 2 namespaces
16"""
17
18import xml.dom
19
20from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
21from xml.dom.minicompat import *
22from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
23
24# This is used by the ID-cache invalidation checks; the list isn't
25# actually complete, since the nodes being checked will never be the
26# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
27# the node being added or removed, not the node being modified.)
28#
29_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
30                            xml.dom.Node.ENTITY_REFERENCE_NODE)
31
32
33class Node(xml.dom.Node):
34    namespaceURI = None # this is non-null only for elements and attributes
35    parentNode = None
36    ownerDocument = None
37    nextSibling = None
38    previousSibling = None
39
40    prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
41
42    def __nonzero__(self):
43        return True
44
45    def toxml(self, encoding = None):
46        return self.toprettyxml("", "", encoding)
47
48    def toprettyxml(self, indent="\t", newl="\n", encoding = None):
49        # indent = the indentation string to prepend, per level
50        # newl = the newline string to append
51        writer = _get_StringIO()
52        if encoding is not None:
53            import codecs
54            # Can't use codecs.getwriter to preserve 2.0 compatibility
55            writer = codecs.lookup(encoding)[3](writer)
56        if self.nodeType == Node.DOCUMENT_NODE:
57            # Can pass encoding only to document, to put it into XML header
58            self.writexml(writer, "", indent, newl, encoding)
59        else:
60            self.writexml(writer, "", indent, newl)
61        return writer.getvalue()
62
63    def hasChildNodes(self):
64        if self.childNodes:
65            return True
66        else:
67            return False
68
69    def _get_childNodes(self):
70        return self.childNodes
71
72    def _get_firstChild(self):
73        if self.childNodes:
74            return self.childNodes[0]
75
76    def _get_lastChild(self):
77        if self.childNodes:
78            return self.childNodes[-1]
79
80    def insertBefore(self, newChild, refChild):
81        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
82            for c in tuple(newChild.childNodes):
83                self.insertBefore(c, refChild)
84            ### The DOM does not clearly specify what to return in this case
85            return newChild
86        if newChild.nodeType not in self._child_node_types:
87            raise xml.dom.HierarchyRequestErr(
88                "%s cannot be child of %s" % (repr(newChild), repr(self)))
89        if newChild.parentNode is not None:
90            newChild.parentNode.removeChild(newChild)
91        if refChild is None:
92            self.appendChild(newChild)
93        else:
94            try:
95                index = self.childNodes.index(refChild)
96            except ValueError:
97                raise xml.dom.NotFoundErr()
98            if newChild.nodeType in _nodeTypes_with_children:
99                _clear_id_cache(self)
100            self.childNodes.insert(index, newChild)
101            newChild.nextSibling = refChild
102            refChild.previousSibling = newChild
103            if index:
104                node = self.childNodes[index-1]
105                node.nextSibling = newChild
106                newChild.previousSibling = node
107            else:
108                newChild.previousSibling = None
109            newChild.parentNode = self
110        return newChild
111
112    def appendChild(self, node):
113        if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
114            for c in tuple(node.childNodes):
115                self.appendChild(c)
116            ### The DOM does not clearly specify what to return in this case
117            return node
118        if node.nodeType not in self._child_node_types:
119            raise xml.dom.HierarchyRequestErr(
120                "%s cannot be child of %s" % (repr(node), repr(self)))
121        elif node.nodeType in _nodeTypes_with_children:
122            _clear_id_cache(self)
123        if node.parentNode is not None:
124            node.parentNode.removeChild(node)
125        _append_child(self, node)
126        node.nextSibling = None
127        return node
128
129    def replaceChild(self, newChild, oldChild):
130        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
131            refChild = oldChild.nextSibling
132            self.removeChild(oldChild)
133            return self.insertBefore(newChild, refChild)
134        if newChild.nodeType not in self._child_node_types:
135            raise xml.dom.HierarchyRequestErr(
136                "%s cannot be child of %s" % (repr(newChild), repr(self)))
137        if newChild is oldChild:
138            return
139        if newChild.parentNode is not None:
140            newChild.parentNode.removeChild(newChild)
141        try:
142            index = self.childNodes.index(oldChild)
143        except ValueError:
144            raise xml.dom.NotFoundErr()
145        self.childNodes[index] = newChild
146        newChild.parentNode = self
147        oldChild.parentNode = None
148        if (newChild.nodeType in _nodeTypes_with_children
149            or oldChild.nodeType in _nodeTypes_with_children):
150            _clear_id_cache(self)
151        newChild.nextSibling = oldChild.nextSibling
152        newChild.previousSibling = oldChild.previousSibling
153        oldChild.nextSibling = None
154        oldChild.previousSibling = None
155        if newChild.previousSibling:
156            newChild.previousSibling.nextSibling = newChild
157        if newChild.nextSibling:
158            newChild.nextSibling.previousSibling = newChild
159        return oldChild
160
161    def removeChild(self, oldChild):
162        try:
163            self.childNodes.remove(oldChild)
164        except ValueError:
165            raise xml.dom.NotFoundErr()
166        if oldChild.nextSibling is not None:
167            oldChild.nextSibling.previousSibling = oldChild.previousSibling
168        if oldChild.previousSibling is not None:
169            oldChild.previousSibling.nextSibling = oldChild.nextSibling
170        oldChild.nextSibling = oldChild.previousSibling = None
171        if oldChild.nodeType in _nodeTypes_with_children:
172            _clear_id_cache(self)
173
174        oldChild.parentNode = None
175        return oldChild
176
177    def normalize(self):
178        L = []
179        for child in self.childNodes:
180            if child.nodeType == Node.TEXT_NODE:
181                if not child.data:
182                    # empty text node; discard
183                    if L:
184                        L[-1].nextSibling = child.nextSibling
185                    if child.nextSibling:
186                        child.nextSibling.previousSibling = child.previousSibling
187                    child.unlink()
188                elif L and L[-1].nodeType == child.nodeType:
189                    # collapse text node
190                    node = L[-1]
191                    node.data = node.data + child.data
192                    node.nextSibling = child.nextSibling
193                    if child.nextSibling:
194                        child.nextSibling.previousSibling = node
195                    child.unlink()
196                else:
197                    L.append(child)
198            else:
199                L.append(child)
200                if child.nodeType == Node.ELEMENT_NODE:
201                    child.normalize()
202        self.childNodes[:] = L
203
204    def cloneNode(self, deep):
205        return _clone_node(self, deep, self.ownerDocument or self)
206
207    def isSupported(self, feature, version):
208        return self.ownerDocument.implementation.hasFeature(feature, version)
209
210    def _get_localName(self):
211        # Overridden in Element and Attr where localName can be Non-Null
212        return None
213
214    # Node interfaces from Level 3 (WD 9 April 2002)
215
216    def isSameNode(self, other):
217        return self is other
218
219    def getInterface(self, feature):
220        if self.isSupported(feature, None):
221            return self
222        else:
223            return None
224
225    # The "user data" functions use a dictionary that is only present
226    # if some user data has been set, so be careful not to assume it
227    # exists.
228
229    def getUserData(self, key):
230        try:
231            return self._user_data[key][0]
232        except (AttributeError, KeyError):
233            return None
234
235    def setUserData(self, key, data, handler):
236        old = None
237        try:
238            d = self._user_data
239        except AttributeError:
240            d = {}
241            self._user_data = d
242        if key in d:
243            old = d[key][0]
244        if data is None:
245            # ignore handlers passed for None
246            handler = None
247            if old is not None:
248                del d[key]
249        else:
250            d[key] = (data, handler)
251        return old
252
253    def _call_user_data_handler(self, operation, src, dst):
254        if hasattr(self, "_user_data"):
255            for key, (data, handler) in self._user_data.items():
256                if handler is not None:
257                    handler.handle(operation, key, data, src, dst)
258
259    # minidom-specific API:
260
261    def unlink(self):
262        self.parentNode = self.ownerDocument = None
263        if self.childNodes:
264            for child in self.childNodes:
265                child.unlink()
266            self.childNodes = NodeList()
267        self.previousSibling = None
268        self.nextSibling = None
269
270defproperty(Node, "firstChild", doc="First child node, or None.")
271defproperty(Node, "lastChild",  doc="Last child node, or None.")
272defproperty(Node, "localName",  doc="Namespace-local name of this node.")
273
274
275def _append_child(self, node):
276    # fast path with less checks; usable by DOM builders if careful
277    childNodes = self.childNodes
278    if childNodes:
279        last = childNodes[-1]
280        node.__dict__["previousSibling"] = last
281        last.__dict__["nextSibling"] = node
282    childNodes.append(node)
283    node.__dict__["parentNode"] = self
284
285def _in_document(node):
286    # return True iff node is part of a document tree
287    while node is not None:
288        if node.nodeType == Node.DOCUMENT_NODE:
289            return True
290        node = node.parentNode
291    return False
292
293def _write_data(writer, data):
294    "Writes datachars to writer."
295    if data:
296        data = data.replace("&", "&amp;").replace("<", "&lt;"). \
297                    replace("\"", "&quot;").replace(">", "&gt;")
298        writer.write(data)
299
300def _get_elements_by_tagName_helper(parent, name, rc):
301    for node in parent.childNodes:
302        if node.nodeType == Node.ELEMENT_NODE and \
303            (name == "*" or node.tagName == name):
304            rc.append(node)
305        _get_elements_by_tagName_helper(node, name, rc)
306    return rc
307
308def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
309    for node in parent.childNodes:
310        if node.nodeType == Node.ELEMENT_NODE:
311            if ((localName == "*" or node.localName == localName) and
312                (nsURI == "*" or node.namespaceURI == nsURI)):
313                rc.append(node)
314            _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
315    return rc
316
317class DocumentFragment(Node):
318    nodeType = Node.DOCUMENT_FRAGMENT_NODE
319    nodeName = "#document-fragment"
320    nodeValue = None
321    attributes = None
322    parentNode = None
323    _child_node_types = (Node.ELEMENT_NODE,
324                         Node.TEXT_NODE,
325                         Node.CDATA_SECTION_NODE,
326                         Node.ENTITY_REFERENCE_NODE,
327                         Node.PROCESSING_INSTRUCTION_NODE,
328                         Node.COMMENT_NODE,
329                         Node.NOTATION_NODE)
330
331    def __init__(self):
332        self.childNodes = NodeList()
333
334
335class Attr(Node):
336    nodeType = Node.ATTRIBUTE_NODE
337    attributes = None
338    ownerElement = None
339    specified = False
340    _is_id = False
341
342    _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
343
344    def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
345                 prefix=None):
346        # skip setattr for performance
347        d = self.__dict__
348        d["nodeName"] = d["name"] = qName
349        d["namespaceURI"] = namespaceURI
350        d["prefix"] = prefix
351        d['childNodes'] = NodeList()
352
353        # Add the single child node that represents the value of the attr
354        self.childNodes.append(Text())
355
356        # nodeValue and value are set elsewhere
357
358    def _get_localName(self):
359        return self.nodeName.split(":", 1)[-1]
360
361    def _get_specified(self):
362        return self.specified
363
364    def __setattr__(self, name, value):
365        d = self.__dict__
366        if name in ("value", "nodeValue"):
367            d["value"] = d["nodeValue"] = value
368            d2 = self.childNodes[0].__dict__
369            d2["data"] = d2["nodeValue"] = value
370            if self.ownerElement is not None:
371                _clear_id_cache(self.ownerElement)
372        elif name in ("name", "nodeName"):
373            d["name"] = d["nodeName"] = value
374            if self.ownerElement is not None:
375                _clear_id_cache(self.ownerElement)
376        else:
377            d[name] = value
378
379    def _set_prefix(self, prefix):
380        nsuri = self.namespaceURI
381        if prefix == "xmlns":
382            if nsuri and nsuri != XMLNS_NAMESPACE:
383                raise xml.dom.NamespaceErr(
384                    "illegal use of 'xmlns' prefix for the wrong namespace")
385        d = self.__dict__
386        d['prefix'] = prefix
387        if prefix is None:
388            newName = self.localName
389        else:
390            newName = "%s:%s" % (prefix, self.localName)
391        if self.ownerElement:
392            _clear_id_cache(self.ownerElement)
393        d['nodeName'] = d['name'] = newName
394
395    def _set_value(self, value):
396        d = self.__dict__
397        d['value'] = d['nodeValue'] = value
398        if self.ownerElement:
399            _clear_id_cache(self.ownerElement)
400        self.childNodes[0].data = value
401
402    def unlink(self):
403        # This implementation does not call the base implementation
404        # since most of that is not needed, and the expense of the
405        # method call is not warranted.  We duplicate the removal of
406        # children, but that's all we needed from the base class.
407        elem = self.ownerElement
408        if elem is not None:
409            del elem._attrs[self.nodeName]
410            del elem._attrsNS[(self.namespaceURI, self.localName)]
411            if self._is_id:
412                self._is_id = False
413                elem._magic_id_nodes -= 1
414                self.ownerDocument._magic_id_count -= 1
415        for child in self.childNodes:
416            child.unlink()
417        del self.childNodes[:]
418
419    def _get_isId(self):
420        if self._is_id:
421            return True
422        doc = self.ownerDocument
423        elem = self.ownerElement
424        if doc is None or elem is None:
425            return False
426
427        info = doc._get_elem_info(elem)
428        if info is None:
429            return False
430        if self.namespaceURI:
431            return info.isIdNS(self.namespaceURI, self.localName)
432        else:
433            return info.isId(self.nodeName)
434
435    def _get_schemaType(self):
436        doc = self.ownerDocument
437        elem = self.ownerElement
438        if doc is None or elem is None:
439            return _no_type
440
441        info = doc._get_elem_info(elem)
442        if info is None:
443            return _no_type
444        if self.namespaceURI:
445            return info.getAttributeTypeNS(self.namespaceURI, self.localName)
446        else:
447            return info.getAttributeType(self.nodeName)
448
449defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
450defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
451defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
452
453
454class NamedNodeMap(object):
455    """The attribute list is a transient interface to the underlying
456    dictionaries.  Mutations here will change the underlying element's
457    dictionary.
458
459    Ordering is imposed artificially and does not reflect the order of
460    attributes as found in an input document.
461    """
462
463    __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
464
465    def __init__(self, attrs, attrsNS, ownerElement):
466        self._attrs = attrs
467        self._attrsNS = attrsNS
468        self._ownerElement = ownerElement
469
470    def _get_length(self):
471        return len(self._attrs)
472
473    def item(self, index):
474        try:
475            return self[self._attrs.keys()[index]]
476        except IndexError:
477            return None
478
479    def items(self):
480        L = []
481        for node in self._attrs.values():
482            L.append((node.nodeName, node.value))
483        return L
484
485    def itemsNS(self):
486        L = []
487        for node in self._attrs.values():
488            L.append(((node.namespaceURI, node.localName), node.value))
489        return L
490
491    def has_key(self, key):
492        if isinstance(key, StringTypes):
493            return key in self._attrs
494        else:
495            return key in self._attrsNS
496
497    def keys(self):
498        return self._attrs.keys()
499
500    def keysNS(self):
501        return self._attrsNS.keys()
502
503    def values(self):
504        return self._attrs.values()
505
506    def get(self, name, value=None):
507        return self._attrs.get(name, value)
508
509    __len__ = _get_length
510
511    __hash__ = None # Mutable type can't be correctly hashed
512    def __cmp__(self, other):
513        if self._attrs is getattr(other, "_attrs", None):
514            return 0
515        else:
516            return cmp(id(self), id(other))
517
518    def __getitem__(self, attname_or_tuple):
519        if isinstance(attname_or_tuple, tuple):
520            return self._attrsNS[attname_or_tuple]
521        else:
522            return self._attrs[attname_or_tuple]
523
524    # same as set
525    def __setitem__(self, attname, value):
526        if isinstance(value, StringTypes):
527            try:
528                node = self._attrs[attname]
529            except KeyError:
530                node = Attr(attname)
531                node.ownerDocument = self._ownerElement.ownerDocument
532                self.setNamedItem(node)
533            node.value = value
534        else:
535            if not isinstance(value, Attr):
536                raise TypeError, "value must be a string or Attr object"
537            node = value
538            self.setNamedItem(node)
539
540    def getNamedItem(self, name):
541        try:
542            return self._attrs[name]
543        except KeyError:
544            return None
545
546    def getNamedItemNS(self, namespaceURI, localName):
547        try:
548            return self._attrsNS[(namespaceURI, localName)]
549        except KeyError:
550            return None
551
552    def removeNamedItem(self, name):
553        n = self.getNamedItem(name)
554        if n is not None:
555            _clear_id_cache(self._ownerElement)
556            del self._attrs[n.nodeName]
557            del self._attrsNS[(n.namespaceURI, n.localName)]
558            if 'ownerElement' in n.__dict__:
559                n.__dict__['ownerElement'] = None
560            return n
561        else:
562            raise xml.dom.NotFoundErr()
563
564    def removeNamedItemNS(self, namespaceURI, localName):
565        n = self.getNamedItemNS(namespaceURI, localName)
566        if n is not None:
567            _clear_id_cache(self._ownerElement)
568            del self._attrsNS[(n.namespaceURI, n.localName)]
569            del self._attrs[n.nodeName]
570            if 'ownerElement' in n.__dict__:
571                n.__dict__['ownerElement'] = None
572            return n
573        else:
574            raise xml.dom.NotFoundErr()
575
576    def setNamedItem(self, node):
577        if not isinstance(node, Attr):
578            raise xml.dom.HierarchyRequestErr(
579                "%s cannot be child of %s" % (repr(node), repr(self)))
580        old = self._attrs.get(node.name)
581        if old:
582            old.unlink()
583        self._attrs[node.name] = node
584        self._attrsNS[(node.namespaceURI, node.localName)] = node
585        node.ownerElement = self._ownerElement
586        _clear_id_cache(node.ownerElement)
587        return old
588
589    def setNamedItemNS(self, node):
590        return self.setNamedItem(node)
591
592    def __delitem__(self, attname_or_tuple):
593        node = self[attname_or_tuple]
594        _clear_id_cache(node.ownerElement)
595        node.unlink()
596
597    def __getstate__(self):
598        return self._attrs, self._attrsNS, self._ownerElement
599
600    def __setstate__(self, state):
601        self._attrs, self._attrsNS, self._ownerElement = state
602
603defproperty(NamedNodeMap, "length",
604            doc="Number of nodes in the NamedNodeMap.")
605
606AttributeList = NamedNodeMap
607
608
609class TypeInfo(object):
610    __slots__ = 'namespace', 'name'
611
612    def __init__(self, namespace, name):
613        self.namespace = namespace
614        self.name = name
615
616    def __repr__(self):
617        if self.namespace:
618            return "<TypeInfo %r (from %r)>" % (self.name, self.namespace)
619        else:
620            return "<TypeInfo %r>" % self.name
621
622    def _get_name(self):
623        return self.name
624
625    def _get_namespace(self):
626        return self.namespace
627
628_no_type = TypeInfo(None, None)
629
630class Element(Node):
631    nodeType = Node.ELEMENT_NODE
632    nodeValue = None
633    schemaType = _no_type
634
635    _magic_id_nodes = 0
636
637    _child_node_types = (Node.ELEMENT_NODE,
638                         Node.PROCESSING_INSTRUCTION_NODE,
639                         Node.COMMENT_NODE,
640                         Node.TEXT_NODE,
641                         Node.CDATA_SECTION_NODE,
642                         Node.ENTITY_REFERENCE_NODE)
643
644    def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
645                 localName=None):
646        self.tagName = self.nodeName = tagName
647        self.prefix = prefix
648        self.namespaceURI = namespaceURI
649        self.childNodes = NodeList()
650
651        self._attrs = {}   # attributes are double-indexed:
652        self._attrsNS = {} #    tagName -> Attribute
653                           #    URI,localName -> Attribute
654                           # in the future: consider lazy generation
655                           # of attribute objects this is too tricky
656                           # for now because of headaches with
657                           # namespaces.
658
659    def _get_localName(self):
660        return self.tagName.split(":", 1)[-1]
661
662    def _get_tagName(self):
663        return self.tagName
664
665    def unlink(self):
666        for attr in self._attrs.values():
667            attr.unlink()
668        self._attrs = None
669        self._attrsNS = None
670        Node.unlink(self)
671
672    def getAttribute(self, attname):
673        try:
674            return self._attrs[attname].value
675        except KeyError:
676            return ""
677
678    def getAttributeNS(self, namespaceURI, localName):
679        try:
680            return self._attrsNS[(namespaceURI, localName)].value
681        except KeyError:
682            return ""
683
684    def setAttribute(self, attname, value):
685        attr = self.getAttributeNode(attname)
686        if attr is None:
687            attr = Attr(attname)
688            # for performance
689            d = attr.__dict__
690            d["value"] = d["nodeValue"] = value
691            d["ownerDocument"] = self.ownerDocument
692            self.setAttributeNode(attr)
693        elif value != attr.value:
694            d = attr.__dict__
695            d["value"] = d["nodeValue"] = value
696            if attr.isId:
697                _clear_id_cache(self)
698
699    def setAttributeNS(self, namespaceURI, qualifiedName, value):
700        prefix, localname = _nssplit(qualifiedName)
701        attr = self.getAttributeNodeNS(namespaceURI, localname)
702        if attr is None:
703            # for performance
704            attr = Attr(qualifiedName, namespaceURI, localname, prefix)
705            d = attr.__dict__
706            d["prefix"] = prefix
707            d["nodeName"] = qualifiedName
708            d["value"] = d["nodeValue"] = value
709            d["ownerDocument"] = self.ownerDocument
710            self.setAttributeNode(attr)
711        else:
712            d = attr.__dict__
713            if value != attr.value:
714                d["value"] = d["nodeValue"] = value
715                if attr.isId:
716                    _clear_id_cache(self)
717            if attr.prefix != prefix:
718                d["prefix"] = prefix
719                d["nodeName"] = qualifiedName
720
721    def getAttributeNode(self, attrname):
722        return self._attrs.get(attrname)
723
724    def getAttributeNodeNS(self, namespaceURI, localName):
725        return self._attrsNS.get((namespaceURI, localName))
726
727    def setAttributeNode(self, attr):
728        if attr.ownerElement not in (None, self):
729            raise xml.dom.InuseAttributeErr("attribute node already owned")
730        old1 = self._attrs.get(attr.name, None)
731        if old1 is not None:
732            self.removeAttributeNode(old1)
733        old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
734        if old2 is not None and old2 is not old1:
735            self.removeAttributeNode(old2)
736        _set_attribute_node(self, attr)
737
738        if old1 is not attr:
739            # It might have already been part of this node, in which case
740            # it doesn't represent a change, and should not be returned.
741            return old1
742        if old2 is not attr:
743            return old2
744
745    setAttributeNodeNS = setAttributeNode
746
747    def removeAttribute(self, name):
748        try:
749            attr = self._attrs[name]
750        except KeyError:
751            raise xml.dom.NotFoundErr()
752        self.removeAttributeNode(attr)
753
754    def removeAttributeNS(self, namespaceURI, localName):
755        try:
756            attr = self._attrsNS[(namespaceURI, localName)]
757        except KeyError:
758            raise xml.dom.NotFoundErr()
759        self.removeAttributeNode(attr)
760
761    def removeAttributeNode(self, node):
762        if node is None:
763            raise xml.dom.NotFoundErr()
764        try:
765            self._attrs[node.name]
766        except KeyError:
767            raise xml.dom.NotFoundErr()
768        _clear_id_cache(self)
769        node.unlink()
770        # Restore this since the node is still useful and otherwise
771        # unlinked
772        node.ownerDocument = self.ownerDocument
773
774    removeAttributeNodeNS = removeAttributeNode
775
776    def hasAttribute(self, name):
777        return name in self._attrs
778
779    def hasAttributeNS(self, namespaceURI, localName):
780        return (namespaceURI, localName) in self._attrsNS
781
782    def getElementsByTagName(self, name):
783        return _get_elements_by_tagName_helper(self, name, NodeList())
784
785    def getElementsByTagNameNS(self, namespaceURI, localName):
786        return _get_elements_by_tagName_ns_helper(
787            self, namespaceURI, localName, NodeList())
788
789    def __repr__(self):
790        return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
791
792    def writexml(self, writer, indent="", addindent="", newl=""):
793        # indent = current indentation
794        # addindent = indentation to add to higher levels
795        # newl = newline string
796        writer.write(indent+"<" + self.tagName)
797
798        attrs = self._get_attributes()
799        a_names = attrs.keys()
800        a_names.sort()
801
802        for a_name in a_names:
803            writer.write(" %s=\"" % a_name)
804            _write_data(writer, attrs[a_name].value)
805            writer.write("\"")
806        if self.childNodes:
807            writer.write(">")
808            if (len(self.childNodes) == 1 and
809                self.childNodes[0].nodeType == Node.TEXT_NODE):
810                self.childNodes[0].writexml(writer, '', '', '')
811            else:
812                writer.write(newl)
813                for node in self.childNodes:
814                    node.writexml(writer, indent+addindent, addindent, newl)
815                writer.write(indent)
816            writer.write("</%s>%s" % (self.tagName, newl))
817        else:
818            writer.write("/>%s"%(newl))
819
820    def _get_attributes(self):
821        return NamedNodeMap(self._attrs, self._attrsNS, self)
822
823    def hasAttributes(self):
824        if self._attrs:
825            return True
826        else:
827            return False
828
829    # DOM Level 3 attributes, based on the 22 Oct 2002 draft
830
831    def setIdAttribute(self, name):
832        idAttr = self.getAttributeNode(name)
833        self.setIdAttributeNode(idAttr)
834
835    def setIdAttributeNS(self, namespaceURI, localName):
836        idAttr = self.getAttributeNodeNS(namespaceURI, localName)
837        self.setIdAttributeNode(idAttr)
838
839    def setIdAttributeNode(self, idAttr):
840        if idAttr is None or not self.isSameNode(idAttr.ownerElement):
841            raise xml.dom.NotFoundErr()
842        if _get_containing_entref(self) is not None:
843            raise xml.dom.NoModificationAllowedErr()
844        if not idAttr._is_id:
845            idAttr.__dict__['_is_id'] = True
846            self._magic_id_nodes += 1
847            self.ownerDocument._magic_id_count += 1
848            _clear_id_cache(self)
849
850defproperty(Element, "attributes",
851            doc="NamedNodeMap of attributes on the element.")
852defproperty(Element, "localName",
853            doc="Namespace-local name of this element.")
854
855
856def _set_attribute_node(element, attr):
857    _clear_id_cache(element)
858    element._attrs[attr.name] = attr
859    element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
860
861    # This creates a circular reference, but Element.unlink()
862    # breaks the cycle since the references to the attribute
863    # dictionaries are tossed.
864    attr.__dict__['ownerElement'] = element
865
866
867class Childless:
868    """Mixin that makes childless-ness easy to implement and avoids
869    the complexity of the Node methods that deal with children.
870    """
871
872    attributes = None
873    childNodes = EmptyNodeList()
874    firstChild = None
875    lastChild = None
876
877    def _get_firstChild(self):
878        return None
879
880    def _get_lastChild(self):
881        return None
882
883    def appendChild(self, node):
884        raise xml.dom.HierarchyRequestErr(
885            self.nodeName + " nodes cannot have children")
886
887    def hasChildNodes(self):
888        return False
889
890    def insertBefore(self, newChild, refChild):
891        raise xml.dom.HierarchyRequestErr(
892            self.nodeName + " nodes do not have children")
893
894    def removeChild(self, oldChild):
895        raise xml.dom.NotFoundErr(
896            self.nodeName + " nodes do not have children")
897
898    def normalize(self):
899        # For childless nodes, normalize() has nothing to do.
900        pass
901
902    def replaceChild(self, newChild, oldChild):
903        raise xml.dom.HierarchyRequestErr(
904            self.nodeName + " nodes do not have children")
905
906
907class ProcessingInstruction(Childless, Node):
908    nodeType = Node.PROCESSING_INSTRUCTION_NODE
909
910    def __init__(self, target, data):
911        self.target = self.nodeName = target
912        self.data = self.nodeValue = data
913
914    def _get_data(self):
915        return self.data
916    def _set_data(self, value):
917        d = self.__dict__
918        d['data'] = d['nodeValue'] = value
919
920    def _get_target(self):
921        return self.target
922    def _set_target(self, value):
923        d = self.__dict__
924        d['target'] = d['nodeName'] = value
925
926    def __setattr__(self, name, value):
927        if name == "data" or name == "nodeValue":
928            self.__dict__['data'] = self.__dict__['nodeValue'] = value
929        elif name == "target" or name == "nodeName":
930            self.__dict__['target'] = self.__dict__['nodeName'] = value
931        else:
932            self.__dict__[name] = value
933
934    def writexml(self, writer, indent="", addindent="", newl=""):
935        writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
936
937
938class CharacterData(Childless, Node):
939    def _get_length(self):
940        return len(self.data)
941    __len__ = _get_length
942
943    def _get_data(self):
944        return self.__dict__['data']
945    def _set_data(self, data):
946        d = self.__dict__
947        d['data'] = d['nodeValue'] = data
948
949    _get_nodeValue = _get_data
950    _set_nodeValue = _set_data
951
952    def __setattr__(self, name, value):
953        if name == "data" or name == "nodeValue":
954            self.__dict__['data'] = self.__dict__['nodeValue'] = value
955        else:
956            self.__dict__[name] = value
957
958    def __repr__(self):
959        data = self.data
960        if len(data) > 10:
961            dotdotdot = "..."
962        else:
963            dotdotdot = ""
964        return '<DOM %s node "%r%s">' % (
965            self.__class__.__name__, data[0:10], dotdotdot)
966
967    def substringData(self, offset, count):
968        if offset < 0:
969            raise xml.dom.IndexSizeErr("offset cannot be negative")
970        if offset >= len(self.data):
971            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
972        if count < 0:
973            raise xml.dom.IndexSizeErr("count cannot be negative")
974        return self.data[offset:offset+count]
975
976    def appendData(self, arg):
977        self.data = self.data + arg
978
979    def insertData(self, offset, arg):
980        if offset < 0:
981            raise xml.dom.IndexSizeErr("offset cannot be negative")
982        if offset >= len(self.data):
983            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
984        if arg:
985            self.data = "%s%s%s" % (
986                self.data[:offset], arg, self.data[offset:])
987
988    def deleteData(self, offset, count):
989        if offset < 0:
990            raise xml.dom.IndexSizeErr("offset cannot be negative")
991        if offset >= len(self.data):
992            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
993        if count < 0:
994            raise xml.dom.IndexSizeErr("count cannot be negative")
995        if count:
996            self.data = self.data[:offset] + self.data[offset+count:]
997
998    def replaceData(self, offset, count, arg):
999        if offset < 0:
1000            raise xml.dom.IndexSizeErr("offset cannot be negative")
1001        if offset >= len(self.data):
1002            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1003        if count < 0:
1004            raise xml.dom.IndexSizeErr("count cannot be negative")
1005        if count:
1006            self.data = "%s%s%s" % (
1007                self.data[:offset], arg, self.data[offset+count:])
1008
1009defproperty(CharacterData, "length", doc="Length of the string data.")
1010
1011
1012class Text(CharacterData):
1013    # Make sure we don't add an instance __dict__ if we don't already
1014    # have one, at least when that's possible:
1015    # XXX this does not work, CharacterData is an old-style class
1016    # __slots__ = ()
1017
1018    nodeType = Node.TEXT_NODE
1019    nodeName = "#text"
1020    attributes = None
1021
1022    def splitText(self, offset):
1023        if offset < 0 or offset > len(self.data):
1024            raise xml.dom.IndexSizeErr("illegal offset value")
1025        newText = self.__class__()
1026        newText.data = self.data[offset:]
1027        newText.ownerDocument = self.ownerDocument
1028        next = self.nextSibling
1029        if self.parentNode and self in self.parentNode.childNodes:
1030            if next is None:
1031                self.parentNode.appendChild(newText)
1032            else:
1033                self.parentNode.insertBefore(newText, next)
1034        self.data = self.data[:offset]
1035        return newText
1036
1037    def writexml(self, writer, indent="", addindent="", newl=""):
1038        _write_data(writer, "%s%s%s" % (indent, self.data, newl))
1039
1040    # DOM Level 3 (WD 9 April 2002)
1041
1042    def _get_wholeText(self):
1043        L = [self.data]
1044        n = self.previousSibling
1045        while n is not None:
1046            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1047                L.insert(0, n.data)
1048                n = n.previousSibling
1049            else:
1050                break
1051        n = self.nextSibling
1052        while n is not None:
1053            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1054                L.append(n.data)
1055                n = n.nextSibling
1056            else:
1057                break
1058        return ''.join(L)
1059
1060    def replaceWholeText(self, content):
1061        # XXX This needs to be seriously changed if minidom ever
1062        # supports EntityReference nodes.
1063        parent = self.parentNode
1064        n = self.previousSibling
1065        while n is not None:
1066            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1067                next = n.previousSibling
1068                parent.removeChild(n)
1069                n = next
1070            else:
1071                break
1072        n = self.nextSibling
1073        if not content:
1074            parent.removeChild(self)
1075        while n is not None:
1076            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1077                next = n.nextSibling
1078                parent.removeChild(n)
1079                n = next
1080            else:
1081                break
1082        if content:
1083            d = self.__dict__
1084            d['data'] = content
1085            d['nodeValue'] = content
1086            return self
1087        else:
1088            return None
1089
1090    def _get_isWhitespaceInElementContent(self):
1091        if self.data.strip():
1092            return False
1093        elem = _get_containing_element(self)
1094        if elem is None:
1095            return False
1096        info = self.ownerDocument._get_elem_info(elem)
1097        if info is None:
1098            return False
1099        else:
1100            return info.isElementContent()
1101
1102defproperty(Text, "isWhitespaceInElementContent",
1103            doc="True iff this text node contains only whitespace"
1104                " and is in element content.")
1105defproperty(Text, "wholeText",
1106            doc="The text of all logically-adjacent text nodes.")
1107
1108
1109def _get_containing_element(node):
1110    c = node.parentNode
1111    while c is not None:
1112        if c.nodeType == Node.ELEMENT_NODE:
1113            return c
1114        c = c.parentNode
1115    return None
1116
1117def _get_containing_entref(node):
1118    c = node.parentNode
1119    while c is not None:
1120        if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1121            return c
1122        c = c.parentNode
1123    return None
1124
1125
1126class Comment(Childless, CharacterData):
1127    nodeType = Node.COMMENT_NODE
1128    nodeName = "#comment"
1129
1130    def __init__(self, data):
1131        self.data = self.nodeValue = data
1132
1133    def writexml(self, writer, indent="", addindent="", newl=""):
1134        if "--" in self.data:
1135            raise ValueError("'--' is not allowed in a comment node")
1136        writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1137
1138
1139class CDATASection(Text):
1140    # Make sure we don't add an instance __dict__ if we don't already
1141    # have one, at least when that's possible:
1142    # XXX this does not work, Text is an old-style class
1143    # __slots__ = ()
1144
1145    nodeType = Node.CDATA_SECTION_NODE
1146    nodeName = "#cdata-section"
1147
1148    def writexml(self, writer, indent="", addindent="", newl=""):
1149        if self.data.find("]]>") >= 0:
1150            raise ValueError("']]>' not allowed in a CDATA section")
1151        writer.write("<![CDATA[%s]]>" % self.data)
1152
1153
1154class ReadOnlySequentialNamedNodeMap(object):
1155    __slots__ = '_seq',
1156
1157    def __init__(self, seq=()):
1158        # seq should be a list or tuple
1159        self._seq = seq
1160
1161    def __len__(self):
1162        return len(self._seq)
1163
1164    def _get_length(self):
1165        return len(self._seq)
1166
1167    def getNamedItem(self, name):
1168        for n in self._seq:
1169            if n.nodeName == name:
1170                return n
1171
1172    def getNamedItemNS(self, namespaceURI, localName):
1173        for n in self._seq:
1174            if n.namespaceURI == namespaceURI and n.localName == localName:
1175                return n
1176
1177    def __getitem__(self, name_or_tuple):
1178        if isinstance(name_or_tuple, tuple):
1179            node = self.getNamedItemNS(*name_or_tuple)
1180        else:
1181            node = self.getNamedItem(name_or_tuple)
1182        if node is None:
1183            raise KeyError, name_or_tuple
1184        return node
1185
1186    def item(self, index):
1187        if index < 0:
1188            return None
1189        try:
1190            return self._seq[index]
1191        except IndexError:
1192            return None
1193
1194    def removeNamedItem(self, name):
1195        raise xml.dom.NoModificationAllowedErr(
1196            "NamedNodeMap instance is read-only")
1197
1198    def removeNamedItemNS(self, namespaceURI, localName):
1199        raise xml.dom.NoModificationAllowedErr(
1200            "NamedNodeMap instance is read-only")
1201
1202    def setNamedItem(self, node):
1203        raise xml.dom.NoModificationAllowedErr(
1204            "NamedNodeMap instance is read-only")
1205
1206    def setNamedItemNS(self, node):
1207        raise xml.dom.NoModificationAllowedErr(
1208            "NamedNodeMap instance is read-only")
1209
1210    def __getstate__(self):
1211        return [self._seq]
1212
1213    def __setstate__(self, state):
1214        self._seq = state[0]
1215
1216defproperty(ReadOnlySequentialNamedNodeMap, "length",
1217            doc="Number of entries in the NamedNodeMap.")
1218
1219
1220class Identified:
1221    """Mix-in class that supports the publicId and systemId attributes."""
1222
1223    # XXX this does not work, this is an old-style class
1224    # __slots__ = 'publicId', 'systemId'
1225
1226    def _identified_mixin_init(self, publicId, systemId):
1227        self.publicId = publicId
1228        self.systemId = systemId
1229
1230    def _get_publicId(self):
1231        return self.publicId
1232
1233    def _get_systemId(self):
1234        return self.systemId
1235
1236class DocumentType(Identified, Childless, Node):
1237    nodeType = Node.DOCUMENT_TYPE_NODE
1238    nodeValue = None
1239    name = None
1240    publicId = None
1241    systemId = None
1242    internalSubset = None
1243
1244    def __init__(self, qualifiedName):
1245        self.entities = ReadOnlySequentialNamedNodeMap()
1246        self.notations = ReadOnlySequentialNamedNodeMap()
1247        if qualifiedName:
1248            prefix, localname = _nssplit(qualifiedName)
1249            self.name = localname
1250        self.nodeName = self.name
1251
1252    def _get_internalSubset(self):
1253        return self.internalSubset
1254
1255    def cloneNode(self, deep):
1256        if self.ownerDocument is None:
1257            # it's ok
1258            clone = DocumentType(None)
1259            clone.name = self.name
1260            clone.nodeName = self.name
1261            operation = xml.dom.UserDataHandler.NODE_CLONED
1262            if deep:
1263                clone.entities._seq = []
1264                clone.notations._seq = []
1265                for n in self.notations._seq:
1266                    notation = Notation(n.nodeName, n.publicId, n.systemId)
1267                    clone.notations._seq.append(notation)
1268                    n._call_user_data_handler(operation, n, notation)
1269                for e in self.entities._seq:
1270                    entity = Entity(e.nodeName, e.publicId, e.systemId,
1271                                    e.notationName)
1272                    entity.actualEncoding = e.actualEncoding
1273                    entity.encoding = e.encoding
1274                    entity.version = e.version
1275                    clone.entities._seq.append(entity)
1276                    e._call_user_data_handler(operation, n, entity)
1277            self._call_user_data_handler(operation, self, clone)
1278            return clone
1279        else:
1280            return None
1281
1282    def writexml(self, writer, indent="", addindent="", newl=""):
1283        writer.write("<!DOCTYPE ")
1284        writer.write(self.name)
1285        if self.publicId:
1286            writer.write("%s  PUBLIC '%s'%s  '%s'"
1287                         % (newl, self.publicId, newl, self.systemId))
1288        elif self.systemId:
1289            writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
1290        if self.internalSubset is not None:
1291            writer.write(" [")
1292            writer.write(self.internalSubset)
1293            writer.write("]")
1294        writer.write(">"+newl)
1295
1296class Entity(Identified, Node):
1297    attributes = None
1298    nodeType = Node.ENTITY_NODE
1299    nodeValue = None
1300
1301    actualEncoding = None
1302    encoding = None
1303    version = None
1304
1305    def __init__(self, name, publicId, systemId, notation):
1306        self.nodeName = name
1307        self.notationName = notation
1308        self.childNodes = NodeList()
1309        self._identified_mixin_init(publicId, systemId)
1310
1311    def _get_actualEncoding(self):
1312        return self.actualEncoding
1313
1314    def _get_encoding(self):
1315        return self.encoding
1316
1317    def _get_version(self):
1318        return self.version
1319
1320    def appendChild(self, newChild):
1321        raise xml.dom.HierarchyRequestErr(
1322            "cannot append children to an entity node")
1323
1324    def insertBefore(self, newChild, refChild):
1325        raise xml.dom.HierarchyRequestErr(
1326            "cannot insert children below an entity node")
1327
1328    def removeChild(self, oldChild):
1329        raise xml.dom.HierarchyRequestErr(
1330            "cannot remove children from an entity node")
1331
1332    def replaceChild(self, newChild, oldChild):
1333        raise xml.dom.HierarchyRequestErr(
1334            "cannot replace children of an entity node")
1335
1336class Notation(Identified, Childless, Node):
1337    nodeType = Node.NOTATION_NODE
1338    nodeValue = None
1339
1340    def __init__(self, name, publicId, systemId):
1341        self.nodeName = name
1342        self._identified_mixin_init(publicId, systemId)
1343
1344
1345class DOMImplementation(DOMImplementationLS):
1346    _features = [("core", "1.0"),
1347                 ("core", "2.0"),
1348                 ("core", None),
1349                 ("xml", "1.0"),
1350                 ("xml", "2.0"),
1351                 ("xml", None),
1352                 ("ls-load", "3.0"),
1353                 ("ls-load", None),
1354                 ]
1355
1356    def hasFeature(self, feature, version):
1357        if version == "":
1358            version = None
1359        return (feature.lower(), version) in self._features
1360
1361    def createDocument(self, namespaceURI, qualifiedName, doctype):
1362        if doctype and doctype.parentNode is not None:
1363            raise xml.dom.WrongDocumentErr(
1364                "doctype object owned by another DOM tree")
1365        doc = self._create_document()
1366
1367        add_root_element = not (namespaceURI is None
1368                                and qualifiedName is None
1369                                and doctype is None)
1370
1371        if not qualifiedName and add_root_element:
1372            # The spec is unclear what to raise here; SyntaxErr
1373            # would be the other obvious candidate. Since Xerces raises
1374            # InvalidCharacterErr, and since SyntaxErr is not listed
1375            # for createDocument, that seems to be the better choice.
1376            # XXX: need to check for illegal characters here and in
1377            # createElement.
1378
1379            # DOM Level III clears this up when talking about the return value
1380            # of this function.  If namespaceURI, qName and DocType are
1381            # Null the document is returned without a document element
1382            # Otherwise if doctype or namespaceURI are not None
1383            # Then we go back to the above problem
1384            raise xml.dom.InvalidCharacterErr("Element with no name")
1385
1386        if add_root_element:
1387            prefix, localname = _nssplit(qualifiedName)
1388            if prefix == "xml" \
1389               and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1390                raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1391            if prefix and not namespaceURI:
1392                raise xml.dom.NamespaceErr(
1393                    "illegal use of prefix without namespaces")
1394            element = doc.createElementNS(namespaceURI, qualifiedName)
1395            if doctype:
1396                doc.appendChild(doctype)
1397            doc.appendChild(element)
1398
1399        if doctype:
1400            doctype.parentNode = doctype.ownerDocument = doc
1401
1402        doc.doctype = doctype
1403        doc.implementation = self
1404        return doc
1405
1406    def createDocumentType(self, qualifiedName, publicId, systemId):
1407        doctype = DocumentType(qualifiedName)
1408        doctype.publicId = publicId
1409        doctype.systemId = systemId
1410        return doctype
1411
1412    # DOM Level 3 (WD 9 April 2002)
1413
1414    def getInterface(self, feature):
1415        if self.hasFeature(feature, None):
1416            return self
1417        else:
1418            return None
1419
1420    # internal
1421    def _create_document(self):
1422        return Document()
1423
1424class ElementInfo(object):
1425    """Object that represents content-model information for an element.
1426
1427    This implementation is not expected to be used in practice; DOM
1428    builders should provide implementations which do the right thing
1429    using information available to it.
1430
1431    """
1432
1433    __slots__ = 'tagName',
1434
1435    def __init__(self, name):
1436        self.tagName = name
1437
1438    def getAttributeType(self, aname):
1439        return _no_type
1440
1441    def getAttributeTypeNS(self, namespaceURI, localName):
1442        return _no_type
1443
1444    def isElementContent(self):
1445        return False
1446
1447    def isEmpty(self):
1448        """Returns true iff this element is declared to have an EMPTY
1449        content model."""
1450        return False
1451
1452    def isId(self, aname):
1453        """Returns true iff the named attribute is a DTD-style ID."""
1454        return False
1455
1456    def isIdNS(self, namespaceURI, localName):
1457        """Returns true iff the identified attribute is a DTD-style ID."""
1458        return False
1459
1460    def __getstate__(self):
1461        return self.tagName
1462
1463    def __setstate__(self, state):
1464        self.tagName = state
1465
1466def _clear_id_cache(node):
1467    if node.nodeType == Node.DOCUMENT_NODE:
1468        node._id_cache.clear()
1469        node._id_search_stack = None
1470    elif _in_document(node):
1471        node.ownerDocument._id_cache.clear()
1472        node.ownerDocument._id_search_stack= None
1473
1474class Document(Node, DocumentLS):
1475    _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1476                         Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1477
1478    nodeType = Node.DOCUMENT_NODE
1479    nodeName = "#document"
1480    nodeValue = None
1481    attributes = None
1482    doctype = None
1483    parentNode = None
1484    previousSibling = nextSibling = None
1485
1486    implementation = DOMImplementation()
1487
1488    # Document attributes from Level 3 (WD 9 April 2002)
1489
1490    actualEncoding = None
1491    encoding = None
1492    standalone = None
1493    version = None
1494    strictErrorChecking = False
1495    errorHandler = None
1496    documentURI = None
1497
1498    _magic_id_count = 0
1499
1500    def __init__(self):
1501        self.childNodes = NodeList()
1502        # mapping of (namespaceURI, localName) -> ElementInfo
1503        #        and tagName -> ElementInfo
1504        self._elem_info = {}
1505        self._id_cache = {}
1506        self._id_search_stack = None
1507
1508    def _get_elem_info(self, element):
1509        if element.namespaceURI:
1510            key = element.namespaceURI, element.localName
1511        else:
1512            key = element.tagName
1513        return self._elem_info.get(key)
1514
1515    def _get_actualEncoding(self):
1516        return self.actualEncoding
1517
1518    def _get_doctype(self):
1519        return self.doctype
1520
1521    def _get_documentURI(self):
1522        return self.documentURI
1523
1524    def _get_encoding(self):
1525        return self.encoding
1526
1527    def _get_errorHandler(self):
1528        return self.errorHandler
1529
1530    def _get_standalone(self):
1531        return self.standalone
1532
1533    def _get_strictErrorChecking(self):
1534        return self.strictErrorChecking
1535
1536    def _get_version(self):
1537        return self.version
1538
1539    def appendChild(self, node):
1540        if node.nodeType not in self._child_node_types:
1541            raise xml.dom.HierarchyRequestErr(
1542                "%s cannot be child of %s" % (repr(node), repr(self)))
1543        if node.parentNode is not None:
1544            # This needs to be done before the next test since this
1545            # may *be* the document element, in which case it should
1546            # end up re-ordered to the end.
1547            node.parentNode.removeChild(node)
1548
1549        if node.nodeType == Node.ELEMENT_NODE \
1550           and self._get_documentElement():
1551            raise xml.dom.HierarchyRequestErr(
1552                "two document elements disallowed")
1553        return Node.appendChild(self, node)
1554
1555    def removeChild(self, oldChild):
1556        try:
1557            self.childNodes.remove(oldChild)
1558        except ValueError:
1559            raise xml.dom.NotFoundErr()
1560        oldChild.nextSibling = oldChild.previousSibling = None
1561        oldChild.parentNode = None
1562        if self.documentElement is oldChild:
1563            self.documentElement = None
1564
1565        return oldChild
1566
1567    def _get_documentElement(self):
1568        for node in self.childNodes:
1569            if node.nodeType == Node.ELEMENT_NODE:
1570                return node
1571
1572    def unlink(self):
1573        if self.doctype is not None:
1574            self.doctype.unlink()
1575            self.doctype = None
1576        Node.unlink(self)
1577
1578    def cloneNode(self, deep):
1579        if not deep:
1580            return None
1581        clone = self.implementation.createDocument(None, None, None)
1582        clone.encoding = self.encoding
1583        clone.standalone = self.standalone
1584        clone.version = self.version
1585        for n in self.childNodes:
1586            childclone = _clone_node(n, deep, clone)
1587            assert childclone.ownerDocument.isSameNode(clone)
1588            clone.childNodes.append(childclone)
1589            if childclone.nodeType == Node.DOCUMENT_NODE:
1590                assert clone.documentElement is None
1591            elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1592                assert clone.doctype is None
1593                clone.doctype = childclone
1594            childclone.parentNode = clone
1595        self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1596                                     self, clone)
1597        return clone
1598
1599    def createDocumentFragment(self):
1600        d = DocumentFragment()
1601        d.ownerDocument = self
1602        return d
1603
1604    def createElement(self, tagName):
1605        e = Element(tagName)
1606        e.ownerDocument = self
1607        return e
1608
1609    def createTextNode(self, data):
1610        if not isinstance(data, StringTypes):
1611            raise TypeError, "node contents must be a string"
1612        t = Text()
1613        t.data = data
1614        t.ownerDocument = self
1615        return t
1616
1617    def createCDATASection(self, data):
1618        if not isinstance(data, StringTypes):
1619            raise TypeError, "node contents must be a string"
1620        c = CDATASection()
1621        c.data = data
1622        c.ownerDocument = self
1623        return c
1624
1625    def createComment(self, data):
1626        c = Comment(data)
1627        c.ownerDocument = self
1628        return c
1629
1630    def createProcessingInstruction(self, target, data):
1631        p = ProcessingInstruction(target, data)
1632        p.ownerDocument = self
1633        return p
1634
1635    def createAttribute(self, qName):
1636        a = Attr(qName)
1637        a.ownerDocument = self
1638        a.value = ""
1639        return a
1640
1641    def createElementNS(self, namespaceURI, qualifiedName):
1642        prefix, localName = _nssplit(qualifiedName)
1643        e = Element(qualifiedName, namespaceURI, prefix)
1644        e.ownerDocument = self
1645        return e
1646
1647    def createAttributeNS(self, namespaceURI, qualifiedName):
1648        prefix, localName = _nssplit(qualifiedName)
1649        a = Attr(qualifiedName, namespaceURI, localName, prefix)
1650        a.ownerDocument = self
1651        a.value = ""
1652        return a
1653
1654    # A couple of implementation-specific helpers to create node types
1655    # not supported by the W3C DOM specs:
1656
1657    def _create_entity(self, name, publicId, systemId, notationName):
1658        e = Entity(name, publicId, systemId, notationName)
1659        e.ownerDocument = self
1660        return e
1661
1662    def _create_notation(self, name, publicId, systemId):
1663        n = Notation(name, publicId, systemId)
1664        n.ownerDocument = self
1665        return n
1666
1667    def getElementById(self, id):
1668        if id in self._id_cache:
1669            return self._id_cache[id]
1670        if not (self._elem_info or self._magic_id_count):
1671            return None
1672
1673        stack = self._id_search_stack
1674        if stack is None:
1675            # we never searched before, or the cache has been cleared
1676            stack = [self.documentElement]
1677            self._id_search_stack = stack
1678        elif not stack:
1679            # Previous search was completed and cache is still valid;
1680            # no matching node.
1681            return None
1682
1683        result = None
1684        while stack:
1685            node = stack.pop()
1686            # add child elements to stack for continued searching
1687            stack.extend([child for child in node.childNodes
1688                          if child.nodeType in _nodeTypes_with_children])
1689            # check this node
1690            info = self._get_elem_info(node)
1691            if info:
1692                # We have to process all ID attributes before
1693                # returning in order to get all the attributes set to
1694                # be IDs using Element.setIdAttribute*().
1695                for attr in node.attributes.values():
1696                    if attr.namespaceURI:
1697                        if info.isIdNS(attr.namespaceURI, attr.localName):
1698                            self._id_cache[attr.value] = node
1699                            if attr.value == id:
1700                                result = node
1701                            elif not node._magic_id_nodes:
1702                                break
1703                    elif info.isId(attr.name):
1704                        self._id_cache[attr.value] = node
1705                        if attr.value == id:
1706                            result = node
1707                        elif not node._magic_id_nodes:
1708                            break
1709                    elif attr._is_id:
1710                        self._id_cache[attr.value] = node
1711                        if attr.value == id:
1712                            result = node
1713                        elif node._magic_id_nodes == 1:
1714                            break
1715            elif node._magic_id_nodes:
1716                for attr in node.attributes.values():
1717                    if attr._is_id:
1718                        self._id_cache[attr.value] = node
1719                        if attr.value == id:
1720                            result = node
1721            if result is not None:
1722                break
1723        return result
1724
1725    def getElementsByTagName(self, name):
1726        return _get_elements_by_tagName_helper(self, name, NodeList())
1727
1728    def getElementsByTagNameNS(self, namespaceURI, localName):
1729        return _get_elements_by_tagName_ns_helper(
1730            self, namespaceURI, localName, NodeList())
1731
1732    def isSupported(self, feature, version):
1733        return self.implementation.hasFeature(feature, version)
1734
1735    def importNode(self, node, deep):
1736        if node.nodeType == Node.DOCUMENT_NODE:
1737            raise xml.dom.NotSupportedErr("cannot import document nodes")
1738        elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1739            raise xml.dom.NotSupportedErr("cannot import document type nodes")
1740        return _clone_node(node, deep, self)
1741
1742    def writexml(self, writer, indent="", addindent="", newl="",
1743                 encoding = None):
1744        if encoding is None:
1745            writer.write('<?xml version="1.0" ?>'+newl)
1746        else:
1747            writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
1748        for node in self.childNodes:
1749            node.writexml(writer, indent, addindent, newl)
1750
1751    # DOM Level 3 (WD 9 April 2002)
1752
1753    def renameNode(self, n, namespaceURI, name):
1754        if n.ownerDocument is not self:
1755            raise xml.dom.WrongDocumentErr(
1756                "cannot rename nodes from other documents;\n"
1757                "expected %s,\nfound %s" % (self, n.ownerDocument))
1758        if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1759            raise xml.dom.NotSupportedErr(
1760                "renameNode() only applies to element and attribute nodes")
1761        if namespaceURI != EMPTY_NAMESPACE:
1762            if ':' in name:
1763                prefix, localName = name.split(':', 1)
1764                if (  prefix == "xmlns"
1765                      and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1766                    raise xml.dom.NamespaceErr(
1767                        "illegal use of 'xmlns' prefix")
1768            else:
1769                if (  name == "xmlns"
1770                      and namespaceURI != xml.dom.XMLNS_NAMESPACE
1771                      and n.nodeType == Node.ATTRIBUTE_NODE):
1772                    raise xml.dom.NamespaceErr(
1773                        "illegal use of the 'xmlns' attribute")
1774                prefix = None
1775                localName = name
1776        else:
1777            prefix = None
1778            localName = None
1779        if n.nodeType == Node.ATTRIBUTE_NODE:
1780            element = n.ownerElement
1781            if element is not None:
1782                is_id = n._is_id
1783                element.removeAttributeNode(n)
1784        else:
1785            element = None
1786        # avoid __setattr__
1787        d = n.__dict__
1788        d['prefix'] = prefix
1789        d['localName'] = localName
1790        d['namespaceURI'] = namespaceURI
1791        d['nodeName'] = name
1792        if n.nodeType == Node.ELEMENT_NODE:
1793            d['tagName'] = name
1794        else:
1795            # attribute node
1796            d['name'] = name
1797            if element is not None:
1798                element.setAttributeNode(n)
1799                if is_id:
1800                    element.setIdAttributeNode(n)
1801        # It's not clear from a semantic perspective whether we should
1802        # call the user data handlers for the NODE_RENAMED event since
1803        # we're re-using the existing node.  The draft spec has been
1804        # interpreted as meaning "no, don't call the handler unless a
1805        # new node is created."
1806        return n
1807
1808defproperty(Document, "documentElement",
1809            doc="Top-level element of this document.")
1810
1811
1812def _clone_node(node, deep, newOwnerDocument):
1813    """
1814    Clone a node and give it the new owner document.
1815    Called by Node.cloneNode and Document.importNode
1816    """
1817    if node.ownerDocument.isSameNode(newOwnerDocument):
1818        operation = xml.dom.UserDataHandler.NODE_CLONED
1819    else:
1820        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1821    if node.nodeType == Node.ELEMENT_NODE:
1822        clone = newOwnerDocument.createElementNS(node.namespaceURI,
1823                                                 node.nodeName)
1824        for attr in node.attributes.values():
1825            clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1826            a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1827            a.specified = attr.specified
1828
1829        if deep:
1830            for child in node.childNodes:
1831                c = _clone_node(child, deep, newOwnerDocument)
1832                clone.appendChild(c)
1833
1834    elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1835        clone = newOwnerDocument.createDocumentFragment()
1836        if deep:
1837            for child in node.childNodes:
1838                c = _clone_node(child, deep, newOwnerDocument)
1839                clone.appendChild(c)
1840
1841    elif node.nodeType == Node.TEXT_NODE:
1842        clone = newOwnerDocument.createTextNode(node.data)
1843    elif node.nodeType == Node.CDATA_SECTION_NODE:
1844        clone = newOwnerDocument.createCDATASection(node.data)
1845    elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1846        clone = newOwnerDocument.createProcessingInstruction(node.target,
1847                                                             node.data)
1848    elif node.nodeType == Node.COMMENT_NODE:
1849        clone = newOwnerDocument.createComment(node.data)
1850    elif node.nodeType == Node.ATTRIBUTE_NODE:
1851        clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1852                                                   node.nodeName)
1853        clone.specified = True
1854        clone.value = node.value
1855    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1856        assert node.ownerDocument is not newOwnerDocument
1857        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1858        clone = newOwnerDocument.implementation.createDocumentType(
1859            node.name, node.publicId, node.systemId)
1860        clone.ownerDocument = newOwnerDocument
1861        if deep:
1862            clone.entities._seq = []
1863            clone.notations._seq = []
1864            for n in node.notations._seq:
1865                notation = Notation(n.nodeName, n.publicId, n.systemId)
1866                notation.ownerDocument = newOwnerDocument
1867                clone.notations._seq.append(notation)
1868                if hasattr(n, '_call_user_data_handler'):
1869                    n._call_user_data_handler(operation, n, notation)
1870            for e in node.entities._seq:
1871                entity = Entity(e.nodeName, e.publicId, e.systemId,
1872                                e.notationName)
1873                entity.actualEncoding = e.actualEncoding
1874                entity.encoding = e.encoding
1875                entity.version = e.version
1876                entity.ownerDocument = newOwnerDocument
1877                clone.entities._seq.append(entity)
1878                if hasattr(e, '_call_user_data_handler'):
1879                    e._call_user_data_handler(operation, n, entity)
1880    else:
1881        # Note the cloning of Document and DocumentType nodes is
1882        # implementation specific.  minidom handles those cases
1883        # directly in the cloneNode() methods.
1884        raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1885
1886    # Check for _call_user_data_handler() since this could conceivably
1887    # used with other DOM implementations (one of the FourThought
1888    # DOMs, perhaps?).
1889    if hasattr(node, '_call_user_data_handler'):
1890        node._call_user_data_handler(operation, node, clone)
1891    return clone
1892
1893
1894def _nssplit(qualifiedName):
1895    fields = qualifiedName.split(':', 1)
1896    if len(fields) == 2:
1897        return fields
1898    else:
1899        return (None, fields[0])
1900
1901
1902def _get_StringIO():
1903    # we can't use cStringIO since it doesn't support Unicode strings
1904    from StringIO import StringIO
1905    return StringIO()
1906
1907def _do_pulldom_parse(func, args, kwargs):
1908    events = func(*args, **kwargs)
1909    toktype, rootNode = events.getEvent()
1910    events.expandNode(rootNode)
1911    events.clear()
1912    return rootNode
1913
1914def parse(file, parser=None, bufsize=None):
1915    """Parse a file into a DOM by filename or file object."""
1916    if parser is None and not bufsize:
1917        from xml.dom import expatbuilder
1918        return expatbuilder.parse(file)
1919    else:
1920        from xml.dom import pulldom
1921        return _do_pulldom_parse(pulldom.parse, (file,),
1922            {'parser': parser, 'bufsize': bufsize})
1923
1924def parseString(string, parser=None):
1925    """Parse a file into a DOM from a string."""
1926    if parser is None:
1927        from xml.dom import expatbuilder
1928        return expatbuilder.parseString(string)
1929    else:
1930        from xml.dom import pulldom
1931        return _do_pulldom_parse(pulldom.parseString, (string,),
1932                                 {'parser': parser})
1933
1934def getDOMImplementation(features=None):
1935    if features:
1936        if isinstance(features, StringTypes):
1937            features = domreg._parse_feature_string(features)
1938        for f, v in features:
1939            if not Document.implementation.hasFeature(f, v):
1940                return None
1941    return Document.implementation
1942