• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Simple implementation of the Level 1 DOM.
2
3Namespaces and other minor Level 2 features are also supported.
4
5parse("foo.xml")
6
7parseString("<foo><bar/></foo>")
8
9Todo:
10=====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14        interface
15 * SAX 2 namespaces
16"""
17
18import io
19import xml.dom
20
21from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
22from xml.dom.minicompat import *
23from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
24
25# This is used by the ID-cache invalidation checks; the list isn't
26# actually complete, since the nodes being checked will never be the
27# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
28# the node being added or removed, not the node being modified.)
29#
30_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
31                            xml.dom.Node.ENTITY_REFERENCE_NODE)
32
33
34class Node(xml.dom.Node):
35    namespaceURI = None # this is non-null only for elements and attributes
36    parentNode = None
37    ownerDocument = None
38    nextSibling = None
39    previousSibling = None
40
41    prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
42
43    def __bool__(self):
44        return True
45
46    def toxml(self, encoding=None, standalone=None):
47        return self.toprettyxml("", "", encoding, standalone)
48
49    def toprettyxml(self, indent="\t", newl="\n", encoding=None,
50                    standalone=None):
51        if encoding is None:
52            writer = io.StringIO()
53        else:
54            writer = io.TextIOWrapper(io.BytesIO(),
55                                      encoding=encoding,
56                                      errors="xmlcharrefreplace",
57                                      newline='\n')
58        if self.nodeType == Node.DOCUMENT_NODE:
59            # Can pass encoding only to document, to put it into XML header
60            self.writexml(writer, "", indent, newl, encoding, standalone)
61        else:
62            self.writexml(writer, "", indent, newl)
63        if encoding is None:
64            return writer.getvalue()
65        else:
66            return writer.detach().getvalue()
67
68    def hasChildNodes(self):
69        return bool(self.childNodes)
70
71    def _get_childNodes(self):
72        return self.childNodes
73
74    def _get_firstChild(self):
75        if self.childNodes:
76            return self.childNodes[0]
77
78    def _get_lastChild(self):
79        if self.childNodes:
80            return self.childNodes[-1]
81
82    def insertBefore(self, newChild, refChild):
83        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
84            for c in tuple(newChild.childNodes):
85                self.insertBefore(c, refChild)
86            ### The DOM does not clearly specify what to return in this case
87            return newChild
88        if newChild.nodeType not in self._child_node_types:
89            raise xml.dom.HierarchyRequestErr(
90                "%s cannot be child of %s" % (repr(newChild), repr(self)))
91        if newChild.parentNode is not None:
92            newChild.parentNode.removeChild(newChild)
93        if refChild is None:
94            self.appendChild(newChild)
95        else:
96            try:
97                index = self.childNodes.index(refChild)
98            except ValueError:
99                raise xml.dom.NotFoundErr()
100            if newChild.nodeType in _nodeTypes_with_children:
101                _clear_id_cache(self)
102            self.childNodes.insert(index, newChild)
103            newChild.nextSibling = refChild
104            refChild.previousSibling = newChild
105            if index:
106                node = self.childNodes[index-1]
107                node.nextSibling = newChild
108                newChild.previousSibling = node
109            else:
110                newChild.previousSibling = None
111            newChild.parentNode = self
112        return newChild
113
114    def appendChild(self, node):
115        if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
116            for c in tuple(node.childNodes):
117                self.appendChild(c)
118            ### The DOM does not clearly specify what to return in this case
119            return node
120        if node.nodeType not in self._child_node_types:
121            raise xml.dom.HierarchyRequestErr(
122                "%s cannot be child of %s" % (repr(node), repr(self)))
123        elif node.nodeType in _nodeTypes_with_children:
124            _clear_id_cache(self)
125        if node.parentNode is not None:
126            node.parentNode.removeChild(node)
127        _append_child(self, node)
128        node.nextSibling = None
129        return node
130
131    def replaceChild(self, newChild, oldChild):
132        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
133            refChild = oldChild.nextSibling
134            self.removeChild(oldChild)
135            return self.insertBefore(newChild, refChild)
136        if newChild.nodeType not in self._child_node_types:
137            raise xml.dom.HierarchyRequestErr(
138                "%s cannot be child of %s" % (repr(newChild), repr(self)))
139        if newChild is oldChild:
140            return
141        if newChild.parentNode is not None:
142            newChild.parentNode.removeChild(newChild)
143        try:
144            index = self.childNodes.index(oldChild)
145        except ValueError:
146            raise xml.dom.NotFoundErr()
147        self.childNodes[index] = newChild
148        newChild.parentNode = self
149        oldChild.parentNode = None
150        if (newChild.nodeType in _nodeTypes_with_children
151            or oldChild.nodeType in _nodeTypes_with_children):
152            _clear_id_cache(self)
153        newChild.nextSibling = oldChild.nextSibling
154        newChild.previousSibling = oldChild.previousSibling
155        oldChild.nextSibling = None
156        oldChild.previousSibling = None
157        if newChild.previousSibling:
158            newChild.previousSibling.nextSibling = newChild
159        if newChild.nextSibling:
160            newChild.nextSibling.previousSibling = newChild
161        return oldChild
162
163    def removeChild(self, oldChild):
164        try:
165            self.childNodes.remove(oldChild)
166        except ValueError:
167            raise xml.dom.NotFoundErr()
168        if oldChild.nextSibling is not None:
169            oldChild.nextSibling.previousSibling = oldChild.previousSibling
170        if oldChild.previousSibling is not None:
171            oldChild.previousSibling.nextSibling = oldChild.nextSibling
172        oldChild.nextSibling = oldChild.previousSibling = None
173        if oldChild.nodeType in _nodeTypes_with_children:
174            _clear_id_cache(self)
175
176        oldChild.parentNode = None
177        return oldChild
178
179    def normalize(self):
180        L = []
181        for child in self.childNodes:
182            if child.nodeType == Node.TEXT_NODE:
183                if not child.data:
184                    # empty text node; discard
185                    if L:
186                        L[-1].nextSibling = child.nextSibling
187                    if child.nextSibling:
188                        child.nextSibling.previousSibling = child.previousSibling
189                    child.unlink()
190                elif L and L[-1].nodeType == child.nodeType:
191                    # collapse text node
192                    node = L[-1]
193                    node.data = node.data + child.data
194                    node.nextSibling = child.nextSibling
195                    if child.nextSibling:
196                        child.nextSibling.previousSibling = node
197                    child.unlink()
198                else:
199                    L.append(child)
200            else:
201                L.append(child)
202                if child.nodeType == Node.ELEMENT_NODE:
203                    child.normalize()
204        self.childNodes[:] = L
205
206    def cloneNode(self, deep):
207        return _clone_node(self, deep, self.ownerDocument or self)
208
209    def isSupported(self, feature, version):
210        return self.ownerDocument.implementation.hasFeature(feature, version)
211
212    def _get_localName(self):
213        # Overridden in Element and Attr where localName can be Non-Null
214        return None
215
216    # Node interfaces from Level 3 (WD 9 April 2002)
217
218    def isSameNode(self, other):
219        return self is other
220
221    def getInterface(self, feature):
222        if self.isSupported(feature, None):
223            return self
224        else:
225            return None
226
227    # The "user data" functions use a dictionary that is only present
228    # if some user data has been set, so be careful not to assume it
229    # exists.
230
231    def getUserData(self, key):
232        try:
233            return self._user_data[key][0]
234        except (AttributeError, KeyError):
235            return None
236
237    def setUserData(self, key, data, handler):
238        old = None
239        try:
240            d = self._user_data
241        except AttributeError:
242            d = {}
243            self._user_data = d
244        if key in d:
245            old = d[key][0]
246        if data is None:
247            # ignore handlers passed for None
248            handler = None
249            if old is not None:
250                del d[key]
251        else:
252            d[key] = (data, handler)
253        return old
254
255    def _call_user_data_handler(self, operation, src, dst):
256        if hasattr(self, "_user_data"):
257            for key, (data, handler) in list(self._user_data.items()):
258                if handler is not None:
259                    handler.handle(operation, key, data, src, dst)
260
261    # minidom-specific API:
262
263    def unlink(self):
264        self.parentNode = self.ownerDocument = None
265        if self.childNodes:
266            for child in self.childNodes:
267                child.unlink()
268            self.childNodes = NodeList()
269        self.previousSibling = None
270        self.nextSibling = None
271
272    # A Node is its own context manager, to ensure that an unlink() call occurs.
273    # This is similar to how a file object works.
274    def __enter__(self):
275        return self
276
277    def __exit__(self, et, ev, tb):
278        self.unlink()
279
280defproperty(Node, "firstChild", doc="First child node, or None.")
281defproperty(Node, "lastChild",  doc="Last child node, or None.")
282defproperty(Node, "localName",  doc="Namespace-local name of this node.")
283
284
285def _append_child(self, node):
286    # fast path with less checks; usable by DOM builders if careful
287    childNodes = self.childNodes
288    if childNodes:
289        last = childNodes[-1]
290        node.previousSibling = last
291        last.nextSibling = node
292    childNodes.append(node)
293    node.parentNode = self
294
295def _in_document(node):
296    # return True iff node is part of a document tree
297    while node is not None:
298        if node.nodeType == Node.DOCUMENT_NODE:
299            return True
300        node = node.parentNode
301    return False
302
303def _write_data(writer, text, attr):
304    "Writes datachars to writer."
305    if not text:
306        return
307    # See the comments in ElementTree.py for behavior and
308    # implementation details.
309    if "&" in text:
310        text = text.replace("&", "&amp;")
311    if "<" in text:
312        text = text.replace("<", "&lt;")
313    if ">" in text:
314        text = text.replace(">", "&gt;")
315    if attr:
316        if '"' in text:
317            text = text.replace('"', "&quot;")
318        if "\r" in text:
319            text = text.replace("\r", "&#13;")
320        if "\n" in text:
321            text = text.replace("\n", "&#10;")
322        if "\t" in text:
323            text = text.replace("\t", "&#9;")
324    writer.write(text)
325
326def _get_elements_by_tagName_helper(parent, name, rc):
327    for node in parent.childNodes:
328        if node.nodeType == Node.ELEMENT_NODE and \
329            (name == "*" or node.tagName == name):
330            rc.append(node)
331        _get_elements_by_tagName_helper(node, name, rc)
332    return rc
333
334def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
335    for node in parent.childNodes:
336        if node.nodeType == Node.ELEMENT_NODE:
337            if ((localName == "*" or node.localName == localName) and
338                (nsURI == "*" or node.namespaceURI == nsURI)):
339                rc.append(node)
340            _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
341    return rc
342
343class DocumentFragment(Node):
344    nodeType = Node.DOCUMENT_FRAGMENT_NODE
345    nodeName = "#document-fragment"
346    nodeValue = None
347    attributes = None
348    parentNode = None
349    _child_node_types = (Node.ELEMENT_NODE,
350                         Node.TEXT_NODE,
351                         Node.CDATA_SECTION_NODE,
352                         Node.ENTITY_REFERENCE_NODE,
353                         Node.PROCESSING_INSTRUCTION_NODE,
354                         Node.COMMENT_NODE,
355                         Node.NOTATION_NODE)
356
357    def __init__(self):
358        self.childNodes = NodeList()
359
360
361class Attr(Node):
362    __slots__=('_name', '_value', 'namespaceURI',
363               '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
364    nodeType = Node.ATTRIBUTE_NODE
365    attributes = None
366    specified = False
367    _is_id = False
368
369    _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
370
371    def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
372                 prefix=None):
373        self.ownerElement = None
374        self._name = qName
375        self.namespaceURI = namespaceURI
376        self._prefix = prefix
377        if localName is not None:
378            self._localName = localName
379        self.childNodes = NodeList()
380
381        # Add the single child node that represents the value of the attr
382        self.childNodes.append(Text())
383
384        # nodeValue and value are set elsewhere
385
386    def _get_localName(self):
387        try:
388            return self._localName
389        except AttributeError:
390            return self.nodeName.split(":", 1)[-1]
391
392    def _get_specified(self):
393        return self.specified
394
395    def _get_name(self):
396        return self._name
397
398    def _set_name(self, value):
399        self._name = value
400        if self.ownerElement is not None:
401            _clear_id_cache(self.ownerElement)
402
403    nodeName = name = property(_get_name, _set_name)
404
405    def _get_value(self):
406        return self._value
407
408    def _set_value(self, value):
409        self._value = value
410        self.childNodes[0].data = value
411        if self.ownerElement is not None:
412            _clear_id_cache(self.ownerElement)
413        self.childNodes[0].data = value
414
415    nodeValue = value = property(_get_value, _set_value)
416
417    def _get_prefix(self):
418        return self._prefix
419
420    def _set_prefix(self, prefix):
421        nsuri = self.namespaceURI
422        if prefix == "xmlns":
423            if nsuri and nsuri != XMLNS_NAMESPACE:
424                raise xml.dom.NamespaceErr(
425                    "illegal use of 'xmlns' prefix for the wrong namespace")
426        self._prefix = prefix
427        if prefix is None:
428            newName = self.localName
429        else:
430            newName = "%s:%s" % (prefix, self.localName)
431        if self.ownerElement:
432            _clear_id_cache(self.ownerElement)
433        self.name = newName
434
435    prefix = property(_get_prefix, _set_prefix)
436
437    def unlink(self):
438        # This implementation does not call the base implementation
439        # since most of that is not needed, and the expense of the
440        # method call is not warranted.  We duplicate the removal of
441        # children, but that's all we needed from the base class.
442        elem = self.ownerElement
443        if elem is not None:
444            del elem._attrs[self.nodeName]
445            del elem._attrsNS[(self.namespaceURI, self.localName)]
446            if self._is_id:
447                self._is_id = False
448                elem._magic_id_nodes -= 1
449                self.ownerDocument._magic_id_count -= 1
450        for child in self.childNodes:
451            child.unlink()
452        del self.childNodes[:]
453
454    def _get_isId(self):
455        if self._is_id:
456            return True
457        doc = self.ownerDocument
458        elem = self.ownerElement
459        if doc is None or elem is None:
460            return False
461
462        info = doc._get_elem_info(elem)
463        if info is None:
464            return False
465        if self.namespaceURI:
466            return info.isIdNS(self.namespaceURI, self.localName)
467        else:
468            return info.isId(self.nodeName)
469
470    def _get_schemaType(self):
471        doc = self.ownerDocument
472        elem = self.ownerElement
473        if doc is None or elem is None:
474            return _no_type
475
476        info = doc._get_elem_info(elem)
477        if info is None:
478            return _no_type
479        if self.namespaceURI:
480            return info.getAttributeTypeNS(self.namespaceURI, self.localName)
481        else:
482            return info.getAttributeType(self.nodeName)
483
484defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
485defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
486defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
487
488
489class NamedNodeMap(object):
490    """The attribute list is a transient interface to the underlying
491    dictionaries.  Mutations here will change the underlying element's
492    dictionary.
493
494    Ordering is imposed artificially and does not reflect the order of
495    attributes as found in an input document.
496    """
497
498    __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
499
500    def __init__(self, attrs, attrsNS, ownerElement):
501        self._attrs = attrs
502        self._attrsNS = attrsNS
503        self._ownerElement = ownerElement
504
505    def _get_length(self):
506        return len(self._attrs)
507
508    def item(self, index):
509        try:
510            return self[list(self._attrs.keys())[index]]
511        except IndexError:
512            return None
513
514    def items(self):
515        L = []
516        for node in self._attrs.values():
517            L.append((node.nodeName, node.value))
518        return L
519
520    def itemsNS(self):
521        L = []
522        for node in self._attrs.values():
523            L.append(((node.namespaceURI, node.localName), node.value))
524        return L
525
526    def __contains__(self, key):
527        if isinstance(key, str):
528            return key in self._attrs
529        else:
530            return key in self._attrsNS
531
532    def keys(self):
533        return self._attrs.keys()
534
535    def keysNS(self):
536        return self._attrsNS.keys()
537
538    def values(self):
539        return self._attrs.values()
540
541    def get(self, name, value=None):
542        return self._attrs.get(name, value)
543
544    __len__ = _get_length
545
546    def _cmp(self, other):
547        if self._attrs is getattr(other, "_attrs", None):
548            return 0
549        else:
550            return (id(self) > id(other)) - (id(self) < id(other))
551
552    def __eq__(self, other):
553        return self._cmp(other) == 0
554
555    def __ge__(self, other):
556        return self._cmp(other) >= 0
557
558    def __gt__(self, other):
559        return self._cmp(other) > 0
560
561    def __le__(self, other):
562        return self._cmp(other) <= 0
563
564    def __lt__(self, other):
565        return self._cmp(other) < 0
566
567    def __getitem__(self, attname_or_tuple):
568        if isinstance(attname_or_tuple, tuple):
569            return self._attrsNS[attname_or_tuple]
570        else:
571            return self._attrs[attname_or_tuple]
572
573    # same as set
574    def __setitem__(self, attname, value):
575        if isinstance(value, str):
576            try:
577                node = self._attrs[attname]
578            except KeyError:
579                node = Attr(attname)
580                node.ownerDocument = self._ownerElement.ownerDocument
581                self.setNamedItem(node)
582            node.value = value
583        else:
584            if not isinstance(value, Attr):
585                raise TypeError("value must be a string or Attr object")
586            node = value
587            self.setNamedItem(node)
588
589    def getNamedItem(self, name):
590        try:
591            return self._attrs[name]
592        except KeyError:
593            return None
594
595    def getNamedItemNS(self, namespaceURI, localName):
596        try:
597            return self._attrsNS[(namespaceURI, localName)]
598        except KeyError:
599            return None
600
601    def removeNamedItem(self, name):
602        n = self.getNamedItem(name)
603        if n is not None:
604            _clear_id_cache(self._ownerElement)
605            del self._attrs[n.nodeName]
606            del self._attrsNS[(n.namespaceURI, n.localName)]
607            if hasattr(n, 'ownerElement'):
608                n.ownerElement = None
609            return n
610        else:
611            raise xml.dom.NotFoundErr()
612
613    def removeNamedItemNS(self, namespaceURI, localName):
614        n = self.getNamedItemNS(namespaceURI, localName)
615        if n is not None:
616            _clear_id_cache(self._ownerElement)
617            del self._attrsNS[(n.namespaceURI, n.localName)]
618            del self._attrs[n.nodeName]
619            if hasattr(n, 'ownerElement'):
620                n.ownerElement = None
621            return n
622        else:
623            raise xml.dom.NotFoundErr()
624
625    def setNamedItem(self, node):
626        if not isinstance(node, Attr):
627            raise xml.dom.HierarchyRequestErr(
628                "%s cannot be child of %s" % (repr(node), repr(self)))
629        old = self._attrs.get(node.name)
630        if old:
631            old.unlink()
632        self._attrs[node.name] = node
633        self._attrsNS[(node.namespaceURI, node.localName)] = node
634        node.ownerElement = self._ownerElement
635        _clear_id_cache(node.ownerElement)
636        return old
637
638    def setNamedItemNS(self, node):
639        return self.setNamedItem(node)
640
641    def __delitem__(self, attname_or_tuple):
642        node = self[attname_or_tuple]
643        _clear_id_cache(node.ownerElement)
644        node.unlink()
645
646    def __getstate__(self):
647        return self._attrs, self._attrsNS, self._ownerElement
648
649    def __setstate__(self, state):
650        self._attrs, self._attrsNS, self._ownerElement = state
651
652defproperty(NamedNodeMap, "length",
653            doc="Number of nodes in the NamedNodeMap.")
654
655AttributeList = NamedNodeMap
656
657
658class TypeInfo(object):
659    __slots__ = 'namespace', 'name'
660
661    def __init__(self, namespace, name):
662        self.namespace = namespace
663        self.name = name
664
665    def __repr__(self):
666        if self.namespace:
667            return "<%s %r (from %r)>" % (self.__class__.__name__, self.name,
668                                          self.namespace)
669        else:
670            return "<%s %r>" % (self.__class__.__name__, self.name)
671
672    def _get_name(self):
673        return self.name
674
675    def _get_namespace(self):
676        return self.namespace
677
678_no_type = TypeInfo(None, None)
679
680class Element(Node):
681    __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
682               'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
683               'nextSibling', 'previousSibling')
684    nodeType = Node.ELEMENT_NODE
685    nodeValue = None
686    schemaType = _no_type
687
688    _magic_id_nodes = 0
689
690    _child_node_types = (Node.ELEMENT_NODE,
691                         Node.PROCESSING_INSTRUCTION_NODE,
692                         Node.COMMENT_NODE,
693                         Node.TEXT_NODE,
694                         Node.CDATA_SECTION_NODE,
695                         Node.ENTITY_REFERENCE_NODE)
696
697    def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
698                 localName=None):
699        self.parentNode = None
700        self.tagName = self.nodeName = tagName
701        self.prefix = prefix
702        self.namespaceURI = namespaceURI
703        self.childNodes = NodeList()
704        self.nextSibling = self.previousSibling = None
705
706        # Attribute dictionaries are lazily created
707        # attributes are double-indexed:
708        #    tagName -> Attribute
709        #    URI,localName -> Attribute
710        # in the future: consider lazy generation
711        # of attribute objects this is too tricky
712        # for now because of headaches with
713        # namespaces.
714        self._attrs = None
715        self._attrsNS = None
716
717    def _ensure_attributes(self):
718        if self._attrs is None:
719            self._attrs = {}
720            self._attrsNS = {}
721
722    def _get_localName(self):
723        try:
724            return self._localName
725        except AttributeError:
726            return self.tagName.split(":", 1)[-1]
727
728    def _get_tagName(self):
729        return self.tagName
730
731    def unlink(self):
732        if self._attrs is not None:
733            for attr in list(self._attrs.values()):
734                attr.unlink()
735        self._attrs = None
736        self._attrsNS = None
737        Node.unlink(self)
738
739    def getAttribute(self, attname):
740        """Returns the value of the specified attribute.
741
742        Returns the value of the element's attribute named attname as
743        a string. An empty string is returned if the element does not
744        have such an attribute. Note that an empty string may also be
745        returned as an explicitly given attribute value, use the
746        hasAttribute method to distinguish these two cases.
747        """
748        if self._attrs is None:
749            return ""
750        try:
751            return self._attrs[attname].value
752        except KeyError:
753            return ""
754
755    def getAttributeNS(self, namespaceURI, localName):
756        if self._attrsNS is None:
757            return ""
758        try:
759            return self._attrsNS[(namespaceURI, localName)].value
760        except KeyError:
761            return ""
762
763    def setAttribute(self, attname, value):
764        attr = self.getAttributeNode(attname)
765        if attr is None:
766            attr = Attr(attname)
767            attr.value = value # also sets nodeValue
768            attr.ownerDocument = self.ownerDocument
769            self.setAttributeNode(attr)
770        elif value != attr.value:
771            attr.value = value
772            if attr.isId:
773                _clear_id_cache(self)
774
775    def setAttributeNS(self, namespaceURI, qualifiedName, value):
776        prefix, localname = _nssplit(qualifiedName)
777        attr = self.getAttributeNodeNS(namespaceURI, localname)
778        if attr is None:
779            attr = Attr(qualifiedName, namespaceURI, localname, prefix)
780            attr.value = value
781            attr.ownerDocument = self.ownerDocument
782            self.setAttributeNode(attr)
783        else:
784            if value != attr.value:
785                attr.value = value
786                if attr.isId:
787                    _clear_id_cache(self)
788            if attr.prefix != prefix:
789                attr.prefix = prefix
790                attr.nodeName = qualifiedName
791
792    def getAttributeNode(self, attrname):
793        if self._attrs is None:
794            return None
795        return self._attrs.get(attrname)
796
797    def getAttributeNodeNS(self, namespaceURI, localName):
798        if self._attrsNS is None:
799            return None
800        return self._attrsNS.get((namespaceURI, localName))
801
802    def setAttributeNode(self, attr):
803        if attr.ownerElement not in (None, self):
804            raise xml.dom.InuseAttributeErr("attribute node already owned")
805        self._ensure_attributes()
806        old1 = self._attrs.get(attr.name, None)
807        if old1 is not None:
808            self.removeAttributeNode(old1)
809        old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
810        if old2 is not None and old2 is not old1:
811            self.removeAttributeNode(old2)
812        _set_attribute_node(self, attr)
813
814        if old1 is not attr:
815            # It might have already been part of this node, in which case
816            # it doesn't represent a change, and should not be returned.
817            return old1
818        if old2 is not attr:
819            return old2
820
821    setAttributeNodeNS = setAttributeNode
822
823    def removeAttribute(self, name):
824        if self._attrsNS is None:
825            raise xml.dom.NotFoundErr()
826        try:
827            attr = self._attrs[name]
828        except KeyError:
829            raise xml.dom.NotFoundErr()
830        self.removeAttributeNode(attr)
831
832    def removeAttributeNS(self, namespaceURI, localName):
833        if self._attrsNS is None:
834            raise xml.dom.NotFoundErr()
835        try:
836            attr = self._attrsNS[(namespaceURI, localName)]
837        except KeyError:
838            raise xml.dom.NotFoundErr()
839        self.removeAttributeNode(attr)
840
841    def removeAttributeNode(self, node):
842        if node is None:
843            raise xml.dom.NotFoundErr()
844        try:
845            self._attrs[node.name]
846        except KeyError:
847            raise xml.dom.NotFoundErr()
848        _clear_id_cache(self)
849        node.unlink()
850        # Restore this since the node is still useful and otherwise
851        # unlinked
852        node.ownerDocument = self.ownerDocument
853        return node
854
855    removeAttributeNodeNS = removeAttributeNode
856
857    def hasAttribute(self, name):
858        """Checks whether the element has an attribute with the specified name.
859
860        Returns True if the element has an attribute with the specified name.
861        Otherwise, returns False.
862        """
863        if self._attrs is None:
864            return False
865        return name in self._attrs
866
867    def hasAttributeNS(self, namespaceURI, localName):
868        if self._attrsNS is None:
869            return False
870        return (namespaceURI, localName) in self._attrsNS
871
872    def getElementsByTagName(self, name):
873        """Returns all descendant elements with the given tag name.
874
875        Returns the list of all descendant elements (not direct children
876        only) with the specified tag name.
877        """
878        return _get_elements_by_tagName_helper(self, name, NodeList())
879
880    def getElementsByTagNameNS(self, namespaceURI, localName):
881        return _get_elements_by_tagName_ns_helper(
882            self, namespaceURI, localName, NodeList())
883
884    def __repr__(self):
885        return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
886
887    def writexml(self, writer, indent="", addindent="", newl=""):
888        """Write an XML element to a file-like object
889
890        Write the element to the writer object that must provide
891        a write method (e.g. a file or StringIO object).
892        """
893        # indent = current indentation
894        # addindent = indentation to add to higher levels
895        # newl = newline string
896        writer.write(indent+"<" + self.tagName)
897
898        attrs = self._get_attributes()
899
900        for a_name in attrs.keys():
901            writer.write(" %s=\"" % a_name)
902            _write_data(writer, attrs[a_name].value, True)
903            writer.write("\"")
904        if self.childNodes:
905            writer.write(">")
906            if (len(self.childNodes) == 1 and
907                self.childNodes[0].nodeType in (
908                        Node.TEXT_NODE, Node.CDATA_SECTION_NODE)):
909                self.childNodes[0].writexml(writer, '', '', '')
910            else:
911                writer.write(newl)
912                for node in self.childNodes:
913                    node.writexml(writer, indent+addindent, addindent, newl)
914                writer.write(indent)
915            writer.write("</%s>%s" % (self.tagName, newl))
916        else:
917            writer.write("/>%s"%(newl))
918
919    def _get_attributes(self):
920        self._ensure_attributes()
921        return NamedNodeMap(self._attrs, self._attrsNS, self)
922
923    def hasAttributes(self):
924        if self._attrs:
925            return True
926        else:
927            return False
928
929    # DOM Level 3 attributes, based on the 22 Oct 2002 draft
930
931    def setIdAttribute(self, name):
932        idAttr = self.getAttributeNode(name)
933        self.setIdAttributeNode(idAttr)
934
935    def setIdAttributeNS(self, namespaceURI, localName):
936        idAttr = self.getAttributeNodeNS(namespaceURI, localName)
937        self.setIdAttributeNode(idAttr)
938
939    def setIdAttributeNode(self, idAttr):
940        if idAttr is None or not self.isSameNode(idAttr.ownerElement):
941            raise xml.dom.NotFoundErr()
942        if _get_containing_entref(self) is not None:
943            raise xml.dom.NoModificationAllowedErr()
944        if not idAttr._is_id:
945            idAttr._is_id = True
946            self._magic_id_nodes += 1
947            self.ownerDocument._magic_id_count += 1
948            _clear_id_cache(self)
949
950defproperty(Element, "attributes",
951            doc="NamedNodeMap of attributes on the element.")
952defproperty(Element, "localName",
953            doc="Namespace-local name of this element.")
954
955
956def _set_attribute_node(element, attr):
957    _clear_id_cache(element)
958    element._ensure_attributes()
959    element._attrs[attr.name] = attr
960    element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
961
962    # This creates a circular reference, but Element.unlink()
963    # breaks the cycle since the references to the attribute
964    # dictionaries are tossed.
965    attr.ownerElement = element
966
967class Childless:
968    """Mixin that makes childless-ness easy to implement and avoids
969    the complexity of the Node methods that deal with children.
970    """
971    __slots__ = ()
972
973    attributes = None
974    childNodes = EmptyNodeList()
975    firstChild = None
976    lastChild = None
977
978    def _get_firstChild(self):
979        return None
980
981    def _get_lastChild(self):
982        return None
983
984    def appendChild(self, node):
985        raise xml.dom.HierarchyRequestErr(
986            self.nodeName + " nodes cannot have children")
987
988    def hasChildNodes(self):
989        return False
990
991    def insertBefore(self, newChild, refChild):
992        raise xml.dom.HierarchyRequestErr(
993            self.nodeName + " nodes do not have children")
994
995    def removeChild(self, oldChild):
996        raise xml.dom.NotFoundErr(
997            self.nodeName + " nodes do not have children")
998
999    def normalize(self):
1000        # For childless nodes, normalize() has nothing to do.
1001        pass
1002
1003    def replaceChild(self, newChild, oldChild):
1004        raise xml.dom.HierarchyRequestErr(
1005            self.nodeName + " nodes do not have children")
1006
1007
1008class ProcessingInstruction(Childless, Node):
1009    nodeType = Node.PROCESSING_INSTRUCTION_NODE
1010    __slots__ = ('target', 'data')
1011
1012    def __init__(self, target, data):
1013        self.target = target
1014        self.data = data
1015
1016    # nodeValue is an alias for data
1017    def _get_nodeValue(self):
1018        return self.data
1019    def _set_nodeValue(self, value):
1020        self.data = value
1021    nodeValue = property(_get_nodeValue, _set_nodeValue)
1022
1023    # nodeName is an alias for target
1024    def _get_nodeName(self):
1025        return self.target
1026    def _set_nodeName(self, value):
1027        self.target = value
1028    nodeName = property(_get_nodeName, _set_nodeName)
1029
1030    def writexml(self, writer, indent="", addindent="", newl=""):
1031        writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
1032
1033
1034class CharacterData(Childless, Node):
1035    __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
1036
1037    def __init__(self):
1038        self.ownerDocument = self.parentNode = None
1039        self.previousSibling = self.nextSibling = None
1040        self._data = ''
1041        Node.__init__(self)
1042
1043    def _get_length(self):
1044        return len(self.data)
1045    __len__ = _get_length
1046
1047    def _get_data(self):
1048        return self._data
1049    def _set_data(self, data):
1050        self._data = data
1051
1052    data = nodeValue = property(_get_data, _set_data)
1053
1054    def __repr__(self):
1055        data = self.data
1056        if len(data) > 10:
1057            dotdotdot = "..."
1058        else:
1059            dotdotdot = ""
1060        return '<DOM %s node "%r%s">' % (
1061            self.__class__.__name__, data[0:10], dotdotdot)
1062
1063    def substringData(self, offset, count):
1064        if offset < 0:
1065            raise xml.dom.IndexSizeErr("offset cannot be negative")
1066        if offset >= len(self.data):
1067            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1068        if count < 0:
1069            raise xml.dom.IndexSizeErr("count cannot be negative")
1070        return self.data[offset:offset+count]
1071
1072    def appendData(self, arg):
1073        self.data = self.data + arg
1074
1075    def insertData(self, offset, arg):
1076        if offset < 0:
1077            raise xml.dom.IndexSizeErr("offset cannot be negative")
1078        if offset >= len(self.data):
1079            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1080        if arg:
1081            self.data = "%s%s%s" % (
1082                self.data[:offset], arg, self.data[offset:])
1083
1084    def deleteData(self, offset, count):
1085        if offset < 0:
1086            raise xml.dom.IndexSizeErr("offset cannot be negative")
1087        if offset >= len(self.data):
1088            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1089        if count < 0:
1090            raise xml.dom.IndexSizeErr("count cannot be negative")
1091        if count:
1092            self.data = self.data[:offset] + self.data[offset+count:]
1093
1094    def replaceData(self, offset, count, arg):
1095        if offset < 0:
1096            raise xml.dom.IndexSizeErr("offset cannot be negative")
1097        if offset >= len(self.data):
1098            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1099        if count < 0:
1100            raise xml.dom.IndexSizeErr("count cannot be negative")
1101        if count:
1102            self.data = "%s%s%s" % (
1103                self.data[:offset], arg, self.data[offset+count:])
1104
1105defproperty(CharacterData, "length", doc="Length of the string data.")
1106
1107
1108class Text(CharacterData):
1109    __slots__ = ()
1110
1111    nodeType = Node.TEXT_NODE
1112    nodeName = "#text"
1113    attributes = None
1114
1115    def splitText(self, offset):
1116        if offset < 0 or offset > len(self.data):
1117            raise xml.dom.IndexSizeErr("illegal offset value")
1118        newText = self.__class__()
1119        newText.data = self.data[offset:]
1120        newText.ownerDocument = self.ownerDocument
1121        next = self.nextSibling
1122        if self.parentNode and self in self.parentNode.childNodes:
1123            if next is None:
1124                self.parentNode.appendChild(newText)
1125            else:
1126                self.parentNode.insertBefore(newText, next)
1127        self.data = self.data[:offset]
1128        return newText
1129
1130    def writexml(self, writer, indent="", addindent="", newl=""):
1131        _write_data(writer, "%s%s%s" % (indent, self.data, newl), False)
1132
1133    # DOM Level 3 (WD 9 April 2002)
1134
1135    def _get_wholeText(self):
1136        L = [self.data]
1137        n = self.previousSibling
1138        while n is not None:
1139            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1140                L.insert(0, n.data)
1141                n = n.previousSibling
1142            else:
1143                break
1144        n = self.nextSibling
1145        while n is not None:
1146            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1147                L.append(n.data)
1148                n = n.nextSibling
1149            else:
1150                break
1151        return ''.join(L)
1152
1153    def replaceWholeText(self, content):
1154        # XXX This needs to be seriously changed if minidom ever
1155        # supports EntityReference nodes.
1156        parent = self.parentNode
1157        n = self.previousSibling
1158        while n is not None:
1159            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1160                next = n.previousSibling
1161                parent.removeChild(n)
1162                n = next
1163            else:
1164                break
1165        n = self.nextSibling
1166        if not content:
1167            parent.removeChild(self)
1168        while n is not None:
1169            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1170                next = n.nextSibling
1171                parent.removeChild(n)
1172                n = next
1173            else:
1174                break
1175        if content:
1176            self.data = content
1177            return self
1178        else:
1179            return None
1180
1181    def _get_isWhitespaceInElementContent(self):
1182        if self.data.strip():
1183            return False
1184        elem = _get_containing_element(self)
1185        if elem is None:
1186            return False
1187        info = self.ownerDocument._get_elem_info(elem)
1188        if info is None:
1189            return False
1190        else:
1191            return info.isElementContent()
1192
1193defproperty(Text, "isWhitespaceInElementContent",
1194            doc="True iff this text node contains only whitespace"
1195                " and is in element content.")
1196defproperty(Text, "wholeText",
1197            doc="The text of all logically-adjacent text nodes.")
1198
1199
1200def _get_containing_element(node):
1201    c = node.parentNode
1202    while c is not None:
1203        if c.nodeType == Node.ELEMENT_NODE:
1204            return c
1205        c = c.parentNode
1206    return None
1207
1208def _get_containing_entref(node):
1209    c = node.parentNode
1210    while c is not None:
1211        if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1212            return c
1213        c = c.parentNode
1214    return None
1215
1216
1217class Comment(CharacterData):
1218    nodeType = Node.COMMENT_NODE
1219    nodeName = "#comment"
1220
1221    def __init__(self, data):
1222        CharacterData.__init__(self)
1223        self._data = data
1224
1225    def writexml(self, writer, indent="", addindent="", newl=""):
1226        if "--" in self.data:
1227            raise ValueError("'--' is not allowed in a comment node")
1228        writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1229
1230
1231class CDATASection(Text):
1232    __slots__ = ()
1233
1234    nodeType = Node.CDATA_SECTION_NODE
1235    nodeName = "#cdata-section"
1236
1237    def writexml(self, writer, indent="", addindent="", newl=""):
1238        if self.data.find("]]>") >= 0:
1239            raise ValueError("']]>' not allowed in a CDATA section")
1240        writer.write("<![CDATA[%s]]>" % self.data)
1241
1242
1243class ReadOnlySequentialNamedNodeMap(object):
1244    __slots__ = '_seq',
1245
1246    def __init__(self, seq=()):
1247        # seq should be a list or tuple
1248        self._seq = seq
1249
1250    def __len__(self):
1251        return len(self._seq)
1252
1253    def _get_length(self):
1254        return len(self._seq)
1255
1256    def getNamedItem(self, name):
1257        for n in self._seq:
1258            if n.nodeName == name:
1259                return n
1260
1261    def getNamedItemNS(self, namespaceURI, localName):
1262        for n in self._seq:
1263            if n.namespaceURI == namespaceURI and n.localName == localName:
1264                return n
1265
1266    def __getitem__(self, name_or_tuple):
1267        if isinstance(name_or_tuple, tuple):
1268            node = self.getNamedItemNS(*name_or_tuple)
1269        else:
1270            node = self.getNamedItem(name_or_tuple)
1271        if node is None:
1272            raise KeyError(name_or_tuple)
1273        return node
1274
1275    def item(self, index):
1276        if index < 0:
1277            return None
1278        try:
1279            return self._seq[index]
1280        except IndexError:
1281            return None
1282
1283    def removeNamedItem(self, name):
1284        raise xml.dom.NoModificationAllowedErr(
1285            "NamedNodeMap instance is read-only")
1286
1287    def removeNamedItemNS(self, namespaceURI, localName):
1288        raise xml.dom.NoModificationAllowedErr(
1289            "NamedNodeMap instance is read-only")
1290
1291    def setNamedItem(self, node):
1292        raise xml.dom.NoModificationAllowedErr(
1293            "NamedNodeMap instance is read-only")
1294
1295    def setNamedItemNS(self, node):
1296        raise xml.dom.NoModificationAllowedErr(
1297            "NamedNodeMap instance is read-only")
1298
1299    def __getstate__(self):
1300        return [self._seq]
1301
1302    def __setstate__(self, state):
1303        self._seq = state[0]
1304
1305defproperty(ReadOnlySequentialNamedNodeMap, "length",
1306            doc="Number of entries in the NamedNodeMap.")
1307
1308
1309class Identified:
1310    """Mix-in class that supports the publicId and systemId attributes."""
1311
1312    __slots__ = 'publicId', 'systemId'
1313
1314    def _identified_mixin_init(self, publicId, systemId):
1315        self.publicId = publicId
1316        self.systemId = systemId
1317
1318    def _get_publicId(self):
1319        return self.publicId
1320
1321    def _get_systemId(self):
1322        return self.systemId
1323
1324class DocumentType(Identified, Childless, Node):
1325    nodeType = Node.DOCUMENT_TYPE_NODE
1326    nodeValue = None
1327    name = None
1328    publicId = None
1329    systemId = None
1330    internalSubset = None
1331
1332    def __init__(self, qualifiedName):
1333        self.entities = ReadOnlySequentialNamedNodeMap()
1334        self.notations = ReadOnlySequentialNamedNodeMap()
1335        if qualifiedName:
1336            prefix, localname = _nssplit(qualifiedName)
1337            self.name = localname
1338        self.nodeName = self.name
1339
1340    def _get_internalSubset(self):
1341        return self.internalSubset
1342
1343    def cloneNode(self, deep):
1344        if self.ownerDocument is None:
1345            # it's ok
1346            clone = DocumentType(None)
1347            clone.name = self.name
1348            clone.nodeName = self.name
1349            operation = xml.dom.UserDataHandler.NODE_CLONED
1350            if deep:
1351                clone.entities._seq = []
1352                clone.notations._seq = []
1353                for n in self.notations._seq:
1354                    notation = Notation(n.nodeName, n.publicId, n.systemId)
1355                    clone.notations._seq.append(notation)
1356                    n._call_user_data_handler(operation, n, notation)
1357                for e in self.entities._seq:
1358                    entity = Entity(e.nodeName, e.publicId, e.systemId,
1359                                    e.notationName)
1360                    entity.actualEncoding = e.actualEncoding
1361                    entity.encoding = e.encoding
1362                    entity.version = e.version
1363                    clone.entities._seq.append(entity)
1364                    e._call_user_data_handler(operation, e, entity)
1365            self._call_user_data_handler(operation, self, clone)
1366            return clone
1367        else:
1368            return None
1369
1370    def writexml(self, writer, indent="", addindent="", newl=""):
1371        writer.write("<!DOCTYPE ")
1372        writer.write(self.name)
1373        if self.publicId:
1374            writer.write("%s  PUBLIC '%s'%s  '%s'"
1375                         % (newl, self.publicId, newl, self.systemId))
1376        elif self.systemId:
1377            writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
1378        if self.internalSubset is not None:
1379            writer.write(" [")
1380            writer.write(self.internalSubset)
1381            writer.write("]")
1382        writer.write(">"+newl)
1383
1384class Entity(Identified, Node):
1385    attributes = None
1386    nodeType = Node.ENTITY_NODE
1387    nodeValue = None
1388
1389    actualEncoding = None
1390    encoding = None
1391    version = None
1392
1393    def __init__(self, name, publicId, systemId, notation):
1394        self.nodeName = name
1395        self.notationName = notation
1396        self.childNodes = NodeList()
1397        self._identified_mixin_init(publicId, systemId)
1398
1399    def _get_actualEncoding(self):
1400        return self.actualEncoding
1401
1402    def _get_encoding(self):
1403        return self.encoding
1404
1405    def _get_version(self):
1406        return self.version
1407
1408    def appendChild(self, newChild):
1409        raise xml.dom.HierarchyRequestErr(
1410            "cannot append children to an entity node")
1411
1412    def insertBefore(self, newChild, refChild):
1413        raise xml.dom.HierarchyRequestErr(
1414            "cannot insert children below an entity node")
1415
1416    def removeChild(self, oldChild):
1417        raise xml.dom.HierarchyRequestErr(
1418            "cannot remove children from an entity node")
1419
1420    def replaceChild(self, newChild, oldChild):
1421        raise xml.dom.HierarchyRequestErr(
1422            "cannot replace children of an entity node")
1423
1424class Notation(Identified, Childless, Node):
1425    nodeType = Node.NOTATION_NODE
1426    nodeValue = None
1427
1428    def __init__(self, name, publicId, systemId):
1429        self.nodeName = name
1430        self._identified_mixin_init(publicId, systemId)
1431
1432
1433class DOMImplementation(DOMImplementationLS):
1434    _features = [("core", "1.0"),
1435                 ("core", "2.0"),
1436                 ("core", None),
1437                 ("xml", "1.0"),
1438                 ("xml", "2.0"),
1439                 ("xml", None),
1440                 ("ls-load", "3.0"),
1441                 ("ls-load", None),
1442                 ]
1443
1444    def hasFeature(self, feature, version):
1445        if version == "":
1446            version = None
1447        return (feature.lower(), version) in self._features
1448
1449    def createDocument(self, namespaceURI, qualifiedName, doctype):
1450        if doctype and doctype.parentNode is not None:
1451            raise xml.dom.WrongDocumentErr(
1452                "doctype object owned by another DOM tree")
1453        doc = self._create_document()
1454
1455        add_root_element = not (namespaceURI is None
1456                                and qualifiedName is None
1457                                and doctype is None)
1458
1459        if not qualifiedName and add_root_element:
1460            # The spec is unclear what to raise here; SyntaxErr
1461            # would be the other obvious candidate. Since Xerces raises
1462            # InvalidCharacterErr, and since SyntaxErr is not listed
1463            # for createDocument, that seems to be the better choice.
1464            # XXX: need to check for illegal characters here and in
1465            # createElement.
1466
1467            # DOM Level III clears this up when talking about the return value
1468            # of this function.  If namespaceURI, qName and DocType are
1469            # Null the document is returned without a document element
1470            # Otherwise if doctype or namespaceURI are not None
1471            # Then we go back to the above problem
1472            raise xml.dom.InvalidCharacterErr("Element with no name")
1473
1474        if add_root_element:
1475            prefix, localname = _nssplit(qualifiedName)
1476            if prefix == "xml" \
1477               and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1478                raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1479            if prefix and not namespaceURI:
1480                raise xml.dom.NamespaceErr(
1481                    "illegal use of prefix without namespaces")
1482            element = doc.createElementNS(namespaceURI, qualifiedName)
1483            if doctype:
1484                doc.appendChild(doctype)
1485            doc.appendChild(element)
1486
1487        if doctype:
1488            doctype.parentNode = doctype.ownerDocument = doc
1489
1490        doc.doctype = doctype
1491        doc.implementation = self
1492        return doc
1493
1494    def createDocumentType(self, qualifiedName, publicId, systemId):
1495        doctype = DocumentType(qualifiedName)
1496        doctype.publicId = publicId
1497        doctype.systemId = systemId
1498        return doctype
1499
1500    # DOM Level 3 (WD 9 April 2002)
1501
1502    def getInterface(self, feature):
1503        if self.hasFeature(feature, None):
1504            return self
1505        else:
1506            return None
1507
1508    # internal
1509    def _create_document(self):
1510        return Document()
1511
1512class ElementInfo(object):
1513    """Object that represents content-model information for an element.
1514
1515    This implementation is not expected to be used in practice; DOM
1516    builders should provide implementations which do the right thing
1517    using information available to it.
1518
1519    """
1520
1521    __slots__ = 'tagName',
1522
1523    def __init__(self, name):
1524        self.tagName = name
1525
1526    def getAttributeType(self, aname):
1527        return _no_type
1528
1529    def getAttributeTypeNS(self, namespaceURI, localName):
1530        return _no_type
1531
1532    def isElementContent(self):
1533        return False
1534
1535    def isEmpty(self):
1536        """Returns true iff this element is declared to have an EMPTY
1537        content model."""
1538        return False
1539
1540    def isId(self, aname):
1541        """Returns true iff the named attribute is a DTD-style ID."""
1542        return False
1543
1544    def isIdNS(self, namespaceURI, localName):
1545        """Returns true iff the identified attribute is a DTD-style ID."""
1546        return False
1547
1548    def __getstate__(self):
1549        return self.tagName
1550
1551    def __setstate__(self, state):
1552        self.tagName = state
1553
1554def _clear_id_cache(node):
1555    if node.nodeType == Node.DOCUMENT_NODE:
1556        node._id_cache.clear()
1557        node._id_search_stack = None
1558    elif _in_document(node):
1559        node.ownerDocument._id_cache.clear()
1560        node.ownerDocument._id_search_stack= None
1561
1562class Document(Node, DocumentLS):
1563    __slots__ = ('_elem_info', 'doctype',
1564                 '_id_search_stack', 'childNodes', '_id_cache')
1565    _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1566                         Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1567
1568    implementation = DOMImplementation()
1569    nodeType = Node.DOCUMENT_NODE
1570    nodeName = "#document"
1571    nodeValue = None
1572    attributes = None
1573    parentNode = None
1574    previousSibling = nextSibling = None
1575
1576
1577    # Document attributes from Level 3 (WD 9 April 2002)
1578
1579    actualEncoding = None
1580    encoding = None
1581    standalone = None
1582    version = None
1583    strictErrorChecking = False
1584    errorHandler = None
1585    documentURI = None
1586
1587    _magic_id_count = 0
1588
1589    def __init__(self):
1590        self.doctype = None
1591        self.childNodes = NodeList()
1592        # mapping of (namespaceURI, localName) -> ElementInfo
1593        #        and tagName -> ElementInfo
1594        self._elem_info = {}
1595        self._id_cache = {}
1596        self._id_search_stack = None
1597
1598    def _get_elem_info(self, element):
1599        if element.namespaceURI:
1600            key = element.namespaceURI, element.localName
1601        else:
1602            key = element.tagName
1603        return self._elem_info.get(key)
1604
1605    def _get_actualEncoding(self):
1606        return self.actualEncoding
1607
1608    def _get_doctype(self):
1609        return self.doctype
1610
1611    def _get_documentURI(self):
1612        return self.documentURI
1613
1614    def _get_encoding(self):
1615        return self.encoding
1616
1617    def _get_errorHandler(self):
1618        return self.errorHandler
1619
1620    def _get_standalone(self):
1621        return self.standalone
1622
1623    def _get_strictErrorChecking(self):
1624        return self.strictErrorChecking
1625
1626    def _get_version(self):
1627        return self.version
1628
1629    def appendChild(self, node):
1630        if node.nodeType not in self._child_node_types:
1631            raise xml.dom.HierarchyRequestErr(
1632                "%s cannot be child of %s" % (repr(node), repr(self)))
1633        if node.parentNode is not None:
1634            # This needs to be done before the next test since this
1635            # may *be* the document element, in which case it should
1636            # end up re-ordered to the end.
1637            node.parentNode.removeChild(node)
1638
1639        if node.nodeType == Node.ELEMENT_NODE \
1640           and self._get_documentElement():
1641            raise xml.dom.HierarchyRequestErr(
1642                "two document elements disallowed")
1643        return Node.appendChild(self, node)
1644
1645    def removeChild(self, oldChild):
1646        try:
1647            self.childNodes.remove(oldChild)
1648        except ValueError:
1649            raise xml.dom.NotFoundErr()
1650        oldChild.nextSibling = oldChild.previousSibling = None
1651        oldChild.parentNode = None
1652        if self.documentElement is oldChild:
1653            self.documentElement = None
1654
1655        return oldChild
1656
1657    def _get_documentElement(self):
1658        for node in self.childNodes:
1659            if node.nodeType == Node.ELEMENT_NODE:
1660                return node
1661
1662    def unlink(self):
1663        if self.doctype is not None:
1664            self.doctype.unlink()
1665            self.doctype = None
1666        Node.unlink(self)
1667
1668    def cloneNode(self, deep):
1669        if not deep:
1670            return None
1671        clone = self.implementation.createDocument(None, None, None)
1672        clone.encoding = self.encoding
1673        clone.standalone = self.standalone
1674        clone.version = self.version
1675        for n in self.childNodes:
1676            childclone = _clone_node(n, deep, clone)
1677            assert childclone.ownerDocument.isSameNode(clone)
1678            clone.childNodes.append(childclone)
1679            if childclone.nodeType == Node.DOCUMENT_NODE:
1680                assert clone.documentElement is None
1681            elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1682                assert clone.doctype is None
1683                clone.doctype = childclone
1684            childclone.parentNode = clone
1685        self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1686                                     self, clone)
1687        return clone
1688
1689    def createDocumentFragment(self):
1690        d = DocumentFragment()
1691        d.ownerDocument = self
1692        return d
1693
1694    def createElement(self, tagName):
1695        e = Element(tagName)
1696        e.ownerDocument = self
1697        return e
1698
1699    def createTextNode(self, data):
1700        if not isinstance(data, str):
1701            raise TypeError("node contents must be a string")
1702        t = Text()
1703        t.data = data
1704        t.ownerDocument = self
1705        return t
1706
1707    def createCDATASection(self, data):
1708        if not isinstance(data, str):
1709            raise TypeError("node contents must be a string")
1710        c = CDATASection()
1711        c.data = data
1712        c.ownerDocument = self
1713        return c
1714
1715    def createComment(self, data):
1716        c = Comment(data)
1717        c.ownerDocument = self
1718        return c
1719
1720    def createProcessingInstruction(self, target, data):
1721        p = ProcessingInstruction(target, data)
1722        p.ownerDocument = self
1723        return p
1724
1725    def createAttribute(self, qName):
1726        a = Attr(qName)
1727        a.ownerDocument = self
1728        a.value = ""
1729        return a
1730
1731    def createElementNS(self, namespaceURI, qualifiedName):
1732        prefix, localName = _nssplit(qualifiedName)
1733        e = Element(qualifiedName, namespaceURI, prefix)
1734        e.ownerDocument = self
1735        return e
1736
1737    def createAttributeNS(self, namespaceURI, qualifiedName):
1738        prefix, localName = _nssplit(qualifiedName)
1739        a = Attr(qualifiedName, namespaceURI, localName, prefix)
1740        a.ownerDocument = self
1741        a.value = ""
1742        return a
1743
1744    # A couple of implementation-specific helpers to create node types
1745    # not supported by the W3C DOM specs:
1746
1747    def _create_entity(self, name, publicId, systemId, notationName):
1748        e = Entity(name, publicId, systemId, notationName)
1749        e.ownerDocument = self
1750        return e
1751
1752    def _create_notation(self, name, publicId, systemId):
1753        n = Notation(name, publicId, systemId)
1754        n.ownerDocument = self
1755        return n
1756
1757    def getElementById(self, id):
1758        if id in self._id_cache:
1759            return self._id_cache[id]
1760        if not (self._elem_info or self._magic_id_count):
1761            return None
1762
1763        stack = self._id_search_stack
1764        if stack is None:
1765            # we never searched before, or the cache has been cleared
1766            stack = [self.documentElement]
1767            self._id_search_stack = stack
1768        elif not stack:
1769            # Previous search was completed and cache is still valid;
1770            # no matching node.
1771            return None
1772
1773        result = None
1774        while stack:
1775            node = stack.pop()
1776            # add child elements to stack for continued searching
1777            stack.extend([child for child in node.childNodes
1778                          if child.nodeType in _nodeTypes_with_children])
1779            # check this node
1780            info = self._get_elem_info(node)
1781            if info:
1782                # We have to process all ID attributes before
1783                # returning in order to get all the attributes set to
1784                # be IDs using Element.setIdAttribute*().
1785                for attr in node.attributes.values():
1786                    if attr.namespaceURI:
1787                        if info.isIdNS(attr.namespaceURI, attr.localName):
1788                            self._id_cache[attr.value] = node
1789                            if attr.value == id:
1790                                result = node
1791                            elif not node._magic_id_nodes:
1792                                break
1793                    elif info.isId(attr.name):
1794                        self._id_cache[attr.value] = node
1795                        if attr.value == id:
1796                            result = node
1797                        elif not node._magic_id_nodes:
1798                            break
1799                    elif attr._is_id:
1800                        self._id_cache[attr.value] = node
1801                        if attr.value == id:
1802                            result = node
1803                        elif node._magic_id_nodes == 1:
1804                            break
1805            elif node._magic_id_nodes:
1806                for attr in node.attributes.values():
1807                    if attr._is_id:
1808                        self._id_cache[attr.value] = node
1809                        if attr.value == id:
1810                            result = node
1811            if result is not None:
1812                break
1813        return result
1814
1815    def getElementsByTagName(self, name):
1816        return _get_elements_by_tagName_helper(self, name, NodeList())
1817
1818    def getElementsByTagNameNS(self, namespaceURI, localName):
1819        return _get_elements_by_tagName_ns_helper(
1820            self, namespaceURI, localName, NodeList())
1821
1822    def isSupported(self, feature, version):
1823        return self.implementation.hasFeature(feature, version)
1824
1825    def importNode(self, node, deep):
1826        if node.nodeType == Node.DOCUMENT_NODE:
1827            raise xml.dom.NotSupportedErr("cannot import document nodes")
1828        elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1829            raise xml.dom.NotSupportedErr("cannot import document type nodes")
1830        return _clone_node(node, deep, self)
1831
1832    def writexml(self, writer, indent="", addindent="", newl="", encoding=None,
1833                 standalone=None):
1834        declarations = []
1835
1836        if encoding:
1837            declarations.append(f'encoding="{encoding}"')
1838        if standalone is not None:
1839            declarations.append(f'standalone="{"yes" if standalone else "no"}"')
1840
1841        writer.write(f'<?xml version="1.0" {" ".join(declarations)}?>{newl}')
1842
1843        for node in self.childNodes:
1844            node.writexml(writer, indent, addindent, newl)
1845
1846    # DOM Level 3 (WD 9 April 2002)
1847
1848    def renameNode(self, n, namespaceURI, name):
1849        if n.ownerDocument is not self:
1850            raise xml.dom.WrongDocumentErr(
1851                "cannot rename nodes from other documents;\n"
1852                "expected %s,\nfound %s" % (self, n.ownerDocument))
1853        if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1854            raise xml.dom.NotSupportedErr(
1855                "renameNode() only applies to element and attribute nodes")
1856        if namespaceURI != EMPTY_NAMESPACE:
1857            if ':' in name:
1858                prefix, localName = name.split(':', 1)
1859                if (  prefix == "xmlns"
1860                      and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1861                    raise xml.dom.NamespaceErr(
1862                        "illegal use of 'xmlns' prefix")
1863            else:
1864                if (  name == "xmlns"
1865                      and namespaceURI != xml.dom.XMLNS_NAMESPACE
1866                      and n.nodeType == Node.ATTRIBUTE_NODE):
1867                    raise xml.dom.NamespaceErr(
1868                        "illegal use of the 'xmlns' attribute")
1869                prefix = None
1870                localName = name
1871        else:
1872            prefix = None
1873            localName = None
1874        if n.nodeType == Node.ATTRIBUTE_NODE:
1875            element = n.ownerElement
1876            if element is not None:
1877                is_id = n._is_id
1878                element.removeAttributeNode(n)
1879        else:
1880            element = None
1881        n.prefix = prefix
1882        n._localName = localName
1883        n.namespaceURI = namespaceURI
1884        n.nodeName = name
1885        if n.nodeType == Node.ELEMENT_NODE:
1886            n.tagName = name
1887        else:
1888            # attribute node
1889            n.name = name
1890            if element is not None:
1891                element.setAttributeNode(n)
1892                if is_id:
1893                    element.setIdAttributeNode(n)
1894        # It's not clear from a semantic perspective whether we should
1895        # call the user data handlers for the NODE_RENAMED event since
1896        # we're re-using the existing node.  The draft spec has been
1897        # interpreted as meaning "no, don't call the handler unless a
1898        # new node is created."
1899        return n
1900
1901defproperty(Document, "documentElement",
1902            doc="Top-level element of this document.")
1903
1904
1905def _clone_node(node, deep, newOwnerDocument):
1906    """
1907    Clone a node and give it the new owner document.
1908    Called by Node.cloneNode and Document.importNode
1909    """
1910    if node.ownerDocument.isSameNode(newOwnerDocument):
1911        operation = xml.dom.UserDataHandler.NODE_CLONED
1912    else:
1913        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1914    if node.nodeType == Node.ELEMENT_NODE:
1915        clone = newOwnerDocument.createElementNS(node.namespaceURI,
1916                                                 node.nodeName)
1917        for attr in node.attributes.values():
1918            clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1919            a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1920            a.specified = attr.specified
1921
1922        if deep:
1923            for child in node.childNodes:
1924                c = _clone_node(child, deep, newOwnerDocument)
1925                clone.appendChild(c)
1926
1927    elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1928        clone = newOwnerDocument.createDocumentFragment()
1929        if deep:
1930            for child in node.childNodes:
1931                c = _clone_node(child, deep, newOwnerDocument)
1932                clone.appendChild(c)
1933
1934    elif node.nodeType == Node.TEXT_NODE:
1935        clone = newOwnerDocument.createTextNode(node.data)
1936    elif node.nodeType == Node.CDATA_SECTION_NODE:
1937        clone = newOwnerDocument.createCDATASection(node.data)
1938    elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1939        clone = newOwnerDocument.createProcessingInstruction(node.target,
1940                                                             node.data)
1941    elif node.nodeType == Node.COMMENT_NODE:
1942        clone = newOwnerDocument.createComment(node.data)
1943    elif node.nodeType == Node.ATTRIBUTE_NODE:
1944        clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1945                                                   node.nodeName)
1946        clone.specified = True
1947        clone.value = node.value
1948    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1949        assert node.ownerDocument is not newOwnerDocument
1950        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1951        clone = newOwnerDocument.implementation.createDocumentType(
1952            node.name, node.publicId, node.systemId)
1953        clone.ownerDocument = newOwnerDocument
1954        if deep:
1955            clone.entities._seq = []
1956            clone.notations._seq = []
1957            for n in node.notations._seq:
1958                notation = Notation(n.nodeName, n.publicId, n.systemId)
1959                notation.ownerDocument = newOwnerDocument
1960                clone.notations._seq.append(notation)
1961                if hasattr(n, '_call_user_data_handler'):
1962                    n._call_user_data_handler(operation, n, notation)
1963            for e in node.entities._seq:
1964                entity = Entity(e.nodeName, e.publicId, e.systemId,
1965                                e.notationName)
1966                entity.actualEncoding = e.actualEncoding
1967                entity.encoding = e.encoding
1968                entity.version = e.version
1969                entity.ownerDocument = newOwnerDocument
1970                clone.entities._seq.append(entity)
1971                if hasattr(e, '_call_user_data_handler'):
1972                    e._call_user_data_handler(operation, e, entity)
1973    else:
1974        # Note the cloning of Document and DocumentType nodes is
1975        # implementation specific.  minidom handles those cases
1976        # directly in the cloneNode() methods.
1977        raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1978
1979    # Check for _call_user_data_handler() since this could conceivably
1980    # used with other DOM implementations (one of the FourThought
1981    # DOMs, perhaps?).
1982    if hasattr(node, '_call_user_data_handler'):
1983        node._call_user_data_handler(operation, node, clone)
1984    return clone
1985
1986
1987def _nssplit(qualifiedName):
1988    fields = qualifiedName.split(':', 1)
1989    if len(fields) == 2:
1990        return fields
1991    else:
1992        return (None, fields[0])
1993
1994
1995def _do_pulldom_parse(func, args, kwargs):
1996    events = func(*args, **kwargs)
1997    toktype, rootNode = events.getEvent()
1998    events.expandNode(rootNode)
1999    events.clear()
2000    return rootNode
2001
2002def parse(file, parser=None, bufsize=None):
2003    """Parse a file into a DOM by filename or file object."""
2004    if parser is None and not bufsize:
2005        from xml.dom import expatbuilder
2006        return expatbuilder.parse(file)
2007    else:
2008        from xml.dom import pulldom
2009        return _do_pulldom_parse(pulldom.parse, (file,),
2010            {'parser': parser, 'bufsize': bufsize})
2011
2012def parseString(string, parser=None):
2013    """Parse a file into a DOM from a string."""
2014    if parser is None:
2015        from xml.dom import expatbuilder
2016        return expatbuilder.parseString(string)
2017    else:
2018        from xml.dom import pulldom
2019        return _do_pulldom_parse(pulldom.parseString, (string,),
2020                                 {'parser': parser})
2021
2022def getDOMImplementation(features=None):
2023    if features:
2024        if isinstance(features, str):
2025            features = domreg._parse_feature_string(features)
2026        for f, v in features:
2027            if not Document.implementation.hasFeature(f, v):
2028                return None
2029    return Document.implementation
2030