1"""Simple implementation of the Level 1 DOM. 2 3Namespaces and other minor Level 2 features are also supported. 4 5parse("foo.xml") 6 7parseString("<foo><bar/></foo>") 8 9Todo: 10===== 11 * convenience methods for getting elements and text. 12 * more testing 13 * bring some of the writer and linearizer code into conformance with this 14 interface 15 * SAX 2 namespaces 16""" 17 18import io 19import xml.dom 20 21from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg 22from xml.dom.minicompat import * 23from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS 24 25# This is used by the ID-cache invalidation checks; the list isn't 26# actually complete, since the nodes being checked will never be the 27# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is 28# the node being added or removed, not the node being modified.) 29# 30_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, 31 xml.dom.Node.ENTITY_REFERENCE_NODE) 32 33 34class Node(xml.dom.Node): 35 namespaceURI = None # this is non-null only for elements and attributes 36 parentNode = None 37 ownerDocument = None 38 nextSibling = None 39 previousSibling = None 40 41 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes 42 43 def __bool__(self): 44 return True 45 46 def toxml(self, encoding=None, standalone=None): 47 return self.toprettyxml("", "", encoding, standalone) 48 49 def toprettyxml(self, indent="\t", newl="\n", encoding=None, 50 standalone=None): 51 if encoding is None: 52 writer = io.StringIO() 53 else: 54 writer = io.TextIOWrapper(io.BytesIO(), 55 encoding=encoding, 56 errors="xmlcharrefreplace", 57 newline='\n') 58 if self.nodeType == Node.DOCUMENT_NODE: 59 # Can pass encoding only to document, to put it into XML header 60 self.writexml(writer, "", indent, newl, encoding, standalone) 61 else: 62 self.writexml(writer, "", indent, newl) 63 if encoding is None: 64 return writer.getvalue() 65 else: 66 return writer.detach().getvalue() 67 68 def hasChildNodes(self): 69 return bool(self.childNodes) 70 71 def _get_childNodes(self): 72 return self.childNodes 73 74 def _get_firstChild(self): 75 if self.childNodes: 76 return self.childNodes[0] 77 78 def _get_lastChild(self): 79 if self.childNodes: 80 return self.childNodes[-1] 81 82 def insertBefore(self, newChild, refChild): 83 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 84 for c in tuple(newChild.childNodes): 85 self.insertBefore(c, refChild) 86 ### The DOM does not clearly specify what to return in this case 87 return newChild 88 if newChild.nodeType not in self._child_node_types: 89 raise xml.dom.HierarchyRequestErr( 90 "%s cannot be child of %s" % (repr(newChild), repr(self))) 91 if newChild.parentNode is not None: 92 newChild.parentNode.removeChild(newChild) 93 if refChild is None: 94 self.appendChild(newChild) 95 else: 96 try: 97 index = self.childNodes.index(refChild) 98 except ValueError: 99 raise xml.dom.NotFoundErr() 100 if newChild.nodeType in _nodeTypes_with_children: 101 _clear_id_cache(self) 102 self.childNodes.insert(index, newChild) 103 newChild.nextSibling = refChild 104 refChild.previousSibling = newChild 105 if index: 106 node = self.childNodes[index-1] 107 node.nextSibling = newChild 108 newChild.previousSibling = node 109 else: 110 newChild.previousSibling = None 111 newChild.parentNode = self 112 return newChild 113 114 def appendChild(self, node): 115 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: 116 for c in tuple(node.childNodes): 117 self.appendChild(c) 118 ### The DOM does not clearly specify what to return in this case 119 return node 120 if node.nodeType not in self._child_node_types: 121 raise xml.dom.HierarchyRequestErr( 122 "%s cannot be child of %s" % (repr(node), repr(self))) 123 elif node.nodeType in _nodeTypes_with_children: 124 _clear_id_cache(self) 125 if node.parentNode is not None: 126 node.parentNode.removeChild(node) 127 _append_child(self, node) 128 node.nextSibling = None 129 return node 130 131 def replaceChild(self, newChild, oldChild): 132 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 133 refChild = oldChild.nextSibling 134 self.removeChild(oldChild) 135 return self.insertBefore(newChild, refChild) 136 if newChild.nodeType not in self._child_node_types: 137 raise xml.dom.HierarchyRequestErr( 138 "%s cannot be child of %s" % (repr(newChild), repr(self))) 139 if newChild is oldChild: 140 return 141 if newChild.parentNode is not None: 142 newChild.parentNode.removeChild(newChild) 143 try: 144 index = self.childNodes.index(oldChild) 145 except ValueError: 146 raise xml.dom.NotFoundErr() 147 self.childNodes[index] = newChild 148 newChild.parentNode = self 149 oldChild.parentNode = None 150 if (newChild.nodeType in _nodeTypes_with_children 151 or oldChild.nodeType in _nodeTypes_with_children): 152 _clear_id_cache(self) 153 newChild.nextSibling = oldChild.nextSibling 154 newChild.previousSibling = oldChild.previousSibling 155 oldChild.nextSibling = None 156 oldChild.previousSibling = None 157 if newChild.previousSibling: 158 newChild.previousSibling.nextSibling = newChild 159 if newChild.nextSibling: 160 newChild.nextSibling.previousSibling = newChild 161 return oldChild 162 163 def removeChild(self, oldChild): 164 try: 165 self.childNodes.remove(oldChild) 166 except ValueError: 167 raise xml.dom.NotFoundErr() 168 if oldChild.nextSibling is not None: 169 oldChild.nextSibling.previousSibling = oldChild.previousSibling 170 if oldChild.previousSibling is not None: 171 oldChild.previousSibling.nextSibling = oldChild.nextSibling 172 oldChild.nextSibling = oldChild.previousSibling = None 173 if oldChild.nodeType in _nodeTypes_with_children: 174 _clear_id_cache(self) 175 176 oldChild.parentNode = None 177 return oldChild 178 179 def normalize(self): 180 L = [] 181 for child in self.childNodes: 182 if child.nodeType == Node.TEXT_NODE: 183 if not child.data: 184 # empty text node; discard 185 if L: 186 L[-1].nextSibling = child.nextSibling 187 if child.nextSibling: 188 child.nextSibling.previousSibling = child.previousSibling 189 child.unlink() 190 elif L and L[-1].nodeType == child.nodeType: 191 # collapse text node 192 node = L[-1] 193 node.data = node.data + child.data 194 node.nextSibling = child.nextSibling 195 if child.nextSibling: 196 child.nextSibling.previousSibling = node 197 child.unlink() 198 else: 199 L.append(child) 200 else: 201 L.append(child) 202 if child.nodeType == Node.ELEMENT_NODE: 203 child.normalize() 204 self.childNodes[:] = L 205 206 def cloneNode(self, deep): 207 return _clone_node(self, deep, self.ownerDocument or self) 208 209 def isSupported(self, feature, version): 210 return self.ownerDocument.implementation.hasFeature(feature, version) 211 212 def _get_localName(self): 213 # Overridden in Element and Attr where localName can be Non-Null 214 return None 215 216 # Node interfaces from Level 3 (WD 9 April 2002) 217 218 def isSameNode(self, other): 219 return self is other 220 221 def getInterface(self, feature): 222 if self.isSupported(feature, None): 223 return self 224 else: 225 return None 226 227 # The "user data" functions use a dictionary that is only present 228 # if some user data has been set, so be careful not to assume it 229 # exists. 230 231 def getUserData(self, key): 232 try: 233 return self._user_data[key][0] 234 except (AttributeError, KeyError): 235 return None 236 237 def setUserData(self, key, data, handler): 238 old = None 239 try: 240 d = self._user_data 241 except AttributeError: 242 d = {} 243 self._user_data = d 244 if key in d: 245 old = d[key][0] 246 if data is None: 247 # ignore handlers passed for None 248 handler = None 249 if old is not None: 250 del d[key] 251 else: 252 d[key] = (data, handler) 253 return old 254 255 def _call_user_data_handler(self, operation, src, dst): 256 if hasattr(self, "_user_data"): 257 for key, (data, handler) in list(self._user_data.items()): 258 if handler is not None: 259 handler.handle(operation, key, data, src, dst) 260 261 # minidom-specific API: 262 263 def unlink(self): 264 self.parentNode = self.ownerDocument = None 265 if self.childNodes: 266 for child in self.childNodes: 267 child.unlink() 268 self.childNodes = NodeList() 269 self.previousSibling = None 270 self.nextSibling = None 271 272 # A Node is its own context manager, to ensure that an unlink() call occurs. 273 # This is similar to how a file object works. 274 def __enter__(self): 275 return self 276 277 def __exit__(self, et, ev, tb): 278 self.unlink() 279 280defproperty(Node, "firstChild", doc="First child node, or None.") 281defproperty(Node, "lastChild", doc="Last child node, or None.") 282defproperty(Node, "localName", doc="Namespace-local name of this node.") 283 284 285def _append_child(self, node): 286 # fast path with less checks; usable by DOM builders if careful 287 childNodes = self.childNodes 288 if childNodes: 289 last = childNodes[-1] 290 node.previousSibling = last 291 last.nextSibling = node 292 childNodes.append(node) 293 node.parentNode = self 294 295def _in_document(node): 296 # return True iff node is part of a document tree 297 while node is not None: 298 if node.nodeType == Node.DOCUMENT_NODE: 299 return True 300 node = node.parentNode 301 return False 302 303def _write_data(writer, text, attr): 304 "Writes datachars to writer." 305 if not text: 306 return 307 # See the comments in ElementTree.py for behavior and 308 # implementation details. 309 if "&" in text: 310 text = text.replace("&", "&") 311 if "<" in text: 312 text = text.replace("<", "<") 313 if ">" in text: 314 text = text.replace(">", ">") 315 if attr: 316 if '"' in text: 317 text = text.replace('"', """) 318 if "\r" in text: 319 text = text.replace("\r", " ") 320 if "\n" in text: 321 text = text.replace("\n", " ") 322 if "\t" in text: 323 text = text.replace("\t", "	") 324 writer.write(text) 325 326def _get_elements_by_tagName_helper(parent, name, rc): 327 for node in parent.childNodes: 328 if node.nodeType == Node.ELEMENT_NODE and \ 329 (name == "*" or node.tagName == name): 330 rc.append(node) 331 _get_elements_by_tagName_helper(node, name, rc) 332 return rc 333 334def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): 335 for node in parent.childNodes: 336 if node.nodeType == Node.ELEMENT_NODE: 337 if ((localName == "*" or node.localName == localName) and 338 (nsURI == "*" or node.namespaceURI == nsURI)): 339 rc.append(node) 340 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) 341 return rc 342 343class DocumentFragment(Node): 344 nodeType = Node.DOCUMENT_FRAGMENT_NODE 345 nodeName = "#document-fragment" 346 nodeValue = None 347 attributes = None 348 parentNode = None 349 _child_node_types = (Node.ELEMENT_NODE, 350 Node.TEXT_NODE, 351 Node.CDATA_SECTION_NODE, 352 Node.ENTITY_REFERENCE_NODE, 353 Node.PROCESSING_INSTRUCTION_NODE, 354 Node.COMMENT_NODE, 355 Node.NOTATION_NODE) 356 357 def __init__(self): 358 self.childNodes = NodeList() 359 360 361class Attr(Node): 362 __slots__=('_name', '_value', 'namespaceURI', 363 '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement') 364 nodeType = Node.ATTRIBUTE_NODE 365 attributes = None 366 specified = False 367 _is_id = False 368 369 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) 370 371 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, 372 prefix=None): 373 self.ownerElement = None 374 self._name = qName 375 self.namespaceURI = namespaceURI 376 self._prefix = prefix 377 if localName is not None: 378 self._localName = localName 379 self.childNodes = NodeList() 380 381 # Add the single child node that represents the value of the attr 382 self.childNodes.append(Text()) 383 384 # nodeValue and value are set elsewhere 385 386 def _get_localName(self): 387 try: 388 return self._localName 389 except AttributeError: 390 return self.nodeName.split(":", 1)[-1] 391 392 def _get_specified(self): 393 return self.specified 394 395 def _get_name(self): 396 return self._name 397 398 def _set_name(self, value): 399 self._name = value 400 if self.ownerElement is not None: 401 _clear_id_cache(self.ownerElement) 402 403 nodeName = name = property(_get_name, _set_name) 404 405 def _get_value(self): 406 return self._value 407 408 def _set_value(self, value): 409 self._value = value 410 self.childNodes[0].data = value 411 if self.ownerElement is not None: 412 _clear_id_cache(self.ownerElement) 413 self.childNodes[0].data = value 414 415 nodeValue = value = property(_get_value, _set_value) 416 417 def _get_prefix(self): 418 return self._prefix 419 420 def _set_prefix(self, prefix): 421 nsuri = self.namespaceURI 422 if prefix == "xmlns": 423 if nsuri and nsuri != XMLNS_NAMESPACE: 424 raise xml.dom.NamespaceErr( 425 "illegal use of 'xmlns' prefix for the wrong namespace") 426 self._prefix = prefix 427 if prefix is None: 428 newName = self.localName 429 else: 430 newName = "%s:%s" % (prefix, self.localName) 431 if self.ownerElement: 432 _clear_id_cache(self.ownerElement) 433 self.name = newName 434 435 prefix = property(_get_prefix, _set_prefix) 436 437 def unlink(self): 438 # This implementation does not call the base implementation 439 # since most of that is not needed, and the expense of the 440 # method call is not warranted. We duplicate the removal of 441 # children, but that's all we needed from the base class. 442 elem = self.ownerElement 443 if elem is not None: 444 del elem._attrs[self.nodeName] 445 del elem._attrsNS[(self.namespaceURI, self.localName)] 446 if self._is_id: 447 self._is_id = False 448 elem._magic_id_nodes -= 1 449 self.ownerDocument._magic_id_count -= 1 450 for child in self.childNodes: 451 child.unlink() 452 del self.childNodes[:] 453 454 def _get_isId(self): 455 if self._is_id: 456 return True 457 doc = self.ownerDocument 458 elem = self.ownerElement 459 if doc is None or elem is None: 460 return False 461 462 info = doc._get_elem_info(elem) 463 if info is None: 464 return False 465 if self.namespaceURI: 466 return info.isIdNS(self.namespaceURI, self.localName) 467 else: 468 return info.isId(self.nodeName) 469 470 def _get_schemaType(self): 471 doc = self.ownerDocument 472 elem = self.ownerElement 473 if doc is None or elem is None: 474 return _no_type 475 476 info = doc._get_elem_info(elem) 477 if info is None: 478 return _no_type 479 if self.namespaceURI: 480 return info.getAttributeTypeNS(self.namespaceURI, self.localName) 481 else: 482 return info.getAttributeType(self.nodeName) 483 484defproperty(Attr, "isId", doc="True if this attribute is an ID.") 485defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") 486defproperty(Attr, "schemaType", doc="Schema type for this attribute.") 487 488 489class NamedNodeMap(object): 490 """The attribute list is a transient interface to the underlying 491 dictionaries. Mutations here will change the underlying element's 492 dictionary. 493 494 Ordering is imposed artificially and does not reflect the order of 495 attributes as found in an input document. 496 """ 497 498 __slots__ = ('_attrs', '_attrsNS', '_ownerElement') 499 500 def __init__(self, attrs, attrsNS, ownerElement): 501 self._attrs = attrs 502 self._attrsNS = attrsNS 503 self._ownerElement = ownerElement 504 505 def _get_length(self): 506 return len(self._attrs) 507 508 def item(self, index): 509 try: 510 return self[list(self._attrs.keys())[index]] 511 except IndexError: 512 return None 513 514 def items(self): 515 L = [] 516 for node in self._attrs.values(): 517 L.append((node.nodeName, node.value)) 518 return L 519 520 def itemsNS(self): 521 L = [] 522 for node in self._attrs.values(): 523 L.append(((node.namespaceURI, node.localName), node.value)) 524 return L 525 526 def __contains__(self, key): 527 if isinstance(key, str): 528 return key in self._attrs 529 else: 530 return key in self._attrsNS 531 532 def keys(self): 533 return self._attrs.keys() 534 535 def keysNS(self): 536 return self._attrsNS.keys() 537 538 def values(self): 539 return self._attrs.values() 540 541 def get(self, name, value=None): 542 return self._attrs.get(name, value) 543 544 __len__ = _get_length 545 546 def _cmp(self, other): 547 if self._attrs is getattr(other, "_attrs", None): 548 return 0 549 else: 550 return (id(self) > id(other)) - (id(self) < id(other)) 551 552 def __eq__(self, other): 553 return self._cmp(other) == 0 554 555 def __ge__(self, other): 556 return self._cmp(other) >= 0 557 558 def __gt__(self, other): 559 return self._cmp(other) > 0 560 561 def __le__(self, other): 562 return self._cmp(other) <= 0 563 564 def __lt__(self, other): 565 return self._cmp(other) < 0 566 567 def __getitem__(self, attname_or_tuple): 568 if isinstance(attname_or_tuple, tuple): 569 return self._attrsNS[attname_or_tuple] 570 else: 571 return self._attrs[attname_or_tuple] 572 573 # same as set 574 def __setitem__(self, attname, value): 575 if isinstance(value, str): 576 try: 577 node = self._attrs[attname] 578 except KeyError: 579 node = Attr(attname) 580 node.ownerDocument = self._ownerElement.ownerDocument 581 self.setNamedItem(node) 582 node.value = value 583 else: 584 if not isinstance(value, Attr): 585 raise TypeError("value must be a string or Attr object") 586 node = value 587 self.setNamedItem(node) 588 589 def getNamedItem(self, name): 590 try: 591 return self._attrs[name] 592 except KeyError: 593 return None 594 595 def getNamedItemNS(self, namespaceURI, localName): 596 try: 597 return self._attrsNS[(namespaceURI, localName)] 598 except KeyError: 599 return None 600 601 def removeNamedItem(self, name): 602 n = self.getNamedItem(name) 603 if n is not None: 604 _clear_id_cache(self._ownerElement) 605 del self._attrs[n.nodeName] 606 del self._attrsNS[(n.namespaceURI, n.localName)] 607 if hasattr(n, 'ownerElement'): 608 n.ownerElement = None 609 return n 610 else: 611 raise xml.dom.NotFoundErr() 612 613 def removeNamedItemNS(self, namespaceURI, localName): 614 n = self.getNamedItemNS(namespaceURI, localName) 615 if n is not None: 616 _clear_id_cache(self._ownerElement) 617 del self._attrsNS[(n.namespaceURI, n.localName)] 618 del self._attrs[n.nodeName] 619 if hasattr(n, 'ownerElement'): 620 n.ownerElement = None 621 return n 622 else: 623 raise xml.dom.NotFoundErr() 624 625 def setNamedItem(self, node): 626 if not isinstance(node, Attr): 627 raise xml.dom.HierarchyRequestErr( 628 "%s cannot be child of %s" % (repr(node), repr(self))) 629 old = self._attrs.get(node.name) 630 if old: 631 old.unlink() 632 self._attrs[node.name] = node 633 self._attrsNS[(node.namespaceURI, node.localName)] = node 634 node.ownerElement = self._ownerElement 635 _clear_id_cache(node.ownerElement) 636 return old 637 638 def setNamedItemNS(self, node): 639 return self.setNamedItem(node) 640 641 def __delitem__(self, attname_or_tuple): 642 node = self[attname_or_tuple] 643 _clear_id_cache(node.ownerElement) 644 node.unlink() 645 646 def __getstate__(self): 647 return self._attrs, self._attrsNS, self._ownerElement 648 649 def __setstate__(self, state): 650 self._attrs, self._attrsNS, self._ownerElement = state 651 652defproperty(NamedNodeMap, "length", 653 doc="Number of nodes in the NamedNodeMap.") 654 655AttributeList = NamedNodeMap 656 657 658class TypeInfo(object): 659 __slots__ = 'namespace', 'name' 660 661 def __init__(self, namespace, name): 662 self.namespace = namespace 663 self.name = name 664 665 def __repr__(self): 666 if self.namespace: 667 return "<%s %r (from %r)>" % (self.__class__.__name__, self.name, 668 self.namespace) 669 else: 670 return "<%s %r>" % (self.__class__.__name__, self.name) 671 672 def _get_name(self): 673 return self.name 674 675 def _get_namespace(self): 676 return self.namespace 677 678_no_type = TypeInfo(None, None) 679 680class Element(Node): 681 __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix', 682 'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS', 683 'nextSibling', 'previousSibling') 684 nodeType = Node.ELEMENT_NODE 685 nodeValue = None 686 schemaType = _no_type 687 688 _magic_id_nodes = 0 689 690 _child_node_types = (Node.ELEMENT_NODE, 691 Node.PROCESSING_INSTRUCTION_NODE, 692 Node.COMMENT_NODE, 693 Node.TEXT_NODE, 694 Node.CDATA_SECTION_NODE, 695 Node.ENTITY_REFERENCE_NODE) 696 697 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, 698 localName=None): 699 self.parentNode = None 700 self.tagName = self.nodeName = tagName 701 self.prefix = prefix 702 self.namespaceURI = namespaceURI 703 self.childNodes = NodeList() 704 self.nextSibling = self.previousSibling = None 705 706 # Attribute dictionaries are lazily created 707 # attributes are double-indexed: 708 # tagName -> Attribute 709 # URI,localName -> Attribute 710 # in the future: consider lazy generation 711 # of attribute objects this is too tricky 712 # for now because of headaches with 713 # namespaces. 714 self._attrs = None 715 self._attrsNS = None 716 717 def _ensure_attributes(self): 718 if self._attrs is None: 719 self._attrs = {} 720 self._attrsNS = {} 721 722 def _get_localName(self): 723 try: 724 return self._localName 725 except AttributeError: 726 return self.tagName.split(":", 1)[-1] 727 728 def _get_tagName(self): 729 return self.tagName 730 731 def unlink(self): 732 if self._attrs is not None: 733 for attr in list(self._attrs.values()): 734 attr.unlink() 735 self._attrs = None 736 self._attrsNS = None 737 Node.unlink(self) 738 739 def getAttribute(self, attname): 740 """Returns the value of the specified attribute. 741 742 Returns the value of the element's attribute named attname as 743 a string. An empty string is returned if the element does not 744 have such an attribute. Note that an empty string may also be 745 returned as an explicitly given attribute value, use the 746 hasAttribute method to distinguish these two cases. 747 """ 748 if self._attrs is None: 749 return "" 750 try: 751 return self._attrs[attname].value 752 except KeyError: 753 return "" 754 755 def getAttributeNS(self, namespaceURI, localName): 756 if self._attrsNS is None: 757 return "" 758 try: 759 return self._attrsNS[(namespaceURI, localName)].value 760 except KeyError: 761 return "" 762 763 def setAttribute(self, attname, value): 764 attr = self.getAttributeNode(attname) 765 if attr is None: 766 attr = Attr(attname) 767 attr.value = value # also sets nodeValue 768 attr.ownerDocument = self.ownerDocument 769 self.setAttributeNode(attr) 770 elif value != attr.value: 771 attr.value = value 772 if attr.isId: 773 _clear_id_cache(self) 774 775 def setAttributeNS(self, namespaceURI, qualifiedName, value): 776 prefix, localname = _nssplit(qualifiedName) 777 attr = self.getAttributeNodeNS(namespaceURI, localname) 778 if attr is None: 779 attr = Attr(qualifiedName, namespaceURI, localname, prefix) 780 attr.value = value 781 attr.ownerDocument = self.ownerDocument 782 self.setAttributeNode(attr) 783 else: 784 if value != attr.value: 785 attr.value = value 786 if attr.isId: 787 _clear_id_cache(self) 788 if attr.prefix != prefix: 789 attr.prefix = prefix 790 attr.nodeName = qualifiedName 791 792 def getAttributeNode(self, attrname): 793 if self._attrs is None: 794 return None 795 return self._attrs.get(attrname) 796 797 def getAttributeNodeNS(self, namespaceURI, localName): 798 if self._attrsNS is None: 799 return None 800 return self._attrsNS.get((namespaceURI, localName)) 801 802 def setAttributeNode(self, attr): 803 if attr.ownerElement not in (None, self): 804 raise xml.dom.InuseAttributeErr("attribute node already owned") 805 self._ensure_attributes() 806 old1 = self._attrs.get(attr.name, None) 807 if old1 is not None: 808 self.removeAttributeNode(old1) 809 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) 810 if old2 is not None and old2 is not old1: 811 self.removeAttributeNode(old2) 812 _set_attribute_node(self, attr) 813 814 if old1 is not attr: 815 # It might have already been part of this node, in which case 816 # it doesn't represent a change, and should not be returned. 817 return old1 818 if old2 is not attr: 819 return old2 820 821 setAttributeNodeNS = setAttributeNode 822 823 def removeAttribute(self, name): 824 if self._attrsNS is None: 825 raise xml.dom.NotFoundErr() 826 try: 827 attr = self._attrs[name] 828 except KeyError: 829 raise xml.dom.NotFoundErr() 830 self.removeAttributeNode(attr) 831 832 def removeAttributeNS(self, namespaceURI, localName): 833 if self._attrsNS is None: 834 raise xml.dom.NotFoundErr() 835 try: 836 attr = self._attrsNS[(namespaceURI, localName)] 837 except KeyError: 838 raise xml.dom.NotFoundErr() 839 self.removeAttributeNode(attr) 840 841 def removeAttributeNode(self, node): 842 if node is None: 843 raise xml.dom.NotFoundErr() 844 try: 845 self._attrs[node.name] 846 except KeyError: 847 raise xml.dom.NotFoundErr() 848 _clear_id_cache(self) 849 node.unlink() 850 # Restore this since the node is still useful and otherwise 851 # unlinked 852 node.ownerDocument = self.ownerDocument 853 return node 854 855 removeAttributeNodeNS = removeAttributeNode 856 857 def hasAttribute(self, name): 858 """Checks whether the element has an attribute with the specified name. 859 860 Returns True if the element has an attribute with the specified name. 861 Otherwise, returns False. 862 """ 863 if self._attrs is None: 864 return False 865 return name in self._attrs 866 867 def hasAttributeNS(self, namespaceURI, localName): 868 if self._attrsNS is None: 869 return False 870 return (namespaceURI, localName) in self._attrsNS 871 872 def getElementsByTagName(self, name): 873 """Returns all descendant elements with the given tag name. 874 875 Returns the list of all descendant elements (not direct children 876 only) with the specified tag name. 877 """ 878 return _get_elements_by_tagName_helper(self, name, NodeList()) 879 880 def getElementsByTagNameNS(self, namespaceURI, localName): 881 return _get_elements_by_tagName_ns_helper( 882 self, namespaceURI, localName, NodeList()) 883 884 def __repr__(self): 885 return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) 886 887 def writexml(self, writer, indent="", addindent="", newl=""): 888 """Write an XML element to a file-like object 889 890 Write the element to the writer object that must provide 891 a write method (e.g. a file or StringIO object). 892 """ 893 # indent = current indentation 894 # addindent = indentation to add to higher levels 895 # newl = newline string 896 writer.write(indent+"<" + self.tagName) 897 898 attrs = self._get_attributes() 899 900 for a_name in attrs.keys(): 901 writer.write(" %s=\"" % a_name) 902 _write_data(writer, attrs[a_name].value, True) 903 writer.write("\"") 904 if self.childNodes: 905 writer.write(">") 906 if (len(self.childNodes) == 1 and 907 self.childNodes[0].nodeType in ( 908 Node.TEXT_NODE, Node.CDATA_SECTION_NODE)): 909 self.childNodes[0].writexml(writer, '', '', '') 910 else: 911 writer.write(newl) 912 for node in self.childNodes: 913 node.writexml(writer, indent+addindent, addindent, newl) 914 writer.write(indent) 915 writer.write("</%s>%s" % (self.tagName, newl)) 916 else: 917 writer.write("/>%s"%(newl)) 918 919 def _get_attributes(self): 920 self._ensure_attributes() 921 return NamedNodeMap(self._attrs, self._attrsNS, self) 922 923 def hasAttributes(self): 924 if self._attrs: 925 return True 926 else: 927 return False 928 929 # DOM Level 3 attributes, based on the 22 Oct 2002 draft 930 931 def setIdAttribute(self, name): 932 idAttr = self.getAttributeNode(name) 933 self.setIdAttributeNode(idAttr) 934 935 def setIdAttributeNS(self, namespaceURI, localName): 936 idAttr = self.getAttributeNodeNS(namespaceURI, localName) 937 self.setIdAttributeNode(idAttr) 938 939 def setIdAttributeNode(self, idAttr): 940 if idAttr is None or not self.isSameNode(idAttr.ownerElement): 941 raise xml.dom.NotFoundErr() 942 if _get_containing_entref(self) is not None: 943 raise xml.dom.NoModificationAllowedErr() 944 if not idAttr._is_id: 945 idAttr._is_id = True 946 self._magic_id_nodes += 1 947 self.ownerDocument._magic_id_count += 1 948 _clear_id_cache(self) 949 950defproperty(Element, "attributes", 951 doc="NamedNodeMap of attributes on the element.") 952defproperty(Element, "localName", 953 doc="Namespace-local name of this element.") 954 955 956def _set_attribute_node(element, attr): 957 _clear_id_cache(element) 958 element._ensure_attributes() 959 element._attrs[attr.name] = attr 960 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr 961 962 # This creates a circular reference, but Element.unlink() 963 # breaks the cycle since the references to the attribute 964 # dictionaries are tossed. 965 attr.ownerElement = element 966 967class Childless: 968 """Mixin that makes childless-ness easy to implement and avoids 969 the complexity of the Node methods that deal with children. 970 """ 971 __slots__ = () 972 973 attributes = None 974 childNodes = EmptyNodeList() 975 firstChild = None 976 lastChild = None 977 978 def _get_firstChild(self): 979 return None 980 981 def _get_lastChild(self): 982 return None 983 984 def appendChild(self, node): 985 raise xml.dom.HierarchyRequestErr( 986 self.nodeName + " nodes cannot have children") 987 988 def hasChildNodes(self): 989 return False 990 991 def insertBefore(self, newChild, refChild): 992 raise xml.dom.HierarchyRequestErr( 993 self.nodeName + " nodes do not have children") 994 995 def removeChild(self, oldChild): 996 raise xml.dom.NotFoundErr( 997 self.nodeName + " nodes do not have children") 998 999 def normalize(self): 1000 # For childless nodes, normalize() has nothing to do. 1001 pass 1002 1003 def replaceChild(self, newChild, oldChild): 1004 raise xml.dom.HierarchyRequestErr( 1005 self.nodeName + " nodes do not have children") 1006 1007 1008class ProcessingInstruction(Childless, Node): 1009 nodeType = Node.PROCESSING_INSTRUCTION_NODE 1010 __slots__ = ('target', 'data') 1011 1012 def __init__(self, target, data): 1013 self.target = target 1014 self.data = data 1015 1016 # nodeValue is an alias for data 1017 def _get_nodeValue(self): 1018 return self.data 1019 def _set_nodeValue(self, value): 1020 self.data = value 1021 nodeValue = property(_get_nodeValue, _set_nodeValue) 1022 1023 # nodeName is an alias for target 1024 def _get_nodeName(self): 1025 return self.target 1026 def _set_nodeName(self, value): 1027 self.target = value 1028 nodeName = property(_get_nodeName, _set_nodeName) 1029 1030 def writexml(self, writer, indent="", addindent="", newl=""): 1031 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) 1032 1033 1034class CharacterData(Childless, Node): 1035 __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling') 1036 1037 def __init__(self): 1038 self.ownerDocument = self.parentNode = None 1039 self.previousSibling = self.nextSibling = None 1040 self._data = '' 1041 Node.__init__(self) 1042 1043 def _get_length(self): 1044 return len(self.data) 1045 __len__ = _get_length 1046 1047 def _get_data(self): 1048 return self._data 1049 def _set_data(self, data): 1050 self._data = data 1051 1052 data = nodeValue = property(_get_data, _set_data) 1053 1054 def __repr__(self): 1055 data = self.data 1056 if len(data) > 10: 1057 dotdotdot = "..." 1058 else: 1059 dotdotdot = "" 1060 return '<DOM %s node "%r%s">' % ( 1061 self.__class__.__name__, data[0:10], dotdotdot) 1062 1063 def substringData(self, offset, count): 1064 if offset < 0: 1065 raise xml.dom.IndexSizeErr("offset cannot be negative") 1066 if offset >= len(self.data): 1067 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1068 if count < 0: 1069 raise xml.dom.IndexSizeErr("count cannot be negative") 1070 return self.data[offset:offset+count] 1071 1072 def appendData(self, arg): 1073 self.data = self.data + arg 1074 1075 def insertData(self, offset, arg): 1076 if offset < 0: 1077 raise xml.dom.IndexSizeErr("offset cannot be negative") 1078 if offset >= len(self.data): 1079 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1080 if arg: 1081 self.data = "%s%s%s" % ( 1082 self.data[:offset], arg, self.data[offset:]) 1083 1084 def deleteData(self, offset, count): 1085 if offset < 0: 1086 raise xml.dom.IndexSizeErr("offset cannot be negative") 1087 if offset >= len(self.data): 1088 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1089 if count < 0: 1090 raise xml.dom.IndexSizeErr("count cannot be negative") 1091 if count: 1092 self.data = self.data[:offset] + self.data[offset+count:] 1093 1094 def replaceData(self, offset, count, arg): 1095 if offset < 0: 1096 raise xml.dom.IndexSizeErr("offset cannot be negative") 1097 if offset >= len(self.data): 1098 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1099 if count < 0: 1100 raise xml.dom.IndexSizeErr("count cannot be negative") 1101 if count: 1102 self.data = "%s%s%s" % ( 1103 self.data[:offset], arg, self.data[offset+count:]) 1104 1105defproperty(CharacterData, "length", doc="Length of the string data.") 1106 1107 1108class Text(CharacterData): 1109 __slots__ = () 1110 1111 nodeType = Node.TEXT_NODE 1112 nodeName = "#text" 1113 attributes = None 1114 1115 def splitText(self, offset): 1116 if offset < 0 or offset > len(self.data): 1117 raise xml.dom.IndexSizeErr("illegal offset value") 1118 newText = self.__class__() 1119 newText.data = self.data[offset:] 1120 newText.ownerDocument = self.ownerDocument 1121 next = self.nextSibling 1122 if self.parentNode and self in self.parentNode.childNodes: 1123 if next is None: 1124 self.parentNode.appendChild(newText) 1125 else: 1126 self.parentNode.insertBefore(newText, next) 1127 self.data = self.data[:offset] 1128 return newText 1129 1130 def writexml(self, writer, indent="", addindent="", newl=""): 1131 _write_data(writer, "%s%s%s" % (indent, self.data, newl), False) 1132 1133 # DOM Level 3 (WD 9 April 2002) 1134 1135 def _get_wholeText(self): 1136 L = [self.data] 1137 n = self.previousSibling 1138 while n is not None: 1139 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1140 L.insert(0, n.data) 1141 n = n.previousSibling 1142 else: 1143 break 1144 n = self.nextSibling 1145 while n is not None: 1146 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1147 L.append(n.data) 1148 n = n.nextSibling 1149 else: 1150 break 1151 return ''.join(L) 1152 1153 def replaceWholeText(self, content): 1154 # XXX This needs to be seriously changed if minidom ever 1155 # supports EntityReference nodes. 1156 parent = self.parentNode 1157 n = self.previousSibling 1158 while n is not None: 1159 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1160 next = n.previousSibling 1161 parent.removeChild(n) 1162 n = next 1163 else: 1164 break 1165 n = self.nextSibling 1166 if not content: 1167 parent.removeChild(self) 1168 while n is not None: 1169 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1170 next = n.nextSibling 1171 parent.removeChild(n) 1172 n = next 1173 else: 1174 break 1175 if content: 1176 self.data = content 1177 return self 1178 else: 1179 return None 1180 1181 def _get_isWhitespaceInElementContent(self): 1182 if self.data.strip(): 1183 return False 1184 elem = _get_containing_element(self) 1185 if elem is None: 1186 return False 1187 info = self.ownerDocument._get_elem_info(elem) 1188 if info is None: 1189 return False 1190 else: 1191 return info.isElementContent() 1192 1193defproperty(Text, "isWhitespaceInElementContent", 1194 doc="True iff this text node contains only whitespace" 1195 " and is in element content.") 1196defproperty(Text, "wholeText", 1197 doc="The text of all logically-adjacent text nodes.") 1198 1199 1200def _get_containing_element(node): 1201 c = node.parentNode 1202 while c is not None: 1203 if c.nodeType == Node.ELEMENT_NODE: 1204 return c 1205 c = c.parentNode 1206 return None 1207 1208def _get_containing_entref(node): 1209 c = node.parentNode 1210 while c is not None: 1211 if c.nodeType == Node.ENTITY_REFERENCE_NODE: 1212 return c 1213 c = c.parentNode 1214 return None 1215 1216 1217class Comment(CharacterData): 1218 nodeType = Node.COMMENT_NODE 1219 nodeName = "#comment" 1220 1221 def __init__(self, data): 1222 CharacterData.__init__(self) 1223 self._data = data 1224 1225 def writexml(self, writer, indent="", addindent="", newl=""): 1226 if "--" in self.data: 1227 raise ValueError("'--' is not allowed in a comment node") 1228 writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) 1229 1230 1231class CDATASection(Text): 1232 __slots__ = () 1233 1234 nodeType = Node.CDATA_SECTION_NODE 1235 nodeName = "#cdata-section" 1236 1237 def writexml(self, writer, indent="", addindent="", newl=""): 1238 if self.data.find("]]>") >= 0: 1239 raise ValueError("']]>' not allowed in a CDATA section") 1240 writer.write("<![CDATA[%s]]>" % self.data) 1241 1242 1243class ReadOnlySequentialNamedNodeMap(object): 1244 __slots__ = '_seq', 1245 1246 def __init__(self, seq=()): 1247 # seq should be a list or tuple 1248 self._seq = seq 1249 1250 def __len__(self): 1251 return len(self._seq) 1252 1253 def _get_length(self): 1254 return len(self._seq) 1255 1256 def getNamedItem(self, name): 1257 for n in self._seq: 1258 if n.nodeName == name: 1259 return n 1260 1261 def getNamedItemNS(self, namespaceURI, localName): 1262 for n in self._seq: 1263 if n.namespaceURI == namespaceURI and n.localName == localName: 1264 return n 1265 1266 def __getitem__(self, name_or_tuple): 1267 if isinstance(name_or_tuple, tuple): 1268 node = self.getNamedItemNS(*name_or_tuple) 1269 else: 1270 node = self.getNamedItem(name_or_tuple) 1271 if node is None: 1272 raise KeyError(name_or_tuple) 1273 return node 1274 1275 def item(self, index): 1276 if index < 0: 1277 return None 1278 try: 1279 return self._seq[index] 1280 except IndexError: 1281 return None 1282 1283 def removeNamedItem(self, name): 1284 raise xml.dom.NoModificationAllowedErr( 1285 "NamedNodeMap instance is read-only") 1286 1287 def removeNamedItemNS(self, namespaceURI, localName): 1288 raise xml.dom.NoModificationAllowedErr( 1289 "NamedNodeMap instance is read-only") 1290 1291 def setNamedItem(self, node): 1292 raise xml.dom.NoModificationAllowedErr( 1293 "NamedNodeMap instance is read-only") 1294 1295 def setNamedItemNS(self, node): 1296 raise xml.dom.NoModificationAllowedErr( 1297 "NamedNodeMap instance is read-only") 1298 1299 def __getstate__(self): 1300 return [self._seq] 1301 1302 def __setstate__(self, state): 1303 self._seq = state[0] 1304 1305defproperty(ReadOnlySequentialNamedNodeMap, "length", 1306 doc="Number of entries in the NamedNodeMap.") 1307 1308 1309class Identified: 1310 """Mix-in class that supports the publicId and systemId attributes.""" 1311 1312 __slots__ = 'publicId', 'systemId' 1313 1314 def _identified_mixin_init(self, publicId, systemId): 1315 self.publicId = publicId 1316 self.systemId = systemId 1317 1318 def _get_publicId(self): 1319 return self.publicId 1320 1321 def _get_systemId(self): 1322 return self.systemId 1323 1324class DocumentType(Identified, Childless, Node): 1325 nodeType = Node.DOCUMENT_TYPE_NODE 1326 nodeValue = None 1327 name = None 1328 publicId = None 1329 systemId = None 1330 internalSubset = None 1331 1332 def __init__(self, qualifiedName): 1333 self.entities = ReadOnlySequentialNamedNodeMap() 1334 self.notations = ReadOnlySequentialNamedNodeMap() 1335 if qualifiedName: 1336 prefix, localname = _nssplit(qualifiedName) 1337 self.name = localname 1338 self.nodeName = self.name 1339 1340 def _get_internalSubset(self): 1341 return self.internalSubset 1342 1343 def cloneNode(self, deep): 1344 if self.ownerDocument is None: 1345 # it's ok 1346 clone = DocumentType(None) 1347 clone.name = self.name 1348 clone.nodeName = self.name 1349 operation = xml.dom.UserDataHandler.NODE_CLONED 1350 if deep: 1351 clone.entities._seq = [] 1352 clone.notations._seq = [] 1353 for n in self.notations._seq: 1354 notation = Notation(n.nodeName, n.publicId, n.systemId) 1355 clone.notations._seq.append(notation) 1356 n._call_user_data_handler(operation, n, notation) 1357 for e in self.entities._seq: 1358 entity = Entity(e.nodeName, e.publicId, e.systemId, 1359 e.notationName) 1360 entity.actualEncoding = e.actualEncoding 1361 entity.encoding = e.encoding 1362 entity.version = e.version 1363 clone.entities._seq.append(entity) 1364 e._call_user_data_handler(operation, e, entity) 1365 self._call_user_data_handler(operation, self, clone) 1366 return clone 1367 else: 1368 return None 1369 1370 def writexml(self, writer, indent="", addindent="", newl=""): 1371 writer.write("<!DOCTYPE ") 1372 writer.write(self.name) 1373 if self.publicId: 1374 writer.write("%s PUBLIC '%s'%s '%s'" 1375 % (newl, self.publicId, newl, self.systemId)) 1376 elif self.systemId: 1377 writer.write("%s SYSTEM '%s'" % (newl, self.systemId)) 1378 if self.internalSubset is not None: 1379 writer.write(" [") 1380 writer.write(self.internalSubset) 1381 writer.write("]") 1382 writer.write(">"+newl) 1383 1384class Entity(Identified, Node): 1385 attributes = None 1386 nodeType = Node.ENTITY_NODE 1387 nodeValue = None 1388 1389 actualEncoding = None 1390 encoding = None 1391 version = None 1392 1393 def __init__(self, name, publicId, systemId, notation): 1394 self.nodeName = name 1395 self.notationName = notation 1396 self.childNodes = NodeList() 1397 self._identified_mixin_init(publicId, systemId) 1398 1399 def _get_actualEncoding(self): 1400 return self.actualEncoding 1401 1402 def _get_encoding(self): 1403 return self.encoding 1404 1405 def _get_version(self): 1406 return self.version 1407 1408 def appendChild(self, newChild): 1409 raise xml.dom.HierarchyRequestErr( 1410 "cannot append children to an entity node") 1411 1412 def insertBefore(self, newChild, refChild): 1413 raise xml.dom.HierarchyRequestErr( 1414 "cannot insert children below an entity node") 1415 1416 def removeChild(self, oldChild): 1417 raise xml.dom.HierarchyRequestErr( 1418 "cannot remove children from an entity node") 1419 1420 def replaceChild(self, newChild, oldChild): 1421 raise xml.dom.HierarchyRequestErr( 1422 "cannot replace children of an entity node") 1423 1424class Notation(Identified, Childless, Node): 1425 nodeType = Node.NOTATION_NODE 1426 nodeValue = None 1427 1428 def __init__(self, name, publicId, systemId): 1429 self.nodeName = name 1430 self._identified_mixin_init(publicId, systemId) 1431 1432 1433class DOMImplementation(DOMImplementationLS): 1434 _features = [("core", "1.0"), 1435 ("core", "2.0"), 1436 ("core", None), 1437 ("xml", "1.0"), 1438 ("xml", "2.0"), 1439 ("xml", None), 1440 ("ls-load", "3.0"), 1441 ("ls-load", None), 1442 ] 1443 1444 def hasFeature(self, feature, version): 1445 if version == "": 1446 version = None 1447 return (feature.lower(), version) in self._features 1448 1449 def createDocument(self, namespaceURI, qualifiedName, doctype): 1450 if doctype and doctype.parentNode is not None: 1451 raise xml.dom.WrongDocumentErr( 1452 "doctype object owned by another DOM tree") 1453 doc = self._create_document() 1454 1455 add_root_element = not (namespaceURI is None 1456 and qualifiedName is None 1457 and doctype is None) 1458 1459 if not qualifiedName and add_root_element: 1460 # The spec is unclear what to raise here; SyntaxErr 1461 # would be the other obvious candidate. Since Xerces raises 1462 # InvalidCharacterErr, and since SyntaxErr is not listed 1463 # for createDocument, that seems to be the better choice. 1464 # XXX: need to check for illegal characters here and in 1465 # createElement. 1466 1467 # DOM Level III clears this up when talking about the return value 1468 # of this function. If namespaceURI, qName and DocType are 1469 # Null the document is returned without a document element 1470 # Otherwise if doctype or namespaceURI are not None 1471 # Then we go back to the above problem 1472 raise xml.dom.InvalidCharacterErr("Element with no name") 1473 1474 if add_root_element: 1475 prefix, localname = _nssplit(qualifiedName) 1476 if prefix == "xml" \ 1477 and namespaceURI != "http://www.w3.org/XML/1998/namespace": 1478 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") 1479 if prefix and not namespaceURI: 1480 raise xml.dom.NamespaceErr( 1481 "illegal use of prefix without namespaces") 1482 element = doc.createElementNS(namespaceURI, qualifiedName) 1483 if doctype: 1484 doc.appendChild(doctype) 1485 doc.appendChild(element) 1486 1487 if doctype: 1488 doctype.parentNode = doctype.ownerDocument = doc 1489 1490 doc.doctype = doctype 1491 doc.implementation = self 1492 return doc 1493 1494 def createDocumentType(self, qualifiedName, publicId, systemId): 1495 doctype = DocumentType(qualifiedName) 1496 doctype.publicId = publicId 1497 doctype.systemId = systemId 1498 return doctype 1499 1500 # DOM Level 3 (WD 9 April 2002) 1501 1502 def getInterface(self, feature): 1503 if self.hasFeature(feature, None): 1504 return self 1505 else: 1506 return None 1507 1508 # internal 1509 def _create_document(self): 1510 return Document() 1511 1512class ElementInfo(object): 1513 """Object that represents content-model information for an element. 1514 1515 This implementation is not expected to be used in practice; DOM 1516 builders should provide implementations which do the right thing 1517 using information available to it. 1518 1519 """ 1520 1521 __slots__ = 'tagName', 1522 1523 def __init__(self, name): 1524 self.tagName = name 1525 1526 def getAttributeType(self, aname): 1527 return _no_type 1528 1529 def getAttributeTypeNS(self, namespaceURI, localName): 1530 return _no_type 1531 1532 def isElementContent(self): 1533 return False 1534 1535 def isEmpty(self): 1536 """Returns true iff this element is declared to have an EMPTY 1537 content model.""" 1538 return False 1539 1540 def isId(self, aname): 1541 """Returns true iff the named attribute is a DTD-style ID.""" 1542 return False 1543 1544 def isIdNS(self, namespaceURI, localName): 1545 """Returns true iff the identified attribute is a DTD-style ID.""" 1546 return False 1547 1548 def __getstate__(self): 1549 return self.tagName 1550 1551 def __setstate__(self, state): 1552 self.tagName = state 1553 1554def _clear_id_cache(node): 1555 if node.nodeType == Node.DOCUMENT_NODE: 1556 node._id_cache.clear() 1557 node._id_search_stack = None 1558 elif _in_document(node): 1559 node.ownerDocument._id_cache.clear() 1560 node.ownerDocument._id_search_stack= None 1561 1562class Document(Node, DocumentLS): 1563 __slots__ = ('_elem_info', 'doctype', 1564 '_id_search_stack', 'childNodes', '_id_cache') 1565 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, 1566 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) 1567 1568 implementation = DOMImplementation() 1569 nodeType = Node.DOCUMENT_NODE 1570 nodeName = "#document" 1571 nodeValue = None 1572 attributes = None 1573 parentNode = None 1574 previousSibling = nextSibling = None 1575 1576 1577 # Document attributes from Level 3 (WD 9 April 2002) 1578 1579 actualEncoding = None 1580 encoding = None 1581 standalone = None 1582 version = None 1583 strictErrorChecking = False 1584 errorHandler = None 1585 documentURI = None 1586 1587 _magic_id_count = 0 1588 1589 def __init__(self): 1590 self.doctype = None 1591 self.childNodes = NodeList() 1592 # mapping of (namespaceURI, localName) -> ElementInfo 1593 # and tagName -> ElementInfo 1594 self._elem_info = {} 1595 self._id_cache = {} 1596 self._id_search_stack = None 1597 1598 def _get_elem_info(self, element): 1599 if element.namespaceURI: 1600 key = element.namespaceURI, element.localName 1601 else: 1602 key = element.tagName 1603 return self._elem_info.get(key) 1604 1605 def _get_actualEncoding(self): 1606 return self.actualEncoding 1607 1608 def _get_doctype(self): 1609 return self.doctype 1610 1611 def _get_documentURI(self): 1612 return self.documentURI 1613 1614 def _get_encoding(self): 1615 return self.encoding 1616 1617 def _get_errorHandler(self): 1618 return self.errorHandler 1619 1620 def _get_standalone(self): 1621 return self.standalone 1622 1623 def _get_strictErrorChecking(self): 1624 return self.strictErrorChecking 1625 1626 def _get_version(self): 1627 return self.version 1628 1629 def appendChild(self, node): 1630 if node.nodeType not in self._child_node_types: 1631 raise xml.dom.HierarchyRequestErr( 1632 "%s cannot be child of %s" % (repr(node), repr(self))) 1633 if node.parentNode is not None: 1634 # This needs to be done before the next test since this 1635 # may *be* the document element, in which case it should 1636 # end up re-ordered to the end. 1637 node.parentNode.removeChild(node) 1638 1639 if node.nodeType == Node.ELEMENT_NODE \ 1640 and self._get_documentElement(): 1641 raise xml.dom.HierarchyRequestErr( 1642 "two document elements disallowed") 1643 return Node.appendChild(self, node) 1644 1645 def removeChild(self, oldChild): 1646 try: 1647 self.childNodes.remove(oldChild) 1648 except ValueError: 1649 raise xml.dom.NotFoundErr() 1650 oldChild.nextSibling = oldChild.previousSibling = None 1651 oldChild.parentNode = None 1652 if self.documentElement is oldChild: 1653 self.documentElement = None 1654 1655 return oldChild 1656 1657 def _get_documentElement(self): 1658 for node in self.childNodes: 1659 if node.nodeType == Node.ELEMENT_NODE: 1660 return node 1661 1662 def unlink(self): 1663 if self.doctype is not None: 1664 self.doctype.unlink() 1665 self.doctype = None 1666 Node.unlink(self) 1667 1668 def cloneNode(self, deep): 1669 if not deep: 1670 return None 1671 clone = self.implementation.createDocument(None, None, None) 1672 clone.encoding = self.encoding 1673 clone.standalone = self.standalone 1674 clone.version = self.version 1675 for n in self.childNodes: 1676 childclone = _clone_node(n, deep, clone) 1677 assert childclone.ownerDocument.isSameNode(clone) 1678 clone.childNodes.append(childclone) 1679 if childclone.nodeType == Node.DOCUMENT_NODE: 1680 assert clone.documentElement is None 1681 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: 1682 assert clone.doctype is None 1683 clone.doctype = childclone 1684 childclone.parentNode = clone 1685 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, 1686 self, clone) 1687 return clone 1688 1689 def createDocumentFragment(self): 1690 d = DocumentFragment() 1691 d.ownerDocument = self 1692 return d 1693 1694 def createElement(self, tagName): 1695 e = Element(tagName) 1696 e.ownerDocument = self 1697 return e 1698 1699 def createTextNode(self, data): 1700 if not isinstance(data, str): 1701 raise TypeError("node contents must be a string") 1702 t = Text() 1703 t.data = data 1704 t.ownerDocument = self 1705 return t 1706 1707 def createCDATASection(self, data): 1708 if not isinstance(data, str): 1709 raise TypeError("node contents must be a string") 1710 c = CDATASection() 1711 c.data = data 1712 c.ownerDocument = self 1713 return c 1714 1715 def createComment(self, data): 1716 c = Comment(data) 1717 c.ownerDocument = self 1718 return c 1719 1720 def createProcessingInstruction(self, target, data): 1721 p = ProcessingInstruction(target, data) 1722 p.ownerDocument = self 1723 return p 1724 1725 def createAttribute(self, qName): 1726 a = Attr(qName) 1727 a.ownerDocument = self 1728 a.value = "" 1729 return a 1730 1731 def createElementNS(self, namespaceURI, qualifiedName): 1732 prefix, localName = _nssplit(qualifiedName) 1733 e = Element(qualifiedName, namespaceURI, prefix) 1734 e.ownerDocument = self 1735 return e 1736 1737 def createAttributeNS(self, namespaceURI, qualifiedName): 1738 prefix, localName = _nssplit(qualifiedName) 1739 a = Attr(qualifiedName, namespaceURI, localName, prefix) 1740 a.ownerDocument = self 1741 a.value = "" 1742 return a 1743 1744 # A couple of implementation-specific helpers to create node types 1745 # not supported by the W3C DOM specs: 1746 1747 def _create_entity(self, name, publicId, systemId, notationName): 1748 e = Entity(name, publicId, systemId, notationName) 1749 e.ownerDocument = self 1750 return e 1751 1752 def _create_notation(self, name, publicId, systemId): 1753 n = Notation(name, publicId, systemId) 1754 n.ownerDocument = self 1755 return n 1756 1757 def getElementById(self, id): 1758 if id in self._id_cache: 1759 return self._id_cache[id] 1760 if not (self._elem_info or self._magic_id_count): 1761 return None 1762 1763 stack = self._id_search_stack 1764 if stack is None: 1765 # we never searched before, or the cache has been cleared 1766 stack = [self.documentElement] 1767 self._id_search_stack = stack 1768 elif not stack: 1769 # Previous search was completed and cache is still valid; 1770 # no matching node. 1771 return None 1772 1773 result = None 1774 while stack: 1775 node = stack.pop() 1776 # add child elements to stack for continued searching 1777 stack.extend([child for child in node.childNodes 1778 if child.nodeType in _nodeTypes_with_children]) 1779 # check this node 1780 info = self._get_elem_info(node) 1781 if info: 1782 # We have to process all ID attributes before 1783 # returning in order to get all the attributes set to 1784 # be IDs using Element.setIdAttribute*(). 1785 for attr in node.attributes.values(): 1786 if attr.namespaceURI: 1787 if info.isIdNS(attr.namespaceURI, attr.localName): 1788 self._id_cache[attr.value] = node 1789 if attr.value == id: 1790 result = node 1791 elif not node._magic_id_nodes: 1792 break 1793 elif info.isId(attr.name): 1794 self._id_cache[attr.value] = node 1795 if attr.value == id: 1796 result = node 1797 elif not node._magic_id_nodes: 1798 break 1799 elif attr._is_id: 1800 self._id_cache[attr.value] = node 1801 if attr.value == id: 1802 result = node 1803 elif node._magic_id_nodes == 1: 1804 break 1805 elif node._magic_id_nodes: 1806 for attr in node.attributes.values(): 1807 if attr._is_id: 1808 self._id_cache[attr.value] = node 1809 if attr.value == id: 1810 result = node 1811 if result is not None: 1812 break 1813 return result 1814 1815 def getElementsByTagName(self, name): 1816 return _get_elements_by_tagName_helper(self, name, NodeList()) 1817 1818 def getElementsByTagNameNS(self, namespaceURI, localName): 1819 return _get_elements_by_tagName_ns_helper( 1820 self, namespaceURI, localName, NodeList()) 1821 1822 def isSupported(self, feature, version): 1823 return self.implementation.hasFeature(feature, version) 1824 1825 def importNode(self, node, deep): 1826 if node.nodeType == Node.DOCUMENT_NODE: 1827 raise xml.dom.NotSupportedErr("cannot import document nodes") 1828 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1829 raise xml.dom.NotSupportedErr("cannot import document type nodes") 1830 return _clone_node(node, deep, self) 1831 1832 def writexml(self, writer, indent="", addindent="", newl="", encoding=None, 1833 standalone=None): 1834 declarations = [] 1835 1836 if encoding: 1837 declarations.append(f'encoding="{encoding}"') 1838 if standalone is not None: 1839 declarations.append(f'standalone="{"yes" if standalone else "no"}"') 1840 1841 writer.write(f'<?xml version="1.0" {" ".join(declarations)}?>{newl}') 1842 1843 for node in self.childNodes: 1844 node.writexml(writer, indent, addindent, newl) 1845 1846 # DOM Level 3 (WD 9 April 2002) 1847 1848 def renameNode(self, n, namespaceURI, name): 1849 if n.ownerDocument is not self: 1850 raise xml.dom.WrongDocumentErr( 1851 "cannot rename nodes from other documents;\n" 1852 "expected %s,\nfound %s" % (self, n.ownerDocument)) 1853 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): 1854 raise xml.dom.NotSupportedErr( 1855 "renameNode() only applies to element and attribute nodes") 1856 if namespaceURI != EMPTY_NAMESPACE: 1857 if ':' in name: 1858 prefix, localName = name.split(':', 1) 1859 if ( prefix == "xmlns" 1860 and namespaceURI != xml.dom.XMLNS_NAMESPACE): 1861 raise xml.dom.NamespaceErr( 1862 "illegal use of 'xmlns' prefix") 1863 else: 1864 if ( name == "xmlns" 1865 and namespaceURI != xml.dom.XMLNS_NAMESPACE 1866 and n.nodeType == Node.ATTRIBUTE_NODE): 1867 raise xml.dom.NamespaceErr( 1868 "illegal use of the 'xmlns' attribute") 1869 prefix = None 1870 localName = name 1871 else: 1872 prefix = None 1873 localName = None 1874 if n.nodeType == Node.ATTRIBUTE_NODE: 1875 element = n.ownerElement 1876 if element is not None: 1877 is_id = n._is_id 1878 element.removeAttributeNode(n) 1879 else: 1880 element = None 1881 n.prefix = prefix 1882 n._localName = localName 1883 n.namespaceURI = namespaceURI 1884 n.nodeName = name 1885 if n.nodeType == Node.ELEMENT_NODE: 1886 n.tagName = name 1887 else: 1888 # attribute node 1889 n.name = name 1890 if element is not None: 1891 element.setAttributeNode(n) 1892 if is_id: 1893 element.setIdAttributeNode(n) 1894 # It's not clear from a semantic perspective whether we should 1895 # call the user data handlers for the NODE_RENAMED event since 1896 # we're re-using the existing node. The draft spec has been 1897 # interpreted as meaning "no, don't call the handler unless a 1898 # new node is created." 1899 return n 1900 1901defproperty(Document, "documentElement", 1902 doc="Top-level element of this document.") 1903 1904 1905def _clone_node(node, deep, newOwnerDocument): 1906 """ 1907 Clone a node and give it the new owner document. 1908 Called by Node.cloneNode and Document.importNode 1909 """ 1910 if node.ownerDocument.isSameNode(newOwnerDocument): 1911 operation = xml.dom.UserDataHandler.NODE_CLONED 1912 else: 1913 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1914 if node.nodeType == Node.ELEMENT_NODE: 1915 clone = newOwnerDocument.createElementNS(node.namespaceURI, 1916 node.nodeName) 1917 for attr in node.attributes.values(): 1918 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) 1919 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) 1920 a.specified = attr.specified 1921 1922 if deep: 1923 for child in node.childNodes: 1924 c = _clone_node(child, deep, newOwnerDocument) 1925 clone.appendChild(c) 1926 1927 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 1928 clone = newOwnerDocument.createDocumentFragment() 1929 if deep: 1930 for child in node.childNodes: 1931 c = _clone_node(child, deep, newOwnerDocument) 1932 clone.appendChild(c) 1933 1934 elif node.nodeType == Node.TEXT_NODE: 1935 clone = newOwnerDocument.createTextNode(node.data) 1936 elif node.nodeType == Node.CDATA_SECTION_NODE: 1937 clone = newOwnerDocument.createCDATASection(node.data) 1938 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 1939 clone = newOwnerDocument.createProcessingInstruction(node.target, 1940 node.data) 1941 elif node.nodeType == Node.COMMENT_NODE: 1942 clone = newOwnerDocument.createComment(node.data) 1943 elif node.nodeType == Node.ATTRIBUTE_NODE: 1944 clone = newOwnerDocument.createAttributeNS(node.namespaceURI, 1945 node.nodeName) 1946 clone.specified = True 1947 clone.value = node.value 1948 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1949 assert node.ownerDocument is not newOwnerDocument 1950 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1951 clone = newOwnerDocument.implementation.createDocumentType( 1952 node.name, node.publicId, node.systemId) 1953 clone.ownerDocument = newOwnerDocument 1954 if deep: 1955 clone.entities._seq = [] 1956 clone.notations._seq = [] 1957 for n in node.notations._seq: 1958 notation = Notation(n.nodeName, n.publicId, n.systemId) 1959 notation.ownerDocument = newOwnerDocument 1960 clone.notations._seq.append(notation) 1961 if hasattr(n, '_call_user_data_handler'): 1962 n._call_user_data_handler(operation, n, notation) 1963 for e in node.entities._seq: 1964 entity = Entity(e.nodeName, e.publicId, e.systemId, 1965 e.notationName) 1966 entity.actualEncoding = e.actualEncoding 1967 entity.encoding = e.encoding 1968 entity.version = e.version 1969 entity.ownerDocument = newOwnerDocument 1970 clone.entities._seq.append(entity) 1971 if hasattr(e, '_call_user_data_handler'): 1972 e._call_user_data_handler(operation, e, entity) 1973 else: 1974 # Note the cloning of Document and DocumentType nodes is 1975 # implementation specific. minidom handles those cases 1976 # directly in the cloneNode() methods. 1977 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) 1978 1979 # Check for _call_user_data_handler() since this could conceivably 1980 # used with other DOM implementations (one of the FourThought 1981 # DOMs, perhaps?). 1982 if hasattr(node, '_call_user_data_handler'): 1983 node._call_user_data_handler(operation, node, clone) 1984 return clone 1985 1986 1987def _nssplit(qualifiedName): 1988 fields = qualifiedName.split(':', 1) 1989 if len(fields) == 2: 1990 return fields 1991 else: 1992 return (None, fields[0]) 1993 1994 1995def _do_pulldom_parse(func, args, kwargs): 1996 events = func(*args, **kwargs) 1997 toktype, rootNode = events.getEvent() 1998 events.expandNode(rootNode) 1999 events.clear() 2000 return rootNode 2001 2002def parse(file, parser=None, bufsize=None): 2003 """Parse a file into a DOM by filename or file object.""" 2004 if parser is None and not bufsize: 2005 from xml.dom import expatbuilder 2006 return expatbuilder.parse(file) 2007 else: 2008 from xml.dom import pulldom 2009 return _do_pulldom_parse(pulldom.parse, (file,), 2010 {'parser': parser, 'bufsize': bufsize}) 2011 2012def parseString(string, parser=None): 2013 """Parse a file into a DOM from a string.""" 2014 if parser is None: 2015 from xml.dom import expatbuilder 2016 return expatbuilder.parseString(string) 2017 else: 2018 from xml.dom import pulldom 2019 return _do_pulldom_parse(pulldom.parseString, (string,), 2020 {'parser': parser}) 2021 2022def getDOMImplementation(features=None): 2023 if features: 2024 if isinstance(features, str): 2025 features = domreg._parse_feature_string(features) 2026 for f, v in features: 2027 if not Document.implementation.hasFeature(f, v): 2028 return None 2029 return Document.implementation 2030