1"""Simple implementation of the Level 1 DOM. 2 3Namespaces and other minor Level 2 features are also supported. 4 5parse("foo.xml") 6 7parseString("<foo><bar/></foo>") 8 9Todo: 10===== 11 * convenience methods for getting elements and text. 12 * more testing 13 * bring some of the writer and linearizer code into conformance with this 14 interface 15 * SAX 2 namespaces 16""" 17 18import xml.dom 19 20from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg 21from xml.dom.minicompat import * 22from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS 23 24# This is used by the ID-cache invalidation checks; the list isn't 25# actually complete, since the nodes being checked will never be the 26# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is 27# the node being added or removed, not the node being modified.) 28# 29_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, 30 xml.dom.Node.ENTITY_REFERENCE_NODE) 31 32 33class Node(xml.dom.Node): 34 namespaceURI = None # this is non-null only for elements and attributes 35 parentNode = None 36 ownerDocument = None 37 nextSibling = None 38 previousSibling = None 39 40 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes 41 42 def __nonzero__(self): 43 return True 44 45 def toxml(self, encoding = None): 46 return self.toprettyxml("", "", encoding) 47 48 def toprettyxml(self, indent="\t", newl="\n", encoding = None): 49 # indent = the indentation string to prepend, per level 50 # newl = the newline string to append 51 writer = _get_StringIO() 52 if encoding is not None: 53 import codecs 54 # Can't use codecs.getwriter to preserve 2.0 compatibility 55 writer = codecs.lookup(encoding)[3](writer) 56 if self.nodeType == Node.DOCUMENT_NODE: 57 # Can pass encoding only to document, to put it into XML header 58 self.writexml(writer, "", indent, newl, encoding) 59 else: 60 self.writexml(writer, "", indent, newl) 61 return writer.getvalue() 62 63 def hasChildNodes(self): 64 if self.childNodes: 65 return True 66 else: 67 return False 68 69 def _get_childNodes(self): 70 return self.childNodes 71 72 def _get_firstChild(self): 73 if self.childNodes: 74 return self.childNodes[0] 75 76 def _get_lastChild(self): 77 if self.childNodes: 78 return self.childNodes[-1] 79 80 def insertBefore(self, newChild, refChild): 81 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 82 for c in tuple(newChild.childNodes): 83 self.insertBefore(c, refChild) 84 ### The DOM does not clearly specify what to return in this case 85 return newChild 86 if newChild.nodeType not in self._child_node_types: 87 raise xml.dom.HierarchyRequestErr( 88 "%s cannot be child of %s" % (repr(newChild), repr(self))) 89 if newChild.parentNode is not None: 90 newChild.parentNode.removeChild(newChild) 91 if refChild is None: 92 self.appendChild(newChild) 93 else: 94 try: 95 index = self.childNodes.index(refChild) 96 except ValueError: 97 raise xml.dom.NotFoundErr() 98 if newChild.nodeType in _nodeTypes_with_children: 99 _clear_id_cache(self) 100 self.childNodes.insert(index, newChild) 101 newChild.nextSibling = refChild 102 refChild.previousSibling = newChild 103 if index: 104 node = self.childNodes[index-1] 105 node.nextSibling = newChild 106 newChild.previousSibling = node 107 else: 108 newChild.previousSibling = None 109 newChild.parentNode = self 110 return newChild 111 112 def appendChild(self, node): 113 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: 114 for c in tuple(node.childNodes): 115 self.appendChild(c) 116 ### The DOM does not clearly specify what to return in this case 117 return node 118 if node.nodeType not in self._child_node_types: 119 raise xml.dom.HierarchyRequestErr( 120 "%s cannot be child of %s" % (repr(node), repr(self))) 121 elif node.nodeType in _nodeTypes_with_children: 122 _clear_id_cache(self) 123 if node.parentNode is not None: 124 node.parentNode.removeChild(node) 125 _append_child(self, node) 126 node.nextSibling = None 127 return node 128 129 def replaceChild(self, newChild, oldChild): 130 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 131 refChild = oldChild.nextSibling 132 self.removeChild(oldChild) 133 return self.insertBefore(newChild, refChild) 134 if newChild.nodeType not in self._child_node_types: 135 raise xml.dom.HierarchyRequestErr( 136 "%s cannot be child of %s" % (repr(newChild), repr(self))) 137 if newChild is oldChild: 138 return 139 if newChild.parentNode is not None: 140 newChild.parentNode.removeChild(newChild) 141 try: 142 index = self.childNodes.index(oldChild) 143 except ValueError: 144 raise xml.dom.NotFoundErr() 145 self.childNodes[index] = newChild 146 newChild.parentNode = self 147 oldChild.parentNode = None 148 if (newChild.nodeType in _nodeTypes_with_children 149 or oldChild.nodeType in _nodeTypes_with_children): 150 _clear_id_cache(self) 151 newChild.nextSibling = oldChild.nextSibling 152 newChild.previousSibling = oldChild.previousSibling 153 oldChild.nextSibling = None 154 oldChild.previousSibling = None 155 if newChild.previousSibling: 156 newChild.previousSibling.nextSibling = newChild 157 if newChild.nextSibling: 158 newChild.nextSibling.previousSibling = newChild 159 return oldChild 160 161 def removeChild(self, oldChild): 162 try: 163 self.childNodes.remove(oldChild) 164 except ValueError: 165 raise xml.dom.NotFoundErr() 166 if oldChild.nextSibling is not None: 167 oldChild.nextSibling.previousSibling = oldChild.previousSibling 168 if oldChild.previousSibling is not None: 169 oldChild.previousSibling.nextSibling = oldChild.nextSibling 170 oldChild.nextSibling = oldChild.previousSibling = None 171 if oldChild.nodeType in _nodeTypes_with_children: 172 _clear_id_cache(self) 173 174 oldChild.parentNode = None 175 return oldChild 176 177 def normalize(self): 178 L = [] 179 for child in self.childNodes: 180 if child.nodeType == Node.TEXT_NODE: 181 if not child.data: 182 # empty text node; discard 183 if L: 184 L[-1].nextSibling = child.nextSibling 185 if child.nextSibling: 186 child.nextSibling.previousSibling = child.previousSibling 187 child.unlink() 188 elif L and L[-1].nodeType == child.nodeType: 189 # collapse text node 190 node = L[-1] 191 node.data = node.data + child.data 192 node.nextSibling = child.nextSibling 193 if child.nextSibling: 194 child.nextSibling.previousSibling = node 195 child.unlink() 196 else: 197 L.append(child) 198 else: 199 L.append(child) 200 if child.nodeType == Node.ELEMENT_NODE: 201 child.normalize() 202 self.childNodes[:] = L 203 204 def cloneNode(self, deep): 205 return _clone_node(self, deep, self.ownerDocument or self) 206 207 def isSupported(self, feature, version): 208 return self.ownerDocument.implementation.hasFeature(feature, version) 209 210 def _get_localName(self): 211 # Overridden in Element and Attr where localName can be Non-Null 212 return None 213 214 # Node interfaces from Level 3 (WD 9 April 2002) 215 216 def isSameNode(self, other): 217 return self is other 218 219 def getInterface(self, feature): 220 if self.isSupported(feature, None): 221 return self 222 else: 223 return None 224 225 # The "user data" functions use a dictionary that is only present 226 # if some user data has been set, so be careful not to assume it 227 # exists. 228 229 def getUserData(self, key): 230 try: 231 return self._user_data[key][0] 232 except (AttributeError, KeyError): 233 return None 234 235 def setUserData(self, key, data, handler): 236 old = None 237 try: 238 d = self._user_data 239 except AttributeError: 240 d = {} 241 self._user_data = d 242 if key in d: 243 old = d[key][0] 244 if data is None: 245 # ignore handlers passed for None 246 handler = None 247 if old is not None: 248 del d[key] 249 else: 250 d[key] = (data, handler) 251 return old 252 253 def _call_user_data_handler(self, operation, src, dst): 254 if hasattr(self, "_user_data"): 255 for key, (data, handler) in self._user_data.items(): 256 if handler is not None: 257 handler.handle(operation, key, data, src, dst) 258 259 # minidom-specific API: 260 261 def unlink(self): 262 self.parentNode = self.ownerDocument = None 263 if self.childNodes: 264 for child in self.childNodes: 265 child.unlink() 266 self.childNodes = NodeList() 267 self.previousSibling = None 268 self.nextSibling = None 269 270defproperty(Node, "firstChild", doc="First child node, or None.") 271defproperty(Node, "lastChild", doc="Last child node, or None.") 272defproperty(Node, "localName", doc="Namespace-local name of this node.") 273 274 275def _append_child(self, node): 276 # fast path with less checks; usable by DOM builders if careful 277 childNodes = self.childNodes 278 if childNodes: 279 last = childNodes[-1] 280 node.__dict__["previousSibling"] = last 281 last.__dict__["nextSibling"] = node 282 childNodes.append(node) 283 node.__dict__["parentNode"] = self 284 285def _in_document(node): 286 # return True iff node is part of a document tree 287 while node is not None: 288 if node.nodeType == Node.DOCUMENT_NODE: 289 return True 290 node = node.parentNode 291 return False 292 293def _write_data(writer, data): 294 "Writes datachars to writer." 295 if data: 296 data = data.replace("&", "&").replace("<", "<"). \ 297 replace("\"", """).replace(">", ">") 298 writer.write(data) 299 300def _get_elements_by_tagName_helper(parent, name, rc): 301 for node in parent.childNodes: 302 if node.nodeType == Node.ELEMENT_NODE and \ 303 (name == "*" or node.tagName == name): 304 rc.append(node) 305 _get_elements_by_tagName_helper(node, name, rc) 306 return rc 307 308def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): 309 for node in parent.childNodes: 310 if node.nodeType == Node.ELEMENT_NODE: 311 if ((localName == "*" or node.localName == localName) and 312 (nsURI == "*" or node.namespaceURI == nsURI)): 313 rc.append(node) 314 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) 315 return rc 316 317class DocumentFragment(Node): 318 nodeType = Node.DOCUMENT_FRAGMENT_NODE 319 nodeName = "#document-fragment" 320 nodeValue = None 321 attributes = None 322 parentNode = None 323 _child_node_types = (Node.ELEMENT_NODE, 324 Node.TEXT_NODE, 325 Node.CDATA_SECTION_NODE, 326 Node.ENTITY_REFERENCE_NODE, 327 Node.PROCESSING_INSTRUCTION_NODE, 328 Node.COMMENT_NODE, 329 Node.NOTATION_NODE) 330 331 def __init__(self): 332 self.childNodes = NodeList() 333 334 335class Attr(Node): 336 nodeType = Node.ATTRIBUTE_NODE 337 attributes = None 338 ownerElement = None 339 specified = False 340 _is_id = False 341 342 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) 343 344 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, 345 prefix=None): 346 # skip setattr for performance 347 d = self.__dict__ 348 d["nodeName"] = d["name"] = qName 349 d["namespaceURI"] = namespaceURI 350 d["prefix"] = prefix 351 d['childNodes'] = NodeList() 352 353 # Add the single child node that represents the value of the attr 354 self.childNodes.append(Text()) 355 356 # nodeValue and value are set elsewhere 357 358 def _get_localName(self): 359 return self.nodeName.split(":", 1)[-1] 360 361 def _get_specified(self): 362 return self.specified 363 364 def __setattr__(self, name, value): 365 d = self.__dict__ 366 if name in ("value", "nodeValue"): 367 d["value"] = d["nodeValue"] = value 368 d2 = self.childNodes[0].__dict__ 369 d2["data"] = d2["nodeValue"] = value 370 if self.ownerElement is not None: 371 _clear_id_cache(self.ownerElement) 372 elif name in ("name", "nodeName"): 373 d["name"] = d["nodeName"] = value 374 if self.ownerElement is not None: 375 _clear_id_cache(self.ownerElement) 376 else: 377 d[name] = value 378 379 def _set_prefix(self, prefix): 380 nsuri = self.namespaceURI 381 if prefix == "xmlns": 382 if nsuri and nsuri != XMLNS_NAMESPACE: 383 raise xml.dom.NamespaceErr( 384 "illegal use of 'xmlns' prefix for the wrong namespace") 385 d = self.__dict__ 386 d['prefix'] = prefix 387 if prefix is None: 388 newName = self.localName 389 else: 390 newName = "%s:%s" % (prefix, self.localName) 391 if self.ownerElement: 392 _clear_id_cache(self.ownerElement) 393 d['nodeName'] = d['name'] = newName 394 395 def _set_value(self, value): 396 d = self.__dict__ 397 d['value'] = d['nodeValue'] = value 398 if self.ownerElement: 399 _clear_id_cache(self.ownerElement) 400 self.childNodes[0].data = value 401 402 def unlink(self): 403 # This implementation does not call the base implementation 404 # since most of that is not needed, and the expense of the 405 # method call is not warranted. We duplicate the removal of 406 # children, but that's all we needed from the base class. 407 elem = self.ownerElement 408 if elem is not None: 409 del elem._attrs[self.nodeName] 410 del elem._attrsNS[(self.namespaceURI, self.localName)] 411 if self._is_id: 412 self._is_id = False 413 elem._magic_id_nodes -= 1 414 self.ownerDocument._magic_id_count -= 1 415 for child in self.childNodes: 416 child.unlink() 417 del self.childNodes[:] 418 419 def _get_isId(self): 420 if self._is_id: 421 return True 422 doc = self.ownerDocument 423 elem = self.ownerElement 424 if doc is None or elem is None: 425 return False 426 427 info = doc._get_elem_info(elem) 428 if info is None: 429 return False 430 if self.namespaceURI: 431 return info.isIdNS(self.namespaceURI, self.localName) 432 else: 433 return info.isId(self.nodeName) 434 435 def _get_schemaType(self): 436 doc = self.ownerDocument 437 elem = self.ownerElement 438 if doc is None or elem is None: 439 return _no_type 440 441 info = doc._get_elem_info(elem) 442 if info is None: 443 return _no_type 444 if self.namespaceURI: 445 return info.getAttributeTypeNS(self.namespaceURI, self.localName) 446 else: 447 return info.getAttributeType(self.nodeName) 448 449defproperty(Attr, "isId", doc="True if this attribute is an ID.") 450defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") 451defproperty(Attr, "schemaType", doc="Schema type for this attribute.") 452 453 454class NamedNodeMap(object): 455 """The attribute list is a transient interface to the underlying 456 dictionaries. Mutations here will change the underlying element's 457 dictionary. 458 459 Ordering is imposed artificially and does not reflect the order of 460 attributes as found in an input document. 461 """ 462 463 __slots__ = ('_attrs', '_attrsNS', '_ownerElement') 464 465 def __init__(self, attrs, attrsNS, ownerElement): 466 self._attrs = attrs 467 self._attrsNS = attrsNS 468 self._ownerElement = ownerElement 469 470 def _get_length(self): 471 return len(self._attrs) 472 473 def item(self, index): 474 try: 475 return self[self._attrs.keys()[index]] 476 except IndexError: 477 return None 478 479 def items(self): 480 L = [] 481 for node in self._attrs.values(): 482 L.append((node.nodeName, node.value)) 483 return L 484 485 def itemsNS(self): 486 L = [] 487 for node in self._attrs.values(): 488 L.append(((node.namespaceURI, node.localName), node.value)) 489 return L 490 491 def has_key(self, key): 492 if isinstance(key, StringTypes): 493 return key in self._attrs 494 else: 495 return key in self._attrsNS 496 497 def keys(self): 498 return self._attrs.keys() 499 500 def keysNS(self): 501 return self._attrsNS.keys() 502 503 def values(self): 504 return self._attrs.values() 505 506 def get(self, name, value=None): 507 return self._attrs.get(name, value) 508 509 __len__ = _get_length 510 511 __hash__ = None # Mutable type can't be correctly hashed 512 def __cmp__(self, other): 513 if self._attrs is getattr(other, "_attrs", None): 514 return 0 515 else: 516 return cmp(id(self), id(other)) 517 518 def __getitem__(self, attname_or_tuple): 519 if isinstance(attname_or_tuple, tuple): 520 return self._attrsNS[attname_or_tuple] 521 else: 522 return self._attrs[attname_or_tuple] 523 524 # same as set 525 def __setitem__(self, attname, value): 526 if isinstance(value, StringTypes): 527 try: 528 node = self._attrs[attname] 529 except KeyError: 530 node = Attr(attname) 531 node.ownerDocument = self._ownerElement.ownerDocument 532 self.setNamedItem(node) 533 node.value = value 534 else: 535 if not isinstance(value, Attr): 536 raise TypeError, "value must be a string or Attr object" 537 node = value 538 self.setNamedItem(node) 539 540 def getNamedItem(self, name): 541 try: 542 return self._attrs[name] 543 except KeyError: 544 return None 545 546 def getNamedItemNS(self, namespaceURI, localName): 547 try: 548 return self._attrsNS[(namespaceURI, localName)] 549 except KeyError: 550 return None 551 552 def removeNamedItem(self, name): 553 n = self.getNamedItem(name) 554 if n is not None: 555 _clear_id_cache(self._ownerElement) 556 del self._attrs[n.nodeName] 557 del self._attrsNS[(n.namespaceURI, n.localName)] 558 if 'ownerElement' in n.__dict__: 559 n.__dict__['ownerElement'] = None 560 return n 561 else: 562 raise xml.dom.NotFoundErr() 563 564 def removeNamedItemNS(self, namespaceURI, localName): 565 n = self.getNamedItemNS(namespaceURI, localName) 566 if n is not None: 567 _clear_id_cache(self._ownerElement) 568 del self._attrsNS[(n.namespaceURI, n.localName)] 569 del self._attrs[n.nodeName] 570 if 'ownerElement' in n.__dict__: 571 n.__dict__['ownerElement'] = None 572 return n 573 else: 574 raise xml.dom.NotFoundErr() 575 576 def setNamedItem(self, node): 577 if not isinstance(node, Attr): 578 raise xml.dom.HierarchyRequestErr( 579 "%s cannot be child of %s" % (repr(node), repr(self))) 580 old = self._attrs.get(node.name) 581 if old: 582 old.unlink() 583 self._attrs[node.name] = node 584 self._attrsNS[(node.namespaceURI, node.localName)] = node 585 node.ownerElement = self._ownerElement 586 _clear_id_cache(node.ownerElement) 587 return old 588 589 def setNamedItemNS(self, node): 590 return self.setNamedItem(node) 591 592 def __delitem__(self, attname_or_tuple): 593 node = self[attname_or_tuple] 594 _clear_id_cache(node.ownerElement) 595 node.unlink() 596 597 def __getstate__(self): 598 return self._attrs, self._attrsNS, self._ownerElement 599 600 def __setstate__(self, state): 601 self._attrs, self._attrsNS, self._ownerElement = state 602 603defproperty(NamedNodeMap, "length", 604 doc="Number of nodes in the NamedNodeMap.") 605 606AttributeList = NamedNodeMap 607 608 609class TypeInfo(object): 610 __slots__ = 'namespace', 'name' 611 612 def __init__(self, namespace, name): 613 self.namespace = namespace 614 self.name = name 615 616 def __repr__(self): 617 if self.namespace: 618 return "<TypeInfo %r (from %r)>" % (self.name, self.namespace) 619 else: 620 return "<TypeInfo %r>" % self.name 621 622 def _get_name(self): 623 return self.name 624 625 def _get_namespace(self): 626 return self.namespace 627 628_no_type = TypeInfo(None, None) 629 630class Element(Node): 631 nodeType = Node.ELEMENT_NODE 632 nodeValue = None 633 schemaType = _no_type 634 635 _magic_id_nodes = 0 636 637 _child_node_types = (Node.ELEMENT_NODE, 638 Node.PROCESSING_INSTRUCTION_NODE, 639 Node.COMMENT_NODE, 640 Node.TEXT_NODE, 641 Node.CDATA_SECTION_NODE, 642 Node.ENTITY_REFERENCE_NODE) 643 644 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, 645 localName=None): 646 self.tagName = self.nodeName = tagName 647 self.prefix = prefix 648 self.namespaceURI = namespaceURI 649 self.childNodes = NodeList() 650 651 self._attrs = {} # attributes are double-indexed: 652 self._attrsNS = {} # tagName -> Attribute 653 # URI,localName -> Attribute 654 # in the future: consider lazy generation 655 # of attribute objects this is too tricky 656 # for now because of headaches with 657 # namespaces. 658 659 def _get_localName(self): 660 return self.tagName.split(":", 1)[-1] 661 662 def _get_tagName(self): 663 return self.tagName 664 665 def unlink(self): 666 for attr in self._attrs.values(): 667 attr.unlink() 668 self._attrs = None 669 self._attrsNS = None 670 Node.unlink(self) 671 672 def getAttribute(self, attname): 673 try: 674 return self._attrs[attname].value 675 except KeyError: 676 return "" 677 678 def getAttributeNS(self, namespaceURI, localName): 679 try: 680 return self._attrsNS[(namespaceURI, localName)].value 681 except KeyError: 682 return "" 683 684 def setAttribute(self, attname, value): 685 attr = self.getAttributeNode(attname) 686 if attr is None: 687 attr = Attr(attname) 688 # for performance 689 d = attr.__dict__ 690 d["value"] = d["nodeValue"] = value 691 d["ownerDocument"] = self.ownerDocument 692 self.setAttributeNode(attr) 693 elif value != attr.value: 694 d = attr.__dict__ 695 d["value"] = d["nodeValue"] = value 696 if attr.isId: 697 _clear_id_cache(self) 698 699 def setAttributeNS(self, namespaceURI, qualifiedName, value): 700 prefix, localname = _nssplit(qualifiedName) 701 attr = self.getAttributeNodeNS(namespaceURI, localname) 702 if attr is None: 703 # for performance 704 attr = Attr(qualifiedName, namespaceURI, localname, prefix) 705 d = attr.__dict__ 706 d["prefix"] = prefix 707 d["nodeName"] = qualifiedName 708 d["value"] = d["nodeValue"] = value 709 d["ownerDocument"] = self.ownerDocument 710 self.setAttributeNode(attr) 711 else: 712 d = attr.__dict__ 713 if value != attr.value: 714 d["value"] = d["nodeValue"] = value 715 if attr.isId: 716 _clear_id_cache(self) 717 if attr.prefix != prefix: 718 d["prefix"] = prefix 719 d["nodeName"] = qualifiedName 720 721 def getAttributeNode(self, attrname): 722 return self._attrs.get(attrname) 723 724 def getAttributeNodeNS(self, namespaceURI, localName): 725 return self._attrsNS.get((namespaceURI, localName)) 726 727 def setAttributeNode(self, attr): 728 if attr.ownerElement not in (None, self): 729 raise xml.dom.InuseAttributeErr("attribute node already owned") 730 old1 = self._attrs.get(attr.name, None) 731 if old1 is not None: 732 self.removeAttributeNode(old1) 733 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) 734 if old2 is not None and old2 is not old1: 735 self.removeAttributeNode(old2) 736 _set_attribute_node(self, attr) 737 738 if old1 is not attr: 739 # It might have already been part of this node, in which case 740 # it doesn't represent a change, and should not be returned. 741 return old1 742 if old2 is not attr: 743 return old2 744 745 setAttributeNodeNS = setAttributeNode 746 747 def removeAttribute(self, name): 748 try: 749 attr = self._attrs[name] 750 except KeyError: 751 raise xml.dom.NotFoundErr() 752 self.removeAttributeNode(attr) 753 754 def removeAttributeNS(self, namespaceURI, localName): 755 try: 756 attr = self._attrsNS[(namespaceURI, localName)] 757 except KeyError: 758 raise xml.dom.NotFoundErr() 759 self.removeAttributeNode(attr) 760 761 def removeAttributeNode(self, node): 762 if node is None: 763 raise xml.dom.NotFoundErr() 764 try: 765 self._attrs[node.name] 766 except KeyError: 767 raise xml.dom.NotFoundErr() 768 _clear_id_cache(self) 769 node.unlink() 770 # Restore this since the node is still useful and otherwise 771 # unlinked 772 node.ownerDocument = self.ownerDocument 773 774 removeAttributeNodeNS = removeAttributeNode 775 776 def hasAttribute(self, name): 777 return name in self._attrs 778 779 def hasAttributeNS(self, namespaceURI, localName): 780 return (namespaceURI, localName) in self._attrsNS 781 782 def getElementsByTagName(self, name): 783 return _get_elements_by_tagName_helper(self, name, NodeList()) 784 785 def getElementsByTagNameNS(self, namespaceURI, localName): 786 return _get_elements_by_tagName_ns_helper( 787 self, namespaceURI, localName, NodeList()) 788 789 def __repr__(self): 790 return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) 791 792 def writexml(self, writer, indent="", addindent="", newl=""): 793 # indent = current indentation 794 # addindent = indentation to add to higher levels 795 # newl = newline string 796 writer.write(indent+"<" + self.tagName) 797 798 attrs = self._get_attributes() 799 a_names = attrs.keys() 800 a_names.sort() 801 802 for a_name in a_names: 803 writer.write(" %s=\"" % a_name) 804 _write_data(writer, attrs[a_name].value) 805 writer.write("\"") 806 if self.childNodes: 807 writer.write(">") 808 if (len(self.childNodes) == 1 and 809 self.childNodes[0].nodeType == Node.TEXT_NODE): 810 self.childNodes[0].writexml(writer, '', '', '') 811 else: 812 writer.write(newl) 813 for node in self.childNodes: 814 node.writexml(writer, indent+addindent, addindent, newl) 815 writer.write(indent) 816 writer.write("</%s>%s" % (self.tagName, newl)) 817 else: 818 writer.write("/>%s"%(newl)) 819 820 def _get_attributes(self): 821 return NamedNodeMap(self._attrs, self._attrsNS, self) 822 823 def hasAttributes(self): 824 if self._attrs: 825 return True 826 else: 827 return False 828 829 # DOM Level 3 attributes, based on the 22 Oct 2002 draft 830 831 def setIdAttribute(self, name): 832 idAttr = self.getAttributeNode(name) 833 self.setIdAttributeNode(idAttr) 834 835 def setIdAttributeNS(self, namespaceURI, localName): 836 idAttr = self.getAttributeNodeNS(namespaceURI, localName) 837 self.setIdAttributeNode(idAttr) 838 839 def setIdAttributeNode(self, idAttr): 840 if idAttr is None or not self.isSameNode(idAttr.ownerElement): 841 raise xml.dom.NotFoundErr() 842 if _get_containing_entref(self) is not None: 843 raise xml.dom.NoModificationAllowedErr() 844 if not idAttr._is_id: 845 idAttr.__dict__['_is_id'] = True 846 self._magic_id_nodes += 1 847 self.ownerDocument._magic_id_count += 1 848 _clear_id_cache(self) 849 850defproperty(Element, "attributes", 851 doc="NamedNodeMap of attributes on the element.") 852defproperty(Element, "localName", 853 doc="Namespace-local name of this element.") 854 855 856def _set_attribute_node(element, attr): 857 _clear_id_cache(element) 858 element._attrs[attr.name] = attr 859 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr 860 861 # This creates a circular reference, but Element.unlink() 862 # breaks the cycle since the references to the attribute 863 # dictionaries are tossed. 864 attr.__dict__['ownerElement'] = element 865 866 867class Childless: 868 """Mixin that makes childless-ness easy to implement and avoids 869 the complexity of the Node methods that deal with children. 870 """ 871 872 attributes = None 873 childNodes = EmptyNodeList() 874 firstChild = None 875 lastChild = None 876 877 def _get_firstChild(self): 878 return None 879 880 def _get_lastChild(self): 881 return None 882 883 def appendChild(self, node): 884 raise xml.dom.HierarchyRequestErr( 885 self.nodeName + " nodes cannot have children") 886 887 def hasChildNodes(self): 888 return False 889 890 def insertBefore(self, newChild, refChild): 891 raise xml.dom.HierarchyRequestErr( 892 self.nodeName + " nodes do not have children") 893 894 def removeChild(self, oldChild): 895 raise xml.dom.NotFoundErr( 896 self.nodeName + " nodes do not have children") 897 898 def normalize(self): 899 # For childless nodes, normalize() has nothing to do. 900 pass 901 902 def replaceChild(self, newChild, oldChild): 903 raise xml.dom.HierarchyRequestErr( 904 self.nodeName + " nodes do not have children") 905 906 907class ProcessingInstruction(Childless, Node): 908 nodeType = Node.PROCESSING_INSTRUCTION_NODE 909 910 def __init__(self, target, data): 911 self.target = self.nodeName = target 912 self.data = self.nodeValue = data 913 914 def _get_data(self): 915 return self.data 916 def _set_data(self, value): 917 d = self.__dict__ 918 d['data'] = d['nodeValue'] = value 919 920 def _get_target(self): 921 return self.target 922 def _set_target(self, value): 923 d = self.__dict__ 924 d['target'] = d['nodeName'] = value 925 926 def __setattr__(self, name, value): 927 if name == "data" or name == "nodeValue": 928 self.__dict__['data'] = self.__dict__['nodeValue'] = value 929 elif name == "target" or name == "nodeName": 930 self.__dict__['target'] = self.__dict__['nodeName'] = value 931 else: 932 self.__dict__[name] = value 933 934 def writexml(self, writer, indent="", addindent="", newl=""): 935 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) 936 937 938class CharacterData(Childless, Node): 939 def _get_length(self): 940 return len(self.data) 941 __len__ = _get_length 942 943 def _get_data(self): 944 return self.__dict__['data'] 945 def _set_data(self, data): 946 d = self.__dict__ 947 d['data'] = d['nodeValue'] = data 948 949 _get_nodeValue = _get_data 950 _set_nodeValue = _set_data 951 952 def __setattr__(self, name, value): 953 if name == "data" or name == "nodeValue": 954 self.__dict__['data'] = self.__dict__['nodeValue'] = value 955 else: 956 self.__dict__[name] = value 957 958 def __repr__(self): 959 data = self.data 960 if len(data) > 10: 961 dotdotdot = "..." 962 else: 963 dotdotdot = "" 964 return '<DOM %s node "%r%s">' % ( 965 self.__class__.__name__, data[0:10], dotdotdot) 966 967 def substringData(self, offset, count): 968 if offset < 0: 969 raise xml.dom.IndexSizeErr("offset cannot be negative") 970 if offset >= len(self.data): 971 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 972 if count < 0: 973 raise xml.dom.IndexSizeErr("count cannot be negative") 974 return self.data[offset:offset+count] 975 976 def appendData(self, arg): 977 self.data = self.data + arg 978 979 def insertData(self, offset, arg): 980 if offset < 0: 981 raise xml.dom.IndexSizeErr("offset cannot be negative") 982 if offset >= len(self.data): 983 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 984 if arg: 985 self.data = "%s%s%s" % ( 986 self.data[:offset], arg, self.data[offset:]) 987 988 def deleteData(self, offset, count): 989 if offset < 0: 990 raise xml.dom.IndexSizeErr("offset cannot be negative") 991 if offset >= len(self.data): 992 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 993 if count < 0: 994 raise xml.dom.IndexSizeErr("count cannot be negative") 995 if count: 996 self.data = self.data[:offset] + self.data[offset+count:] 997 998 def replaceData(self, offset, count, arg): 999 if offset < 0: 1000 raise xml.dom.IndexSizeErr("offset cannot be negative") 1001 if offset >= len(self.data): 1002 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1003 if count < 0: 1004 raise xml.dom.IndexSizeErr("count cannot be negative") 1005 if count: 1006 self.data = "%s%s%s" % ( 1007 self.data[:offset], arg, self.data[offset+count:]) 1008 1009defproperty(CharacterData, "length", doc="Length of the string data.") 1010 1011 1012class Text(CharacterData): 1013 # Make sure we don't add an instance __dict__ if we don't already 1014 # have one, at least when that's possible: 1015 # XXX this does not work, CharacterData is an old-style class 1016 # __slots__ = () 1017 1018 nodeType = Node.TEXT_NODE 1019 nodeName = "#text" 1020 attributes = None 1021 1022 def splitText(self, offset): 1023 if offset < 0 or offset > len(self.data): 1024 raise xml.dom.IndexSizeErr("illegal offset value") 1025 newText = self.__class__() 1026 newText.data = self.data[offset:] 1027 newText.ownerDocument = self.ownerDocument 1028 next = self.nextSibling 1029 if self.parentNode and self in self.parentNode.childNodes: 1030 if next is None: 1031 self.parentNode.appendChild(newText) 1032 else: 1033 self.parentNode.insertBefore(newText, next) 1034 self.data = self.data[:offset] 1035 return newText 1036 1037 def writexml(self, writer, indent="", addindent="", newl=""): 1038 _write_data(writer, "%s%s%s" % (indent, self.data, newl)) 1039 1040 # DOM Level 3 (WD 9 April 2002) 1041 1042 def _get_wholeText(self): 1043 L = [self.data] 1044 n = self.previousSibling 1045 while n is not None: 1046 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1047 L.insert(0, n.data) 1048 n = n.previousSibling 1049 else: 1050 break 1051 n = self.nextSibling 1052 while n is not None: 1053 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1054 L.append(n.data) 1055 n = n.nextSibling 1056 else: 1057 break 1058 return ''.join(L) 1059 1060 def replaceWholeText(self, content): 1061 # XXX This needs to be seriously changed if minidom ever 1062 # supports EntityReference nodes. 1063 parent = self.parentNode 1064 n = self.previousSibling 1065 while n is not None: 1066 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1067 next = n.previousSibling 1068 parent.removeChild(n) 1069 n = next 1070 else: 1071 break 1072 n = self.nextSibling 1073 if not content: 1074 parent.removeChild(self) 1075 while n is not None: 1076 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1077 next = n.nextSibling 1078 parent.removeChild(n) 1079 n = next 1080 else: 1081 break 1082 if content: 1083 d = self.__dict__ 1084 d['data'] = content 1085 d['nodeValue'] = content 1086 return self 1087 else: 1088 return None 1089 1090 def _get_isWhitespaceInElementContent(self): 1091 if self.data.strip(): 1092 return False 1093 elem = _get_containing_element(self) 1094 if elem is None: 1095 return False 1096 info = self.ownerDocument._get_elem_info(elem) 1097 if info is None: 1098 return False 1099 else: 1100 return info.isElementContent() 1101 1102defproperty(Text, "isWhitespaceInElementContent", 1103 doc="True iff this text node contains only whitespace" 1104 " and is in element content.") 1105defproperty(Text, "wholeText", 1106 doc="The text of all logically-adjacent text nodes.") 1107 1108 1109def _get_containing_element(node): 1110 c = node.parentNode 1111 while c is not None: 1112 if c.nodeType == Node.ELEMENT_NODE: 1113 return c 1114 c = c.parentNode 1115 return None 1116 1117def _get_containing_entref(node): 1118 c = node.parentNode 1119 while c is not None: 1120 if c.nodeType == Node.ENTITY_REFERENCE_NODE: 1121 return c 1122 c = c.parentNode 1123 return None 1124 1125 1126class Comment(Childless, CharacterData): 1127 nodeType = Node.COMMENT_NODE 1128 nodeName = "#comment" 1129 1130 def __init__(self, data): 1131 self.data = self.nodeValue = data 1132 1133 def writexml(self, writer, indent="", addindent="", newl=""): 1134 if "--" in self.data: 1135 raise ValueError("'--' is not allowed in a comment node") 1136 writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) 1137 1138 1139class CDATASection(Text): 1140 # Make sure we don't add an instance __dict__ if we don't already 1141 # have one, at least when that's possible: 1142 # XXX this does not work, Text is an old-style class 1143 # __slots__ = () 1144 1145 nodeType = Node.CDATA_SECTION_NODE 1146 nodeName = "#cdata-section" 1147 1148 def writexml(self, writer, indent="", addindent="", newl=""): 1149 if self.data.find("]]>") >= 0: 1150 raise ValueError("']]>' not allowed in a CDATA section") 1151 writer.write("<![CDATA[%s]]>" % self.data) 1152 1153 1154class ReadOnlySequentialNamedNodeMap(object): 1155 __slots__ = '_seq', 1156 1157 def __init__(self, seq=()): 1158 # seq should be a list or tuple 1159 self._seq = seq 1160 1161 def __len__(self): 1162 return len(self._seq) 1163 1164 def _get_length(self): 1165 return len(self._seq) 1166 1167 def getNamedItem(self, name): 1168 for n in self._seq: 1169 if n.nodeName == name: 1170 return n 1171 1172 def getNamedItemNS(self, namespaceURI, localName): 1173 for n in self._seq: 1174 if n.namespaceURI == namespaceURI and n.localName == localName: 1175 return n 1176 1177 def __getitem__(self, name_or_tuple): 1178 if isinstance(name_or_tuple, tuple): 1179 node = self.getNamedItemNS(*name_or_tuple) 1180 else: 1181 node = self.getNamedItem(name_or_tuple) 1182 if node is None: 1183 raise KeyError, name_or_tuple 1184 return node 1185 1186 def item(self, index): 1187 if index < 0: 1188 return None 1189 try: 1190 return self._seq[index] 1191 except IndexError: 1192 return None 1193 1194 def removeNamedItem(self, name): 1195 raise xml.dom.NoModificationAllowedErr( 1196 "NamedNodeMap instance is read-only") 1197 1198 def removeNamedItemNS(self, namespaceURI, localName): 1199 raise xml.dom.NoModificationAllowedErr( 1200 "NamedNodeMap instance is read-only") 1201 1202 def setNamedItem(self, node): 1203 raise xml.dom.NoModificationAllowedErr( 1204 "NamedNodeMap instance is read-only") 1205 1206 def setNamedItemNS(self, node): 1207 raise xml.dom.NoModificationAllowedErr( 1208 "NamedNodeMap instance is read-only") 1209 1210 def __getstate__(self): 1211 return [self._seq] 1212 1213 def __setstate__(self, state): 1214 self._seq = state[0] 1215 1216defproperty(ReadOnlySequentialNamedNodeMap, "length", 1217 doc="Number of entries in the NamedNodeMap.") 1218 1219 1220class Identified: 1221 """Mix-in class that supports the publicId and systemId attributes.""" 1222 1223 # XXX this does not work, this is an old-style class 1224 # __slots__ = 'publicId', 'systemId' 1225 1226 def _identified_mixin_init(self, publicId, systemId): 1227 self.publicId = publicId 1228 self.systemId = systemId 1229 1230 def _get_publicId(self): 1231 return self.publicId 1232 1233 def _get_systemId(self): 1234 return self.systemId 1235 1236class DocumentType(Identified, Childless, Node): 1237 nodeType = Node.DOCUMENT_TYPE_NODE 1238 nodeValue = None 1239 name = None 1240 publicId = None 1241 systemId = None 1242 internalSubset = None 1243 1244 def __init__(self, qualifiedName): 1245 self.entities = ReadOnlySequentialNamedNodeMap() 1246 self.notations = ReadOnlySequentialNamedNodeMap() 1247 if qualifiedName: 1248 prefix, localname = _nssplit(qualifiedName) 1249 self.name = localname 1250 self.nodeName = self.name 1251 1252 def _get_internalSubset(self): 1253 return self.internalSubset 1254 1255 def cloneNode(self, deep): 1256 if self.ownerDocument is None: 1257 # it's ok 1258 clone = DocumentType(None) 1259 clone.name = self.name 1260 clone.nodeName = self.name 1261 operation = xml.dom.UserDataHandler.NODE_CLONED 1262 if deep: 1263 clone.entities._seq = [] 1264 clone.notations._seq = [] 1265 for n in self.notations._seq: 1266 notation = Notation(n.nodeName, n.publicId, n.systemId) 1267 clone.notations._seq.append(notation) 1268 n._call_user_data_handler(operation, n, notation) 1269 for e in self.entities._seq: 1270 entity = Entity(e.nodeName, e.publicId, e.systemId, 1271 e.notationName) 1272 entity.actualEncoding = e.actualEncoding 1273 entity.encoding = e.encoding 1274 entity.version = e.version 1275 clone.entities._seq.append(entity) 1276 e._call_user_data_handler(operation, n, entity) 1277 self._call_user_data_handler(operation, self, clone) 1278 return clone 1279 else: 1280 return None 1281 1282 def writexml(self, writer, indent="", addindent="", newl=""): 1283 writer.write("<!DOCTYPE ") 1284 writer.write(self.name) 1285 if self.publicId: 1286 writer.write("%s PUBLIC '%s'%s '%s'" 1287 % (newl, self.publicId, newl, self.systemId)) 1288 elif self.systemId: 1289 writer.write("%s SYSTEM '%s'" % (newl, self.systemId)) 1290 if self.internalSubset is not None: 1291 writer.write(" [") 1292 writer.write(self.internalSubset) 1293 writer.write("]") 1294 writer.write(">"+newl) 1295 1296class Entity(Identified, Node): 1297 attributes = None 1298 nodeType = Node.ENTITY_NODE 1299 nodeValue = None 1300 1301 actualEncoding = None 1302 encoding = None 1303 version = None 1304 1305 def __init__(self, name, publicId, systemId, notation): 1306 self.nodeName = name 1307 self.notationName = notation 1308 self.childNodes = NodeList() 1309 self._identified_mixin_init(publicId, systemId) 1310 1311 def _get_actualEncoding(self): 1312 return self.actualEncoding 1313 1314 def _get_encoding(self): 1315 return self.encoding 1316 1317 def _get_version(self): 1318 return self.version 1319 1320 def appendChild(self, newChild): 1321 raise xml.dom.HierarchyRequestErr( 1322 "cannot append children to an entity node") 1323 1324 def insertBefore(self, newChild, refChild): 1325 raise xml.dom.HierarchyRequestErr( 1326 "cannot insert children below an entity node") 1327 1328 def removeChild(self, oldChild): 1329 raise xml.dom.HierarchyRequestErr( 1330 "cannot remove children from an entity node") 1331 1332 def replaceChild(self, newChild, oldChild): 1333 raise xml.dom.HierarchyRequestErr( 1334 "cannot replace children of an entity node") 1335 1336class Notation(Identified, Childless, Node): 1337 nodeType = Node.NOTATION_NODE 1338 nodeValue = None 1339 1340 def __init__(self, name, publicId, systemId): 1341 self.nodeName = name 1342 self._identified_mixin_init(publicId, systemId) 1343 1344 1345class DOMImplementation(DOMImplementationLS): 1346 _features = [("core", "1.0"), 1347 ("core", "2.0"), 1348 ("core", None), 1349 ("xml", "1.0"), 1350 ("xml", "2.0"), 1351 ("xml", None), 1352 ("ls-load", "3.0"), 1353 ("ls-load", None), 1354 ] 1355 1356 def hasFeature(self, feature, version): 1357 if version == "": 1358 version = None 1359 return (feature.lower(), version) in self._features 1360 1361 def createDocument(self, namespaceURI, qualifiedName, doctype): 1362 if doctype and doctype.parentNode is not None: 1363 raise xml.dom.WrongDocumentErr( 1364 "doctype object owned by another DOM tree") 1365 doc = self._create_document() 1366 1367 add_root_element = not (namespaceURI is None 1368 and qualifiedName is None 1369 and doctype is None) 1370 1371 if not qualifiedName and add_root_element: 1372 # The spec is unclear what to raise here; SyntaxErr 1373 # would be the other obvious candidate. Since Xerces raises 1374 # InvalidCharacterErr, and since SyntaxErr is not listed 1375 # for createDocument, that seems to be the better choice. 1376 # XXX: need to check for illegal characters here and in 1377 # createElement. 1378 1379 # DOM Level III clears this up when talking about the return value 1380 # of this function. If namespaceURI, qName and DocType are 1381 # Null the document is returned without a document element 1382 # Otherwise if doctype or namespaceURI are not None 1383 # Then we go back to the above problem 1384 raise xml.dom.InvalidCharacterErr("Element with no name") 1385 1386 if add_root_element: 1387 prefix, localname = _nssplit(qualifiedName) 1388 if prefix == "xml" \ 1389 and namespaceURI != "http://www.w3.org/XML/1998/namespace": 1390 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") 1391 if prefix and not namespaceURI: 1392 raise xml.dom.NamespaceErr( 1393 "illegal use of prefix without namespaces") 1394 element = doc.createElementNS(namespaceURI, qualifiedName) 1395 if doctype: 1396 doc.appendChild(doctype) 1397 doc.appendChild(element) 1398 1399 if doctype: 1400 doctype.parentNode = doctype.ownerDocument = doc 1401 1402 doc.doctype = doctype 1403 doc.implementation = self 1404 return doc 1405 1406 def createDocumentType(self, qualifiedName, publicId, systemId): 1407 doctype = DocumentType(qualifiedName) 1408 doctype.publicId = publicId 1409 doctype.systemId = systemId 1410 return doctype 1411 1412 # DOM Level 3 (WD 9 April 2002) 1413 1414 def getInterface(self, feature): 1415 if self.hasFeature(feature, None): 1416 return self 1417 else: 1418 return None 1419 1420 # internal 1421 def _create_document(self): 1422 return Document() 1423 1424class ElementInfo(object): 1425 """Object that represents content-model information for an element. 1426 1427 This implementation is not expected to be used in practice; DOM 1428 builders should provide implementations which do the right thing 1429 using information available to it. 1430 1431 """ 1432 1433 __slots__ = 'tagName', 1434 1435 def __init__(self, name): 1436 self.tagName = name 1437 1438 def getAttributeType(self, aname): 1439 return _no_type 1440 1441 def getAttributeTypeNS(self, namespaceURI, localName): 1442 return _no_type 1443 1444 def isElementContent(self): 1445 return False 1446 1447 def isEmpty(self): 1448 """Returns true iff this element is declared to have an EMPTY 1449 content model.""" 1450 return False 1451 1452 def isId(self, aname): 1453 """Returns true iff the named attribute is a DTD-style ID.""" 1454 return False 1455 1456 def isIdNS(self, namespaceURI, localName): 1457 """Returns true iff the identified attribute is a DTD-style ID.""" 1458 return False 1459 1460 def __getstate__(self): 1461 return self.tagName 1462 1463 def __setstate__(self, state): 1464 self.tagName = state 1465 1466def _clear_id_cache(node): 1467 if node.nodeType == Node.DOCUMENT_NODE: 1468 node._id_cache.clear() 1469 node._id_search_stack = None 1470 elif _in_document(node): 1471 node.ownerDocument._id_cache.clear() 1472 node.ownerDocument._id_search_stack= None 1473 1474class Document(Node, DocumentLS): 1475 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, 1476 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) 1477 1478 nodeType = Node.DOCUMENT_NODE 1479 nodeName = "#document" 1480 nodeValue = None 1481 attributes = None 1482 doctype = None 1483 parentNode = None 1484 previousSibling = nextSibling = None 1485 1486 implementation = DOMImplementation() 1487 1488 # Document attributes from Level 3 (WD 9 April 2002) 1489 1490 actualEncoding = None 1491 encoding = None 1492 standalone = None 1493 version = None 1494 strictErrorChecking = False 1495 errorHandler = None 1496 documentURI = None 1497 1498 _magic_id_count = 0 1499 1500 def __init__(self): 1501 self.childNodes = NodeList() 1502 # mapping of (namespaceURI, localName) -> ElementInfo 1503 # and tagName -> ElementInfo 1504 self._elem_info = {} 1505 self._id_cache = {} 1506 self._id_search_stack = None 1507 1508 def _get_elem_info(self, element): 1509 if element.namespaceURI: 1510 key = element.namespaceURI, element.localName 1511 else: 1512 key = element.tagName 1513 return self._elem_info.get(key) 1514 1515 def _get_actualEncoding(self): 1516 return self.actualEncoding 1517 1518 def _get_doctype(self): 1519 return self.doctype 1520 1521 def _get_documentURI(self): 1522 return self.documentURI 1523 1524 def _get_encoding(self): 1525 return self.encoding 1526 1527 def _get_errorHandler(self): 1528 return self.errorHandler 1529 1530 def _get_standalone(self): 1531 return self.standalone 1532 1533 def _get_strictErrorChecking(self): 1534 return self.strictErrorChecking 1535 1536 def _get_version(self): 1537 return self.version 1538 1539 def appendChild(self, node): 1540 if node.nodeType not in self._child_node_types: 1541 raise xml.dom.HierarchyRequestErr( 1542 "%s cannot be child of %s" % (repr(node), repr(self))) 1543 if node.parentNode is not None: 1544 # This needs to be done before the next test since this 1545 # may *be* the document element, in which case it should 1546 # end up re-ordered to the end. 1547 node.parentNode.removeChild(node) 1548 1549 if node.nodeType == Node.ELEMENT_NODE \ 1550 and self._get_documentElement(): 1551 raise xml.dom.HierarchyRequestErr( 1552 "two document elements disallowed") 1553 return Node.appendChild(self, node) 1554 1555 def removeChild(self, oldChild): 1556 try: 1557 self.childNodes.remove(oldChild) 1558 except ValueError: 1559 raise xml.dom.NotFoundErr() 1560 oldChild.nextSibling = oldChild.previousSibling = None 1561 oldChild.parentNode = None 1562 if self.documentElement is oldChild: 1563 self.documentElement = None 1564 1565 return oldChild 1566 1567 def _get_documentElement(self): 1568 for node in self.childNodes: 1569 if node.nodeType == Node.ELEMENT_NODE: 1570 return node 1571 1572 def unlink(self): 1573 if self.doctype is not None: 1574 self.doctype.unlink() 1575 self.doctype = None 1576 Node.unlink(self) 1577 1578 def cloneNode(self, deep): 1579 if not deep: 1580 return None 1581 clone = self.implementation.createDocument(None, None, None) 1582 clone.encoding = self.encoding 1583 clone.standalone = self.standalone 1584 clone.version = self.version 1585 for n in self.childNodes: 1586 childclone = _clone_node(n, deep, clone) 1587 assert childclone.ownerDocument.isSameNode(clone) 1588 clone.childNodes.append(childclone) 1589 if childclone.nodeType == Node.DOCUMENT_NODE: 1590 assert clone.documentElement is None 1591 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: 1592 assert clone.doctype is None 1593 clone.doctype = childclone 1594 childclone.parentNode = clone 1595 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, 1596 self, clone) 1597 return clone 1598 1599 def createDocumentFragment(self): 1600 d = DocumentFragment() 1601 d.ownerDocument = self 1602 return d 1603 1604 def createElement(self, tagName): 1605 e = Element(tagName) 1606 e.ownerDocument = self 1607 return e 1608 1609 def createTextNode(self, data): 1610 if not isinstance(data, StringTypes): 1611 raise TypeError, "node contents must be a string" 1612 t = Text() 1613 t.data = data 1614 t.ownerDocument = self 1615 return t 1616 1617 def createCDATASection(self, data): 1618 if not isinstance(data, StringTypes): 1619 raise TypeError, "node contents must be a string" 1620 c = CDATASection() 1621 c.data = data 1622 c.ownerDocument = self 1623 return c 1624 1625 def createComment(self, data): 1626 c = Comment(data) 1627 c.ownerDocument = self 1628 return c 1629 1630 def createProcessingInstruction(self, target, data): 1631 p = ProcessingInstruction(target, data) 1632 p.ownerDocument = self 1633 return p 1634 1635 def createAttribute(self, qName): 1636 a = Attr(qName) 1637 a.ownerDocument = self 1638 a.value = "" 1639 return a 1640 1641 def createElementNS(self, namespaceURI, qualifiedName): 1642 prefix, localName = _nssplit(qualifiedName) 1643 e = Element(qualifiedName, namespaceURI, prefix) 1644 e.ownerDocument = self 1645 return e 1646 1647 def createAttributeNS(self, namespaceURI, qualifiedName): 1648 prefix, localName = _nssplit(qualifiedName) 1649 a = Attr(qualifiedName, namespaceURI, localName, prefix) 1650 a.ownerDocument = self 1651 a.value = "" 1652 return a 1653 1654 # A couple of implementation-specific helpers to create node types 1655 # not supported by the W3C DOM specs: 1656 1657 def _create_entity(self, name, publicId, systemId, notationName): 1658 e = Entity(name, publicId, systemId, notationName) 1659 e.ownerDocument = self 1660 return e 1661 1662 def _create_notation(self, name, publicId, systemId): 1663 n = Notation(name, publicId, systemId) 1664 n.ownerDocument = self 1665 return n 1666 1667 def getElementById(self, id): 1668 if id in self._id_cache: 1669 return self._id_cache[id] 1670 if not (self._elem_info or self._magic_id_count): 1671 return None 1672 1673 stack = self._id_search_stack 1674 if stack is None: 1675 # we never searched before, or the cache has been cleared 1676 stack = [self.documentElement] 1677 self._id_search_stack = stack 1678 elif not stack: 1679 # Previous search was completed and cache is still valid; 1680 # no matching node. 1681 return None 1682 1683 result = None 1684 while stack: 1685 node = stack.pop() 1686 # add child elements to stack for continued searching 1687 stack.extend([child for child in node.childNodes 1688 if child.nodeType in _nodeTypes_with_children]) 1689 # check this node 1690 info = self._get_elem_info(node) 1691 if info: 1692 # We have to process all ID attributes before 1693 # returning in order to get all the attributes set to 1694 # be IDs using Element.setIdAttribute*(). 1695 for attr in node.attributes.values(): 1696 if attr.namespaceURI: 1697 if info.isIdNS(attr.namespaceURI, attr.localName): 1698 self._id_cache[attr.value] = node 1699 if attr.value == id: 1700 result = node 1701 elif not node._magic_id_nodes: 1702 break 1703 elif info.isId(attr.name): 1704 self._id_cache[attr.value] = node 1705 if attr.value == id: 1706 result = node 1707 elif not node._magic_id_nodes: 1708 break 1709 elif attr._is_id: 1710 self._id_cache[attr.value] = node 1711 if attr.value == id: 1712 result = node 1713 elif node._magic_id_nodes == 1: 1714 break 1715 elif node._magic_id_nodes: 1716 for attr in node.attributes.values(): 1717 if attr._is_id: 1718 self._id_cache[attr.value] = node 1719 if attr.value == id: 1720 result = node 1721 if result is not None: 1722 break 1723 return result 1724 1725 def getElementsByTagName(self, name): 1726 return _get_elements_by_tagName_helper(self, name, NodeList()) 1727 1728 def getElementsByTagNameNS(self, namespaceURI, localName): 1729 return _get_elements_by_tagName_ns_helper( 1730 self, namespaceURI, localName, NodeList()) 1731 1732 def isSupported(self, feature, version): 1733 return self.implementation.hasFeature(feature, version) 1734 1735 def importNode(self, node, deep): 1736 if node.nodeType == Node.DOCUMENT_NODE: 1737 raise xml.dom.NotSupportedErr("cannot import document nodes") 1738 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1739 raise xml.dom.NotSupportedErr("cannot import document type nodes") 1740 return _clone_node(node, deep, self) 1741 1742 def writexml(self, writer, indent="", addindent="", newl="", 1743 encoding = None): 1744 if encoding is None: 1745 writer.write('<?xml version="1.0" ?>'+newl) 1746 else: 1747 writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl)) 1748 for node in self.childNodes: 1749 node.writexml(writer, indent, addindent, newl) 1750 1751 # DOM Level 3 (WD 9 April 2002) 1752 1753 def renameNode(self, n, namespaceURI, name): 1754 if n.ownerDocument is not self: 1755 raise xml.dom.WrongDocumentErr( 1756 "cannot rename nodes from other documents;\n" 1757 "expected %s,\nfound %s" % (self, n.ownerDocument)) 1758 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): 1759 raise xml.dom.NotSupportedErr( 1760 "renameNode() only applies to element and attribute nodes") 1761 if namespaceURI != EMPTY_NAMESPACE: 1762 if ':' in name: 1763 prefix, localName = name.split(':', 1) 1764 if ( prefix == "xmlns" 1765 and namespaceURI != xml.dom.XMLNS_NAMESPACE): 1766 raise xml.dom.NamespaceErr( 1767 "illegal use of 'xmlns' prefix") 1768 else: 1769 if ( name == "xmlns" 1770 and namespaceURI != xml.dom.XMLNS_NAMESPACE 1771 and n.nodeType == Node.ATTRIBUTE_NODE): 1772 raise xml.dom.NamespaceErr( 1773 "illegal use of the 'xmlns' attribute") 1774 prefix = None 1775 localName = name 1776 else: 1777 prefix = None 1778 localName = None 1779 if n.nodeType == Node.ATTRIBUTE_NODE: 1780 element = n.ownerElement 1781 if element is not None: 1782 is_id = n._is_id 1783 element.removeAttributeNode(n) 1784 else: 1785 element = None 1786 # avoid __setattr__ 1787 d = n.__dict__ 1788 d['prefix'] = prefix 1789 d['localName'] = localName 1790 d['namespaceURI'] = namespaceURI 1791 d['nodeName'] = name 1792 if n.nodeType == Node.ELEMENT_NODE: 1793 d['tagName'] = name 1794 else: 1795 # attribute node 1796 d['name'] = name 1797 if element is not None: 1798 element.setAttributeNode(n) 1799 if is_id: 1800 element.setIdAttributeNode(n) 1801 # It's not clear from a semantic perspective whether we should 1802 # call the user data handlers for the NODE_RENAMED event since 1803 # we're re-using the existing node. The draft spec has been 1804 # interpreted as meaning "no, don't call the handler unless a 1805 # new node is created." 1806 return n 1807 1808defproperty(Document, "documentElement", 1809 doc="Top-level element of this document.") 1810 1811 1812def _clone_node(node, deep, newOwnerDocument): 1813 """ 1814 Clone a node and give it the new owner document. 1815 Called by Node.cloneNode and Document.importNode 1816 """ 1817 if node.ownerDocument.isSameNode(newOwnerDocument): 1818 operation = xml.dom.UserDataHandler.NODE_CLONED 1819 else: 1820 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1821 if node.nodeType == Node.ELEMENT_NODE: 1822 clone = newOwnerDocument.createElementNS(node.namespaceURI, 1823 node.nodeName) 1824 for attr in node.attributes.values(): 1825 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) 1826 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) 1827 a.specified = attr.specified 1828 1829 if deep: 1830 for child in node.childNodes: 1831 c = _clone_node(child, deep, newOwnerDocument) 1832 clone.appendChild(c) 1833 1834 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 1835 clone = newOwnerDocument.createDocumentFragment() 1836 if deep: 1837 for child in node.childNodes: 1838 c = _clone_node(child, deep, newOwnerDocument) 1839 clone.appendChild(c) 1840 1841 elif node.nodeType == Node.TEXT_NODE: 1842 clone = newOwnerDocument.createTextNode(node.data) 1843 elif node.nodeType == Node.CDATA_SECTION_NODE: 1844 clone = newOwnerDocument.createCDATASection(node.data) 1845 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 1846 clone = newOwnerDocument.createProcessingInstruction(node.target, 1847 node.data) 1848 elif node.nodeType == Node.COMMENT_NODE: 1849 clone = newOwnerDocument.createComment(node.data) 1850 elif node.nodeType == Node.ATTRIBUTE_NODE: 1851 clone = newOwnerDocument.createAttributeNS(node.namespaceURI, 1852 node.nodeName) 1853 clone.specified = True 1854 clone.value = node.value 1855 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1856 assert node.ownerDocument is not newOwnerDocument 1857 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1858 clone = newOwnerDocument.implementation.createDocumentType( 1859 node.name, node.publicId, node.systemId) 1860 clone.ownerDocument = newOwnerDocument 1861 if deep: 1862 clone.entities._seq = [] 1863 clone.notations._seq = [] 1864 for n in node.notations._seq: 1865 notation = Notation(n.nodeName, n.publicId, n.systemId) 1866 notation.ownerDocument = newOwnerDocument 1867 clone.notations._seq.append(notation) 1868 if hasattr(n, '_call_user_data_handler'): 1869 n._call_user_data_handler(operation, n, notation) 1870 for e in node.entities._seq: 1871 entity = Entity(e.nodeName, e.publicId, e.systemId, 1872 e.notationName) 1873 entity.actualEncoding = e.actualEncoding 1874 entity.encoding = e.encoding 1875 entity.version = e.version 1876 entity.ownerDocument = newOwnerDocument 1877 clone.entities._seq.append(entity) 1878 if hasattr(e, '_call_user_data_handler'): 1879 e._call_user_data_handler(operation, n, entity) 1880 else: 1881 # Note the cloning of Document and DocumentType nodes is 1882 # implementation specific. minidom handles those cases 1883 # directly in the cloneNode() methods. 1884 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) 1885 1886 # Check for _call_user_data_handler() since this could conceivably 1887 # used with other DOM implementations (one of the FourThought 1888 # DOMs, perhaps?). 1889 if hasattr(node, '_call_user_data_handler'): 1890 node._call_user_data_handler(operation, node, clone) 1891 return clone 1892 1893 1894def _nssplit(qualifiedName): 1895 fields = qualifiedName.split(':', 1) 1896 if len(fields) == 2: 1897 return fields 1898 else: 1899 return (None, fields[0]) 1900 1901 1902def _get_StringIO(): 1903 # we can't use cStringIO since it doesn't support Unicode strings 1904 from StringIO import StringIO 1905 return StringIO() 1906 1907def _do_pulldom_parse(func, args, kwargs): 1908 events = func(*args, **kwargs) 1909 toktype, rootNode = events.getEvent() 1910 events.expandNode(rootNode) 1911 events.clear() 1912 return rootNode 1913 1914def parse(file, parser=None, bufsize=None): 1915 """Parse a file into a DOM by filename or file object.""" 1916 if parser is None and not bufsize: 1917 from xml.dom import expatbuilder 1918 return expatbuilder.parse(file) 1919 else: 1920 from xml.dom import pulldom 1921 return _do_pulldom_parse(pulldom.parse, (file,), 1922 {'parser': parser, 'bufsize': bufsize}) 1923 1924def parseString(string, parser=None): 1925 """Parse a file into a DOM from a string.""" 1926 if parser is None: 1927 from xml.dom import expatbuilder 1928 return expatbuilder.parseString(string) 1929 else: 1930 from xml.dom import pulldom 1931 return _do_pulldom_parse(pulldom.parseString, (string,), 1932 {'parser': parser}) 1933 1934def getDOMImplementation(features=None): 1935 if features: 1936 if isinstance(features, StringTypes): 1937 features = domreg._parse_feature_string(features) 1938 for f, v in features: 1939 if not Document.implementation.hasFeature(f, v): 1940 return None 1941 return Document.implementation 1942