1# xml.etree test. This file contains enough tests to make sure that 2# all included components work as they should. 3# Large parts are extracted from the upstream test suite. 4 5# IMPORTANT: the same doctests are run from "test_xml_etree_c" in 6# order to ensure consistency between the C implementation and the 7# Python implementation. 8# 9# For this purpose, the module-level "ET" symbol is temporarily 10# monkey-patched when running the "test_xml_etree_c" test suite. 11# Don't re-import "xml.etree.ElementTree" module in the docstring, 12# except if the test is specific to the Python implementation. 13 14import sys 15import cgi 16 17from test import test_support 18from test.test_support import findfile 19 20from xml.etree import ElementTree as ET 21 22SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") 23SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") 24 25SAMPLE_XML = """\ 26<body> 27 <tag class='a'>text</tag> 28 <tag class='b' /> 29 <section> 30 <tag class='b' id='inner'>subtext</tag> 31 </section> 32</body> 33""" 34 35SAMPLE_SECTION = """\ 36<section> 37 <tag class='b' id='inner'>subtext</tag> 38 <nexttag /> 39 <nextsection> 40 <tag /> 41 </nextsection> 42</section> 43""" 44 45SAMPLE_XML_NS = """ 46<body xmlns="http://effbot.org/ns"> 47 <tag>text</tag> 48 <tag /> 49 <section> 50 <tag>subtext</tag> 51 </section> 52</body> 53""" 54 55 56def sanity(): 57 """ 58 Import sanity. 59 60 >>> from xml.etree import ElementTree 61 >>> from xml.etree import ElementInclude 62 >>> from xml.etree import ElementPath 63 """ 64 65def check_method(method): 66 if not hasattr(method, '__call__'): 67 print method, "not callable" 68 69def serialize(elem, to_string=True, **options): 70 import StringIO 71 file = StringIO.StringIO() 72 tree = ET.ElementTree(elem) 73 tree.write(file, **options) 74 if to_string: 75 return file.getvalue() 76 else: 77 file.seek(0) 78 return file 79 80def summarize(elem): 81 if elem.tag == ET.Comment: 82 return "<Comment>" 83 return elem.tag 84 85def summarize_list(seq): 86 return [summarize(elem) for elem in seq] 87 88def normalize_crlf(tree): 89 for elem in tree.iter(): 90 if elem.text: 91 elem.text = elem.text.replace("\r\n", "\n") 92 if elem.tail: 93 elem.tail = elem.tail.replace("\r\n", "\n") 94 95def check_string(string): 96 len(string) 97 for char in string: 98 if len(char) != 1: 99 print "expected one-character string, got %r" % char 100 new_string = string + "" 101 new_string = string + " " 102 string[:0] 103 104def check_mapping(mapping): 105 len(mapping) 106 keys = mapping.keys() 107 items = mapping.items() 108 for key in keys: 109 item = mapping[key] 110 mapping["key"] = "value" 111 if mapping["key"] != "value": 112 print "expected value string, got %r" % mapping["key"] 113 114def check_element(element): 115 if not ET.iselement(element): 116 print "not an element" 117 if not hasattr(element, "tag"): 118 print "no tag member" 119 if not hasattr(element, "attrib"): 120 print "no attrib member" 121 if not hasattr(element, "text"): 122 print "no text member" 123 if not hasattr(element, "tail"): 124 print "no tail member" 125 126 check_string(element.tag) 127 check_mapping(element.attrib) 128 if element.text is not None: 129 check_string(element.text) 130 if element.tail is not None: 131 check_string(element.tail) 132 for elem in element: 133 check_element(elem) 134 135# -------------------------------------------------------------------- 136# element tree tests 137 138def interface(): 139 r""" 140 Test element tree interface. 141 142 >>> element = ET.Element("tag") 143 >>> check_element(element) 144 >>> tree = ET.ElementTree(element) 145 >>> check_element(tree.getroot()) 146 147 >>> element = ET.Element("t\xe4g", key="value") 148 >>> tree = ET.ElementTree(element) 149 >>> repr(element) # doctest: +ELLIPSIS 150 "<Element 't\\xe4g' at 0x...>" 151 >>> element = ET.Element("tag", key="value") 152 153 Make sure all standard element methods exist. 154 155 >>> check_method(element.append) 156 >>> check_method(element.extend) 157 >>> check_method(element.insert) 158 >>> check_method(element.remove) 159 >>> check_method(element.getchildren) 160 >>> check_method(element.find) 161 >>> check_method(element.iterfind) 162 >>> check_method(element.findall) 163 >>> check_method(element.findtext) 164 >>> check_method(element.clear) 165 >>> check_method(element.get) 166 >>> check_method(element.set) 167 >>> check_method(element.keys) 168 >>> check_method(element.items) 169 >>> check_method(element.iter) 170 >>> check_method(element.itertext) 171 >>> check_method(element.getiterator) 172 173 These methods return an iterable. See bug 6472. 174 175 >>> check_method(element.iter("tag").next) 176 >>> check_method(element.iterfind("tag").next) 177 >>> check_method(element.iterfind("*").next) 178 >>> check_method(tree.iter("tag").next) 179 >>> check_method(tree.iterfind("tag").next) 180 >>> check_method(tree.iterfind("*").next) 181 182 These aliases are provided: 183 184 >>> assert ET.XML == ET.fromstring 185 >>> assert ET.PI == ET.ProcessingInstruction 186 >>> assert ET.XMLParser == ET.XMLTreeBuilder 187 """ 188 189def simpleops(): 190 """ 191 Basic method sanity checks. 192 193 >>> elem = ET.XML("<body><tag/></body>") 194 >>> serialize(elem) 195 '<body><tag /></body>' 196 >>> e = ET.Element("tag2") 197 >>> elem.append(e) 198 >>> serialize(elem) 199 '<body><tag /><tag2 /></body>' 200 >>> elem.remove(e) 201 >>> serialize(elem) 202 '<body><tag /></body>' 203 >>> elem.insert(0, e) 204 >>> serialize(elem) 205 '<body><tag2 /><tag /></body>' 206 >>> elem.remove(e) 207 >>> elem.extend([e]) 208 >>> serialize(elem) 209 '<body><tag /><tag2 /></body>' 210 >>> elem.remove(e) 211 212 >>> element = ET.Element("tag", key="value") 213 >>> serialize(element) # 1 214 '<tag key="value" />' 215 >>> subelement = ET.Element("subtag") 216 >>> element.append(subelement) 217 >>> serialize(element) # 2 218 '<tag key="value"><subtag /></tag>' 219 >>> element.insert(0, subelement) 220 >>> serialize(element) # 3 221 '<tag key="value"><subtag /><subtag /></tag>' 222 >>> element.remove(subelement) 223 >>> serialize(element) # 4 224 '<tag key="value"><subtag /></tag>' 225 >>> element.remove(subelement) 226 >>> serialize(element) # 5 227 '<tag key="value" />' 228 >>> element.remove(subelement) 229 Traceback (most recent call last): 230 ValueError: list.remove(x): x not in list 231 >>> serialize(element) # 6 232 '<tag key="value" />' 233 >>> element[0:0] = [subelement, subelement, subelement] 234 >>> serialize(element[1]) 235 '<subtag />' 236 >>> element[1:9] == [element[1], element[2]] 237 True 238 >>> element[:9:2] == [element[0], element[2]] 239 True 240 >>> del element[1:2] 241 >>> serialize(element) 242 '<tag key="value"><subtag /><subtag /></tag>' 243 """ 244 245def cdata(): 246 """ 247 Test CDATA handling (etc). 248 249 >>> serialize(ET.XML("<tag>hello</tag>")) 250 '<tag>hello</tag>' 251 >>> serialize(ET.XML("<tag>hello</tag>")) 252 '<tag>hello</tag>' 253 >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>")) 254 '<tag>hello</tag>' 255 """ 256 257# Only with Python implementation 258def simplefind(): 259 """ 260 Test find methods using the elementpath fallback. 261 262 >>> from xml.etree import ElementTree 263 264 >>> CurrentElementPath = ElementTree.ElementPath 265 >>> ElementTree.ElementPath = ElementTree._SimpleElementPath() 266 >>> elem = ElementTree.XML(SAMPLE_XML) 267 >>> elem.find("tag").tag 268 'tag' 269 >>> ElementTree.ElementTree(elem).find("tag").tag 270 'tag' 271 >>> elem.findtext("tag") 272 'text' 273 >>> elem.findtext("tog") 274 >>> elem.findtext("tog", "default") 275 'default' 276 >>> ElementTree.ElementTree(elem).findtext("tag") 277 'text' 278 >>> summarize_list(elem.findall("tag")) 279 ['tag', 'tag'] 280 >>> summarize_list(elem.findall(".//tag")) 281 ['tag', 'tag', 'tag'] 282 283 Path syntax doesn't work in this case. 284 285 >>> elem.find("section/tag") 286 >>> elem.findtext("section/tag") 287 >>> summarize_list(elem.findall("section/tag")) 288 [] 289 290 >>> ElementTree.ElementPath = CurrentElementPath 291 """ 292 293def find(): 294 """ 295 Test find methods (including xpath syntax). 296 297 >>> elem = ET.XML(SAMPLE_XML) 298 >>> elem.find("tag").tag 299 'tag' 300 >>> ET.ElementTree(elem).find("tag").tag 301 'tag' 302 >>> elem.find("section/tag").tag 303 'tag' 304 >>> elem.find("./tag").tag 305 'tag' 306 >>> ET.ElementTree(elem).find("./tag").tag 307 'tag' 308 >>> ET.ElementTree(elem).find("/tag").tag 309 'tag' 310 >>> elem[2] = ET.XML(SAMPLE_SECTION) 311 >>> elem.find("section/nexttag").tag 312 'nexttag' 313 >>> ET.ElementTree(elem).find("section/tag").tag 314 'tag' 315 >>> ET.ElementTree(elem).find("tog") 316 >>> ET.ElementTree(elem).find("tog/foo") 317 >>> elem.findtext("tag") 318 'text' 319 >>> elem.findtext("section/nexttag") 320 '' 321 >>> elem.findtext("section/nexttag", "default") 322 '' 323 >>> elem.findtext("tog") 324 >>> elem.findtext("tog", "default") 325 'default' 326 >>> ET.ElementTree(elem).findtext("tag") 327 'text' 328 >>> ET.ElementTree(elem).findtext("tog/foo") 329 >>> ET.ElementTree(elem).findtext("tog/foo", "default") 330 'default' 331 >>> ET.ElementTree(elem).findtext("./tag") 332 'text' 333 >>> ET.ElementTree(elem).findtext("/tag") 334 'text' 335 >>> elem.findtext("section/tag") 336 'subtext' 337 >>> ET.ElementTree(elem).findtext("section/tag") 338 'subtext' 339 >>> summarize_list(elem.findall(".")) 340 ['body'] 341 >>> summarize_list(elem.findall("tag")) 342 ['tag', 'tag'] 343 >>> summarize_list(elem.findall("tog")) 344 [] 345 >>> summarize_list(elem.findall("tog/foo")) 346 [] 347 >>> summarize_list(elem.findall("*")) 348 ['tag', 'tag', 'section'] 349 >>> summarize_list(elem.findall(".//tag")) 350 ['tag', 'tag', 'tag', 'tag'] 351 >>> summarize_list(elem.findall("section/tag")) 352 ['tag'] 353 >>> summarize_list(elem.findall("section//tag")) 354 ['tag', 'tag'] 355 >>> summarize_list(elem.findall("section/*")) 356 ['tag', 'nexttag', 'nextsection'] 357 >>> summarize_list(elem.findall("section//*")) 358 ['tag', 'nexttag', 'nextsection', 'tag'] 359 >>> summarize_list(elem.findall("section/.//*")) 360 ['tag', 'nexttag', 'nextsection', 'tag'] 361 >>> summarize_list(elem.findall("*/*")) 362 ['tag', 'nexttag', 'nextsection'] 363 >>> summarize_list(elem.findall("*//*")) 364 ['tag', 'nexttag', 'nextsection', 'tag'] 365 >>> summarize_list(elem.findall("*/tag")) 366 ['tag'] 367 >>> summarize_list(elem.findall("*/./tag")) 368 ['tag'] 369 >>> summarize_list(elem.findall("./tag")) 370 ['tag', 'tag'] 371 >>> summarize_list(elem.findall(".//tag")) 372 ['tag', 'tag', 'tag', 'tag'] 373 >>> summarize_list(elem.findall("././tag")) 374 ['tag', 'tag'] 375 >>> summarize_list(elem.findall(".//tag[@class]")) 376 ['tag', 'tag', 'tag'] 377 >>> summarize_list(elem.findall(".//tag[@class='a']")) 378 ['tag'] 379 >>> summarize_list(elem.findall(".//tag[@class='b']")) 380 ['tag', 'tag'] 381 >>> summarize_list(elem.findall(".//tag[@id]")) 382 ['tag'] 383 >>> summarize_list(elem.findall(".//section[tag]")) 384 ['section'] 385 >>> summarize_list(elem.findall(".//section[element]")) 386 [] 387 >>> summarize_list(elem.findall("../tag")) 388 [] 389 >>> summarize_list(elem.findall("section/../tag")) 390 ['tag', 'tag'] 391 >>> summarize_list(ET.ElementTree(elem).findall("./tag")) 392 ['tag', 'tag'] 393 394 Following example is invalid in 1.2. 395 A leading '*' is assumed in 1.3. 396 397 >>> elem.findall("section//") == elem.findall("section//*") 398 True 399 400 ET's Path module handles this case incorrectly; this gives 401 a warning in 1.3, and the behaviour will be modified in 1.4. 402 403 >>> summarize_list(ET.ElementTree(elem).findall("/tag")) 404 ['tag', 'tag'] 405 406 >>> elem = ET.XML(SAMPLE_XML_NS) 407 >>> summarize_list(elem.findall("tag")) 408 [] 409 >>> summarize_list(elem.findall("{http://effbot.org/ns}tag")) 410 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag'] 411 >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag")) 412 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag'] 413 """ 414 415def file_init(): 416 """ 417 >>> import StringIO 418 419 >>> stringfile = StringIO.StringIO(SAMPLE_XML) 420 >>> tree = ET.ElementTree(file=stringfile) 421 >>> tree.find("tag").tag 422 'tag' 423 >>> tree.find("section/tag").tag 424 'tag' 425 426 >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE) 427 >>> tree.find("element").tag 428 'element' 429 >>> tree.find("element/../empty-element").tag 430 'empty-element' 431 """ 432 433def bad_find(): 434 """ 435 Check bad or unsupported path expressions. 436 437 >>> elem = ET.XML(SAMPLE_XML) 438 >>> elem.findall("/tag") 439 Traceback (most recent call last): 440 SyntaxError: cannot use absolute path on element 441 """ 442 443def path_cache(): 444 """ 445 Check that the path cache behaves sanely. 446 447 >>> elem = ET.XML(SAMPLE_XML) 448 >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 449 >>> cache_len_10 = len(ET.ElementPath._cache) 450 >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 451 >>> len(ET.ElementPath._cache) == cache_len_10 452 True 453 >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i)) 454 >>> len(ET.ElementPath._cache) > cache_len_10 455 True 456 >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i)) 457 >>> len(ET.ElementPath._cache) < 500 458 True 459 """ 460 461def copy(): 462 """ 463 Test copy handling (etc). 464 465 >>> import copy 466 >>> e1 = ET.XML("<tag>hello<foo/></tag>") 467 >>> e2 = copy.copy(e1) 468 >>> e3 = copy.deepcopy(e1) 469 >>> e1.find("foo").tag = "bar" 470 >>> serialize(e1) 471 '<tag>hello<bar /></tag>' 472 >>> serialize(e2) 473 '<tag>hello<bar /></tag>' 474 >>> serialize(e3) 475 '<tag>hello<foo /></tag>' 476 477 """ 478 479def attrib(): 480 """ 481 Test attribute handling. 482 483 >>> elem = ET.Element("tag") 484 >>> elem.get("key") # 1.1 485 >>> elem.get("key", "default") # 1.2 486 'default' 487 >>> elem.set("key", "value") 488 >>> elem.get("key") # 1.3 489 'value' 490 491 >>> elem = ET.Element("tag", key="value") 492 >>> elem.get("key") # 2.1 493 'value' 494 >>> elem.attrib # 2.2 495 {'key': 'value'} 496 497 >>> attrib = {"key": "value"} 498 >>> elem = ET.Element("tag", attrib) 499 >>> attrib.clear() # check for aliasing issues 500 >>> elem.get("key") # 3.1 501 'value' 502 >>> elem.attrib # 3.2 503 {'key': 'value'} 504 505 >>> attrib = {"key": "value"} 506 >>> elem = ET.Element("tag", **attrib) 507 >>> attrib.clear() # check for aliasing issues 508 >>> elem.get("key") # 4.1 509 'value' 510 >>> elem.attrib # 4.2 511 {'key': 'value'} 512 513 >>> elem = ET.Element("tag", {"key": "other"}, key="value") 514 >>> elem.get("key") # 5.1 515 'value' 516 >>> elem.attrib # 5.2 517 {'key': 'value'} 518 519 >>> elem = ET.Element('test') 520 >>> elem.text = "aa" 521 >>> elem.set('testa', 'testval') 522 >>> elem.set('testb', 'test2') 523 >>> ET.tostring(elem) 524 '<test testa="testval" testb="test2">aa</test>' 525 >>> sorted(elem.keys()) 526 ['testa', 'testb'] 527 >>> sorted(elem.items()) 528 [('testa', 'testval'), ('testb', 'test2')] 529 >>> elem.attrib['testb'] 530 'test2' 531 >>> elem.attrib['testb'] = 'test1' 532 >>> elem.attrib['testc'] = 'test2' 533 >>> ET.tostring(elem) 534 '<test testa="testval" testb="test1" testc="test2">aa</test>' 535 """ 536 537def makeelement(): 538 """ 539 Test makeelement handling. 540 541 >>> elem = ET.Element("tag") 542 >>> attrib = {"key": "value"} 543 >>> subelem = elem.makeelement("subtag", attrib) 544 >>> if subelem.attrib is attrib: 545 ... print "attrib aliasing" 546 >>> elem.append(subelem) 547 >>> serialize(elem) 548 '<tag><subtag key="value" /></tag>' 549 550 >>> elem.clear() 551 >>> serialize(elem) 552 '<tag />' 553 >>> elem.append(subelem) 554 >>> serialize(elem) 555 '<tag><subtag key="value" /></tag>' 556 >>> elem.extend([subelem, subelem]) 557 >>> serialize(elem) 558 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>' 559 >>> elem[:] = [subelem] 560 >>> serialize(elem) 561 '<tag><subtag key="value" /></tag>' 562 >>> elem[:] = tuple([subelem]) 563 >>> serialize(elem) 564 '<tag><subtag key="value" /></tag>' 565 566 """ 567 568def parsefile(): 569 """ 570 Test parsing from file. 571 572 >>> tree = ET.parse(SIMPLE_XMLFILE) 573 >>> normalize_crlf(tree) 574 >>> tree.write(sys.stdout) 575 <root> 576 <element key="value">text</element> 577 <element>text</element>tail 578 <empty-element /> 579 </root> 580 >>> tree = ET.parse(SIMPLE_NS_XMLFILE) 581 >>> normalize_crlf(tree) 582 >>> tree.write(sys.stdout) 583 <ns0:root xmlns:ns0="namespace"> 584 <ns0:element key="value">text</ns0:element> 585 <ns0:element>text</ns0:element>tail 586 <ns0:empty-element /> 587 </ns0:root> 588 589 >>> with open(SIMPLE_XMLFILE) as f: 590 ... data = f.read() 591 592 >>> parser = ET.XMLParser() 593 >>> parser.version # doctest: +ELLIPSIS 594 'Expat ...' 595 >>> parser.feed(data) 596 >>> print serialize(parser.close()) 597 <root> 598 <element key="value">text</element> 599 <element>text</element>tail 600 <empty-element /> 601 </root> 602 603 >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility 604 >>> parser.feed(data) 605 >>> print serialize(parser.close()) 606 <root> 607 <element key="value">text</element> 608 <element>text</element>tail 609 <empty-element /> 610 </root> 611 612 >>> target = ET.TreeBuilder() 613 >>> parser = ET.XMLParser(target=target) 614 >>> parser.feed(data) 615 >>> print serialize(parser.close()) 616 <root> 617 <element key="value">text</element> 618 <element>text</element>tail 619 <empty-element /> 620 </root> 621 """ 622 623def parseliteral(): 624 """ 625 >>> element = ET.XML("<html><body>text</body></html>") 626 >>> ET.ElementTree(element).write(sys.stdout) 627 <html><body>text</body></html> 628 >>> element = ET.fromstring("<html><body>text</body></html>") 629 >>> ET.ElementTree(element).write(sys.stdout) 630 <html><body>text</body></html> 631 >>> sequence = ["<html><body>", "text</bo", "dy></html>"] 632 >>> element = ET.fromstringlist(sequence) 633 >>> print ET.tostring(element) 634 <html><body>text</body></html> 635 >>> print "".join(ET.tostringlist(element)) 636 <html><body>text</body></html> 637 >>> ET.tostring(element, "ascii") 638 "<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>" 639 >>> _, ids = ET.XMLID("<html><body>text</body></html>") 640 >>> len(ids) 641 0 642 >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>") 643 >>> len(ids) 644 1 645 >>> ids["body"].tag 646 'body' 647 """ 648 649def iterparse(): 650 """ 651 Test iterparse interface. 652 653 >>> iterparse = ET.iterparse 654 655 >>> context = iterparse(SIMPLE_XMLFILE) 656 >>> action, elem = next(context) 657 >>> print action, elem.tag 658 end element 659 >>> for action, elem in context: 660 ... print action, elem.tag 661 end element 662 end empty-element 663 end root 664 >>> context.root.tag 665 'root' 666 667 >>> context = iterparse(SIMPLE_NS_XMLFILE) 668 >>> for action, elem in context: 669 ... print action, elem.tag 670 end {namespace}element 671 end {namespace}element 672 end {namespace}empty-element 673 end {namespace}root 674 675 >>> events = () 676 >>> context = iterparse(SIMPLE_XMLFILE, events) 677 >>> for action, elem in context: 678 ... print action, elem.tag 679 680 >>> events = () 681 >>> context = iterparse(SIMPLE_XMLFILE, events=events) 682 >>> for action, elem in context: 683 ... print action, elem.tag 684 685 >>> events = ("start", "end") 686 >>> context = iterparse(SIMPLE_XMLFILE, events) 687 >>> for action, elem in context: 688 ... print action, elem.tag 689 start root 690 start element 691 end element 692 start element 693 end element 694 start empty-element 695 end empty-element 696 end root 697 698 >>> events = ("start", "end", "start-ns", "end-ns") 699 >>> context = iterparse(SIMPLE_NS_XMLFILE, events) 700 >>> for action, elem in context: 701 ... if action in ("start", "end"): 702 ... print action, elem.tag 703 ... else: 704 ... print action, elem 705 start-ns ('', 'namespace') 706 start {namespace}root 707 start {namespace}element 708 end {namespace}element 709 start {namespace}element 710 end {namespace}element 711 start {namespace}empty-element 712 end {namespace}empty-element 713 end {namespace}root 714 end-ns None 715 716 >>> import StringIO 717 718 >>> events = ('start-ns', 'end-ns') 719 >>> context = ET.iterparse(StringIO.StringIO(r"<root xmlns=''/>"), events) 720 >>> for action, elem in context: 721 ... print action, elem 722 start-ns ('', '') 723 end-ns None 724 725 >>> events = ("start", "end", "bogus") 726 >>> with open(SIMPLE_XMLFILE, "rb") as f: 727 ... iterparse(f, events) 728 Traceback (most recent call last): 729 ValueError: unknown event 'bogus' 730 731 >>> source = StringIO.StringIO( 732 ... "<?xml version='1.0' encoding='iso-8859-1'?>\\n" 733 ... "<body xmlns='http://éffbot.org/ns'\\n" 734 ... " xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n") 735 >>> events = ("start-ns",) 736 >>> context = iterparse(source, events) 737 >>> for action, elem in context: 738 ... print action, elem 739 start-ns ('', u'http://\\xe9ffbot.org/ns') 740 start-ns (u'cl\\xe9', 'http://effbot.org/ns') 741 742 >>> source = StringIO.StringIO("<document />junk") 743 >>> try: 744 ... for action, elem in iterparse(source): 745 ... print action, elem.tag 746 ... except ET.ParseError, v: 747 ... print v 748 end document 749 junk after document element: line 1, column 12 750 """ 751 752def writefile(): 753 """ 754 >>> elem = ET.Element("tag") 755 >>> elem.text = "text" 756 >>> serialize(elem) 757 '<tag>text</tag>' 758 >>> ET.SubElement(elem, "subtag").text = "subtext" 759 >>> serialize(elem) 760 '<tag>text<subtag>subtext</subtag></tag>' 761 762 Test tag suppression 763 >>> elem.tag = None 764 >>> serialize(elem) 765 'text<subtag>subtext</subtag>' 766 >>> elem.insert(0, ET.Comment("comment")) 767 >>> serialize(elem) # assumes 1.3 768 'text<!--comment--><subtag>subtext</subtag>' 769 >>> elem[0] = ET.PI("key", "value") 770 >>> serialize(elem) 771 'text<?key value?><subtag>subtext</subtag>' 772 """ 773 774def custom_builder(): 775 """ 776 Test parser w. custom builder. 777 778 >>> with open(SIMPLE_XMLFILE) as f: 779 ... data = f.read() 780 >>> class Builder: 781 ... def start(self, tag, attrib): 782 ... print "start", tag 783 ... def end(self, tag): 784 ... print "end", tag 785 ... def data(self, text): 786 ... pass 787 >>> builder = Builder() 788 >>> parser = ET.XMLParser(target=builder) 789 >>> parser.feed(data) 790 start root 791 start element 792 end element 793 start element 794 end element 795 start empty-element 796 end empty-element 797 end root 798 799 >>> with open(SIMPLE_NS_XMLFILE) as f: 800 ... data = f.read() 801 >>> class Builder: 802 ... def start(self, tag, attrib): 803 ... print "start", tag 804 ... def end(self, tag): 805 ... print "end", tag 806 ... def data(self, text): 807 ... pass 808 ... def pi(self, target, data): 809 ... print "pi", target, repr(data) 810 ... def comment(self, data): 811 ... print "comment", repr(data) 812 >>> builder = Builder() 813 >>> parser = ET.XMLParser(target=builder) 814 >>> parser.feed(data) 815 pi pi 'data' 816 comment ' comment ' 817 start {namespace}root 818 start {namespace}element 819 end {namespace}element 820 start {namespace}element 821 end {namespace}element 822 start {namespace}empty-element 823 end {namespace}empty-element 824 end {namespace}root 825 826 """ 827 828def getchildren(): 829 """ 830 Test Element.getchildren() 831 832 >>> with open(SIMPLE_XMLFILE, "r") as f: 833 ... tree = ET.parse(f) 834 >>> for elem in tree.getroot().iter(): 835 ... summarize_list(elem.getchildren()) 836 ['element', 'element', 'empty-element'] 837 [] 838 [] 839 [] 840 >>> for elem in tree.getiterator(): 841 ... summarize_list(elem.getchildren()) 842 ['element', 'element', 'empty-element'] 843 [] 844 [] 845 [] 846 847 >>> elem = ET.XML(SAMPLE_XML) 848 >>> len(elem.getchildren()) 849 3 850 >>> len(elem[2].getchildren()) 851 1 852 >>> elem[:] == elem.getchildren() 853 True 854 >>> child1 = elem[0] 855 >>> child2 = elem[2] 856 >>> del elem[1:2] 857 >>> len(elem.getchildren()) 858 2 859 >>> child1 == elem[0] 860 True 861 >>> child2 == elem[1] 862 True 863 >>> elem[0:2] = [child2, child1] 864 >>> child2 == elem[0] 865 True 866 >>> child1 == elem[1] 867 True 868 >>> child1 == elem[0] 869 False 870 >>> elem.clear() 871 >>> elem.getchildren() 872 [] 873 """ 874 875def writestring(): 876 """ 877 >>> elem = ET.XML("<html><body>text</body></html>") 878 >>> ET.tostring(elem) 879 '<html><body>text</body></html>' 880 >>> elem = ET.fromstring("<html><body>text</body></html>") 881 >>> ET.tostring(elem) 882 '<html><body>text</body></html>' 883 """ 884 885def check_encoding(encoding): 886 """ 887 >>> check_encoding("ascii") 888 >>> check_encoding("us-ascii") 889 >>> check_encoding("iso-8859-1") 890 >>> check_encoding("iso-8859-15") 891 >>> check_encoding("cp437") 892 >>> check_encoding("mac-roman") 893 >>> check_encoding("gbk") 894 Traceback (most recent call last): 895 ValueError: multi-byte encodings are not supported 896 >>> check_encoding("cp037") 897 Traceback (most recent call last): 898 ParseError: unknown encoding: line 1, column 30 899 """ 900 ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding) 901 902def encoding(): 903 r""" 904 Test encoding issues. 905 906 >>> elem = ET.Element("tag") 907 >>> elem.text = u"abc" 908 >>> serialize(elem) 909 '<tag>abc</tag>' 910 >>> serialize(elem, encoding="utf-8") 911 '<tag>abc</tag>' 912 >>> serialize(elem, encoding="us-ascii") 913 '<tag>abc</tag>' 914 >>> serialize(elem, encoding="iso-8859-1") 915 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>" 916 917 >>> elem.text = "<&\"\'>" 918 >>> serialize(elem) 919 '<tag><&"\'></tag>' 920 >>> serialize(elem, encoding="utf-8") 921 '<tag><&"\'></tag>' 922 >>> serialize(elem, encoding="us-ascii") # cdata characters 923 '<tag><&"\'></tag>' 924 >>> serialize(elem, encoding="iso-8859-1") 925 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag><&"\'></tag>' 926 927 >>> elem.attrib["key"] = "<&\"\'>" 928 >>> elem.text = None 929 >>> serialize(elem) 930 '<tag key="<&"\'>" />' 931 >>> serialize(elem, encoding="utf-8") 932 '<tag key="<&"\'>" />' 933 >>> serialize(elem, encoding="us-ascii") 934 '<tag key="<&"\'>" />' 935 >>> serialize(elem, encoding="iso-8859-1") 936 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="<&"\'>" />' 937 938 >>> elem.text = u'\xe5\xf6\xf6<>' 939 >>> elem.attrib.clear() 940 >>> serialize(elem) 941 '<tag>åöö<></tag>' 942 >>> serialize(elem, encoding="utf-8") 943 '<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>' 944 >>> serialize(elem, encoding="us-ascii") 945 '<tag>åöö<></tag>' 946 >>> serialize(elem, encoding="iso-8859-1") 947 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6<></tag>" 948 949 >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>' 950 >>> elem.text = None 951 >>> serialize(elem) 952 '<tag key="åöö<>" />' 953 >>> serialize(elem, encoding="utf-8") 954 '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />' 955 >>> serialize(elem, encoding="us-ascii") 956 '<tag key="åöö<>" />' 957 >>> serialize(elem, encoding="iso-8859-1") 958 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6<>" />' 959 """ 960 961def methods(): 962 r""" 963 Test serialization methods. 964 965 >>> e = ET.XML("<html><link/><script>1 < 2</script></html>") 966 >>> e.tail = "\n" 967 >>> serialize(e) 968 '<html><link /><script>1 < 2</script></html>\n' 969 >>> serialize(e, method=None) 970 '<html><link /><script>1 < 2</script></html>\n' 971 >>> serialize(e, method="xml") 972 '<html><link /><script>1 < 2</script></html>\n' 973 >>> serialize(e, method="html") 974 '<html><link><script>1 < 2</script></html>\n' 975 >>> serialize(e, method="text") 976 '1 < 2\n' 977 """ 978 979def iterators(): 980 """ 981 Test iterators. 982 983 >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") 984 >>> summarize_list(e.iter()) 985 ['html', 'body', 'i'] 986 >>> summarize_list(e.find("body").iter()) 987 ['body', 'i'] 988 >>> summarize(next(e.iter())) 989 'html' 990 >>> "".join(e.itertext()) 991 'this is a paragraph...' 992 >>> "".join(e.find("body").itertext()) 993 'this is a paragraph.' 994 >>> next(e.itertext()) 995 'this is a ' 996 997 Method iterparse should return an iterator. See bug 6472. 998 999 >>> sourcefile = serialize(e, to_string=False) 1000 >>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS 1001 ('end', <Element 'i' at 0x...>) 1002 1003 >>> tree = ET.ElementTree(None) 1004 >>> tree.iter() 1005 Traceback (most recent call last): 1006 AttributeError: 'NoneType' object has no attribute 'iter' 1007 """ 1008 1009ENTITY_XML = """\ 1010<!DOCTYPE points [ 1011<!ENTITY % user-entities SYSTEM 'user-entities.xml'> 1012%user-entities; 1013]> 1014<document>&entity;</document> 1015""" 1016 1017def entity(): 1018 """ 1019 Test entity handling. 1020 1021 1) good entities 1022 1023 >>> e = ET.XML("<document title='舰'>test</document>") 1024 >>> serialize(e) 1025 '<document title="舰">test</document>' 1026 1027 2) bad entities 1028 1029 >>> ET.XML("<document>&entity;</document>") 1030 Traceback (most recent call last): 1031 ParseError: undefined entity: line 1, column 10 1032 1033 >>> ET.XML(ENTITY_XML) 1034 Traceback (most recent call last): 1035 ParseError: undefined entity &entity;: line 5, column 10 1036 1037 3) custom entity 1038 1039 >>> parser = ET.XMLParser() 1040 >>> parser.entity["entity"] = "text" 1041 >>> parser.feed(ENTITY_XML) 1042 >>> root = parser.close() 1043 >>> serialize(root) 1044 '<document>text</document>' 1045 """ 1046 1047def error(xml): 1048 """ 1049 1050 Test error handling. 1051 1052 >>> issubclass(ET.ParseError, SyntaxError) 1053 True 1054 >>> error("foo").position 1055 (1, 0) 1056 >>> error("<tag>&foo;</tag>").position 1057 (1, 5) 1058 >>> error("foobar<").position 1059 (1, 6) 1060 1061 """ 1062 try: 1063 ET.XML(xml) 1064 except ET.ParseError: 1065 return sys.exc_value 1066 1067def namespace(): 1068 """ 1069 Test namespace issues. 1070 1071 1) xml namespace 1072 1073 >>> elem = ET.XML("<tag xml:lang='en' />") 1074 >>> serialize(elem) # 1.1 1075 '<tag xml:lang="en" />' 1076 1077 2) other "well-known" namespaces 1078 1079 >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />") 1080 >>> serialize(elem) # 2.1 1081 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />' 1082 1083 >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />") 1084 >>> serialize(elem) # 2.2 1085 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />' 1086 1087 >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />") 1088 >>> serialize(elem) # 2.3 1089 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />' 1090 1091 3) unknown namespaces 1092 >>> elem = ET.XML(SAMPLE_XML_NS) 1093 >>> print serialize(elem) 1094 <ns0:body xmlns:ns0="http://effbot.org/ns"> 1095 <ns0:tag>text</ns0:tag> 1096 <ns0:tag /> 1097 <ns0:section> 1098 <ns0:tag>subtext</ns0:tag> 1099 </ns0:section> 1100 </ns0:body> 1101 """ 1102 1103def qname(): 1104 """ 1105 Test QName handling. 1106 1107 1) decorated tags 1108 1109 >>> elem = ET.Element("{uri}tag") 1110 >>> serialize(elem) # 1.1 1111 '<ns0:tag xmlns:ns0="uri" />' 1112 >>> elem = ET.Element(ET.QName("{uri}tag")) 1113 >>> serialize(elem) # 1.2 1114 '<ns0:tag xmlns:ns0="uri" />' 1115 >>> elem = ET.Element(ET.QName("uri", "tag")) 1116 >>> serialize(elem) # 1.3 1117 '<ns0:tag xmlns:ns0="uri" />' 1118 >>> elem = ET.Element(ET.QName("uri", "tag")) 1119 >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1")) 1120 >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2")) 1121 >>> serialize(elem) # 1.4 1122 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>' 1123 1124 2) decorated attributes 1125 1126 >>> elem.clear() 1127 >>> elem.attrib["{uri}key"] = "value" 1128 >>> serialize(elem) # 2.1 1129 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />' 1130 1131 >>> elem.clear() 1132 >>> elem.attrib[ET.QName("{uri}key")] = "value" 1133 >>> serialize(elem) # 2.2 1134 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />' 1135 1136 3) decorated values are not converted by default, but the 1137 QName wrapper can be used for values 1138 1139 >>> elem.clear() 1140 >>> elem.attrib["{uri}key"] = "{uri}value" 1141 >>> serialize(elem) # 3.1 1142 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />' 1143 1144 >>> elem.clear() 1145 >>> elem.attrib["{uri}key"] = ET.QName("{uri}value") 1146 >>> serialize(elem) # 3.2 1147 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />' 1148 1149 >>> elem.clear() 1150 >>> subelem = ET.Element("tag") 1151 >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value") 1152 >>> elem.append(subelem) 1153 >>> elem.append(subelem) 1154 >>> serialize(elem) # 3.3 1155 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>' 1156 1157 4) Direct QName tests 1158 1159 >>> str(ET.QName('ns', 'tag')) 1160 '{ns}tag' 1161 >>> str(ET.QName('{ns}tag')) 1162 '{ns}tag' 1163 >>> q1 = ET.QName('ns', 'tag') 1164 >>> q2 = ET.QName('ns', 'tag') 1165 >>> q1 == q2 1166 True 1167 >>> q2 = ET.QName('ns', 'other-tag') 1168 >>> q1 == q2 1169 False 1170 >>> q1 == 'ns:tag' 1171 False 1172 >>> q1 == '{ns}tag' 1173 True 1174 """ 1175 1176def doctype_public(): 1177 """ 1178 Test PUBLIC doctype. 1179 1180 >>> elem = ET.XML('<!DOCTYPE html PUBLIC' 1181 ... ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 1182 ... ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 1183 ... '<html>text</html>') 1184 1185 """ 1186 1187def xpath_tokenizer(p): 1188 """ 1189 Test the XPath tokenizer. 1190 1191 >>> # tests from the xml specification 1192 >>> xpath_tokenizer("*") 1193 ['*'] 1194 >>> xpath_tokenizer("text()") 1195 ['text', '()'] 1196 >>> xpath_tokenizer("@name") 1197 ['@', 'name'] 1198 >>> xpath_tokenizer("@*") 1199 ['@', '*'] 1200 >>> xpath_tokenizer("para[1]") 1201 ['para', '[', '1', ']'] 1202 >>> xpath_tokenizer("para[last()]") 1203 ['para', '[', 'last', '()', ']'] 1204 >>> xpath_tokenizer("*/para") 1205 ['*', '/', 'para'] 1206 >>> xpath_tokenizer("/doc/chapter[5]/section[2]") 1207 ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']'] 1208 >>> xpath_tokenizer("chapter//para") 1209 ['chapter', '//', 'para'] 1210 >>> xpath_tokenizer("//para") 1211 ['//', 'para'] 1212 >>> xpath_tokenizer("//olist/item") 1213 ['//', 'olist', '/', 'item'] 1214 >>> xpath_tokenizer(".") 1215 ['.'] 1216 >>> xpath_tokenizer(".//para") 1217 ['.', '//', 'para'] 1218 >>> xpath_tokenizer("..") 1219 ['..'] 1220 >>> xpath_tokenizer("../@lang") 1221 ['..', '/', '@', 'lang'] 1222 >>> xpath_tokenizer("chapter[title]") 1223 ['chapter', '[', 'title', ']'] 1224 >>> xpath_tokenizer("employee[@secretary and @assistant]") 1225 ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'] 1226 1227 >>> # additional tests 1228 >>> xpath_tokenizer("{http://spam}egg") 1229 ['{http://spam}egg'] 1230 >>> xpath_tokenizer("./spam.egg") 1231 ['.', '/', 'spam.egg'] 1232 >>> xpath_tokenizer(".//{http://spam}egg") 1233 ['.', '//', '{http://spam}egg'] 1234 """ 1235 from xml.etree import ElementPath 1236 out = [] 1237 for op, tag in ElementPath.xpath_tokenizer(p): 1238 out.append(op or tag) 1239 return out 1240 1241def processinginstruction(): 1242 """ 1243 Test ProcessingInstruction directly 1244 1245 >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction')) 1246 '<?test instruction?>' 1247 >>> ET.tostring(ET.PI('test', 'instruction')) 1248 '<?test instruction?>' 1249 1250 Issue #2746 1251 1252 >>> ET.tostring(ET.PI('test', '<testing&>')) 1253 '<?test <testing&>?>' 1254 >>> ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1') 1255 "<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>" 1256 """ 1257 1258# 1259# xinclude tests (samples from appendix C of the xinclude specification) 1260 1261XINCLUDE = {} 1262 1263XINCLUDE["C1.xml"] = """\ 1264<?xml version='1.0'?> 1265<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1266 <p>120 Mz is adequate for an average home user.</p> 1267 <xi:include href="disclaimer.xml"/> 1268</document> 1269""" 1270 1271XINCLUDE["disclaimer.xml"] = """\ 1272<?xml version='1.0'?> 1273<disclaimer> 1274 <p>The opinions represented herein represent those of the individual 1275 and should not be interpreted as official policy endorsed by this 1276 organization.</p> 1277</disclaimer> 1278""" 1279 1280XINCLUDE["C2.xml"] = """\ 1281<?xml version='1.0'?> 1282<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1283 <p>This document has been accessed 1284 <xi:include href="count.txt" parse="text"/> times.</p> 1285</document> 1286""" 1287 1288XINCLUDE["count.txt"] = "324387" 1289 1290XINCLUDE["C2b.xml"] = """\ 1291<?xml version='1.0'?> 1292<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1293 <p>This document has been <em>accessed</em> 1294 <xi:include href="count.txt" parse="text"/> times.</p> 1295</document> 1296""" 1297 1298XINCLUDE["C3.xml"] = """\ 1299<?xml version='1.0'?> 1300<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1301 <p>The following is the source of the "data.xml" resource:</p> 1302 <example><xi:include href="data.xml" parse="text"/></example> 1303</document> 1304""" 1305 1306XINCLUDE["data.xml"] = """\ 1307<?xml version='1.0'?> 1308<data> 1309 <item><![CDATA[Brooks & Shields]]></item> 1310</data> 1311""" 1312 1313XINCLUDE["C5.xml"] = """\ 1314<?xml version='1.0'?> 1315<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1316 <xi:include href="example.txt" parse="text"> 1317 <xi:fallback> 1318 <xi:include href="fallback-example.txt" parse="text"> 1319 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback> 1320 </xi:include> 1321 </xi:fallback> 1322 </xi:include> 1323</div> 1324""" 1325 1326XINCLUDE["default.xml"] = """\ 1327<?xml version='1.0'?> 1328<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1329 <p>Example.</p> 1330 <xi:include href="{}"/> 1331</document> 1332""".format(cgi.escape(SIMPLE_XMLFILE, True)) 1333 1334def xinclude_loader(href, parse="xml", encoding=None): 1335 try: 1336 data = XINCLUDE[href] 1337 except KeyError: 1338 raise IOError("resource not found") 1339 if parse == "xml": 1340 from xml.etree.ElementTree import XML 1341 return XML(data) 1342 return data 1343 1344def xinclude(): 1345 r""" 1346 Basic inclusion example (XInclude C.1) 1347 1348 >>> from xml.etree import ElementTree as ET 1349 >>> from xml.etree import ElementInclude 1350 1351 >>> document = xinclude_loader("C1.xml") 1352 >>> ElementInclude.include(document, xinclude_loader) 1353 >>> print serialize(document) # C1 1354 <document> 1355 <p>120 Mz is adequate for an average home user.</p> 1356 <disclaimer> 1357 <p>The opinions represented herein represent those of the individual 1358 and should not be interpreted as official policy endorsed by this 1359 organization.</p> 1360 </disclaimer> 1361 </document> 1362 1363 Textual inclusion example (XInclude C.2) 1364 1365 >>> document = xinclude_loader("C2.xml") 1366 >>> ElementInclude.include(document, xinclude_loader) 1367 >>> print serialize(document) # C2 1368 <document> 1369 <p>This document has been accessed 1370 324387 times.</p> 1371 </document> 1372 1373 Textual inclusion after sibling element (based on modified XInclude C.2) 1374 1375 >>> document = xinclude_loader("C2b.xml") 1376 >>> ElementInclude.include(document, xinclude_loader) 1377 >>> print(serialize(document)) # C2b 1378 <document> 1379 <p>This document has been <em>accessed</em> 1380 324387 times.</p> 1381 </document> 1382 1383 Textual inclusion of XML example (XInclude C.3) 1384 1385 >>> document = xinclude_loader("C3.xml") 1386 >>> ElementInclude.include(document, xinclude_loader) 1387 >>> print serialize(document) # C3 1388 <document> 1389 <p>The following is the source of the "data.xml" resource:</p> 1390 <example><?xml version='1.0'?> 1391 <data> 1392 <item><![CDATA[Brooks & Shields]]></item> 1393 </data> 1394 </example> 1395 </document> 1396 1397 Fallback example (XInclude C.5) 1398 Note! Fallback support is not yet implemented 1399 1400 >>> document = xinclude_loader("C5.xml") 1401 >>> ElementInclude.include(document, xinclude_loader) 1402 Traceback (most recent call last): 1403 IOError: resource not found 1404 >>> # print serialize(document) # C5 1405 """ 1406 1407def xinclude_default(): 1408 """ 1409 >>> from xml.etree import ElementInclude 1410 1411 >>> document = xinclude_loader("default.xml") 1412 >>> ElementInclude.include(document) 1413 >>> print serialize(document) # default 1414 <document> 1415 <p>Example.</p> 1416 <root> 1417 <element key="value">text</element> 1418 <element>text</element>tail 1419 <empty-element /> 1420 </root> 1421 </document> 1422 """ 1423 1424# 1425# badly formatted xi:include tags 1426 1427XINCLUDE_BAD = {} 1428 1429XINCLUDE_BAD["B1.xml"] = """\ 1430<?xml version='1.0'?> 1431<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1432 <p>120 Mz is adequate for an average home user.</p> 1433 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/> 1434</document> 1435""" 1436 1437XINCLUDE_BAD["B2.xml"] = """\ 1438<?xml version='1.0'?> 1439<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1440 <xi:fallback></xi:fallback> 1441</div> 1442""" 1443 1444def xinclude_failures(): 1445 r""" 1446 Test failure to locate included XML file. 1447 1448 >>> from xml.etree import ElementInclude 1449 1450 >>> def none_loader(href, parser, encoding=None): 1451 ... return None 1452 1453 >>> document = ET.XML(XINCLUDE["C1.xml"]) 1454 >>> ElementInclude.include(document, loader=none_loader) 1455 Traceback (most recent call last): 1456 FatalIncludeError: cannot load 'disclaimer.xml' as 'xml' 1457 1458 Test failure to locate included text file. 1459 1460 >>> document = ET.XML(XINCLUDE["C2.xml"]) 1461 >>> ElementInclude.include(document, loader=none_loader) 1462 Traceback (most recent call last): 1463 FatalIncludeError: cannot load 'count.txt' as 'text' 1464 1465 Test bad parse type. 1466 1467 >>> document = ET.XML(XINCLUDE_BAD["B1.xml"]) 1468 >>> ElementInclude.include(document, loader=none_loader) 1469 Traceback (most recent call last): 1470 FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE') 1471 1472 Test xi:fallback outside xi:include. 1473 1474 >>> document = ET.XML(XINCLUDE_BAD["B2.xml"]) 1475 >>> ElementInclude.include(document, loader=none_loader) 1476 Traceback (most recent call last): 1477 FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback') 1478 """ 1479 1480# -------------------------------------------------------------------- 1481# reported bugs 1482 1483def bug_xmltoolkit21(): 1484 """ 1485 1486 marshaller gives obscure errors for non-string values 1487 1488 >>> elem = ET.Element(123) 1489 >>> serialize(elem) # tag 1490 Traceback (most recent call last): 1491 TypeError: cannot serialize 123 (type int) 1492 >>> elem = ET.Element("elem") 1493 >>> elem.text = 123 1494 >>> serialize(elem) # text 1495 Traceback (most recent call last): 1496 TypeError: cannot serialize 123 (type int) 1497 >>> elem = ET.Element("elem") 1498 >>> elem.tail = 123 1499 >>> serialize(elem) # tail 1500 Traceback (most recent call last): 1501 TypeError: cannot serialize 123 (type int) 1502 >>> elem = ET.Element("elem") 1503 >>> elem.set(123, "123") 1504 >>> serialize(elem) # attribute key 1505 Traceback (most recent call last): 1506 TypeError: cannot serialize 123 (type int) 1507 >>> elem = ET.Element("elem") 1508 >>> elem.set("123", 123) 1509 >>> serialize(elem) # attribute value 1510 Traceback (most recent call last): 1511 TypeError: cannot serialize 123 (type int) 1512 1513 """ 1514 1515def bug_xmltoolkit25(): 1516 """ 1517 1518 typo in ElementTree.findtext 1519 1520 >>> elem = ET.XML(SAMPLE_XML) 1521 >>> tree = ET.ElementTree(elem) 1522 >>> tree.findtext("tag") 1523 'text' 1524 >>> tree.findtext("section/tag") 1525 'subtext' 1526 1527 """ 1528 1529def bug_xmltoolkit28(): 1530 """ 1531 1532 .//tag causes exceptions 1533 1534 >>> tree = ET.XML("<doc><table><tbody/></table></doc>") 1535 >>> summarize_list(tree.findall(".//thead")) 1536 [] 1537 >>> summarize_list(tree.findall(".//tbody")) 1538 ['tbody'] 1539 1540 """ 1541 1542def bug_xmltoolkitX1(): 1543 """ 1544 1545 dump() doesn't flush the output buffer 1546 1547 >>> tree = ET.XML("<doc><table><tbody/></table></doc>") 1548 >>> ET.dump(tree); sys.stdout.write("tail") 1549 <doc><table><tbody /></table></doc> 1550 tail 1551 1552 """ 1553 1554def bug_xmltoolkit39(): 1555 """ 1556 1557 non-ascii element and attribute names doesn't work 1558 1559 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />") 1560 >>> ET.tostring(tree, "utf-8") 1561 '<t\\xc3\\xa4g />' 1562 1563 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag \xe4ttr='välue' />") 1564 >>> tree.attrib 1565 {u'\\xe4ttr': u'v\\xe4lue'} 1566 >>> ET.tostring(tree, "utf-8") 1567 '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />' 1568 1569 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g>text</t\xe4g>") 1570 >>> ET.tostring(tree, "utf-8") 1571 '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>' 1572 1573 >>> tree = ET.Element(u"t\u00e4g") 1574 >>> ET.tostring(tree, "utf-8") 1575 '<t\\xc3\\xa4g />' 1576 1577 >>> tree = ET.Element("tag") 1578 >>> tree.set(u"\u00e4ttr", u"v\u00e4lue") 1579 >>> ET.tostring(tree, "utf-8") 1580 '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />' 1581 1582 """ 1583 1584def bug_xmltoolkit54(): 1585 """ 1586 1587 problems handling internally defined entities 1588 1589 >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]><doc>&ldots;</doc>") 1590 >>> serialize(e) 1591 '<doc>舰</doc>' 1592 1593 """ 1594 1595def bug_xmltoolkit55(): 1596 """ 1597 1598 make sure we're reporting the first error, not the last 1599 1600 >>> e = ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>") 1601 Traceback (most recent call last): 1602 ParseError: undefined entity &ldots;: line 1, column 36 1603 1604 """ 1605 1606class ExceptionFile: 1607 def read(self, x): 1608 raise IOError 1609 1610def xmltoolkit60(): 1611 """ 1612 1613 Handle crash in stream source. 1614 >>> tree = ET.parse(ExceptionFile()) 1615 Traceback (most recent call last): 1616 IOError 1617 1618 """ 1619 1620XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?> 1621<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []> 1622<patent-application-publication> 1623<subdoc-abstract> 1624<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph> 1625</subdoc-abstract> 1626</patent-application-publication>""" 1627 1628 1629def xmltoolkit62(): 1630 """ 1631 1632 Don't crash when using custom entities. 1633 1634 >>> xmltoolkit62() 1635 u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.' 1636 1637 """ 1638 ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'} 1639 parser = ET.XMLTreeBuilder() 1640 parser.entity.update(ENTITIES) 1641 parser.feed(XMLTOOLKIT62_DOC) 1642 t = parser.close() 1643 return t.find('.//paragraph').text 1644 1645def xmltoolkit63(): 1646 """ 1647 1648 Check reference leak. 1649 >>> xmltoolkit63() 1650 >>> count = sys.getrefcount(None) 1651 >>> for i in range(1000): 1652 ... xmltoolkit63() 1653 >>> sys.getrefcount(None) - count 1654 0 1655 1656 """ 1657 tree = ET.TreeBuilder() 1658 tree.start("tag", {}) 1659 tree.data("text") 1660 tree.end("tag") 1661 1662# -------------------------------------------------------------------- 1663 1664 1665def bug_200708_newline(): 1666 r""" 1667 1668 Preserve newlines in attributes. 1669 1670 >>> e = ET.Element('SomeTag', text="def _f():\n return 3\n") 1671 >>> ET.tostring(e) 1672 '<SomeTag text="def _f(): return 3 " />' 1673 >>> ET.XML(ET.tostring(e)).get("text") 1674 'def _f():\n return 3\n' 1675 >>> ET.tostring(ET.XML(ET.tostring(e))) 1676 '<SomeTag text="def _f(): return 3 " />' 1677 1678 """ 1679 1680def bug_200708_close(): 1681 """ 1682 1683 Test default builder. 1684 >>> parser = ET.XMLParser() # default 1685 >>> parser.feed("<element>some text</element>") 1686 >>> summarize(parser.close()) 1687 'element' 1688 1689 Test custom builder. 1690 >>> class EchoTarget: 1691 ... def close(self): 1692 ... return ET.Element("element") # simulate root 1693 >>> parser = ET.XMLParser(EchoTarget()) 1694 >>> parser.feed("<element>some text</element>") 1695 >>> summarize(parser.close()) 1696 'element' 1697 1698 """ 1699 1700def bug_200709_default_namespace(): 1701 """ 1702 1703 >>> e = ET.Element("{default}elem") 1704 >>> s = ET.SubElement(e, "{default}elem") 1705 >>> serialize(e, default_namespace="default") # 1 1706 '<elem xmlns="default"><elem /></elem>' 1707 1708 >>> e = ET.Element("{default}elem") 1709 >>> s = ET.SubElement(e, "{default}elem") 1710 >>> s = ET.SubElement(e, "{not-default}elem") 1711 >>> serialize(e, default_namespace="default") # 2 1712 '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>' 1713 1714 >>> e = ET.Element("{default}elem") 1715 >>> s = ET.SubElement(e, "{default}elem") 1716 >>> s = ET.SubElement(e, "elem") # unprefixed name 1717 >>> serialize(e, default_namespace="default") # 3 1718 Traceback (most recent call last): 1719 ValueError: cannot use non-qualified names with default_namespace option 1720 1721 """ 1722 1723def bug_200709_register_namespace(): 1724 """ 1725 1726 >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title")) 1727 '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />' 1728 >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/") 1729 >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title")) 1730 '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />' 1731 1732 And the Dublin Core namespace is in the default list: 1733 1734 >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title")) 1735 '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />' 1736 1737 """ 1738 1739def bug_200709_element_comment(): 1740 """ 1741 1742 Not sure if this can be fixed, really (since the serializer needs 1743 ET.Comment, not cET.comment). 1744 1745 >>> a = ET.Element('a') 1746 >>> a.append(ET.Comment('foo')) 1747 >>> a[0].tag == ET.Comment 1748 True 1749 1750 >>> a = ET.Element('a') 1751 >>> a.append(ET.PI('foo')) 1752 >>> a[0].tag == ET.PI 1753 True 1754 1755 """ 1756 1757def bug_200709_element_insert(): 1758 """ 1759 1760 >>> a = ET.Element('a') 1761 >>> b = ET.SubElement(a, 'b') 1762 >>> c = ET.SubElement(a, 'c') 1763 >>> d = ET.Element('d') 1764 >>> a.insert(0, d) 1765 >>> summarize_list(a) 1766 ['d', 'b', 'c'] 1767 >>> a.insert(-1, d) 1768 >>> summarize_list(a) 1769 ['d', 'b', 'd', 'c'] 1770 1771 """ 1772 1773def bug_200709_iter_comment(): 1774 """ 1775 1776 >>> a = ET.Element('a') 1777 >>> b = ET.SubElement(a, 'b') 1778 >>> comment_b = ET.Comment("TEST-b") 1779 >>> b.append(comment_b) 1780 >>> summarize_list(a.iter(ET.Comment)) 1781 ['<Comment>'] 1782 1783 """ 1784 1785def bug_18347(): 1786 """ 1787 1788 >>> e = ET.XML('<html><CamelCase>text</CamelCase></html>') 1789 >>> serialize(e) 1790 '<html><CamelCase>text</CamelCase></html>' 1791 >>> serialize(e, method="html") 1792 '<html><CamelCase>text</CamelCase></html>' 1793 """ 1794 1795# -------------------------------------------------------------------- 1796# reported on bugs.python.org 1797 1798def bug_1534630(): 1799 """ 1800 1801 >>> bob = ET.TreeBuilder() 1802 >>> e = bob.data("data") 1803 >>> e = bob.start("tag", {}) 1804 >>> e = bob.end("tag") 1805 >>> e = bob.close() 1806 >>> serialize(e) 1807 '<tag />' 1808 1809 """ 1810 1811def check_issue6233(): 1812 """ 1813 1814 >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>") 1815 >>> ET.tostring(e, 'ascii') 1816 "<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>" 1817 >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>") 1818 >>> ET.tostring(e, 'ascii') 1819 "<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>" 1820 1821 """ 1822 1823def check_issue3151(): 1824 """ 1825 1826 >>> e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>') 1827 >>> e.tag 1828 '{${stuff}}localname' 1829 >>> t = ET.ElementTree(e) 1830 >>> ET.tostring(e) 1831 '<ns0:localname xmlns:ns0="${stuff}" />' 1832 1833 """ 1834 1835def check_issue6565(): 1836 """ 1837 1838 >>> elem = ET.XML("<body><tag/></body>") 1839 >>> summarize_list(elem) 1840 ['tag'] 1841 >>> newelem = ET.XML(SAMPLE_XML) 1842 >>> elem[:] = newelem[:] 1843 >>> summarize_list(elem) 1844 ['tag', 'tag', 'section'] 1845 1846 """ 1847 1848def check_html_empty_elems_serialization(self): 1849 # issue 15970 1850 # from http://www.w3.org/TR/html401/index/elements.html 1851 """ 1852 1853 >>> empty_elems = ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR', 1854 ... 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM'] 1855 >>> elems = ''.join('<%s />' % elem for elem in empty_elems) 1856 >>> serialize(ET.XML('<html>%s</html>' % elems), method='html') 1857 '<html><AREA><BASE><BASEFONT><BR><COL><FRAME><HR><IMG><INPUT><ISINDEX><LINK><META><PARAM></html>' 1858 >>> serialize(ET.XML('<html>%s</html>' % elems.lower()), method='html') 1859 '<html><area><base><basefont><br><col><frame><hr><img><input><isindex><link><meta><param></html>' 1860 >>> elems = ''.join('<%s></%s>' % (elem, elem) for elem in empty_elems) 1861 >>> serialize(ET.XML('<html>%s</html>' % elems), method='html') 1862 '<html><AREA><BASE><BASEFONT><BR><COL><FRAME><HR><IMG><INPUT><ISINDEX><LINK><META><PARAM></html>' 1863 >>> serialize(ET.XML('<html>%s</html>' % elems.lower()), method='html') 1864 '<html><area><base><basefont><br><col><frame><hr><img><input><isindex><link><meta><param></html>' 1865 1866 """ 1867 1868# -------------------------------------------------------------------- 1869 1870 1871class CleanContext(object): 1872 """Provide default namespace mapping and path cache.""" 1873 checkwarnings = None 1874 1875 def __init__(self, quiet=False): 1876 if sys.flags.optimize >= 2: 1877 # under -OO, doctests cannot be run and therefore not all warnings 1878 # will be emitted 1879 quiet = True 1880 deprecations = ( 1881 # Search behaviour is broken if search path starts with "/". 1882 ("This search is broken in 1.3 and earlier, and will be fixed " 1883 "in a future version. If you rely on the current behaviour, " 1884 "change it to '.+'", FutureWarning), 1885 # Element.getchildren() and Element.getiterator() are deprecated. 1886 ("This method will be removed in future versions. " 1887 "Use .+ instead.", DeprecationWarning), 1888 ("This method will be removed in future versions. " 1889 "Use .+ instead.", PendingDeprecationWarning), 1890 # XMLParser.doctype() is deprecated. 1891 ("This method of XMLParser is deprecated. Define doctype.. " 1892 "method on the TreeBuilder target.", DeprecationWarning)) 1893 self.checkwarnings = test_support.check_warnings(*deprecations, 1894 quiet=quiet) 1895 1896 def __enter__(self): 1897 from xml.etree import ElementTree 1898 self._nsmap = ElementTree._namespace_map 1899 self._path_cache = ElementTree.ElementPath._cache 1900 # Copy the default namespace mapping 1901 ElementTree._namespace_map = self._nsmap.copy() 1902 # Copy the path cache (should be empty) 1903 ElementTree.ElementPath._cache = self._path_cache.copy() 1904 self.checkwarnings.__enter__() 1905 1906 def __exit__(self, *args): 1907 from xml.etree import ElementTree 1908 # Restore mapping and path cache 1909 ElementTree._namespace_map = self._nsmap 1910 ElementTree.ElementPath._cache = self._path_cache 1911 self.checkwarnings.__exit__(*args) 1912 1913 1914def test_main(module_name='xml.etree.ElementTree'): 1915 from test import test_xml_etree 1916 1917 use_py_module = (module_name == 'xml.etree.ElementTree') 1918 1919 # The same doctests are used for both the Python and the C implementations 1920 assert test_xml_etree.ET.__name__ == module_name 1921 1922 # XXX the C module should give the same warnings as the Python module 1923 with CleanContext(quiet=not use_py_module): 1924 test_support.run_doctest(test_xml_etree, verbosity=True) 1925 1926 # The module should not be changed by the tests 1927 assert test_xml_etree.ET.__name__ == module_name 1928 1929if __name__ == '__main__': 1930 test_main() 1931